-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Fold xored one-complemented operand comparisons #69882
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (elhewaty) Changes
Full diff: https://github.com/llvm/llvm-project/pull/69882.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 66e2b6c72cce46c..4c19edfb27d2f2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6929,6 +6929,23 @@ static Instruction *foldReductionIdiom(ICmpInst &I,
return nullptr;
}
+// Gets the inverse of the predicate, but not the full predicate,
+// it doesn't change the equality, e.g SLE <-> SGE, SLT <-> SGT,
+// ULE <-> UGE, ULT <-> UGT
+static ICmpInst::Predicate ConvertPred(ICmpInst::Predicate Pred) {
+ switch(Pred) {
+ case ICmpInst::ICMP_SLE: return ICmpInst::ICMP_SGE;
+ case ICmpInst::ICMP_SGE: return ICmpInst::ICMP_SLE;
+ case ICmpInst::ICMP_SLT: return ICmpInst::ICMP_SGT;
+ case ICmpInst::ICMP_SGT: return ICmpInst::ICMP_SLT;
+ case ICmpInst::ICMP_ULE: return ICmpInst::ICMP_UGE;
+ case ICmpInst::ICMP_UGE: return ICmpInst::ICMP_ULE;
+ case ICmpInst::ICMP_ULT: return ICmpInst::ICMP_UGT;
+ case ICmpInst::ICMP_UGT: return ICmpInst::ICMP_ULT;
+ default: llvm_unreachable("Invalid Predicate");
+ }
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -7127,6 +7144,18 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
Op1);
+ // Transform (~A ^ B) s< ~A --> (A ^ B) s> A,
+ // (~A ^ B) s> ~A --> (A ^ B) s< A,
+ // (~A ^ B) s<= ~A --> (A ^ B) s>= A,
+ // (~A ^ B) s>= ~A --> (A ^ B) s<= A,
+ // (~A ^ B) u< ~A --> (A ^ B) u< A,
+ // (~A ^ B) u> ~A --> (A ^ B) u< A,
+ // (~A ^ B) u<= ~A --> (A ^ B) u>= A,
+ // and (~A ^ B) u>= ~A --> (A ^ B) <= A
+ if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Not(m_Value(A))) && !I.isEquality())
+ return new ICmpInst(ConvertPred(Pred), Builder.CreateXor(A, B), A);
+
// ~X < ~Y --> Y < X
// ~X < C --> X > ~C
if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll
index 9b6572697cf5e8f..893fb868e6adc77 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll
@@ -4,6 +4,118 @@
declare void @llvm.assume(i1)
declare void @barrier()
+define i32 @test_slt_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_slt_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp slt i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_sle_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_sle_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sge i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp sle i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_sgt_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_sgt_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp sgt i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_sge_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_sge_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sle i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp sge i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_ult_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_ult_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp ult i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_ule_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_ule_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp uge i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp ule i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_ugt_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_ugt_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp ugt i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define i32 @test_uge_xor(i32 %0, i32 %1) {
+; CHECK-LABEL: @test_uge_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i32 [[TMP3]], [[TMP0]]
+; CHECK-NEXT: [[TMP5:%.*]] = zext i1 [[TMP4]] to i32
+; CHECK-NEXT: ret i32 [[TMP5]]
+;
+ %3 = xor i32 %0, -1
+ %4 = xor i32 %3, %1
+ %5 = icmp uge i32 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
define i1 @xor_uge(i8 %x, i8 %y) {
; CHECK-LABEL: @xor_uge(
; CHECK-NEXT: [[YNZ:%.*]] = icmp ne i8 [[Y:%.*]], 0
|
@dtcxzyw please have a look. |
(~A ^ B) s< ~A --> (A ^ B) s> A (~A ^ B) s> ~A --> (A ^ B) s< A (~A ^ B) s<= ~A --> (A ^ B) s>= A (~A ^ B) s>= ~A --> (A ^ B) s<= A (~A ^ B) u< ~A --> (A ^ B) u< A (~A ^ B) u> ~A --> (A ^ B) u< A (~A ^ B) u<= ~A --> (A ^ B) u>= A (~A ^ B) u>= ~A --> (A ^ B) <= A |
You can test this locally with the following command:git-clang-format --diff 01893b54835ba8c0e97d67361a7b6128cd775565 b348c4631aa4da367e34a77c57c074fd3499f28a -- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp View the diff from clang-format here.diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 83cb65aa0d..588b16a8c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -7328,13 +7328,12 @@ Instruction *InstCombinerImpl::foldFCmpIntToFPConst(FCmpInst &I,
// [0, UMAX], but it may still be fractional. See if it is fractional by
// casting the FP value to the integer value and back, checking for equality.
// Don't do this for zero, because -0.0 is not fractional.
- Constant *RHSInt = LHSUnsigned
- ? ConstantExpr::getFPToUI(RHSC, IntTy)
- : ConstantExpr::getFPToSI(RHSC, IntTy);
+ Constant *RHSInt = LHSUnsigned ? ConstantExpr::getFPToUI(RHSC, IntTy)
+ : ConstantExpr::getFPToSI(RHSC, IntTy);
if (!RHS.isZero()) {
bool Equal = LHSUnsigned
- ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
- : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+ ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+ : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
if (!Equal) {
// If we had a comparison against a fractional value, we have to adjust
// the compare predicate and sometimes the value. RHSC is rounded towards
|
%4 = xor i32 %3, %1 | ||
%5 = icmp uge i32 %4, %3 | ||
%6 = zext i1 %5 to i32 | ||
ret i32 %6 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you swap the operand order in some of the tests. I.e all your tests of of the form:
~A ^ B pred A
. Can you:
- Add tests where its form:
A pred Xor
- Add tests where the xor is of form
A ^ ~B
- Can you add tests for
~A ^ B pred ~C
You don't need to add new tests for all these cases, just flip around operands in some of your current tests.
@dtcxzyw why did the build fail? |
You should add |
Or instead rename variables to X/Y/Z as shadow variables can cause confusion and in llvm |
f58c447
to
a7ec75f
Compare
61c05b7
to
3a58da4
Compare
@dtcxzyw updated. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with the value name fix in test_slt_xor
and test_sle_xor
.
… one-complemented oparands(NFC)
3a58da4
to
7b9aaff
Compare
@dtcxzyw I used git clang-format but the check failed |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Emm, it looks like an internal failure of the buildbot. |
So, What is the future of the pull? |
7b9aaff
to
b348c46
Compare
@dtcxzyw Please take a look. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Please wait for additional approval from other reviewers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…9882) - [InstCombine] Add test coverage for comparisons of operands including one-complemented oparands(NFC). - [InstCombine] Fold xored one-complemented operand comparisons. Alive2: https://alive2.llvm.org/ce/z/PZMJeB Fixes llvm#69803.
Alive2: https://alive2.llvm.org/ce/z/PZMJeB
Fixes clang is suboptimal for
(~a ^ b) < ~a
#69803.