-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InstCombine] Add transforms for (or/and (icmp eq/ne X,0),(icmp eq/ne X,Pow2OrZero))
#94648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/94648.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8695e9e69df2..89eb583ee427 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -701,6 +701,38 @@ Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder.CreateICmp(NewPred, Input, RangeEnd);
}
+// (or (icmp eq X, 0), (icmp eq X, Pow2OrZero))
+// -> (icmp eq (and X, Pow2OrZero), X)
+// (and (icmp ne X, 0), (icmp ne X, Pow2OrZero))
+// -> (icmp ne (and X, Pow2OrZero), X)
+static Value *
+foldAndOrOfICmpsWithPow2AndWithZero(InstCombiner::BuilderTy &Builder,
+ ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
+ const SimplifyQuery &Q) {
+ CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+ // Make sure we have right compares for our op.
+ if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred)
+ return nullptr;
+
+ // Make it so we can match LHS against the (icmp eq/ne X, 0) just for
+ // simplicity.
+ if (match(RHS->getOperand(1), m_Zero()))
+ std::swap(LHS, RHS);
+
+ Value *Pow2, *Op;
+ // Match the desired pattern:
+ // LHS: (icmp eq/ne X, 0)
+ // RHS: (icmp eq/ne X, Pow2OrZero)
+ if (!match(LHS, m_ICmp(Pred, m_Value(Op), m_Zero())) ||
+ !match(RHS, m_c_ICmp(Pred, m_Specific(Op), m_Value(Pow2))) ||
+ !isKnownToBeAPowerOfTwo(Pow2, Q.DL, /*OrZero*/ true, /*Depth*/ 0, Q.AC,
+ Q.CxtI, Q.DT))
+ return nullptr;
+
+ Value *And = Builder.CreateAnd(Op, Pow2);
+ return Builder.CreateICmp(Pred, And, Op);
+}
+
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)
Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,
@@ -3240,6 +3272,11 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
+ if (!IsLogical)
+ if (Value *V =
+ foldAndOrOfICmpsWithPow2AndWithZero(Builder, LHS, RHS, IsAnd, Q))
+ return V;
+
const APInt *LHSC = nullptr, *RHSC = nullptr;
match(LHS1, m_APInt(LHSC));
match(RHS1, m_APInt(RHSC));
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index c20f48a985b3..14ac3e404774 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -3057,9 +3057,9 @@ define i32 @icmp_slt_0_or_icmp_add_1_sge_100_i32_fail(i32 %x) {
define i1 @logical_and_icmps1(i32 %a, i1 %other_cond) {
; CHECK-LABEL: @logical_and_icmps1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[A:%.*]], 10086
-; CHECK-NEXT: [[RET2:%.*]] = select i1 [[RET1:%.*]], i1 [[CMP3]], i1 false
-; CHECK-NEXT: ret i1 [[RET2]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[A:%.*]], 10086
+; CHECK-NEXT: [[RET:%.*]] = select i1 [[OTHER_COND:%.*]], i1 [[TMP0]], i1 false
+; CHECK-NEXT: ret i1 [[RET]]
;
entry:
%cmp1 = icmp sgt i32 %a, -1
@@ -3085,9 +3085,9 @@ entry:
define <4 x i1> @logical_and_icmps_vec1(<4 x i32> %a, <4 x i1> %other_cond) {
; CHECK-LABEL: @logical_and_icmps_vec1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP3:%.*]] = icmp ult <4 x i32> [[A:%.*]], <i32 10086, i32 10086, i32 10086, i32 10086>
-; CHECK-NEXT: [[RET2:%.*]] = select <4 x i1> [[RET1:%.*]], <4 x i1> [[CMP3]], <4 x i1> zeroinitializer
-; CHECK-NEXT: ret <4 x i1> [[RET2]]
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], <i32 10086, i32 10086, i32 10086, i32 10086>
+; CHECK-NEXT: [[RET:%.*]] = select <4 x i1> [[OTHER_COND:%.*]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
+; CHECK-NEXT: ret <4 x i1> [[RET]]
;
entry:
%cmp1 = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1 >
@@ -3113,3 +3113,147 @@ entry:
%ret = select i1 %logical_and, i1 %cmp2, i1 false
ret i1 %ret
}
+
+
+define i1 @icmp_eq_or_z_or_pow2orz(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[POW2ORZ]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %y
+ %pow2orz = and i8 %ny, %y
+
+ %c0 = icmp eq i8 %x, 0
+ %cp2 = icmp eq i8 %x, %pow2orz
+ %r = or i1 %c0, %cp2
+ ret i1 %r
+}
+
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_logic_or(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_logic_or(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT: [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[C0]], i1 true, i1 [[CP2]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %y
+ %pow2orz = and i8 %ny, %y
+
+ %c0 = icmp eq i8 %x, 0
+ %cp2 = icmp eq i8 %x, %pow2orz
+ %r = select i1 %c0, i1 true, i1 %cp2
+ ret i1 %r
+}
+
+
+define <2 x i1> @icmp_ne_and_z_and_pow2orz(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz(
+; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[POW2ORZ]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %ny = sub <2 x i8> zeroinitializer, %y
+ %pow2orz = and <2 x i8> %ny, %y
+
+ %c0 = icmp ne <2 x i8> %x, zeroinitializer
+ %cp2 = icmp ne <2 x i8> %x, %pow2orz
+ %r = and <2 x i1> %c0, %cp2
+ ret <2 x i1> %r
+}
+
+define <2 x i1> @icmp_ne_and_z_and_pow2orz_fail_logic_and(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz_fail_logic_and(
+; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
+; CHECK-NEXT: [[C0:%.*]] = icmp ne <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT: [[CP2:%.*]] = icmp ne <2 x i8> [[POW2ORZ]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C0]], <2 x i1> [[CP2]], <2 x i1> zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %ny = sub <2 x i8> zeroinitializer, %y
+ %pow2orz = and <2 x i8> %ny, %y
+
+ %c0 = icmp ne <2 x i8> %x, zeroinitializer
+ %cp2 = icmp ne <2 x i8> %x, %pow2orz
+ %r = select <2 x i1> %c0, <2 x i1> %cp2, <2 x i1> zeroinitializer
+ ret <2 x i1> %r
+}
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_not_pow2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_not_pow2(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 1, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT: [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 1, %y
+ %pow2orz = and i8 %ny, %y
+
+ %c0 = icmp eq i8 %x, 0
+ %cp2 = icmp eq i8 %x, %pow2orz
+ %r = or i1 %c0, %cp2
+ ret i1 %r
+}
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_nonzero_const(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_nonzero_const(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT: [[C0:%.*]] = icmp eq i8 [[X:%.*]], 1
+; CHECK-NEXT: [[CP2:%.*]] = icmp eq i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %y
+ %pow2orz = and i8 %ny, %y
+
+ %c0 = icmp eq i8 %x, 1
+ %cp2 = icmp eq i8 %x, %pow2orz
+ %r = or i1 %c0, %cp2
+ ret i1 %r
+}
+
+define <2 x i1> @icmp_ne_and_z_and_pow2orz_fail_bad_pred(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @icmp_ne_and_z_and_pow2orz_fail_bad_pred(
+; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and <2 x i8> [[NY]], [[Y]]
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i8> [[POW2ORZ]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %ny = sub <2 x i8> zeroinitializer, %y
+ %pow2orz = and <2 x i8> %ny, %y
+
+ %c0 = icmp eq <2 x i8> %x, zeroinitializer
+ %cp2 = icmp eq <2 x i8> %x, %pow2orz
+ %r = and <2 x i1> %c0, %cp2
+ ret <2 x i1> %r
+}
+
+define i1 @icmp_eq_or_z_or_pow2orz_fail_bad_pred2(i8 %x, i8 %y) {
+; CHECK-LABEL: @icmp_eq_or_z_or_pow2orz_fail_bad_pred2(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[Y:%.*]]
+; CHECK-NEXT: [[POW2ORZ:%.*]] = and i8 [[NY]], [[Y]]
+; CHECK-NEXT: [[C0:%.*]] = icmp slt i8 [[X:%.*]], 1
+; CHECK-NEXT: [[CP2:%.*]] = icmp sge i8 [[POW2ORZ]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = or i1 [[C0]], [[CP2]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %y
+ %pow2orz = and i8 %ny, %y
+
+ %c0 = icmp sle i8 %x, 0
+ %cp2 = icmp sle i8 %x, %pow2orz
+ %r = or i1 %c0, %cp2
+ ret i1 %r
+}
|
(or/and (icmp eq/ne X,0),(icmp eq/ne X,Pow2OrZero))
NB: this was approved by nikic a while back: https://reviews.llvm.org/D157312 Hopefully its time to get this in :) |
Implementation still looks good to me. I think @dtcxzyw's tests should give us a much better answer about whether this canonicalization is problematic or not now :) |
We need a one-use check to avoid creating more instructions than before. |
bac02a8
to
52823f5
Compare
@dtcxzyw Could you please rerun tests with the new one-use checks? |
…cmp eq/ne X,Pow2OrZero))`; NFC
52823f5
to
71aa5ab
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The old version 52823f5 LGTM.
if (!IsLogical) | ||
if (Value *V = | ||
foldAndOrOfICmpsWithPow2AndWithZero(Builder, LHS, RHS, IsAnd, Q)) | ||
return V; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why did you add this fold in this position? New exotic folds usually get added to the end, not the start of functions. Especially weird to see this interleaved with a bunch of variable declarations that you aren't using.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving to the end.
Did this fold produce any positive diffs? It seems like it's always either neutral (inverting a predicate) or a regression, but maybe I didn't see some. |
…e X,Pow2OrZero))` `(or (icmp eq X, 0), (icmp eq X, Pow2OrZero))` --> `(icmp eq (and X, Pow2OrZero), X)` `(and (icmp ne X, 0), (icmp ne X, Pow2OrZero))` --> `(icmp ne (and X, Pow2OrZero), X)` Proofs: https://alive2.llvm.org/ce/z/nPo2BN
71aa5ab
to
d82214b
Compare
`(icmp eq/ne (and X, P2), (and X, -P2))` -> `(icmp ult/uge X, P2 * 2)` If `P2` is constant, we can perform this fold profitably even if the `and` ops are multi-use. If `P2` is not constant, then we only perform the fold if the `and` ops are multi-use. This, however, saves an instruction compared to the `xor` + `and` variant. NB: This came up in some of the diffs resulting from llvm#94648 Proofs: https://alive2.llvm.org/ce/z/mfd3G9
`(icmp eq/ne (and X, P2), (and X, -P2))` -> `(icmp ult/uge X, P2 * 2)` If `P2` is constant, we can perform this fold profitably even if the `and` ops are multi-use. NB: This came up in some of the diffs resulting from llvm#94648 Proofs: https://alive2.llvm.org/ce/z/mfd3G9
See bench/php/optimized/php_date.ll |
ping |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
// be one-use. We don't create additional instructions if only one | ||
// of them is one-use. So cases where one is one-use and the other | ||
// is two-use might be profitable. | ||
if (!match(LHS, m_OneUse(m_ICmp(Pred, m_Value(Op), m_Zero()))) || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a side note, we should probably add m_ICmp variants without predicate, as we have a bunch of places where the fetched predicate is just discarded, but kind of looks like it's part of the match. (Or maybe alternatively add something like m_SpecificICmp that matches the passed predicate instead of it being an out parameter.)
…e X,Pow2OrZero))` `(or (icmp eq X, 0), (icmp eq X, Pow2OrZero))` --> `(icmp eq (and X, Pow2OrZero), X)` `(and (icmp ne X, 0), (icmp ne X, Pow2OrZero))` --> `(icmp ne (and X, Pow2OrZero), X)` Proofs: https://alive2.llvm.org/ce/z/nPo2BN Closes llvm#94648
(or/and (icmp eq/ne X,0),(icmp eq/ne X,Pow2OrZero))
; NFC(or/and (icmp eq/ne X,0),(icmp eq/ne X,Pow2OrZero))
(or (icmp eq X, 0), (icmp eq X, Pow2OrZero))
-->
(icmp eq (and X, Pow2OrZero), X)
(and (icmp ne X, 0), (icmp ne X, Pow2OrZero))
-->
(icmp ne (and X, Pow2OrZero), X)
Proofs: https://alive2.llvm.org/ce/z/nPo2BN