-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds #94867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (goldsteinn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/94867.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4203147bc6a54..e79913500f320 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5548,8 +5548,8 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
- if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
- match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_And(m_Value(C), m_Value(D)))) {
Value *X = nullptr, *Y = nullptr, *Z = nullptr;
if (A == C) {
@@ -5570,10 +5570,36 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
Z = B;
}
- if (X) { // Build (X^Y) & Z
- Op1 = Builder.CreateXor(X, Y);
- Op1 = Builder.CreateAnd(Op1, Z);
- return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+ if (X) {
+ // (X&P2) == (X&-P2)
+ // -> X u< P2*2
+ // (X&P2) != (X&-P2)
+ // -> X u>= P2*2
+ // iff P2 is not INT_MIN
+ const APInt *CP2;
+ ICmpInst::Predicate P2Pred =
+ Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+ if (match(X, m_APInt(CP2)) && match(Y, m_SpecificInt(-*CP2)) &&
+ (CP2->isPowerOf2() || CP2->isNegatedPowerOf2()) &&
+ !CP2->isMinSignedValue()) {
+ APInt CMask = CP2->isPowerOf2() ? *CP2 : -*CP2;
+ return new ICmpInst(P2Pred, Z,
+ ConstantInt::get(Z->getType(), CMask + CMask));
+ }
+
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ // nsw neg precludes INT_MIN
+ if (match(X, m_NSWNeg(m_Specific(Y))))
+ std::swap(X, Y);
+ if (match(Y, m_NSWNeg(m_Specific(X))) &&
+ isKnownToBeAPowerOfTwo(X, /*OrZero=*/false, 0, &I))
+ return new ICmpInst(P2Pred, Z, Builder.CreateAdd(X, X));
+
+ // Build (X^Y) & Z
+ Op1 = Builder.CreateXor(X, Y);
+ Op1 = Builder.CreateAnd(Op1, Z);
+ return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+ }
}
}
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index 14379ebf3a905..daadbf36d7577 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -4,6 +4,8 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+declare void @use.i8(i8)
+
; Should be optimized to one and.
define i1 @test1(i32 %a, i32 %b) {
; CHECK-LABEL: @test1(
@@ -75,3 +77,154 @@ define <2 x i1> @test3vec(<2 x i64> %A) {
ret <2 x i1> %cmp
}
+define i1 @test_eq_p2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2(
+; CHECK-NEXT: [[TMP1:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %y = shl nsw i8 1, %yy
+ %neg_y = sub i8 0, %y
+ %and_x_neg_y = and i8 %x, %neg_y
+ %and_x_y = and i8 %x, %y
+
+ %r = icmp eq i8 %and_x_y, %and_x_neg_y
+ ret i1 %r
+}
+
+define i1 @test_eq_p2_2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_2(
+; CHECK-NEXT: [[TMP1:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %y = shl nsw i8 1, %yy
+ %neg_y = sub i8 0, %y
+ %and_x_neg_y = and i8 %x, %neg_y
+ %and_x_y = and i8 %x, %y
+
+ %r = icmp eq i8 %and_x_neg_y, %and_x_y
+ ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_maybe_zero(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_maybe_zero(
+; CHECK-NEXT: [[Y:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %yyy = shl nsw i8 1, %yy
+ %y = add i8 %yyy, %yyy
+ %neg_y = sub nsw i8 0, %y
+ %and_x_neg_y = and i8 %x, %neg_y
+ %and_x_y = and i8 %x, %y
+
+ %r = icmp eq i8 %and_x_neg_y, %and_x_y
+ ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_maybe_int_min(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_maybe_int_min(
+; CHECK-NEXT: [[Y:%.*]] = shl nuw i8 1, [[YY:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Y]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %y = shl i8 1, %yy
+ %neg_y = sub i8 0, %y
+ %and_x_neg_y = and i8 %x, %neg_y
+ %and_x_y = and i8 %x, %y
+
+ %r = icmp eq i8 %and_x_y, %and_x_neg_y
+ ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_multiuse(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_multiuse(
+; CHECK-NEXT: [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
+; CHECK-NEXT: [[NEG_Y:%.*]] = sub nsw i8 0, [[Y]]
+; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[NEG_Y]], [[X:%.*]]
+; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[Y]], [[X]]
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND_X_Y]], [[AND_X_NEG_Y]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %y = shl nsw i8 1, %yy
+ %neg_y = sub i8 0, %y
+ %and_x_neg_y = and i8 %x, %neg_y
+ %and_x_y = and i8 %x, %y
+ call void @use.i8(i8 %and_x_y)
+ %r = icmp eq i8 %and_x_y, %and_x_neg_y
+ ret i1 %r
+}
+
+define i1 @test_ne_cp2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2(
+; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], 31
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %and_x_neg_y = and i8 %x, -16
+ %and_x_y = and i8 %x, 16
+ call void @use.i8(i8 %and_x_neg_y)
+ call void @use.i8(i8 %and_x_y)
+ %r = icmp ne i8 %and_x_neg_y, %and_x_y
+ ret i1 %r
+}
+
+define i1 @test_ne_cp2_2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_2(
+; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -4
+; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], 7
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %and_x_neg_y = and i8 %x, -4
+ %and_x_y = and i8 %x, 4
+ call void @use.i8(i8 %and_x_neg_y)
+ call void @use.i8(i8 %and_x_y)
+ %r = icmp ne i8 %and_x_y, %and_x_neg_y
+ ret i1 %r
+}
+
+define i1 @test_ne_cp2_other_fail(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_other_fail(
+; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -17
+; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %and_x_neg_y = and i8 %x, -17
+ %and_x_y = and i8 %x, 16
+ call void @use.i8(i8 %and_x_neg_y)
+ call void @use.i8(i8 %and_x_y)
+ %r = icmp ne i8 %and_x_neg_y, %and_x_y
+ ret i1 %r
+}
+
+define i1 @test_ne_cp2_other_fail2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_other_fail2(
+; CHECK-NEXT: [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT: [[AND_X_Y:%.*]] = and i8 [[X]], 17
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT: call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %and_x_neg_y = and i8 %x, -16
+ %and_x_y = and i8 %x, 17
+ call void @use.i8(i8 %and_x_neg_y)
+ call void @use.i8(i8 %and_x_y)
+ %r = icmp ne i8 %and_x_neg_y, %and_x_y
+ ret i1 %r
+}
|
(icmp eq/ne (and X, P2), (and X, -P2))
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If P2 is not constant, then we only perform the fold if the and
ops are multi-use. This, however, saves an instruction compared to the
xor + and variant.
Unless this also came up in the diffs, please remove it from this PR.
A possible alternative way to handle the motivating cases would be to allow the fold if X and Y are constants and only one of the ands is one-use. This is better in some ways (handles any constants) and worse in others (does not handle both ands being multi-use).
970c0d6
to
a89b83d
Compare
Drop non-const case. Did both... (added another commit). If its too much for 1 pr LMK and I'll move to a seperate PR. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
a89b83d
to
057dc9d
Compare
(icmp eq/ne (and X, P2), (and X, -P2))
Okay, retitled/updated message. Basically we can just extend this fold under the special case that |
Two ways to relaxed: 1) Only require one of the `and` ops to be single-use if both `X` and `Y` are constant. 2) Special case, if `X ^ Y` is a negative power of 2, then `Z ^ NegP2 ==/!= 0` will fold to `Z u</u>= -NegP2` which creates no additional instructions so fold irrelivant of use counts.
057dc9d
to
4d2e4f5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Please add alive2 link to your commit message.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType())); | ||
if (X) { | ||
// If X^Y is a negative power of two, then `icmp eq/ne (Z & NegP2), 0` | ||
// will fold to `icmp ult/uge Z, -NegP2` incuring no additional uses. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// will fold to `icmp ult/uge Z, -NegP2` incuring no additional uses. | |
// will fold to `icmp ult/uge Z, -NegP2` incurring no additional instructions. |
(*C0 ^ *C1).isNegatedPowerOf2(); | ||
|
||
// If either Op0/Op1 are both one use or X^Y will constant fold and one of | ||
// Op0/Op1 are one use proceeed. In those cases we are instruction neutral |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Op0/Op1 are one use proceeed. In those cases we are instruction neutral | |
// Op0/Op1 are one use, proceed. In those cases we are instruction neutral |
|
||
// If either Op0/Op1 are both one use or X^Y will constant fold and one of | ||
// Op0/Op1 are one use proceeed. In those cases we are instruction neutral | ||
// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B` | |
// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`. |
// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B` | ||
int UseCnt = | ||
int(Op0->hasOneUse()) + int(Op1->hasOneUse()) + | ||
(int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant()))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant()))); | |
int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant())); |
Well at this point the commit is not adding any new folds, so not sure why alive2 would be needed? |
Two ways to relaxed: 1) Only require one of the `and` ops to be single-use if both `X` and `Y` are constant. 2) Special case, if `X ^ Y` is a negative power of 2, then `Z ^ NegP2 ==/!= 0` will fold to `Z u</u>= -NegP2` which creates no additional instructions so fold irrelivant of use counts. Closes llvm#94867
(icmp eq/ne (and X, P2), (and X, -P2))
; NFCTwo ways to relaxed:
1) Only require one of the
and
ops to be single-use if bothX
and
Y
are constant.2) Special case, if
X ^ Y
is a negative power of 2, thenZ ^ NegP2 ==/!= 0
will fold toZ u</u>= -NegP2
whichcreates no additional instructions so fold irrelivant of
use counts.