[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds #94867

goldsteinn · 2024-06-08T19:33:57Z

[InstCombine] Add tests for combining (icmp eq/ne (and X, P2), (and X, -P2)); NFC
[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds

Two ways to relaxed:
1) Only require one of the and ops to be single-use if both X
and Y are constant.
2) Special case, if X ^ Y is a negative power of 2, then
Z ^ NegP2 ==/!= 0 will fold to Z u</u>= -NegP2 which
creates no additional instructions so fold irrelivant of
use counts.

llvmbot · 2024-06-08T19:34:27Z

@llvm/pr-subscribers-llvm-transforms

Author: None (goldsteinn)

Changes

[InstCombine] Add tests for combining (icmp eq/ne (and X, P2), (and X, -P2)); NFC
[InstCombine] Add combines for (icmp eq/ne (and X, P2), (and X, -P2))

Full diff: https://github.com/llvm/llvm-project/pull/94867.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+32-6)
(modified) llvm/test/Transforms/InstCombine/and-compare.ll (+153)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4203147bc6a54..e79913500f320 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5548,8 +5548,8 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
   }
 
   // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
-  if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
-      match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      match(Op1, m_And(m_Value(C), m_Value(D)))) {
     Value *X = nullptr, *Y = nullptr, *Z = nullptr;
 
     if (A == C) {
@@ -5570,10 +5570,36 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
       Z = B;
     }
 
-    if (X) { // Build (X^Y) & Z
-      Op1 = Builder.CreateXor(X, Y);
-      Op1 = Builder.CreateAnd(Op1, Z);
-      return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+    if (X) {
+      // (X&P2) == (X&-P2)
+      //    -> X u< P2*2
+      // (X&P2) != (X&-P2)
+      //    -> X u>= P2*2
+      // iff P2 is not INT_MIN
+      const APInt *CP2;
+      ICmpInst::Predicate P2Pred =
+          Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+      if (match(X, m_APInt(CP2)) && match(Y, m_SpecificInt(-*CP2)) &&
+          (CP2->isPowerOf2() || CP2->isNegatedPowerOf2()) &&
+          !CP2->isMinSignedValue()) {
+        APInt CMask = CP2->isPowerOf2() ? *CP2 : -*CP2;
+        return new ICmpInst(P2Pred, Z,
+                            ConstantInt::get(Z->getType(), CMask + CMask));
+      }
+
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        // nsw neg precludes INT_MIN
+        if (match(X, m_NSWNeg(m_Specific(Y))))
+          std::swap(X, Y);
+        if (match(Y, m_NSWNeg(m_Specific(X))) &&
+            isKnownToBeAPowerOfTwo(X, /*OrZero=*/false, 0, &I))
+          return new ICmpInst(P2Pred, Z, Builder.CreateAdd(X, X));
+
+        // Build (X^Y) & Z
+        Op1 = Builder.CreateXor(X, Y);
+        Op1 = Builder.CreateAnd(Op1, Z);
+        return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index 14379ebf3a905..daadbf36d7577 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -4,6 +4,8 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+declare void @use.i8(i8)
+
 ; Should be optimized to one and.
 define i1 @test1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test1(
@@ -75,3 +77,154 @@ define <2 x i1> @test3vec(<2 x i64> %A) {
   ret <2 x i1> %cmp
 }
 
+define i1 @test_eq_p2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl nsw i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl nsw i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_maybe_zero(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_maybe_zero(
+; CHECK-NEXT:    [[Y:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %yyy = shl nsw i8 1, %yy
+  %y = add i8 %yyy, %yyy
+  %neg_y = sub nsw i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_maybe_int_min(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_maybe_int_min(
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw i8 1, [[YY:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_multiuse(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_multiuse(
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
+; CHECK-NEXT:    [[NEG_Y:%.*]] = sub nsw i8 0, [[Y]]
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[NEG_Y]], [[X:%.*]]
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[Y]], [[X]]
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND_X_Y]], [[AND_X_NEG_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl nsw i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp eq i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], 31
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -16
+  %and_x_y = and i8 %x, 16
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2_2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_2(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -4
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], 7
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -4
+  %and_x_y = and i8 %x, 4
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2_other_fail(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_other_fail(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -17
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -17
+  %and_x_y = and i8 %x, 16
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2_other_fail2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_other_fail2(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 17
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -16
+  %and_x_y = and i8 %x, 17
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}

nikic

If P2 is not constant, then we only perform the fold if the and
ops are multi-use. This, however, saves an instruction compared to the
xor + and variant.

Unless this also came up in the diffs, please remove it from this PR.

A possible alternative way to handle the motivating cases would be to allow the fold if X and Y are constants and only one of the ands is one-use. This is better in some ways (handles any constants) and worse in others (does not handle both ands being multi-use).

goldsteinn · 2024-06-08T20:56:34Z

If P2 is not constant, then we only perform the fold if the and
ops are multi-use. This, however, saves an instruction compared to the
xor + and variant.

Unless this also came up in the diffs, please remove it from this PR.

A possible alternative way to handle the motivating cases would be to allow the fold if X and Y are constants and only one of the ands is one-use. This is better in some ways (handles any constants) and worse in others (does not handle both ands being multi-use).

Drop non-const case.

Did both... (added another commit). If its too much for 1 pr LMK and I'll move to a seperate PR.

PR Link: llvm/llvm-project#94867

dtcxzyw

LGTM.

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

…X, -P2))`; NFC

goldsteinn · 2024-06-09T19:39:15Z

Okay, retitled/updated message. Basically we can just extend this fold under the special case that X ^ Y is neg p2 / constant.

PR Link: llvm/llvm-project#94867

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Two ways to relaxed: 1) Only require one of the `and` ops to be single-use if both `X` and `Y` are constant. 2) Special case, if `X ^ Y` is a negative power of 2, then `Z ^ NegP2 ==/!= 0` will fold to `Z u</u>= -NegP2` which creates no additional instructions so fold irrelivant of use counts.

dtcxzyw

LGTM. Please add alive2 link to your commit message.

nikic

LGTM

nikic · 2024-06-10T11:51:32Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

-      return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+    if (X) {
+      // If X^Y is a negative power of two, then `icmp eq/ne (Z & NegP2), 0`
+      // will fold to `icmp ult/uge Z, -NegP2` incuring no additional uses.


Suggested change

// will fold to `icmp ult/uge Z, -NegP2` incuring no additional uses.

// will fold to `icmp ult/uge Z, -NegP2` incurring no additional instructions.

nikic · 2024-06-10T11:52:03Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+                        (*C0 ^ *C1).isNegatedPowerOf2();
+
+      // If either Op0/Op1 are both one use or X^Y will constant fold and one of
+      // Op0/Op1 are one use proceeed. In those cases we are instruction neutral


Suggested change

// Op0/Op1 are one use proceeed. In those cases we are instruction neutral

// Op0/Op1 are one use, proceed. In those cases we are instruction neutral

nikic · 2024-06-10T11:52:14Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+
+      // If either Op0/Op1 are both one use or X^Y will constant fold and one of
+      // Op0/Op1 are one use proceeed. In those cases we are instruction neutral
+      // but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`


Suggested change

// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`

// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`.

nikic · 2024-06-10T11:52:33Z

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

+      // but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`
+      int UseCnt =
+          int(Op0->hasOneUse()) + int(Op1->hasOneUse()) +
+          (int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant())));


Suggested change

(int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant())));

int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant()));

goldsteinn · 2024-06-10T15:58:47Z

LGTM. Please add alive2 link to your commit message.

Well at this point the commit is not adding any new folds, so not sure why alive2 would be needed?

Two ways to relaxed: 1) Only require one of the `and` ops to be single-use if both `X` and `Y` are constant. 2) Special case, if `X ^ Y` is a negative power of 2, then `Z ^ NegP2 ==/!= 0` will fold to `Z u</u>= -NegP2` which creates no additional instructions so fold irrelivant of use counts. Closes llvm#94867

goldsteinn requested a review from nikic as a code owner June 8, 2024 19:33

llvmbot added the llvm:transforms label Jun 8, 2024

goldsteinn requested a review from dtcxzyw June 8, 2024 19:34

goldsteinn changed the title ~~goldsteinn/cmp eq p2 neg p2~~ [InstCombine] Add combines for (icmp eq/ne (and X, P2), (and X, -P2)) Jun 8, 2024

nikic reviewed Jun 8, 2024

View reviewed changes

goldsteinn force-pushed the goldsteinn/cmp-eq-p2-neg-p2 branch from 970c0d6 to a89b83d Compare June 8, 2024 20:54

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jun 8, 2024

pre-commit: test PR94867

be9c3fd

PR Link: llvm/llvm-project#94867

dtcxzyw mentioned this pull request Jun 8, 2024

pre-commit: test PR94867 dtcxzyw/llvm-opt-benchmark#672

Closed

dtcxzyw approved these changes Jun 9, 2024

View reviewed changes

nikic reviewed Jun 9, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

[InstCombine] Add tests for combining `(icmp eq/ne (and X, P2), (and …

4c9f9fa

…X, -P2))`; NFC

goldsteinn force-pushed the goldsteinn/cmp-eq-p2-neg-p2 branch from a89b83d to 057dc9d Compare June 9, 2024 19:37

goldsteinn changed the title ~~[InstCombine] Add combines for (icmp eq/ne (and X, P2), (and X, -P2))~~ [InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds Jun 9, 2024

goldsteinn requested a review from dtcxzyw June 9, 2024 19:39

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Jun 9, 2024

pre-commit: test PR94867

3fec31b

PR Link: llvm/llvm-project#94867

dtcxzyw mentioned this pull request Jun 9, 2024

pre-commit: test PR94867 (New) dtcxzyw/llvm-opt-benchmark#674

Closed

dtcxzyw reviewed Jun 9, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp Outdated Show resolved Hide resolved

goldsteinn force-pushed the goldsteinn/cmp-eq-p2-neg-p2 branch from 057dc9d to 4d2e4f5 Compare June 9, 2024 23:07

dtcxzyw approved these changes Jun 10, 2024

View reviewed changes

nikic approved these changes Jun 10, 2024

View reviewed changes

goldsteinn closed this in 77f75b4 Jun 10, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds #94867

[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds #94867

Uh oh!

goldsteinn commented Jun 8, 2024 •

edited

Loading

Uh oh!

llvmbot commented Jun 8, 2024

Uh oh!

nikic left a comment

Uh oh!

goldsteinn commented Jun 8, 2024

Uh oh!

dtcxzyw left a comment

Uh oh!

Uh oh!

goldsteinn commented Jun 9, 2024

Uh oh!

Uh oh!

dtcxzyw left a comment

Uh oh!

nikic left a comment

Uh oh!

nikic Jun 10, 2024

Uh oh!

nikic Jun 10, 2024

Uh oh!

nikic Jun 10, 2024

Uh oh!

nikic Jun 10, 2024

Uh oh!

goldsteinn commented Jun 10, 2024

Uh oh!

Uh oh!

	// will fold to `icmp ult/uge Z, -NegP2` incuring no additional uses.
	// will fold to `icmp ult/uge Z, -NegP2` incurring no additional instructions.

	// Op0/Op1 are one use proceeed. In those cases we are instruction neutral
	// Op0/Op1 are one use, proceed. In those cases we are instruction neutral

	// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`
	// but `icmp eq/ne A, 0` is easier to analyze than `icmp eq/ne A, B`.

	(int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant())));
	int(match(X, m_ImmConstant()) && match(Y, m_ImmConstant()));

[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds #94867

[InstCombine] Extend (icmp eq/ne (and Z, X), (and Z, Y)) folds #94867

Uh oh!

Conversation

goldsteinn commented Jun 8, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 8, 2024

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

goldsteinn commented Jun 8, 2024

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

goldsteinn commented Jun 9, 2024

Uh oh!

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

nikic Jun 10, 2024

Choose a reason for hiding this comment

Uh oh!

nikic Jun 10, 2024

Choose a reason for hiding this comment

Uh oh!

nikic Jun 10, 2024

Choose a reason for hiding this comment

Uh oh!

nikic Jun 10, 2024

Choose a reason for hiding this comment

Uh oh!

goldsteinn commented Jun 10, 2024

Uh oh!

Uh oh!

goldsteinn commented Jun 8, 2024 •

edited

Loading