[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants #82929

dtcxzyw · 2024-02-25T17:23:02Z

This patch extends reassociateMinMaxWithConstants to fold the following patterns:

umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
smin (umin X, nneg C0), nneg C1 --> umin X, (smin/umin C0, C1)

Alive2: https://alive2.llvm.org/ce/z/wfEj-e

Address the comment #82472 (review).

llvmbot · 2024-02-25T17:23:37Z

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch extends reassociateMinMaxWithConstants to fold the following patterns:

umax (smax X, nneg C0), nneg C1 --&gt; smax X, (umax C0, C1)
smin (umin X, nneg C0), nneg C1 --&gt; umin X, (smin/umin C0, C1)

Alive2: https://alive2.llvm.org/ce/z/wfEj-e

Full diff: https://github.com/llvm/llvm-project/pull/82929.diff

3 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+17-6)
(modified) llvm/test/Transforms/InstCombine/minmax-fold.ll (+125-2)
(modified) llvm/test/Transforms/InstCombine/select_meta.ll (+8-9)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0be1495083cebb..60b8243d6ba669 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1220,10 +1220,11 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
 /// If this min/max has a constant operand and an operand that is a matching
 /// min/max with a constant operand, constant-fold the 2 constant operands.
 static Value *reassociateMinMaxWithConstants(IntrinsicInst *II,
-                                             IRBuilderBase &Builder) {
+                                             IRBuilderBase &Builder,
+                                             const SimplifyQuery &SQ) {
   Intrinsic::ID MinMaxID = II->getIntrinsicID();
-  auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
-  if (!LHS || LHS->getIntrinsicID() != MinMaxID)
+  auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
+  if (!LHS)
     return nullptr;
 
   Constant *C0, *C1;
@@ -1231,11 +1232,21 @@ static Value *reassociateMinMaxWithConstants(IntrinsicInst *II,
       !match(II->getArgOperand(1), m_ImmConstant(C1)))
     return nullptr;
 
-  // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
+  // max (max X, C0), C1 --> max X, (max C0, C1)
+  // min (min X, C0), C1 --> min X, (min C0, C1)
+  // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
+  // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
+  Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
+  if (InnerMinMaxID != MinMaxID &&
+      !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
+         (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
+        isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
+    return nullptr;
+
   ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
   Value *CondC = Builder.CreateICmp(Pred, C0, C1);
   Value *NewC = Builder.CreateSelect(CondC, C0, C1);
-  return Builder.CreateIntrinsic(MinMaxID, II->getType(),
+  return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
                                  {LHS->getArgOperand(0), NewC});
 }
 
@@ -1786,7 +1797,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     if (Instruction *SAdd = matchSAddSubSat(*II))
       return SAdd;
 
-    if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder))
+    if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
       return replaceInstUsesWith(*II, NewMinMax);
 
     if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 1f7837c109b3f1..8391fe33eb9b59 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -316,8 +316,7 @@ define i32 @test73(i32 %x) {
 ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
 define i32 @test74(i32 %x) {
 ; CHECK-LABEL: @test74(
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36)
-; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.umax.i32(i32 [[COND]], i32 75)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75)
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
   %cmp = icmp slt i32 %x, 36
@@ -1419,3 +1418,127 @@ entry:
   %r = select i1 %cmp2, i32 %s1, i32 %k1
   ret i32 %r
 }
+
+define i32 @test_umax_smax1(i32 %x) {
+; CHECK-LABEL: @test_umax_smax1(
+; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 1)
+; CHECK-NEXT:    ret i32 [[UMAX]]
+;
+  %smax = call i32 @llvm.smax.i32(i32 %x, i32 0)
+  %umax = call i32 @llvm.umax.i32(i32 %smax, i32 1)
+  ret i32 %umax
+}
+
+define i32 @test_umax_smax2(i32 %x) {
+; CHECK-LABEL: @test_umax_smax2(
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 20)
+; CHECK-NEXT:    ret i32 [[SMAX]]
+;
+  %smax = call i32 @llvm.smax.i32(i32 %x, i32 20)
+  %umax = call i32 @llvm.umax.i32(i32 %smax, i32 10)
+  ret i32 %umax
+}
+
+define <2 x i32> @test_umax_smax_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test_umax_smax_vec(
+; CHECK-NEXT:    [[UMAX:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 1, i32 20>)
+; CHECK-NEXT:    ret <2 x i32> [[UMAX]]
+;
+  %smax = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %x, <2 x i32> <i32 0, i32 20>)
+  %umax = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %smax, <2 x i32> <i32 1, i32 10>)
+  ret <2 x i32> %umax
+}
+
+define i32 @test_smin_umin1(i32 %x) {
+; CHECK-LABEL: @test_smin_umin1(
+; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 10)
+; CHECK-NEXT:    ret i32 [[SMIN]]
+;
+  %smin = call i32 @llvm.umin.i32(i32 %x, i32 10)
+  %umin = call i32 @llvm.smin.i32(i32 %smin, i32 20)
+  ret i32 %umin
+}
+
+define i32 @test_smin_umin2(i32 %x) {
+; CHECK-LABEL: @test_smin_umin2(
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 10)
+; CHECK-NEXT:    ret i32 [[UMIN]]
+;
+  %smin = call i32 @llvm.umin.i32(i32 %x, i32 20)
+  %umin = call i32 @llvm.smin.i32(i32 %smin, i32 10)
+  ret i32 %umin
+}
+
+define <2 x i32> @test_smin_umin_vec(<2 x i32> %x) {
+; CHECK-LABEL: @test_smin_umin_vec(
+; CHECK-NEXT:    [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 10, i32 10>)
+; CHECK-NEXT:    ret <2 x i32> [[UMIN]]
+;
+  %smin = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %x, <2 x i32> <i32 10, i32 20>)
+  %umin = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %smin, <2 x i32> <i32 20, i32 10>)
+  ret <2 x i32> %umin
+}
+
+; Negative tests
+
+define i32 @test_umax_smax3(i32 %x) {
+; CHECK-LABEL: @test_umax_smax3(
+; CHECK-NEXT:    ret i32 -1
+;
+  %smax = call i32 @llvm.smax.i32(i32 %x, i32 0)
+  %umax = call i32 @llvm.umax.i32(i32 %smax, i32 -1)
+  ret i32 %umax
+}
+
+define i32 @test_umax_smax4(i32 %x) {
+; CHECK-LABEL: @test_umax_smax4(
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -20)
+; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SMAX]], i32 10)
+; CHECK-NEXT:    ret i32 [[UMAX]]
+;
+  %smax = call i32 @llvm.smax.i32(i32 %x, i32 -20)
+  %umax = call i32 @llvm.umax.i32(i32 %smax, i32 10)
+  ret i32 %umax
+}
+
+define i32 @test_smin_umin3(i32 %x) {
+; CHECK-LABEL: @test_smin_umin3(
+; CHECK-NEXT:    ret i32 -20
+;
+  %smin = call i32 @llvm.umin.i32(i32 %x, i32 10)
+  %umin = call i32 @llvm.smin.i32(i32 %smin, i32 -20)
+  ret i32 %umin
+}
+
+define i32 @test_smin_umin4(i32 %x) {
+; CHECK-LABEL: @test_smin_umin4(
+; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 -20)
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN]], i32 10)
+; CHECK-NEXT:    ret i32 [[UMIN]]
+;
+  %smin = call i32 @llvm.umin.i32(i32 %x, i32 -20)
+  %umin = call i32 @llvm.smin.i32(i32 %smin, i32 10)
+  ret i32 %umin
+}
+
+define i32 @test_umax_nonminmax(i32 %x) {
+; CHECK-LABEL: @test_umax_nonminmax(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[Y]], i32 1)
+; CHECK-NEXT:    ret i32 [[UMAX]]
+;
+  %y = call i32 @llvm.ctpop.i32(i32 %x)
+  %umax = call i32 @llvm.umax.i32(i32 %y, i32 1)
+  ret i32 %umax
+}
+
+define <2 x i32> @test_umax_smax_vec_neg(<2 x i32> %x) {
+; CHECK-LABEL: @test_umax_smax_vec_neg(
+; CHECK-NEXT:    [[SMAX:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 0, i32 -20>)
+; CHECK-NEXT:    [[UMAX:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[SMAX]], <2 x i32> <i32 1, i32 10>)
+; CHECK-NEXT:    ret <2 x i32> [[UMAX]]
+;
+  %smax = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %x, <2 x i32> <i32 0, i32 -20>)
+  %umax = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %smax, <2 x i32> <i32 1, i32 10>)
+  ret <2 x i32> %umax
+}
diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll
index df1e5a82ad5d15..cd133101736cfc 100644
--- a/llvm/test/Transforms/InstCombine/select_meta.ll
+++ b/llvm/test/Transforms/InstCombine/select_meta.ll
@@ -171,8 +171,7 @@ define i32 @test72(i32 %x) {
 ; SMAX(SMAX(X, 36), 75) -> SMAX(X, 75)
 define i32 @test74(i32 %x) {
 ; CHECK-LABEL: @test74(
-; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 36)
-; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.umax.i32(i32 [[COND]], i32 75)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 75)
 ; CHECK-NEXT:    ret i32 [[RETVAL]]
 ;
   %cmp = icmp slt i32 %x, 36
@@ -317,7 +316,7 @@ define <2 x i32> @not_cond_vec_undef(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv)
 
 define i64 @select_add(i1 %cond, i64 %x, i64 %y) {
 ; CHECK-LABEL: @select_add(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i64 [[Y:%.*]], i64 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i64 [[Y:%.*]], i64 0, !prof [[PROF0]], !unpredictable [[META2:![0-9]+]]
 ; CHECK-NEXT:    [[RET:%.*]] = add i64 [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
@@ -328,7 +327,7 @@ define i64 @select_add(i1 %cond, i64 %x, i64 %y) {
 
 define <2 x i32> @select_or(<2 x i1> %cond, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_or(
-; CHECK-NEXT:    [[OP:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = or <2 x i32> [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -339,7 +338,7 @@ define <2 x i32> @select_or(<2 x i1> %cond, <2 x i32> %x, <2 x i32> %y) {
 
 define i17 @select_sub(i1 %cond, i17 %x, i17 %y) {
 ; CHECK-LABEL: @select_sub(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i17 [[Y:%.*]], i17 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i17 [[Y:%.*]], i17 0, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = sub i17 [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret i17 [[RET]]
 ;
@@ -350,7 +349,7 @@ define i17 @select_sub(i1 %cond, i17 %x, i17 %y) {
 
 define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 ; CHECK-LABEL: @select_ashr(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i128 [[Y:%.*]], i128 0, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], i128 [[Y:%.*]], i128 0, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = ashr i128 [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret i128 [[RET]]
 ;
@@ -361,7 +360,7 @@ define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 
 define double @select_fmul(i1 %cond, double %x, double %y) {
 ; CHECK-LABEL: @select_fmul(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fmul double [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret double [[RET]]
 ;
@@ -372,7 +371,7 @@ define double @select_fmul(i1 %cond, double %x, double %y) {
 
 define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable !2
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
@@ -391,5 +390,5 @@ define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ;.
 ; CHECK: [[PROF0]] = !{!"branch_weights", i32 2, i32 10}
 ; CHECK: [[PROF1]] = !{!"branch_weights", i32 10, i32 2}
-; CHECK: [[META2:![0-9]+]] = !{}
+; CHECK: [[META2]] = !{}
 ;.

PR Link: llvm/llvm-project#82929

nikic

LGTM

dtcxzyw added 2 commits February 26, 2024 01:20

[InstCombine] Add pre-commit tests. NFC.

5909cff

[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants

485cea4

dtcxzyw requested a review from goldsteinn February 25, 2024 17:23

dtcxzyw requested a review from nikic as a code owner February 25, 2024 17:23

llvmbot added the llvm:transforms label Feb 25, 2024

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 25, 2024

pre-commit: test PR82929

4bbd103

PR Link: llvm/llvm-project#82929

dtcxzyw mentioned this pull request Feb 25, 2024

pre-commit: test PR82929 dtcxzyw/llvm-opt-benchmark#270

Closed

nikic approved these changes Feb 25, 2024

View reviewed changes

dtcxzyw merged commit 641d160 into llvm:main Feb 25, 2024

dtcxzyw deleted the perf/minmax-mixed branch February 25, 2024 19:26

goldsteinn mentioned this pull request Mar 18, 2024

[InstCombine] Add contributor guide #79007

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants #82929

[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants #82929

dtcxzyw commented Feb 25, 2024 •

edited

Loading

llvmbot commented Feb 25, 2024

nikic left a comment

[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants #82929

[InstCombine] Fold umax(smax)/smin(umin) with non-negative constants #82929

Conversation

dtcxzyw commented Feb 25, 2024 • edited Loading

llvmbot commented Feb 25, 2024

nikic left a comment

Choose a reason for hiding this comment

dtcxzyw commented Feb 25, 2024 •

edited

Loading