-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InstCombine] Fold cttz(lshr(-1, x) + 1)
to width - x
#91244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Monad (YanWQ-monad) ChangesFold define i64 @<!-- -->src(i64 %50) {
%52 = lshr i64 -1, %50
%53 = add i64 %52, 1
%54 = call i64 @<!-- -->llvm.cttz.i64(i64 %53, i1 false)
ret i64 %54
} to define i64 @<!-- -->tgt(i64 %50) {
%52 = sub i64 64, %50
ret i64 %52
} as #91171 (review) pointed out. Alive2 proof: https://alive2.llvm.org/ce/z/2aHfYa Note: the Full diff: https://github.com/llvm/llvm-project/pull/91244.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1913ef92c16c0e..d7433ad3599f91 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -562,6 +562,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
return BinaryOperator::CreateSub(ConstCttz, X);
}
+
+ // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
+ if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
+ Value *Width =
+ ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
+ return BinaryOperator::CreateSub(Width, X);
+ }
} else {
// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
diff --git a/llvm/test/Transforms/InstCombine/cttz.ll b/llvm/test/Transforms/InstCombine/cttz.ll
index 3595cff5f1aed0..66b7a03fe5d7be 100644
--- a/llvm/test/Transforms/InstCombine/cttz.ll
+++ b/llvm/test/Transforms/InstCombine/cttz.ll
@@ -215,3 +215,64 @@ define i32 @cttz_of_lowest_set_bit_wrong_intrinsic(i32 %x) {
%tz = call i32 @llvm.ctlz.i32(i32 %and, i1 false)
ret i32 %tz
}
+
+define i32 @cttz_of_power_of_two(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two(
+; CHECK-NEXT: [[R:%.*]] = sub i32 32, [[X:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_zero_poison(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_zero_poison(
+; CHECK-NEXT: [[R:%.*]] = sub i32 32, [[X:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 true)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_wrong_intrinsic(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_wrong_intrinsic(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -1, [[X:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LSHR]], 1
+; CHECK-NEXT: [[R:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[ADD]], i1 false)
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.ctlz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_wrong_constant_1(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_wrong_constant_1(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -2, [[X:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[LSHR]], 1
+; CHECK-NEXT: [[R:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[ADD]], i1 true)
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -2, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_wrong_constant_2(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_wrong_constant_2(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -1, [[X:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LSHR]], -1
+; CHECK-NEXT: [[R:%.*]] = call range(i32 1, 33) i32 @llvm.cttz.i32(i32 [[ADD]], i1 false)
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, -1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
|
LGTM. Wait on 1 more. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fold
to
as #91171 (review) pointed out.
Alive2 proof: https://alive2.llvm.org/ce/z/2aHfYa
Note: the
ctlz
version of this pattern seems not exist in dtcxzyw's benchmark, so put it aside for now.