Skip to content

Commit dc2b2ae

Browse files
committed
[InstCombine] Fold cttz of lowest set bit
cttz(-a & a) is the same as cttz(a). -a & a is an idiom to extract the lowest set bit, which naturally does not affect the number of trailing zeroes. Proof: https://alive2.llvm.org/ce/z/Yp26x7
1 parent c8bc1ab commit dc2b2ae

File tree

4 files changed

+11
-21
lines changed

4 files changed

+11
-21
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
537537
if (match(Op0, m_Neg(m_Value(X))))
538538
return IC.replaceOperand(II, 0, X);
539539

540+
// cttz(-x & x) -> cttz(x)
541+
if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
542+
return IC.replaceOperand(II, 0, X);
543+
540544
// cttz(sext(x)) -> cttz(zext(x))
541545
if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
542546
auto *Zext = IC.Builder.CreateZExt(X, II.getType());

llvm/test/Transforms/InstCombine/cttz.ll

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,7 @@ define <2 x i64> @cttz_sext_zero_def_vec(<2 x i32> %x) {
122122

123123
define i32 @cttz_of_lowest_set_bit(i32 %x) {
124124
; CHECK-LABEL: @cttz_of_lowest_set_bit(
125-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
126-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
127-
; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 false), !range [[RNG1]]
125+
; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG1]]
128126
; CHECK-NEXT: ret i32 [[TZ]]
129127
;
130128
%sub = sub i32 0, %x
@@ -136,9 +134,7 @@ define i32 @cttz_of_lowest_set_bit(i32 %x) {
136134
define i32 @cttz_of_lowest_set_bit_commuted(i32 %xx) {
137135
; CHECK-LABEL: @cttz_of_lowest_set_bit_commuted(
138136
; CHECK-NEXT: [[X:%.*]] = udiv i32 42, [[XX:%.*]]
139-
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[X]]
140-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[SUB]]
141-
; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 false), !range [[RNG1]]
137+
; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 false), !range [[RNG1]]
142138
; CHECK-NEXT: ret i32 [[TZ]]
143139
;
144140
%x = udiv i32 42, %xx ; thwart complexity-based canonicalization
@@ -150,9 +146,7 @@ define i32 @cttz_of_lowest_set_bit_commuted(i32 %xx) {
150146

151147
define i32 @cttz_of_lowest_set_bit_poison_flag(i32 %x) {
152148
; CHECK-LABEL: @cttz_of_lowest_set_bit_poison_flag(
153-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
154-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
155-
; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG1]]
149+
; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG1]]
156150
; CHECK-NEXT: ret i32 [[TZ]]
157151
;
158152
%sub = sub i32 0, %x
@@ -163,9 +157,7 @@ define i32 @cttz_of_lowest_set_bit_poison_flag(i32 %x) {
163157

164158
define <2 x i64> @cttz_of_lowest_set_bit_vec(<2 x i64> %x) {
165159
; CHECK-LABEL: @cttz_of_lowest_set_bit_vec(
166-
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
167-
; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]]
168-
; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[AND]], i1 false), !range [[RNG2]]
160+
; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 false), !range [[RNG2]]
169161
; CHECK-NEXT: ret <2 x i64> [[TZ]]
170162
;
171163
%sub = sub <2 x i64> zeroinitializer, %x
@@ -176,9 +168,7 @@ define <2 x i64> @cttz_of_lowest_set_bit_vec(<2 x i64> %x) {
176168

177169
define <2 x i64> @cttz_of_lowest_set_bit_vec_undef(<2 x i64> %x) {
178170
; CHECK-LABEL: @cttz_of_lowest_set_bit_vec_undef(
179-
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
180-
; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]]
181-
; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[AND]], i1 false), !range [[RNG2]]
171+
; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 false), !range [[RNG2]]
182172
; CHECK-NEXT: ret <2 x i64> [[TZ]]
183173
;
184174
%sub = sub <2 x i64> zeroinitializer, %x

llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,7 @@ define <2 x i32> @select_clz_to_ctz_vec(<2 x i32> %a) {
7272

7373
define i32 @select_clz_to_ctz_extra_use(i32 %a) {
7474
; CHECK-LABEL: @select_clz_to_ctz_extra_use(
75-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
76-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
77-
; CHECK-NEXT: [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG0]]
75+
; CHECK-NEXT: [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range [[RNG0]]
7876
; CHECK-NEXT: call void @use(i32 [[SUB1]])
7977
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0]]
8078
; CHECK-NEXT: ret i32 [[COND]]

llvm/test/Transforms/InstCombine/xor.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,9 +1336,7 @@ define i32 @xor_orn_2use(i32 %a, i32 %b, ptr %s1, ptr %s2) {
13361336

13371337
define i32 @ctlz_pow2(i32 %x) {
13381338
; CHECK-LABEL: @ctlz_pow2(
1339-
; CHECK-NEXT: [[N:%.*]] = sub i32 0, [[X:%.*]]
1340-
; CHECK-NEXT: [[A:%.*]] = and i32 [[N]], [[X]]
1341-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0:![0-9]+]]
1339+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]]
13421340
; CHECK-NEXT: ret i32 [[R]]
13431341
;
13441342
%n = sub i32 0, %x

0 commit comments

Comments
 (0)