Skip to content

Commit c727b10

Browse files
committed
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90000)
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))` and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The transformation is only implemented for constant `c` to not increase the number of instructions. The idea of the transformation is to set the c-th lowest (for `cttz`) or highest (for `ctlz`) bit in the operand. In this way, the `cttz` or `ctlz` instruction always returns at most `c`. Alive2 proofs: https://alive2.llvm.org/ce/z/xRZTE7
1 parent 9fdd0b1 commit c727b10

File tree

3 files changed

+73
-28
lines changed

3 files changed

+73
-28
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2466,6 +2466,18 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
24662466
return m_Intrinsic<Intrinsic::bswap>(Op0);
24672467
}
24682468

2469+
template <typename Opnd0, typename Opnd1>
2470+
inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Ctlz(const Opnd0 &Op0,
2471+
const Opnd1 &Op1) {
2472+
return m_Intrinsic<Intrinsic::ctlz>(Op0, Op1);
2473+
}
2474+
2475+
template <typename Opnd0, typename Opnd1>
2476+
inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Cttz(const Opnd0 &Op0,
2477+
const Opnd1 &Op1) {
2478+
return m_Intrinsic<Intrinsic::cttz>(Op0, Op1);
2479+
}
2480+
24692481
template <typename Opnd0>
24702482
inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
24712483
return m_Intrinsic<Intrinsic::fabs>(Op0);

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,6 +1633,39 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
16331633
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
16341634
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
16351635
}
1636+
// umin(cttz(x), const) --> cttz(x | (1 << const))
1637+
Value *X;
1638+
const APInt *Y;
1639+
Value *Z;
1640+
if (match(I0, m_OneUse(m_Cttz(m_Value(X), m_Value(Z)))) &&
1641+
match(I1, m_APInt(Y))) {
1642+
Value *CttzOp = X;
1643+
if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) {
1644+
auto One = APInt::getOneBitSet(
1645+
I1->getType()->getScalarType()->getIntegerBitWidth(), 0);
1646+
Value *NewConst = ConstantInt::get(I1->getType(), One << *Y);
1647+
CttzOp = Builder.CreateOr(X, NewConst);
1648+
}
1649+
return CallInst::Create(Intrinsic::getDeclaration(II->getModule(),
1650+
Intrinsic::cttz,
1651+
II->getType()),
1652+
{CttzOp, Z});
1653+
}
1654+
// umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const)))
1655+
if (match(I0, m_OneUse(m_Ctlz(m_Value(X), m_Value(Z)))) &&
1656+
match(I1, m_APInt(Y))) {
1657+
Value *CtlzOp = X;
1658+
if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) {
1659+
auto Min = APInt::getSignedMinValue(
1660+
I1->getType()->getScalarType()->getIntegerBitWidth());
1661+
Value *NewConst = ConstantInt::get(I1->getType(), Min.lshr(*Y));
1662+
CtlzOp = Builder.CreateOr(X, NewConst);
1663+
}
1664+
return CallInst::Create(Intrinsic::getDeclaration(II->getModule(),
1665+
Intrinsic::ctlz,
1666+
II->getType()),
1667+
{CtlzOp, Z});
1668+
}
16361669
[[fallthrough]];
16371670
}
16381671
case Intrinsic::umax: {

llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
2525
define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
2626
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
2727
; CHECK-SAME: i8 [[X:%.*]]) {
28-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
29-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
28+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
29+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
3030
; CHECK-NEXT: ret i8 [[RET]]
3131
;
3232
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -37,8 +37,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
3737
define i8 @umin_cttz_i8_zero_defined(i8 %X) {
3838
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
3939
; CHECK-SAME: i8 [[X:%.*]]) {
40-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
41-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
40+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
41+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
4242
; CHECK-NEXT: ret i8 [[RET]]
4343
;
4444
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
@@ -49,8 +49,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
4949
define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
5050
; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
5151
; CHECK-SAME: i8 [[X:%.*]]) {
52-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
53-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
52+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
53+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
5454
; CHECK-NEXT: ret i8 [[RET]]
5555
;
5656
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -72,8 +72,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
7272
define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
7373
; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
7474
; CHECK-SAME: i16 [[X:%.*]]) {
75-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
76-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
75+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64
76+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
7777
; CHECK-NEXT: ret i16 [[RET]]
7878
;
7979
%cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
@@ -84,8 +84,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
8484
define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
8585
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
8686
; CHECK-SAME: i32 [[X:%.*]]) {
87-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
88-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
87+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64
88+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
8989
; CHECK-NEXT: ret i32 [[RET]]
9090
;
9191
%cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
@@ -96,8 +96,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
9696
define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
9797
; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
9898
; CHECK-SAME: i64 [[X:%.*]]) {
99-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
100-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
99+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64
100+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
101101
; CHECK-NEXT: ret i64 [[RET]]
102102
;
103103
%cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
@@ -129,8 +129,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
129129
define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
130130
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
131131
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
132-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
133-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
132+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
133+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
134134
; CHECK-NEXT: ret <2 x i32> [[RET]]
135135
;
136136
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -191,8 +191,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
191191
define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
192192
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
193193
; CHECK-SAME: i8 [[X:%.*]]) {
194-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
195-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
194+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
195+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
196196
; CHECK-NEXT: ret i8 [[RET]]
197197
;
198198
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -203,8 +203,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
203203
define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
204204
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
205205
; CHECK-SAME: i8 [[X:%.*]]) {
206-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
207-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
206+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
207+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
208208
; CHECK-NEXT: ret i8 [[RET]]
209209
;
210210
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
@@ -215,8 +215,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
215215
define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
216216
; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
217217
; CHECK-SAME: i8 [[X:%.*]]) {
218-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
219-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
218+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
219+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
220220
; CHECK-NEXT: ret i8 [[RET]]
221221
;
222222
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -238,8 +238,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
238238
define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
239239
; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
240240
; CHECK-SAME: i16 [[X:%.*]]) {
241-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
242-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
241+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512
242+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
243243
; CHECK-NEXT: ret i16 [[RET]]
244244
;
245245
%ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
@@ -250,8 +250,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
250250
define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
251251
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
252252
; CHECK-SAME: i32 [[X:%.*]]) {
253-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
254-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
253+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432
254+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
255255
; CHECK-NEXT: ret i32 [[RET]]
256256
;
257257
%ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
@@ -262,8 +262,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
262262
define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
263263
; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
264264
; CHECK-SAME: i64 [[X:%.*]]) {
265-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
266-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
265+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
266+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
267267
; CHECK-NEXT: ret i64 [[RET]]
268268
;
269269
%ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
@@ -295,8 +295,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
295295
define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
296296
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
297297
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
298-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
299-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
298+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
299+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
300300
; CHECK-NEXT: ret <2 x i32> [[RET]]
301301
;
302302
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)

0 commit comments

Comments
 (0)