Skip to content

Commit 2907f4f

Browse files
committed
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90000)
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))` and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The transformation is only implemented for constant `c` to not increase the number of instructions. The idea of the transformation is to set the c-th lowest (for `cttz`) or highest (for `ctlz`) bit in the operand. In this way, the `cttz` or `ctlz` instruction always returns at most `c`. Alive2 proofs: https://alive2.llvm.org/ce/z/7BQLBe
1 parent 3ebfb1a commit 2907f4f

File tree

3 files changed

+103
-44
lines changed

3 files changed

+103
-44
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2466,6 +2466,18 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
24662466
return m_Intrinsic<Intrinsic::bswap>(Op0);
24672467
}
24682468

2469+
template <typename Opnd0, typename Opnd1>
2470+
inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Ctlz(const Opnd0 &Op0,
2471+
const Opnd1 &Op1) {
2472+
return m_Intrinsic<Intrinsic::ctlz>(Op0, Op1);
2473+
}
2474+
2475+
template <typename Opnd0, typename Opnd1>
2476+
inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Cttz(const Opnd0 &Op0,
2477+
const Opnd1 &Op1) {
2478+
return m_Intrinsic<Intrinsic::cttz>(Op0, Op1);
2479+
}
2480+
24692481
template <typename Opnd0>
24702482
inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
24712483
return m_Intrinsic<Intrinsic::fabs>(Op0);

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,40 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
14281428
return nullptr;
14291429
}
14301430

1431+
/// Fold an unsigned minimum of trailing or leading zero bits counts:
1432+
/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1433+
/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | ((1 << (bitwidth-1))
1434+
/// >> ConstOp))
1435+
template <Intrinsic::ID IntrID>
1436+
static Instruction *foldMinimumOverTrailingOrLeadingZeroCount(
1437+
Instruction *OrigInst, Value *CtOp, Value *ZeroUndef, Constant *ConstOp,
1438+
const DataLayout &DL, InstCombiner::BuilderTy &Builder) {
1439+
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1440+
"This helper only supports cttz and ctlz intrinsics");
1441+
1442+
auto BitWidth = ConstOp->getType()->getScalarSizeInBits();
1443+
auto *Ty = ConstOp->getType();
1444+
1445+
Constant *NewConst = ConstantFoldSelectInstruction(
1446+
ConstantFoldCompareInstOperands(CmpInst::ICMP_ULT, ConstOp,
1447+
ConstantInt::get(Ty, BitWidth), DL),
1448+
ConstantFoldBinaryOpOperands(
1449+
IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1450+
IntrID == Intrinsic::cttz
1451+
? ConstantInt::get(Ty, 1)
1452+
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1453+
ConstOp, DL),
1454+
Constant::getNullValue(Ty));
1455+
1456+
Value *NewCtOp =
1457+
NewConst->isZeroValue() ? CtOp : Builder.CreateOr(CtOp, NewConst);
1458+
1459+
return CallInst::Create(Intrinsic::getDeclaration(OrigInst->getModule(),
1460+
IntrID,
1461+
OrigInst->getType()),
1462+
{NewCtOp, ZeroUndef});
1463+
}
1464+
14311465
/// CallInst simplification. This mostly only handles folding of intrinsic
14321466
/// instructions. For normal calls, it allows visitCallBase to do the heavy
14331467
/// lifting.
@@ -1633,6 +1667,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
16331667
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
16341668
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
16351669
}
1670+
// umin(cttz(x), const) --> cttz(x | (1 << const))
1671+
Value *X;
1672+
Constant *Y;
1673+
Value *Z;
1674+
if (match(I0, m_OneUse(m_Cttz(m_Value(X), m_Value(Z)))) &&
1675+
match(I1, m_Constant(Y))) {
1676+
return foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
1677+
II, X, Z, Y, DL, Builder);
1678+
}
1679+
// umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const)))
1680+
if (match(I0, m_OneUse(m_Ctlz(m_Value(X), m_Value(Z)))) &&
1681+
match(I1, m_Constant(Y))) {
1682+
return foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
1683+
II, X, Z, Y, DL, Builder);
1684+
}
16361685
[[fallthrough]];
16371686
}
16381687
case Intrinsic::umax: {

llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
55
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
66
; CHECK-SAME: i8 [[X:%.*]]) {
7-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
8-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
7+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
8+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
99
; CHECK-NEXT: ret i8 [[RET]]
1010
;
1111
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -16,8 +16,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
1616
define i8 @umin_cttz_i8_zero_defined(i8 %X) {
1717
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
1818
; CHECK-SAME: i8 [[X:%.*]]) {
19-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
20-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
19+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
20+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
2121
; CHECK-NEXT: ret i8 [[RET]]
2222
;
2323
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
@@ -28,8 +28,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
2828
define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
2929
; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
3030
; CHECK-SAME: i8 [[X:%.*]]) {
31-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
32-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
31+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
32+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
3333
; CHECK-NEXT: ret i8 [[RET]]
3434
;
3535
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -51,8 +51,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
5151
define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
5252
; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
5353
; CHECK-SAME: i16 [[X:%.*]]) {
54-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
55-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
54+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64
55+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
5656
; CHECK-NEXT: ret i16 [[RET]]
5757
;
5858
%cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
@@ -63,8 +63,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
6363
define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
6464
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
6565
; CHECK-SAME: i32 [[X:%.*]]) {
66-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
67-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
66+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64
67+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
6868
; CHECK-NEXT: ret i32 [[RET]]
6969
;
7070
%cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
@@ -75,8 +75,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
7575
define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
7676
; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
7777
; CHECK-SAME: i64 [[X:%.*]]) {
78-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
79-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
78+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64
79+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
8080
; CHECK-NEXT: ret i64 [[RET]]
8181
;
8282
%cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
@@ -108,8 +108,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
108108
define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
109109
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
110110
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
111-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
112-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
111+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
112+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
113113
; CHECK-NEXT: ret <2 x i32> [[RET]]
114114
;
115115
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -120,8 +120,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
120120
define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
121121
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(
122122
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
123-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
124-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 poison>)
123+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 poison>
124+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
125125
; CHECK-NEXT: ret <2 x i32> [[RET]]
126126
;
127127
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -132,8 +132,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
132132
define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
133133
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
134134
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
135-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
136-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 0>)
135+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 1>
136+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
137137
; CHECK-NEXT: ret <2 x i32> [[RET]]
138138
;
139139
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -144,8 +144,8 @@ define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
144144
define <2 x i32> @umin_cttz_2xi32_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
145145
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_some_lt_bitwidth_zero_undefined(
146146
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
147-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
148-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 64>)
147+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 0>
148+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
149149
; CHECK-NEXT: ret <2 x i32> [[RET]]
150150
;
151151
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -157,8 +157,7 @@ define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x
157157
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
158158
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
159159
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
160-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 32, i32 64>)
161-
; CHECK-NEXT: ret <2 x i32> [[RET]]
160+
; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
162161
;
163162
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
164163
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 32, i32 64>)
@@ -194,8 +193,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
194193
define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
195194
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
196195
; CHECK-SAME: i8 [[X:%.*]]) {
197-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
198-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
196+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
197+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
199198
; CHECK-NEXT: ret i8 [[RET]]
200199
;
201200
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -206,8 +205,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
206205
define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
207206
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
208207
; CHECK-SAME: i8 [[X:%.*]]) {
209-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
210-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
208+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
209+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
211210
; CHECK-NEXT: ret i8 [[RET]]
212211
;
213212
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
@@ -218,8 +217,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
218217
define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
219218
; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
220219
; CHECK-SAME: i8 [[X:%.*]]) {
221-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
222-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
220+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
221+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
223222
; CHECK-NEXT: ret i8 [[RET]]
224223
;
225224
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -241,8 +240,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
241240
define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
242241
; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
243242
; CHECK-SAME: i16 [[X:%.*]]) {
244-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
245-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
243+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512
244+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
246245
; CHECK-NEXT: ret i16 [[RET]]
247246
;
248247
%ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
@@ -253,8 +252,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
253252
define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
254253
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
255254
; CHECK-SAME: i32 [[X:%.*]]) {
256-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
257-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
255+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432
256+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
258257
; CHECK-NEXT: ret i32 [[RET]]
259258
;
260259
%ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
@@ -265,8 +264,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
265264
define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
266265
; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
267266
; CHECK-SAME: i64 [[X:%.*]]) {
268-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
269-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
267+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
268+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
270269
; CHECK-NEXT: ret i64 [[RET]]
271270
;
272271
%ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
@@ -298,8 +297,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
298297
define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
299298
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
300299
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
301-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
302-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
300+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
301+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
303302
; CHECK-NEXT: ret <2 x i32> [[RET]]
304303
;
305304
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -310,8 +309,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
310309
define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
311310
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(
312311
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
313-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
314-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 poison>)
312+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 poison>
313+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
315314
; CHECK-NEXT: ret <2 x i32> [[RET]]
316315
;
317316
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -322,8 +321,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
322321
define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
323322
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
324323
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
325-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
326-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 0>)
324+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 -2147483648>
325+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
327326
; CHECK-NEXT: ret <2 x i32> [[RET]]
328327
;
329328
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -334,8 +333,8 @@ define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
334333
define <2 x i32> @umin_ctlz_2xi32_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
335334
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_some_lt_bitwidth_zero_undefined(
336335
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
337-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
338-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 64>)
336+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 0>
337+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
339338
; CHECK-NEXT: ret <2 x i32> [[RET]]
340339
;
341340
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -347,8 +346,7 @@ define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x
347346
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
348347
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
349348
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
350-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 32, i32 64>)
351-
; CHECK-NEXT: ret <2 x i32> [[RET]]
349+
; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
352350
;
353351
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
354352
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 32, i32 64>)

0 commit comments

Comments
 (0)