Skip to content

Commit 2dc1346

Browse files
committed
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90000)
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))` and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The transformation is only implemented for constant `c` to not increase the number of instructions. The idea of the transformation is to set the c-th lowest (for `cttz`) or highest (for `ctlz`) bit in the operand. In this way, the `cttz` or `ctlz` instruction always returns at most `c`. Alive2 proofs: https://alive2.llvm.org/ce/z/y8Hdb8
1 parent a2bce7f commit 2dc1346

File tree

2 files changed

+93
-46
lines changed

2 files changed

+93
-46
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,43 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
14281428
return nullptr;
14291429
}
14301430

1431+
/// Fold an unsigned minimum of trailing or leading zero bits counts:
1432+
/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1433+
/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | ((1 << (bitwidth-1))
1434+
/// >> ConstOp))
1435+
template <Intrinsic::ID IntrID>
1436+
static Value *
1437+
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
1438+
const DataLayout &DL,
1439+
InstCombiner::BuilderTy &Builder) {
1440+
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1441+
"This helper only supports cttz and ctlz intrinsics");
1442+
1443+
Value *X;
1444+
Value *Z;
1445+
if (match(I0, m_OneUse(m_Intrinsic<IntrID>(m_Value(X), m_Value(Z))))) {
1446+
auto BitWidth = I1->getType()->getScalarSizeInBits();
1447+
auto *Ty = I1->getType();
1448+
1449+
Value *NewCtOp = X;
1450+
auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1451+
if (match(I1, m_CheckedInt(LessBitWidth))) {
1452+
Constant *NewConst = ConstantFoldBinaryOpOperands(
1453+
IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1454+
IntrID == Intrinsic::cttz
1455+
? ConstantInt::get(Ty, 1)
1456+
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1457+
cast<Constant>(I1), DL);
1458+
NewCtOp = Builder.CreateOr(X, NewConst);
1459+
} else if (!match(I1, m_CheckedInt(std::not_fn(LessBitWidth)))) {
1460+
return nullptr; // Non-splat vector with elements < and >= BitWidth
1461+
}
1462+
1463+
return Builder.CreateBinaryIntrinsic(IntrID, NewCtOp, Z);
1464+
}
1465+
return nullptr;
1466+
}
1467+
14311468
/// CallInst simplification. This mostly only handles folding of intrinsic
14321469
/// instructions. For normal calls, it allows visitCallBase to do the heavy
14331470
/// lifting.
@@ -1633,6 +1670,18 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
16331670
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
16341671
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
16351672
}
1673+
// umin(cttz(x), const) --> cttz(x | (1 << const))
1674+
if (Value *FoldedCttz =
1675+
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
1676+
I0, I1, DL, Builder)) {
1677+
return replaceInstUsesWith(*II, FoldedCttz);
1678+
}
1679+
// umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const)))
1680+
if (Value *FoldedCtlz =
1681+
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
1682+
I0, I1, DL, Builder)) {
1683+
return replaceInstUsesWith(*II, FoldedCtlz);
1684+
}
16361685
[[fallthrough]];
16371686
}
16381687
case Intrinsic::umax: {

llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll

Lines changed: 44 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
55
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
66
; CHECK-SAME: i8 [[X:%.*]]) {
7-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
8-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
7+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
8+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
99
; CHECK-NEXT: ret i8 [[RET]]
1010
;
1111
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -16,8 +16,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
1616
define i8 @umin_cttz_i8_zero_defined(i8 %X) {
1717
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
1818
; CHECK-SAME: i8 [[X:%.*]]) {
19-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
20-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
19+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
20+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
2121
; CHECK-NEXT: ret i8 [[RET]]
2222
;
2323
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
@@ -28,8 +28,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
2828
define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
2929
; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
3030
; CHECK-SAME: i8 [[X:%.*]]) {
31-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
32-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
31+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
32+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
3333
; CHECK-NEXT: ret i8 [[RET]]
3434
;
3535
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -51,8 +51,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
5151
define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
5252
; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
5353
; CHECK-SAME: i16 [[X:%.*]]) {
54-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
55-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
54+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64
55+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
5656
; CHECK-NEXT: ret i16 [[RET]]
5757
;
5858
%cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
@@ -63,8 +63,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
6363
define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
6464
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
6565
; CHECK-SAME: i32 [[X:%.*]]) {
66-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
67-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
66+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64
67+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
6868
; CHECK-NEXT: ret i32 [[RET]]
6969
;
7070
%cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
@@ -75,8 +75,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
7575
define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
7676
; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
7777
; CHECK-SAME: i64 [[X:%.*]]) {
78-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
79-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
78+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64
79+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
8080
; CHECK-NEXT: ret i64 [[RET]]
8181
;
8282
%cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
@@ -108,8 +108,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
108108
define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
109109
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
110110
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
111-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
112-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
111+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
112+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
113113
; CHECK-NEXT: ret <2 x i32> [[RET]]
114114
;
115115
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -120,8 +120,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
120120
define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
121121
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(
122122
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
123-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
124-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 poison>)
123+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 poison>
124+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
125125
; CHECK-NEXT: ret <2 x i32> [[RET]]
126126
;
127127
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -132,8 +132,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
132132
define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
133133
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
134134
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
135-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
136-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 0>)
135+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 1>
136+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
137137
; CHECK-NEXT: ret <2 x i32> [[RET]]
138138
;
139139
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -144,9 +144,9 @@ define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
144144
define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
145145
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
146146
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
147-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
148-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 64>)
149-
; CHECK-NEXT: ret <2 x i32> [[RET]]
147+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
148+
; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 6, i32 64>)
149+
; CHECK-NEXT: ret <2 x i32> [[RET1]]
150150
;
151151
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
152152
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 64>)
@@ -156,8 +156,7 @@ define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi
156156
define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
157157
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
158158
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
159-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
160-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 32, i32 64>)
159+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
161160
; CHECK-NEXT: ret <2 x i32> [[RET]]
162161
;
163162
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -194,8 +193,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
194193
define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
195194
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
196195
; CHECK-SAME: i8 [[X:%.*]]) {
197-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
198-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
196+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
197+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
199198
; CHECK-NEXT: ret i8 [[RET]]
200199
;
201200
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -206,8 +205,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
206205
define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
207206
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
208207
; CHECK-SAME: i8 [[X:%.*]]) {
209-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
210-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
208+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
209+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
211210
; CHECK-NEXT: ret i8 [[RET]]
212211
;
213212
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
@@ -218,8 +217,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
218217
define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
219218
; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
220219
; CHECK-SAME: i8 [[X:%.*]]) {
221-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
222-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
220+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
221+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
223222
; CHECK-NEXT: ret i8 [[RET]]
224223
;
225224
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -241,8 +240,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
241240
define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
242241
; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
243242
; CHECK-SAME: i16 [[X:%.*]]) {
244-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
245-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
243+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512
244+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
246245
; CHECK-NEXT: ret i16 [[RET]]
247246
;
248247
%ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
@@ -253,8 +252,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
253252
define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
254253
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
255254
; CHECK-SAME: i32 [[X:%.*]]) {
256-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
257-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
255+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432
256+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
258257
; CHECK-NEXT: ret i32 [[RET]]
259258
;
260259
%ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
@@ -265,8 +264,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
265264
define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
266265
; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
267266
; CHECK-SAME: i64 [[X:%.*]]) {
268-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
269-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
267+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
268+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
270269
; CHECK-NEXT: ret i64 [[RET]]
271270
;
272271
%ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
@@ -298,8 +297,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
298297
define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
299298
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
300299
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
301-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
302-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
300+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
301+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
303302
; CHECK-NEXT: ret <2 x i32> [[RET]]
304303
;
305304
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -310,8 +309,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
310309
define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
311310
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(
312311
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
313-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
314-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 poison>)
312+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 poison>
313+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
315314
; CHECK-NEXT: ret <2 x i32> [[RET]]
316315
;
317316
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -322,8 +321,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
322321
define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
323322
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
324323
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
325-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
326-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 0>)
324+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 -2147483648>
325+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
327326
; CHECK-NEXT: ret <2 x i32> [[RET]]
328327
;
329328
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -334,9 +333,9 @@ define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
334333
define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
335334
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
336335
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
337-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
338-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 64>)
339-
; CHECK-NEXT: ret <2 x i32> [[RET]]
336+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
337+
; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 6, i32 64>)
338+
; CHECK-NEXT: ret <2 x i32> [[RET1]]
340339
;
341340
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
342341
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 64>)
@@ -346,8 +345,7 @@ define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi
346345
define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
347346
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
348347
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
349-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
350-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 32, i32 64>)
348+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
351349
; CHECK-NEXT: ret <2 x i32> [[RET]]
352350
;
353351
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)

0 commit comments

Comments
 (0)