Skip to content

Commit 74dcc56

Browse files
committed
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90000)
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))` and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The transformation is only implemented for constant `c` to not increase the number of instructions. The idea of the transformation is to set the c-th lowest (for `cttz`) or highest (for `ctlz`) bit in the operand. In this way, the `cttz` or `ctlz` instruction always returns at most `c`. Alive2 proofs: https://alive2.llvm.org/ce/z/y8Hdb8
1 parent 6b6d4e7 commit 74dcc56

File tree

2 files changed

+95
-48
lines changed

2 files changed

+95
-48
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,43 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
14561456
return UsedIndices.all() ? V : nullptr;
14571457
}
14581458

1459+
/// Fold an unsigned minimum of trailing or leading zero bits counts:
1460+
/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1461+
/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
1462+
/// >> ConstOp))
1463+
template <Intrinsic::ID IntrID>
1464+
static Value *
1465+
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
1466+
const DataLayout &DL,
1467+
InstCombiner::BuilderTy &Builder) {
1468+
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1469+
"This helper only supports cttz and ctlz intrinsics");
1470+
1471+
Value *CtOp;
1472+
Value *ZeroUndef;
1473+
if (!match(I0,
1474+
m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
1475+
return nullptr;
1476+
1477+
unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1478+
auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1479+
if (!match(I1, m_CheckedInt(LessBitWidth)))
1480+
// We have a constant >= BitWidth (which can be handled by CVP)
1481+
// or a non-splat vector with elements < and >= BitWidth
1482+
return nullptr;
1483+
1484+
Type *Ty = I1->getType();
1485+
Constant *NewConst = ConstantFoldBinaryOpOperands(
1486+
IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1487+
IntrID == Intrinsic::cttz
1488+
? ConstantInt::get(Ty, 1)
1489+
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1490+
cast<Constant>(I1), DL);
1491+
return Builder.CreateBinaryIntrinsic(
1492+
IntrID, Builder.CreateOr(CtOp, NewConst),
1493+
ConstantInt::getTrue(ZeroUndef->getType()));
1494+
}
1495+
14591496
/// CallInst simplification. This mostly only handles folding of intrinsic
14601497
/// instructions. For normal calls, it allows visitCallBase to do the heavy
14611498
/// lifting.
@@ -1661,6 +1698,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
16611698
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
16621699
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
16631700
}
1701+
// umin(cttz(x), const) --> cttz(x | (1 << const))
1702+
if (Value *FoldedCttz =
1703+
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
1704+
I0, I1, DL, Builder))
1705+
return replaceInstUsesWith(*II, FoldedCttz);
1706+
// umin(ctlz(x), const) --> ctlz(x | ((SignedMin >> const)))
1707+
if (Value *FoldedCtlz =
1708+
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
1709+
I0, I1, DL, Builder))
1710+
return replaceInstUsesWith(*II, FoldedCtlz);
16641711
[[fallthrough]];
16651712
}
16661713
case Intrinsic::umax: {

llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
55
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
66
; CHECK-SAME: i8 [[X:%.*]]) {
7-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
8-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
7+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
8+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
99
; CHECK-NEXT: ret i8 [[RET]]
1010
;
1111
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -16,8 +16,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
1616
define i8 @umin_cttz_i8_zero_defined(i8 %X) {
1717
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
1818
; CHECK-SAME: i8 [[X:%.*]]) {
19-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
20-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
19+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
20+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
2121
; CHECK-NEXT: ret i8 [[RET]]
2222
;
2323
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
@@ -28,8 +28,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
2828
define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
2929
; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
3030
; CHECK-SAME: i8 [[X:%.*]]) {
31-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
32-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
31+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
32+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
3333
; CHECK-NEXT: ret i8 [[RET]]
3434
;
3535
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -51,8 +51,8 @@ define i8 @umin_cttz_i8_negative_ge_bitwidth_zero_undefined(i8 %X) {
5151
define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
5252
; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
5353
; CHECK-SAME: i16 [[X:%.*]]) {
54-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
55-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
54+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64
55+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
5656
; CHECK-NEXT: ret i16 [[RET]]
5757
;
5858
%cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
@@ -63,8 +63,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
6363
define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
6464
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
6565
; CHECK-SAME: i32 [[X:%.*]]) {
66-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
67-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
66+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64
67+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
6868
; CHECK-NEXT: ret i32 [[RET]]
6969
;
7070
%cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
@@ -75,8 +75,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
7575
define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
7676
; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
7777
; CHECK-SAME: i64 [[X:%.*]]) {
78-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
79-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
78+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64
79+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
8080
; CHECK-NEXT: ret i64 [[RET]]
8181
;
8282
%cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
@@ -108,8 +108,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
108108
define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
109109
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
110110
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
111-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
112-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
111+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
112+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
113113
; CHECK-NEXT: ret <2 x i32> [[RET]]
114114
;
115115
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -120,8 +120,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
120120
define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
121121
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(
122122
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
123-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
124-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 poison>)
123+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 poison>
124+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
125125
; CHECK-NEXT: ret <2 x i32> [[RET]]
126126
;
127127
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -132,8 +132,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
132132
define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
133133
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
134134
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
135-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
136-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 0>)
135+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 1>
136+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
137137
; CHECK-NEXT: ret <2 x i32> [[RET]]
138138
;
139139
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -144,9 +144,9 @@ define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
144144
define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
145145
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
146146
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
147-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
148-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 64>)
149-
; CHECK-NEXT: ret <2 x i32> [[RET]]
147+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
148+
; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 6, i32 64>)
149+
; CHECK-NEXT: ret <2 x i32> [[RET1]]
150150
;
151151
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
152152
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 64>)
@@ -156,9 +156,9 @@ define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi
156156
define <2 x i32> @umin_cttz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
157157
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(
158158
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
159-
; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
160-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 32, i32 64>)
161-
; CHECK-NEXT: ret <2 x i32> [[RET]]
159+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
160+
; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 32, i32 64>)
161+
; CHECK-NEXT: ret <2 x i32> [[RET1]]
162162
;
163163
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
164164
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 32, i32 64>)
@@ -194,8 +194,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
194194
define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
195195
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
196196
; CHECK-SAME: i8 [[X:%.*]]) {
197-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
198-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
197+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
198+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
199199
; CHECK-NEXT: ret i8 [[RET]]
200200
;
201201
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -206,8 +206,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
206206
define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
207207
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
208208
; CHECK-SAME: i8 [[X:%.*]]) {
209-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
210-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
209+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
210+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
211211
; CHECK-NEXT: ret i8 [[RET]]
212212
;
213213
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
@@ -218,8 +218,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
218218
define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
219219
; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
220220
; CHECK-SAME: i8 [[X:%.*]]) {
221-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
222-
; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
221+
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
222+
; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
223223
; CHECK-NEXT: ret i8 [[RET]]
224224
;
225225
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -241,8 +241,8 @@ define i8 @umin_ctlz_i8_negative_ge_bitwidth_zero_undefined(i8 %X) {
241241
define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
242242
; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
243243
; CHECK-SAME: i16 [[X:%.*]]) {
244-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
245-
; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
244+
; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512
245+
; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
246246
; CHECK-NEXT: ret i16 [[RET]]
247247
;
248248
%ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
@@ -253,8 +253,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
253253
define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
254254
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
255255
; CHECK-SAME: i32 [[X:%.*]]) {
256-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
257-
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
256+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432
257+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
258258
; CHECK-NEXT: ret i32 [[RET]]
259259
;
260260
%ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
@@ -265,8 +265,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
265265
define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
266266
; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
267267
; CHECK-SAME: i64 [[X:%.*]]) {
268-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
269-
; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
268+
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
269+
; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
270270
; CHECK-NEXT: ret i64 [[RET]]
271271
;
272272
%ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
@@ -298,8 +298,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
298298
define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
299299
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
300300
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
301-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
302-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
301+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
302+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
303303
; CHECK-NEXT: ret <2 x i32> [[RET]]
304304
;
305305
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -310,8 +310,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
310310
define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
311311
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(
312312
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
313-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
314-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 poison>)
313+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 poison>
314+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
315315
; CHECK-NEXT: ret <2 x i32> [[RET]]
316316
;
317317
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -322,8 +322,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
322322
define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
323323
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
324324
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
325-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
326-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 0>)
325+
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 -2147483648>
326+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
327327
; CHECK-NEXT: ret <2 x i32> [[RET]]
328328
;
329329
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -334,9 +334,9 @@ define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
334334
define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
335335
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
336336
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
337-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
338-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 64>)
339-
; CHECK-NEXT: ret <2 x i32> [[RET]]
337+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
338+
; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 6, i32 64>)
339+
; CHECK-NEXT: ret <2 x i32> [[RET1]]
340340
;
341341
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
342342
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 64>)
@@ -346,9 +346,9 @@ define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi
346346
define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
347347
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_none_lt_bitwidth_zero_undefined(
348348
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
349-
; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
350-
; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 32, i32 64>)
351-
; CHECK-NEXT: ret <2 x i32> [[RET]]
349+
; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
350+
; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 32, i32 64>)
351+
; CHECK-NEXT: ret <2 x i32> [[RET1]]
352352
;
353353
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
354354
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 32, i32 64>)

0 commit comments

Comments
 (0)