Skip to content

Commit 29d05c0

Browse files
committed
[CodeGen] [SelectionDAG] More efficient code for X % C == 0 (UREM case) (try 3)
Summary: I'm submitting a new revision since i don't understand how to reclaim/reopen/take over the existing one, D50222. There is no such action in "Add Action" menu... This implements an optimization described in Hacker's Delight 10-17: when `C` is constant, the result of `X % C == 0` can be computed more cheaply without actually calculating the remainder. The motivation is discussed here: https://bugs.llvm.org/show_bug.cgi?id=35479. This is a recommit, the original commit rL364563 was reverted in rL364568 because test-suite detected miscompile - the new comparison constant 'Q' was being computed incorrectly (we divided by `D0` instead of `D`). Original patch D50222 by @hermord (Dmytro Shynkevych) Notes: - In principle, it's possible to also handle the `X % C1 == C2` case, as discussed on bugzilla. This seems to require an extra branch on overflow, so I refrained from implementing this for now. - An explicit check for when the `REM` can be reduced to just its LHS is included: the `X % C` == 0 optimization breaks `test1` in `test/CodeGen/X86/jump_sign.ll` otherwise. I hadn't managed to find a better way to not generate worse output in this case. - The `test/CodeGen/X86/jump_sign.ll` regresses, and is being fixed by a followup patch D63390. Reviewers: RKSimon, craig.topper, spatel, hermord, xbolva00 Reviewed By: RKSimon, xbolva00 Subscribers: dexonsmith, kristina, xbolva00, javed.absar, llvm-commits, hermord Tags: #llvm Differential Revision: https://reviews.llvm.org/D63391 llvm-svn: 364600
1 parent a59cf87 commit 29d05c0

13 files changed

+284
-316
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4054,6 +4054,12 @@ class TargetLowering : public TargetLoweringBase {
40544054
SDValue N1, ISD::CondCode Cond,
40554055
DAGCombinerInfo &DCI,
40564056
const SDLoc &DL) const;
4057+
4058+
SDValue prepareUREMEqFold(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4059+
DAGCombinerInfo &DCI, const SDLoc &DL,
4060+
SmallVectorImpl<SDNode *> &Created) const;
4061+
SDValue buildUREMEqFold(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4062+
DAGCombinerInfo &DCI, const SDLoc &DL) const;
40574063
};
40584064

40594065
/// Given an LLVM IR type and return type attributes, compute the return value

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3460,6 +3460,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
34603460
return V;
34613461
}
34623462

3463+
// Fold remainder of division by a constant.
3464+
if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
3465+
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3466+
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3467+
3468+
// When division is cheap or optimizing for minimum size,
3469+
// fall through to DIVREM creation by skipping this fold.
3470+
if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
3471+
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3472+
return Folded;
3473+
}
3474+
34633475
// Fold away ALL boolean setcc's.
34643476
if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
34653477
SDValue Temp;
@@ -4445,6 +4457,103 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
44454457
return DAG.getSelect(dl, VT, IsOne, N0, Q);
44464458
}
44474459

4460+
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4461+
/// where the divisor is constant and the comparison target is zero,
4462+
/// return a DAG expression that will generate the same comparison result
4463+
/// using only multiplications, additions and shifts/rotations.
4464+
/// Ref: "Hacker's Delight" 10-17.
4465+
SDValue TargetLowering::buildUREMEqFold(EVT VT, SDValue REMNode,
4466+
SDValue CompNode, ISD::CondCode Cond,
4467+
DAGCombinerInfo &DCI,
4468+
const SDLoc &DL) const {
4469+
SmallVector<SDNode *, 2> Built;
4470+
if (SDValue Folded =
4471+
prepareUREMEqFold(VT, REMNode, CompNode, Cond, DCI, DL, Built)) {
4472+
for (SDNode *N : Built)
4473+
DCI.AddToWorklist(N);
4474+
return Folded;
4475+
}
4476+
4477+
return SDValue();
4478+
}
4479+
4480+
SDValue
4481+
TargetLowering::prepareUREMEqFold(EVT VT, SDValue REMNode, SDValue CompNode,
4482+
ISD::CondCode Cond, DAGCombinerInfo &DCI,
4483+
const SDLoc &DL,
4484+
SmallVectorImpl<SDNode *> &Created) const {
4485+
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
4486+
// - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
4487+
// - P is the multiplicative inverse of D0 modulo 2^W
4488+
// - Q = floor((2^W - 1) / D0)
4489+
// where W is the width of the common type of N and D.
4490+
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4491+
"Only applicable for (in)equality comparisons.");
4492+
4493+
EVT REMVT = REMNode->getValueType(0);
4494+
4495+
// If MUL is unavailable, we cannot proceed in any case.
4496+
if (!isOperationLegalOrCustom(ISD::MUL, REMVT))
4497+
return SDValue();
4498+
4499+
// TODO: Add non-uniform constant support.
4500+
ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
4501+
ConstantSDNode *CompTarget = isConstOrConstSplat(CompNode);
4502+
if (!Divisor || !CompTarget || Divisor->isNullValue() ||
4503+
!CompTarget->isNullValue())
4504+
return SDValue();
4505+
4506+
const APInt &D = Divisor->getAPIntValue();
4507+
4508+
// Decompose D into D0 * 2^K
4509+
unsigned K = D.countTrailingZeros();
4510+
bool DivisorIsEven = (K != 0);
4511+
APInt D0 = D.lshr(K);
4512+
4513+
// The fold is invalid when D0 == 1.
4514+
// This is reachable because visitSetCC happens before visitREM.
4515+
if (D0.isOneValue())
4516+
return SDValue();
4517+
4518+
// P = inv(D0, 2^W)
4519+
// 2^W requires W + 1 bits, so we have to extend and then truncate.
4520+
unsigned W = D.getBitWidth();
4521+
APInt P = D0.zext(W + 1)
4522+
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
4523+
.trunc(W);
4524+
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
4525+
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
4526+
4527+
// Q = floor((2^W - 1) / D)
4528+
APInt Q = APInt::getAllOnesValue(W).udiv(D);
4529+
4530+
SelectionDAG &DAG = DCI.DAG;
4531+
4532+
SDValue PVal = DAG.getConstant(P, DL, REMVT);
4533+
SDValue QVal = DAG.getConstant(Q, DL, REMVT);
4534+
// (mul N, P)
4535+
SDValue Op1 = DAG.getNode(ISD::MUL, DL, REMVT, REMNode->getOperand(0), PVal);
4536+
Created.push_back(Op1.getNode());
4537+
4538+
// Rotate right only if D was even.
4539+
if (DivisorIsEven) {
4540+
// We need ROTR to do this.
4541+
if (!isOperationLegalOrCustom(ISD::ROTR, REMVT))
4542+
return SDValue();
4543+
SDValue ShAmt =
4544+
DAG.getConstant(K, DL, getShiftAmountTy(REMVT, DAG.getDataLayout()));
4545+
SDNodeFlags Flags;
4546+
Flags.setExact(true);
4547+
// UREM: (rotr (mul N, P), K)
4548+
Op1 = DAG.getNode(ISD::ROTR, DL, REMVT, Op1, ShAmt, Flags);
4549+
Created.push_back(Op1.getNode());
4550+
}
4551+
4552+
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
4553+
return DAG.getSetCC(DL, VT, Op1, QVal,
4554+
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
4555+
}
4556+
44484557
bool TargetLowering::
44494558
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
44504559
if (!isa<ConstantSDNode>(Op.getOperand(0))) {

llvm/lib/Support/APInt.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,8 @@ APInt APInt::sqrt() const {
10951095
/// however we simplify it to speed up calculating only the inverse, and take
10961096
/// advantage of div+rem calculations. We also use some tricks to avoid copying
10971097
/// (potentially large) APInts around.
1098+
/// WARNING: a value of '0' may be returned,
1099+
/// signifying that no multiplicative inverse exists!
10981100
APInt APInt::multiplicativeInverse(const APInt& modulo) const {
10991101
assert(ult(modulo) && "This APInt must be smaller than the modulo");
11001102

llvm/test/CodeGen/AArch64/urem-seteq-optsize.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ define i32 @test_optsize(i32 %X) optsize nounwind readnone {
2626
; CHECK: // %bb.0:
2727
; CHECK-NEXT: mov w8, #52429
2828
; CHECK-NEXT: movk w8, #52428, lsl #16
29-
; CHECK-NEXT: umull x8, w0, w8
30-
; CHECK-NEXT: lsr x8, x8, #34
31-
; CHECK-NEXT: add w8, w8, w8, lsl #2
32-
; CHECK-NEXT: mov w9, #-10
33-
; CHECK-NEXT: cmp w0, w8
29+
; CHECK-NEXT: mov w9, #13108
30+
; CHECK-NEXT: movk w9, #13107, lsl #16
31+
; CHECK-NEXT: mul w8, w0, w8
32+
; CHECK-NEXT: mov w10, #-10
33+
; CHECK-NEXT: cmp w8, w9
3434
; CHECK-NEXT: mov w8, #42
35-
; CHECK-NEXT: csel w0, w8, w9, eq
35+
; CHECK-NEXT: csel w0, w8, w10, lo
3636
; CHECK-NEXT: ret
3737
%rem = urem i32 %X, 5
3838
%cmp = icmp eq i32 %rem, 0

llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
123123
ret <4 x i32> %ret
124124
}
125125

126-
; Can't fold due to second line
127126
define <4 x i32> @test_urem_nomulinv(<4 x i32> %X) nounwind readnone {
128127
; CHECK-LABEL: test_urem_nomulinv:
129128
; CHECK: // %bb.0:

llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,9 @@ define <4 x i32> @test_urem_odd_vec_i32(<4 x i32> %X) nounwind readnone {
99
; CHECK-NEXT: mov w8, #52429
1010
; CHECK-NEXT: movk w8, #52428, lsl #16
1111
; CHECK-NEXT: dup v2.4s, w8
12-
; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s
13-
; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s
14-
; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s
15-
; CHECK-NEXT: movi v1.4s, #5
16-
; CHECK-NEXT: ushr v2.4s, v2.4s, #2
17-
; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s
18-
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
12+
; CHECK-NEXT: movi v1.16b, #51
13+
; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
14+
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
1915
; CHECK-NEXT: movi v1.4s, #1
2016
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
2117
; CHECK-NEXT: ret
@@ -31,13 +27,9 @@ define <8 x i16> @test_urem_odd_vec_i16(<8 x i16> %X) nounwind readnone {
3127
; CHECK: // %bb.0:
3228
; CHECK-NEXT: mov w8, #52429
3329
; CHECK-NEXT: dup v2.8h, w8
34-
; CHECK-NEXT: umull2 v3.4s, v0.8h, v2.8h
35-
; CHECK-NEXT: umull v2.4s, v0.4h, v2.4h
36-
; CHECK-NEXT: uzp2 v2.8h, v2.8h, v3.8h
37-
; CHECK-NEXT: movi v1.8h, #5
38-
; CHECK-NEXT: ushr v2.8h, v2.8h, #2
39-
; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h
40-
; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
30+
; CHECK-NEXT: movi v1.16b, #51
31+
; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
32+
; CHECK-NEXT: cmhs v0.8h, v1.8h, v0.8h
4133
; CHECK-NEXT: movi v1.8h, #1
4234
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
4335
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/urem-seteq.ll

Lines changed: 43 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ define i32 @test_urem_odd(i32 %X) nounwind readnone {
1010
; CHECK: // %bb.0:
1111
; CHECK-NEXT: mov w8, #52429
1212
; CHECK-NEXT: movk w8, #52428, lsl #16
13-
; CHECK-NEXT: umull x8, w0, w8
14-
; CHECK-NEXT: lsr x8, x8, #34
15-
; CHECK-NEXT: add w8, w8, w8, lsl #2
16-
; CHECK-NEXT: cmp w0, w8
17-
; CHECK-NEXT: cset w0, eq
13+
; CHECK-NEXT: mov w9, #13108
14+
; CHECK-NEXT: mul w8, w0, w8
15+
; CHECK-NEXT: movk w9, #13107, lsl #16
16+
; CHECK-NEXT: cmp w8, w9
17+
; CHECK-NEXT: cset w0, lo
1818
; CHECK-NEXT: ret
1919
%urem = urem i32 %X, 5
2020
%cmp = icmp eq i32 %urem, 0
@@ -26,14 +26,11 @@ define i32 @test_urem_odd(i32 %X) nounwind readnone {
2626
define i32 @test_urem_odd_bit30(i32 %X) nounwind readnone {
2727
; CHECK-LABEL: test_urem_odd_bit30:
2828
; CHECK: // %bb.0:
29-
; CHECK-NEXT: mov w8, #-11
30-
; CHECK-NEXT: umull x8, w0, w8
31-
; CHECK-NEXT: mov w9, #3
32-
; CHECK-NEXT: lsr x8, x8, #62
33-
; CHECK-NEXT: movk w9, #16384, lsl #16
34-
; CHECK-NEXT: msub w8, w8, w9, w0
35-
; CHECK-NEXT: cmp w8, #0 // =0
36-
; CHECK-NEXT: cset w0, eq
29+
; CHECK-NEXT: mov w8, #43691
30+
; CHECK-NEXT: movk w8, #27306, lsl #16
31+
; CHECK-NEXT: mul w8, w0, w8
32+
; CHECK-NEXT: cmp w8, #4 // =4
33+
; CHECK-NEXT: cset w0, lo
3734
; CHECK-NEXT: ret
3835
%urem = urem i32 %X, 1073741827
3936
%cmp = icmp eq i32 %urem, 0
@@ -45,14 +42,11 @@ define i32 @test_urem_odd_bit30(i32 %X) nounwind readnone {
4542
define i32 @test_urem_odd_bit31(i32 %X) nounwind readnone {
4643
; CHECK-LABEL: test_urem_odd_bit31:
4744
; CHECK: // %bb.0:
48-
; CHECK-NEXT: mov w8, w0
49-
; CHECK-NEXT: lsl x9, x8, #30
50-
; CHECK-NEXT: sub x8, x9, x8
51-
; CHECK-NEXT: lsr x8, x8, #61
52-
; CHECK-NEXT: mov w9, #-2147483645
53-
; CHECK-NEXT: msub w8, w8, w9, w0
54-
; CHECK-NEXT: cmp w8, #0 // =0
55-
; CHECK-NEXT: cset w0, eq
45+
; CHECK-NEXT: mov w8, #43691
46+
; CHECK-NEXT: movk w8, #10922, lsl #16
47+
; CHECK-NEXT: mul w8, w0, w8
48+
; CHECK-NEXT: cmp w8, #2 // =2
49+
; CHECK-NEXT: cset w0, lo
5650
; CHECK-NEXT: ret
5751
%urem = urem i32 %X, 2147483651
5852
%cmp = icmp eq i32 %urem, 0
@@ -69,16 +63,15 @@ define i32 @test_urem_odd_bit31(i32 %X) nounwind readnone {
6963
define i16 @test_urem_even(i16 %X) nounwind readnone {
7064
; CHECK-LABEL: test_urem_even:
7165
; CHECK: // %bb.0:
72-
; CHECK-NEXT: mov w10, #9363
73-
; CHECK-NEXT: ubfx w9, w0, #1, #15
74-
; CHECK-NEXT: movk w10, #37449, lsl #16
75-
; CHECK-NEXT: umull x9, w9, w10
66+
; CHECK-NEXT: mov w9, #28087
7667
; CHECK-NEXT: and w8, w0, #0xffff
77-
; CHECK-NEXT: lsr x9, x9, #34
78-
; CHECK-NEXT: mov w10, #14
79-
; CHECK-NEXT: msub w8, w9, w10, w8
80-
; CHECK-NEXT: cmp w8, #0 // =0
81-
; CHECK-NEXT: cset w0, ne
68+
; CHECK-NEXT: movk w9, #46811, lsl #16
69+
; CHECK-NEXT: mul w8, w8, w9
70+
; CHECK-NEXT: mov w9, #9362
71+
; CHECK-NEXT: ror w8, w8, #1
72+
; CHECK-NEXT: movk w9, #4681, lsl #16
73+
; CHECK-NEXT: cmp w8, w9
74+
; CHECK-NEXT: cset w0, hi
8275
; CHECK-NEXT: ret
8376
%urem = urem i16 %X, 14
8477
%cmp = icmp ne i16 %urem, 0
@@ -90,14 +83,12 @@ define i16 @test_urem_even(i16 %X) nounwind readnone {
9083
define i32 @test_urem_even_bit30(i32 %X) nounwind readnone {
9184
; CHECK-LABEL: test_urem_even_bit30:
9285
; CHECK: // %bb.0:
93-
; CHECK-NEXT: mov w8, #-415
94-
; CHECK-NEXT: umull x8, w0, w8
95-
; CHECK-NEXT: mov w9, #104
96-
; CHECK-NEXT: lsr x8, x8, #62
97-
; CHECK-NEXT: movk w9, #16384, lsl #16
98-
; CHECK-NEXT: msub w8, w8, w9, w0
99-
; CHECK-NEXT: cmp w8, #0 // =0
100-
; CHECK-NEXT: cset w0, eq
86+
; CHECK-NEXT: mov w8, #20165
87+
; CHECK-NEXT: movk w8, #64748, lsl #16
88+
; CHECK-NEXT: mul w8, w0, w8
89+
; CHECK-NEXT: ror w8, w8, #3
90+
; CHECK-NEXT: cmp w8, #4 // =4
91+
; CHECK-NEXT: cset w0, lo
10192
; CHECK-NEXT: ret
10293
%urem = urem i32 %X, 1073741928
10394
%cmp = icmp eq i32 %urem, 0
@@ -109,15 +100,12 @@ define i32 @test_urem_even_bit30(i32 %X) nounwind readnone {
109100
define i32 @test_urem_even_bit31(i32 %X) nounwind readnone {
110101
; CHECK-LABEL: test_urem_even_bit31:
111102
; CHECK: // %bb.0:
112-
; CHECK-NEXT: mov w8, #65435
113-
; CHECK-NEXT: movk w8, #32767, lsl #16
114-
; CHECK-NEXT: umull x8, w0, w8
115-
; CHECK-NEXT: mov w9, #102
116-
; CHECK-NEXT: lsr x8, x8, #62
117-
; CHECK-NEXT: movk w9, #32768, lsl #16
118-
; CHECK-NEXT: msub w8, w8, w9, w0
119-
; CHECK-NEXT: cmp w8, #0 // =0
120-
; CHECK-NEXT: cset w0, eq
103+
; CHECK-NEXT: mov w8, #64251
104+
; CHECK-NEXT: movk w8, #47866, lsl #16
105+
; CHECK-NEXT: mul w8, w0, w8
106+
; CHECK-NEXT: ror w8, w8, #1
107+
; CHECK-NEXT: cmp w8, #2 // =2
108+
; CHECK-NEXT: cset w0, lo
121109
; CHECK-NEXT: ret
122110
%urem = urem i32 %X, 2147483750
123111
%cmp = icmp eq i32 %urem, 0
@@ -137,19 +125,17 @@ define i32 @test_urem_one(i32 %X) nounwind readnone {
137125
ret i32 %ret
138126
}
139127

140-
; We should not proceed with this fold if we can not compute
141-
; multiplicative inverse
142128
define i32 @test_urem_100(i32 %X) nounwind readnone {
143129
; CHECK-LABEL: test_urem_100:
144130
; CHECK: // %bb.0:
145-
; CHECK-NEXT: mov w8, #34079
146-
; CHECK-NEXT: movk w8, #20971, lsl #16
147-
; CHECK-NEXT: umull x8, w0, w8
148-
; CHECK-NEXT: lsr x8, x8, #37
149-
; CHECK-NEXT: mov w9, #100
150-
; CHECK-NEXT: msub w8, w8, w9, w0
151-
; CHECK-NEXT: cmp w8, #0 // =0
152-
; CHECK-NEXT: cset w0, eq
131+
; CHECK-NEXT: mov w8, #23593
132+
; CHECK-NEXT: movk w8, #49807, lsl #16
133+
; CHECK-NEXT: mul w8, w0, w8
134+
; CHECK-NEXT: mov w9, #23593
135+
; CHECK-NEXT: ror w8, w8, #2
136+
; CHECK-NEXT: movk w9, #655, lsl #16
137+
; CHECK-NEXT: cmp w8, w9
138+
; CHECK-NEXT: cset w0, lo
153139
; CHECK-NEXT: ret
154140
%urem = urem i32 %X, 100
155141
%cmp = icmp eq i32 %urem, 0

llvm/test/CodeGen/X86/jump_sign.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,13 +236,11 @@ define void @func_o() nounwind uwtable {
236236
; CHECK-NEXT: jne .LBB12_8
237237
; CHECK-NEXT: # %bb.4: # %if.end29
238238
; CHECK-NEXT: movzwl (%eax), %eax
239+
; CHECK-NEXT: imull $-13107, %eax, %eax # imm = 0xCCCD
240+
; CHECK-NEXT: rorw %ax
239241
; CHECK-NEXT: movzwl %ax, %eax
240-
; CHECK-NEXT: imull $52429, %eax, %ecx # imm = 0xCCCD
241-
; CHECK-NEXT: shrl $18, %ecx
242-
; CHECK-NEXT: andl $-2, %ecx
243-
; CHECK-NEXT: leal (%ecx,%ecx,4), %ecx
244-
; CHECK-NEXT: cmpw %cx, %ax
245-
; CHECK-NEXT: jne .LBB12_5
242+
; CHECK-NEXT: cmpl $6554, %eax # imm = 0x199A
243+
; CHECK-NEXT: jae .LBB12_5
246244
; CHECK-NEXT: .LBB12_8: # %if.then44
247245
; CHECK-NEXT: xorl %eax, %eax
248246
; CHECK-NEXT: testb %al, %al

0 commit comments

Comments
 (0)