Skip to content

Commit e3894f5

Browse files
authored
[CodeGenPrepare] Convert ctpop(X) ==/!= 1 into ctpop(X) u</u> 2/1 (#111284)
Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. After #100899, we set the range of ctpop's return value to indicate the argument/result is non-zero. This patch converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` in CGP to fix #95255.
1 parent 6c398ab commit e3894f5

File tree

6 files changed

+258
-15
lines changed

6 files changed

+258
-15
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,6 +2111,31 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
21112111
return false;
21122112
}
21132113

2114+
/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2115+
/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2116+
/// result cannot be zero.
2117+
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
2118+
const TargetTransformInfo &TTI,
2119+
const DataLayout &DL) {
2120+
ICmpInst::Predicate Pred;
2121+
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
2122+
return false;
2123+
if (!ICmpInst::isEquality(Pred))
2124+
return false;
2125+
auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2126+
2127+
if (isKnownNonZero(II, DL)) {
2128+
if (Pred == ICmpInst::ICMP_EQ) {
2129+
Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
2130+
Cmp->setPredicate(ICmpInst::ICMP_ULT);
2131+
} else {
2132+
Cmp->setPredicate(ICmpInst::ICMP_UGT);
2133+
}
2134+
return true;
2135+
}
2136+
return false;
2137+
}
2138+
21142139
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21152140
if (sinkCmpExpression(Cmp, *TLI))
21162141
return true;
@@ -2130,6 +2155,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21302155
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
21312156
return true;
21322157

2158+
if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
2159+
return true;
2160+
21332161
return false;
21342162
}
21352163

llvm/test/CodeGen/AArch64/arm64-popcnt.ll

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
1515
; CHECK-NONEON-LABEL: cnt32_advsimd:
1616
; CHECK-NONEON: // %bb.0:
1717
; CHECK-NONEON-NEXT: lsr w9, w0, #1
18-
; CHECK-NONEON-NEXT: mov w8, #16843009
18+
; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
1919
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
2020
; CHECK-NONEON-NEXT: sub w9, w0, w9
2121
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
5050
; CHECK-NONEON-LABEL: cnt32_advsimd_2:
5151
; CHECK-NONEON: // %bb.0:
5252
; CHECK-NONEON-NEXT: lsr w9, w0, #1
53-
; CHECK-NONEON-NEXT: mov w8, #16843009
53+
; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
5454
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
5555
; CHECK-NONEON-NEXT: sub w9, w0, w9
5656
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
8686
; CHECK-NONEON-LABEL: cnt64_advsimd:
8787
; CHECK-NONEON: // %bb.0:
8888
; CHECK-NONEON-NEXT: lsr x9, x0, #1
89-
; CHECK-NONEON-NEXT: mov x8, #72340172838076673
89+
; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
9090
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
9191
; CHECK-NONEON-NEXT: sub x9, x0, x9
9292
; CHECK-NONEON-NEXT: lsr x10, x9, #2
@@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
114114
; CHECK-LABEL: cnt32:
115115
; CHECK: // %bb.0:
116116
; CHECK-NEXT: lsr w9, w0, #1
117-
; CHECK-NEXT: mov w8, #16843009
117+
; CHECK-NEXT: mov w8, #16843009 // =0x1010101
118118
; CHECK-NEXT: and w9, w9, #0x55555555
119119
; CHECK-NEXT: sub w9, w0, w9
120120
; CHECK-NEXT: lsr w10, w9, #2
@@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
130130
; CHECK-NONEON-LABEL: cnt32:
131131
; CHECK-NONEON: // %bb.0:
132132
; CHECK-NONEON-NEXT: lsr w9, w0, #1
133-
; CHECK-NONEON-NEXT: mov w8, #16843009
133+
; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101
134134
; CHECK-NONEON-NEXT: and w9, w9, #0x55555555
135135
; CHECK-NONEON-NEXT: sub w9, w0, w9
136136
; CHECK-NONEON-NEXT: lsr w10, w9, #2
@@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
155155
; CHECK-LABEL: cnt64:
156156
; CHECK: // %bb.0:
157157
; CHECK-NEXT: lsr x9, x0, #1
158-
; CHECK-NEXT: mov x8, #72340172838076673
158+
; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101
159159
; CHECK-NEXT: and x9, x9, #0x5555555555555555
160160
; CHECK-NEXT: sub x9, x0, x9
161161
; CHECK-NEXT: lsr x10, x9, #2
@@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
171171
; CHECK-NONEON-LABEL: cnt64:
172172
; CHECK-NONEON: // %bb.0:
173173
; CHECK-NONEON-NEXT: lsr x9, x0, #1
174-
; CHECK-NONEON-NEXT: mov x8, #72340172838076673
174+
; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101
175175
; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555
176176
; CHECK-NONEON-NEXT: sub x9, x0, x9
177177
; CHECK-NONEON-NEXT: lsr x10, x9, #2
@@ -278,5 +278,59 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
278278
ret i1 %cmp
279279
}
280280

281+
define i1 @ctpop32_eq_one_nonzero(i32 %x) {
282+
; CHECK-LABEL: ctpop32_eq_one_nonzero:
283+
; CHECK: // %bb.0: // %entry
284+
; CHECK-NEXT: sub w8, w0, #1
285+
; CHECK-NEXT: tst w0, w8
286+
; CHECK-NEXT: cset w0, eq
287+
; CHECK-NEXT: ret
288+
;
289+
; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
290+
; CHECK-NONEON: // %bb.0: // %entry
291+
; CHECK-NONEON-NEXT: sub w8, w0, #1
292+
; CHECK-NONEON-NEXT: tst w0, w8
293+
; CHECK-NONEON-NEXT: cset w0, eq
294+
; CHECK-NONEON-NEXT: ret
295+
;
296+
; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
297+
; CHECK-CSSC: // %bb.0: // %entry
298+
; CHECK-CSSC-NEXT: sub w8, w0, #1
299+
; CHECK-CSSC-NEXT: tst w0, w8
300+
; CHECK-CSSC-NEXT: cset w0, eq
301+
; CHECK-CSSC-NEXT: ret
302+
entry:
303+
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
304+
%cmp = icmp eq i32 %popcnt, 1
305+
ret i1 %cmp
306+
}
307+
308+
define i1 @ctpop32_ne_one_nonzero(i32 %x) {
309+
; CHECK-LABEL: ctpop32_ne_one_nonzero:
310+
; CHECK: // %bb.0: // %entry
311+
; CHECK-NEXT: sub w8, w0, #1
312+
; CHECK-NEXT: tst w0, w8
313+
; CHECK-NEXT: cset w0, ne
314+
; CHECK-NEXT: ret
315+
;
316+
; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
317+
; CHECK-NONEON: // %bb.0: // %entry
318+
; CHECK-NONEON-NEXT: sub w8, w0, #1
319+
; CHECK-NONEON-NEXT: tst w0, w8
320+
; CHECK-NONEON-NEXT: cset w0, ne
321+
; CHECK-NONEON-NEXT: ret
322+
;
323+
; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
324+
; CHECK-CSSC: // %bb.0: // %entry
325+
; CHECK-CSSC-NEXT: sub w8, w0, #1
326+
; CHECK-CSSC-NEXT: tst w0, w8
327+
; CHECK-CSSC-NEXT: cset w0, ne
328+
; CHECK-CSSC-NEXT: ret
329+
entry:
330+
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
331+
%cmp = icmp ne i32 %popcnt, 1
332+
ret i1 %cmp
333+
}
334+
281335
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
282336
declare i64 @llvm.ctpop.i64(i64) nounwind readnone

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,3 +1441,42 @@ define i32 @srai_slli2(i16 signext %0) {
14411441
%3 = sext i16 %sext to i32
14421442
ret i32 %3
14431443
}
1444+
1445+
define i1 @ctpop32_eq_one_nonzero(i32 %x) {
1446+
; RV32I-LABEL: ctpop32_eq_one_nonzero:
1447+
; RV32I: # %bb.0: # %entry
1448+
; RV32I-NEXT: addi a1, a0, -1
1449+
; RV32I-NEXT: and a0, a0, a1
1450+
; RV32I-NEXT: seqz a0, a0
1451+
; RV32I-NEXT: ret
1452+
;
1453+
; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
1454+
; RV32ZBB: # %bb.0: # %entry
1455+
; RV32ZBB-NEXT: cpop a0, a0
1456+
; RV32ZBB-NEXT: sltiu a0, a0, 2
1457+
; RV32ZBB-NEXT: ret
1458+
entry:
1459+
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
1460+
%cmp = icmp eq i32 %popcnt, 1
1461+
ret i1 %cmp
1462+
}
1463+
1464+
define i1 @ctpop32_ne_one_nonzero(i32 %x) {
1465+
; RV32I-LABEL: ctpop32_ne_one_nonzero:
1466+
; RV32I: # %bb.0: # %entry
1467+
; RV32I-NEXT: addi a1, a0, -1
1468+
; RV32I-NEXT: and a0, a0, a1
1469+
; RV32I-NEXT: snez a0, a0
1470+
; RV32I-NEXT: ret
1471+
;
1472+
; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
1473+
; RV32ZBB: # %bb.0: # %entry
1474+
; RV32ZBB-NEXT: cpop a0, a0
1475+
; RV32ZBB-NEXT: sltiu a0, a0, 2
1476+
; RV32ZBB-NEXT: xori a0, a0, 1
1477+
; RV32ZBB-NEXT: ret
1478+
entry:
1479+
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
1480+
%cmp = icmp ne i32 %popcnt, 1
1481+
ret i1 %cmp
1482+
}

llvm/test/CodeGen/RISCV/rv64zbb.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,3 +1618,84 @@ entry:
16181618
%5 = add nsw i32 %4, %0
16191619
ret i32 %5
16201620
}
1621+
1622+
define i1 @ctpop32_eq_one_nonzero(i32 %x) {
1623+
; RV64I-LABEL: ctpop32_eq_one_nonzero:
1624+
; RV64I: # %bb.0: # %entry
1625+
; RV64I-NEXT: addi a1, a0, -1
1626+
; RV64I-NEXT: and a0, a0, a1
1627+
; RV64I-NEXT: sext.w a0, a0
1628+
; RV64I-NEXT: seqz a0, a0
1629+
; RV64I-NEXT: ret
1630+
;
1631+
; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
1632+
; RV64ZBB: # %bb.0: # %entry
1633+
; RV64ZBB-NEXT: cpopw a0, a0
1634+
; RV64ZBB-NEXT: sltiu a0, a0, 2
1635+
; RV64ZBB-NEXT: ret
1636+
entry:
1637+
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
1638+
%cmp = icmp eq i32 %popcnt, 1
1639+
ret i1 %cmp
1640+
}
1641+
1642+
define i1 @ctpop32_ne_one_nonzero(i32 %x) {
1643+
; RV64I-LABEL: ctpop32_ne_one_nonzero:
1644+
; RV64I: # %bb.0: # %entry
1645+
; RV64I-NEXT: addi a1, a0, -1
1646+
; RV64I-NEXT: and a0, a0, a1
1647+
; RV64I-NEXT: sext.w a0, a0
1648+
; RV64I-NEXT: snez a0, a0
1649+
; RV64I-NEXT: ret
1650+
;
1651+
; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
1652+
; RV64ZBB: # %bb.0: # %entry
1653+
; RV64ZBB-NEXT: cpopw a0, a0
1654+
; RV64ZBB-NEXT: sltiu a0, a0, 2
1655+
; RV64ZBB-NEXT: xori a0, a0, 1
1656+
; RV64ZBB-NEXT: ret
1657+
entry:
1658+
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
1659+
%cmp = icmp ne i32 %popcnt, 1
1660+
ret i1 %cmp
1661+
}
1662+
1663+
define i1 @ctpop64_eq_one_nonzero(i64 %x) {
1664+
; RV64I-LABEL: ctpop64_eq_one_nonzero:
1665+
; RV64I: # %bb.0: # %entry
1666+
; RV64I-NEXT: addi a1, a0, -1
1667+
; RV64I-NEXT: and a0, a0, a1
1668+
; RV64I-NEXT: seqz a0, a0
1669+
; RV64I-NEXT: ret
1670+
;
1671+
; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
1672+
; RV64ZBB: # %bb.0: # %entry
1673+
; RV64ZBB-NEXT: cpop a0, a0
1674+
; RV64ZBB-NEXT: sltiu a0, a0, 2
1675+
; RV64ZBB-NEXT: ret
1676+
entry:
1677+
%popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)
1678+
%cmp = icmp eq i64 %popcnt, 1
1679+
ret i1 %cmp
1680+
}
1681+
1682+
define i1 @ctpop32_eq_one_maybezero(i32 %x) {
1683+
; RV64I-LABEL: ctpop32_eq_one_maybezero:
1684+
; RV64I: # %bb.0: # %entry
1685+
; RV64I-NEXT: addiw a1, a0, -1
1686+
; RV64I-NEXT: xor a0, a0, a1
1687+
; RV64I-NEXT: sext.w a0, a0
1688+
; RV64I-NEXT: sltu a0, a1, a0
1689+
; RV64I-NEXT: ret
1690+
;
1691+
; RV64ZBB-LABEL: ctpop32_eq_one_maybezero:
1692+
; RV64ZBB: # %bb.0: # %entry
1693+
; RV64ZBB-NEXT: cpopw a0, a0
1694+
; RV64ZBB-NEXT: addi a0, a0, -1
1695+
; RV64ZBB-NEXT: seqz a0, a0
1696+
; RV64ZBB-NEXT: ret
1697+
entry:
1698+
%popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x)
1699+
%cmp = icmp eq i32 %popcnt, 1
1700+
ret i1 %cmp
1701+
}

llvm/test/CodeGen/X86/ispow2.ll

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
102102
; CHECK-AVX512: # %bb.0:
103103
; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
104104
; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
105-
; CHECK-AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
105+
; CHECK-AVX512-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
106106
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
107107
; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
108108
; CHECK-AVX512-NEXT: vzeroupper
@@ -155,7 +155,7 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
155155
; CHECK-AVX512: # %bb.0:
156156
; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
157157
; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
158-
; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
158+
; CHECK-AVX512-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
159159
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
160160
; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
161161
; CHECK-AVX512-NEXT: vzeroupper
@@ -220,3 +220,44 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
220220
%r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
221221
ret <4 x i1> %r
222222
}
223+
224+
225+
define i1 @ctpop32_eq_one_nonzero(i32 %x) {
226+
; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
227+
; CHECK-NOBMI: # %bb.0: # %entry
228+
; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
229+
; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
230+
; CHECK-NOBMI-NEXT: testl %eax, %edi
231+
; CHECK-NOBMI-NEXT: sete %al
232+
; CHECK-NOBMI-NEXT: retq
233+
;
234+
; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
235+
; CHECK-BMI2: # %bb.0: # %entry
236+
; CHECK-BMI2-NEXT: blsrl %edi, %eax
237+
; CHECK-BMI2-NEXT: sete %al
238+
; CHECK-BMI2-NEXT: retq
239+
entry:
240+
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
241+
%cmp = icmp eq i32 %popcnt, 1
242+
ret i1 %cmp
243+
}
244+
245+
define i1 @ctpop32_ne_one_nonzero(i32 %x) {
246+
; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
247+
; CHECK-NOBMI: # %bb.0: # %entry
248+
; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
249+
; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
250+
; CHECK-NOBMI-NEXT: testl %eax, %edi
251+
; CHECK-NOBMI-NEXT: setne %al
252+
; CHECK-NOBMI-NEXT: retq
253+
;
254+
; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
255+
; CHECK-BMI2: # %bb.0: # %entry
256+
; CHECK-BMI2-NEXT: blsrl %edi, %eax
257+
; CHECK-BMI2-NEXT: setne %al
258+
; CHECK-BMI2-NEXT: retq
259+
entry:
260+
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
261+
%cmp = icmp ne i32 %popcnt, 1
262+
ret i1 %cmp
263+
}

llvm/test/CodeGen/X86/known-never-zero.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -555,9 +555,9 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
555555
; X86-NEXT: por %xmm2, %xmm0
556556
; X86-NEXT: pcmpeqd %xmm1, %xmm1
557557
; X86-NEXT: paddd %xmm0, %xmm1
558-
; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
559-
; X86-NEXT: pxor %xmm1, %xmm0
560-
; X86-NEXT: pcmpgtd %xmm1, %xmm0
558+
; X86-NEXT: pand %xmm1, %xmm0
559+
; X86-NEXT: pxor %xmm1, %xmm1
560+
; X86-NEXT: pcmpeqd %xmm1, %xmm0
561561
; X86-NEXT: psrld $31, %xmm0
562562
; X86-NEXT: retl
563563
;
@@ -566,10 +566,10 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
566566
; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
567567
; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
568568
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1
569-
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
570-
; X64-NEXT: vpminud %xmm1, %xmm0, %xmm1
569+
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
570+
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
571571
; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
572-
; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
572+
; X64-NEXT: vpsrld $31, %xmm0, %xmm0
573573
; X64-NEXT: retq
574574
%z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
575575
%r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)

0 commit comments

Comments
 (0)