Skip to content

Commit bf11b6d

Browse files
committed
[CodeGenPrepare] Convert ctpop(X) ==/!= 1 -> ctpop(X) u</u> 2/1
1 parent 9449548 commit bf11b6d

File tree

5 files changed

+86
-56
lines changed

5 files changed

+86
-56
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
21112111
return false;
21122112
}
21132113

2114+
/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2115+
/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2116+
/// result cannot be zero.
2117+
static bool adjustIsPower2Test(CmpInst *Cmp) {
2118+
ICmpInst::Predicate Pred;
2119+
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
2120+
return false;
2121+
if (!ICmpInst::isEquality(Pred))
2122+
return false;
2123+
auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2124+
if (auto Range = II->getRange()) {
2125+
Type *Ty = II->getType();
2126+
unsigned BitWidth = Ty->getScalarSizeInBits();
2127+
if (Range->contains(APInt::getZero(BitWidth)))
2128+
return false;
2129+
2130+
if (Pred == ICmpInst::ICMP_EQ) {
2131+
Cmp->setPredicate(ICmpInst::ICMP_ULT);
2132+
Cmp->setOperand(1, ConstantInt::get(Ty, 2));
2133+
} else
2134+
Cmp->setPredicate(ICmpInst::ICMP_UGT);
2135+
return true;
2136+
}
2137+
return false;
2138+
}
2139+
21142140
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21152141
if (sinkCmpExpression(Cmp, *TLI))
21162142
return true;
@@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21302156
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
21312157
return true;
21322158

2159+
if (adjustIsPower2Test(Cmp))
2160+
return true;
2161+
21332162
return false;
21342163
}
21352164

llvm/test/CodeGen/AArch64/arm64-popcnt.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -282,23 +282,21 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
282282
; CHECK-LABEL: ctpop32_eq_one_nonzero:
283283
; CHECK: // %bb.0: // %entry
284284
; CHECK-NEXT: sub w8, w0, #1
285-
; CHECK-NEXT: eor w9, w0, w8
286-
; CHECK-NEXT: cmp w9, w8
287-
; CHECK-NEXT: cset w0, hi
285+
; CHECK-NEXT: tst w0, w8
286+
; CHECK-NEXT: cset w0, eq
288287
; CHECK-NEXT: ret
289288
;
290289
; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero:
291290
; CHECK-NONEON: // %bb.0: // %entry
292291
; CHECK-NONEON-NEXT: sub w8, w0, #1
293-
; CHECK-NONEON-NEXT: eor w9, w0, w8
294-
; CHECK-NONEON-NEXT: cmp w9, w8
295-
; CHECK-NONEON-NEXT: cset w0, hi
292+
; CHECK-NONEON-NEXT: tst w0, w8
293+
; CHECK-NONEON-NEXT: cset w0, eq
296294
; CHECK-NONEON-NEXT: ret
297295
;
298296
; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero:
299297
; CHECK-CSSC: // %bb.0: // %entry
300-
; CHECK-CSSC-NEXT: cnt w8, w0
301-
; CHECK-CSSC-NEXT: cmp w8, #1
298+
; CHECK-CSSC-NEXT: sub w8, w0, #1
299+
; CHECK-CSSC-NEXT: tst w0, w8
302300
; CHECK-CSSC-NEXT: cset w0, eq
303301
; CHECK-CSSC-NEXT: ret
304302
entry:
@@ -311,23 +309,21 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
311309
; CHECK-LABEL: ctpop32_ne_one_nonzero:
312310
; CHECK: // %bb.0: // %entry
313311
; CHECK-NEXT: sub w8, w0, #1
314-
; CHECK-NEXT: eor w9, w0, w8
315-
; CHECK-NEXT: cmp w9, w8
316-
; CHECK-NEXT: cset w0, ls
312+
; CHECK-NEXT: tst w0, w8
313+
; CHECK-NEXT: cset w0, ne
317314
; CHECK-NEXT: ret
318315
;
319316
; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero:
320317
; CHECK-NONEON: // %bb.0: // %entry
321318
; CHECK-NONEON-NEXT: sub w8, w0, #1
322-
; CHECK-NONEON-NEXT: eor w9, w0, w8
323-
; CHECK-NONEON-NEXT: cmp w9, w8
324-
; CHECK-NONEON-NEXT: cset w0, ls
319+
; CHECK-NONEON-NEXT: tst w0, w8
320+
; CHECK-NONEON-NEXT: cset w0, ne
325321
; CHECK-NONEON-NEXT: ret
326322
;
327323
; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero:
328324
; CHECK-CSSC: // %bb.0: // %entry
329-
; CHECK-CSSC-NEXT: cnt w8, w0
330-
; CHECK-CSSC-NEXT: cmp w8, #1
325+
; CHECK-CSSC-NEXT: sub w8, w0, #1
326+
; CHECK-CSSC-NEXT: tst w0, w8
331327
; CHECK-CSSC-NEXT: cset w0, ne
332328
; CHECK-CSSC-NEXT: ret
333329
entry:

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,15 +1446,14 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) {
14461446
; RV32I-LABEL: ctpop32_eq_one_nonzero:
14471447
; RV32I: # %bb.0: # %entry
14481448
; RV32I-NEXT: addi a1, a0, -1
1449-
; RV32I-NEXT: xor a0, a0, a1
1450-
; RV32I-NEXT: sltu a0, a1, a0
1449+
; RV32I-NEXT: and a0, a0, a1
1450+
; RV32I-NEXT: seqz a0, a0
14511451
; RV32I-NEXT: ret
14521452
;
14531453
; RV32ZBB-LABEL: ctpop32_eq_one_nonzero:
14541454
; RV32ZBB: # %bb.0: # %entry
14551455
; RV32ZBB-NEXT: cpop a0, a0
1456-
; RV32ZBB-NEXT: addi a0, a0, -1
1457-
; RV32ZBB-NEXT: seqz a0, a0
1456+
; RV32ZBB-NEXT: sltiu a0, a0, 2
14581457
; RV32ZBB-NEXT: ret
14591458
entry:
14601459
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
@@ -1466,16 +1465,15 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) {
14661465
; RV32I-LABEL: ctpop32_ne_one_nonzero:
14671466
; RV32I: # %bb.0: # %entry
14681467
; RV32I-NEXT: addi a1, a0, -1
1469-
; RV32I-NEXT: xor a0, a0, a1
1470-
; RV32I-NEXT: sltu a0, a1, a0
1471-
; RV32I-NEXT: xori a0, a0, 1
1468+
; RV32I-NEXT: and a0, a0, a1
1469+
; RV32I-NEXT: snez a0, a0
14721470
; RV32I-NEXT: ret
14731471
;
14741472
; RV32ZBB-LABEL: ctpop32_ne_one_nonzero:
14751473
; RV32ZBB: # %bb.0: # %entry
14761474
; RV32ZBB-NEXT: cpop a0, a0
1477-
; RV32ZBB-NEXT: addi a0, a0, -1
1478-
; RV32ZBB-NEXT: snez a0, a0
1475+
; RV32ZBB-NEXT: sltiu a0, a0, 2
1476+
; RV32ZBB-NEXT: xori a0, a0, 1
14791477
; RV32ZBB-NEXT: ret
14801478
entry:
14811479
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)

llvm/test/CodeGen/RISCV/rv64zbb.ll

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,17 +1622,16 @@ entry:
16221622
define i1 @ctpop32_eq_one_nonzero(i32 %x) {
16231623
; RV64I-LABEL: ctpop32_eq_one_nonzero:
16241624
; RV64I: # %bb.0: # %entry
1625-
; RV64I-NEXT: addiw a1, a0, -1
1626-
; RV64I-NEXT: xor a0, a0, a1
1625+
; RV64I-NEXT: addi a1, a0, -1
1626+
; RV64I-NEXT: and a0, a0, a1
16271627
; RV64I-NEXT: sext.w a0, a0
1628-
; RV64I-NEXT: sltu a0, a1, a0
1628+
; RV64I-NEXT: seqz a0, a0
16291629
; RV64I-NEXT: ret
16301630
;
16311631
; RV64ZBB-LABEL: ctpop32_eq_one_nonzero:
16321632
; RV64ZBB: # %bb.0: # %entry
16331633
; RV64ZBB-NEXT: cpopw a0, a0
1634-
; RV64ZBB-NEXT: addi a0, a0, -1
1635-
; RV64ZBB-NEXT: seqz a0, a0
1634+
; RV64ZBB-NEXT: sltiu a0, a0, 2
16361635
; RV64ZBB-NEXT: ret
16371636
entry:
16381637
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
@@ -1643,18 +1642,17 @@ entry:
16431642
define i1 @ctpop32_ne_one_nonzero(i32 %x) {
16441643
; RV64I-LABEL: ctpop32_ne_one_nonzero:
16451644
; RV64I: # %bb.0: # %entry
1646-
; RV64I-NEXT: addiw a1, a0, -1
1647-
; RV64I-NEXT: xor a0, a0, a1
1645+
; RV64I-NEXT: addi a1, a0, -1
1646+
; RV64I-NEXT: and a0, a0, a1
16481647
; RV64I-NEXT: sext.w a0, a0
1649-
; RV64I-NEXT: sltu a0, a1, a0
1650-
; RV64I-NEXT: xori a0, a0, 1
1648+
; RV64I-NEXT: snez a0, a0
16511649
; RV64I-NEXT: ret
16521650
;
16531651
; RV64ZBB-LABEL: ctpop32_ne_one_nonzero:
16541652
; RV64ZBB: # %bb.0: # %entry
16551653
; RV64ZBB-NEXT: cpopw a0, a0
1656-
; RV64ZBB-NEXT: addi a0, a0, -1
1657-
; RV64ZBB-NEXT: snez a0, a0
1654+
; RV64ZBB-NEXT: sltiu a0, a0, 2
1655+
; RV64ZBB-NEXT: xori a0, a0, 1
16581656
; RV64ZBB-NEXT: ret
16591657
entry:
16601658
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
@@ -1666,15 +1664,14 @@ define i1 @ctpop64_eq_one_nonzero(i64 %x) {
16661664
; RV64I-LABEL: ctpop64_eq_one_nonzero:
16671665
; RV64I: # %bb.0: # %entry
16681666
; RV64I-NEXT: addi a1, a0, -1
1669-
; RV64I-NEXT: xor a0, a0, a1
1670-
; RV64I-NEXT: sltu a0, a1, a0
1667+
; RV64I-NEXT: and a0, a0, a1
1668+
; RV64I-NEXT: seqz a0, a0
16711669
; RV64I-NEXT: ret
16721670
;
16731671
; RV64ZBB-LABEL: ctpop64_eq_one_nonzero:
16741672
; RV64ZBB: # %bb.0: # %entry
16751673
; RV64ZBB-NEXT: cpop a0, a0
1676-
; RV64ZBB-NEXT: addi a0, a0, -1
1677-
; RV64ZBB-NEXT: seqz a0, a0
1674+
; RV64ZBB-NEXT: sltiu a0, a0, 2
16781675
; RV64ZBB-NEXT: ret
16791676
entry:
16801677
%popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x)

llvm/test/CodeGen/X86/ispow2.ll

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -223,29 +223,39 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
223223

224224

225225
define i1 @ctpop32_eq_one_nonzero(i32 %x) {
226-
; CHECK-LABEL: ctpop32_eq_one_nonzero:
227-
; CHECK: # %bb.0: # %entry
228-
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
229-
; CHECK-NEXT: leal -1(%rdi), %eax
230-
; CHECK-NEXT: xorl %eax, %edi
231-
; CHECK-NEXT: cmpl %eax, %edi
232-
; CHECK-NEXT: seta %al
233-
; CHECK-NEXT: retq
226+
; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero:
227+
; CHECK-NOBMI: # %bb.0: # %entry
228+
; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
229+
; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
230+
; CHECK-NOBMI-NEXT: testl %eax, %edi
231+
; CHECK-NOBMI-NEXT: sete %al
232+
; CHECK-NOBMI-NEXT: retq
233+
;
234+
; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero:
235+
; CHECK-BMI2: # %bb.0: # %entry
236+
; CHECK-BMI2-NEXT: blsrl %edi, %eax
237+
; CHECK-BMI2-NEXT: sete %al
238+
; CHECK-BMI2-NEXT: retq
234239
entry:
235240
%popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
236241
%cmp = icmp eq i32 %popcnt, 1
237242
ret i1 %cmp
238243
}
239244

240245
define i1 @ctpop32_ne_one_nonzero(i32 %x) {
241-
; CHECK-LABEL: ctpop32_ne_one_nonzero:
242-
; CHECK: # %bb.0: # %entry
243-
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
244-
; CHECK-NEXT: leal -1(%rdi), %eax
245-
; CHECK-NEXT: xorl %eax, %edi
246-
; CHECK-NEXT: cmpl %eax, %edi
247-
; CHECK-NEXT: setbe %al
248-
; CHECK-NEXT: retq
246+
; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero:
247+
; CHECK-NOBMI: # %bb.0: # %entry
248+
; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
249+
; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
250+
; CHECK-NOBMI-NEXT: testl %eax, %edi
251+
; CHECK-NOBMI-NEXT: setne %al
252+
; CHECK-NOBMI-NEXT: retq
253+
;
254+
; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero:
255+
; CHECK-BMI2: # %bb.0: # %entry
256+
; CHECK-BMI2-NEXT: blsrl %edi, %eax
257+
; CHECK-BMI2-NEXT: setne %al
258+
; CHECK-BMI2-NEXT: retq
249259
entry:
250260
%popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x)
251261
%cmp = icmp ne i32 %popcnt, 1

0 commit comments

Comments
 (0)