Skip to content

Commit 42dbb28

Browse files
committed
Handle vectors and ==/!= 1 patterns
1 parent 8dc0fab commit 42dbb28

11 files changed

+471
-506
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,28 +1767,49 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
17671767
bool CodeGenPrepare::unfoldPow2Test(CmpInst *Cmp) {
17681768
CmpPredicate Pred;
17691769
Value *X;
1770-
uint64_t C;
1770+
const APInt *C;
17711771

1772+
// (icmp (ctpop x), c)
17721773
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
1773-
m_ConstantInt(C))))
1774+
m_APIntAllowPoison(C))))
17741775
return false;
17751776

1776-
Type *Ty = X->getType();
1777-
if (Ty->isVectorTy() || TTI->getPopcntSupport(Ty->getIntegerBitWidth()) ==
1778-
TargetTransformInfo::PSK_FastHardware)
1777+
// This transformation increases the number of instructions, don't do it if
1778+
// ctpop is fast.
1779+
Type *OpTy = X->getType();
1780+
if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy)))
17791781
return false;
17801782

1781-
// (ctpop x) u< 2 -> (x & (x - 1)) == 0
1782-
// (ctpop x) u> 1 -> (x & (x - 1)) != 0
1783-
if ((Pred == CmpInst::ICMP_ULT && C == 2) ||
1784-
(Pred == CmpInst::ICMP_UGT && C == 1)) {
1783+
// ctpop(x) u< 2 -> (x & (x - 1)) == 0
1784+
// ctpop(x) u> 1 -> (x & (x - 1)) != 0
1785+
// Also handles ctpop(x) == 1 and ctpop(x) != 1 if ctpop(x) is known non-zero.
1786+
if ((Pred == CmpInst::ICMP_ULT && *C == 2) ||
1787+
(Pred == CmpInst::ICMP_UGT && *C == 1) ||
1788+
(ICmpInst::isEquality(Pred) && *C == 1 &&
1789+
isKnownNonZero(Cmp->getOperand(0), *DL))) {
17851790
IRBuilder<> Builder(Cmp);
1786-
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(Ty));
1791+
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
17871792
Value *And = Builder.CreateAnd(X, Sub);
17881793
CmpInst::Predicate NewPred =
1789-
Pred == CmpInst::ICMP_ULT ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
1794+
(Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1795+
? CmpInst::ICMP_EQ
1796+
: CmpInst::ICMP_NE;
17901797
Value *NewCmp =
1791-
Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(Ty));
1798+
Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1799+
Cmp->replaceAllUsesWith(NewCmp);
1800+
RecursivelyDeleteTriviallyDeadInstructions(Cmp);
1801+
return true;
1802+
}
1803+
1804+
// ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1805+
// ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1806+
if (ICmpInst::isEquality(Pred) && *C == 1) {
1807+
IRBuilder<> Builder(Cmp);
1808+
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1809+
Value *Xor = Builder.CreateXor(X, Sub);
1810+
CmpInst::Predicate NewPred =
1811+
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1812+
Value *NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
17921813
Cmp->replaceAllUsesWith(NewCmp);
17931814
RecursivelyDeleteTriviallyDeadInstructions(Cmp);
17941815
return true;

llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11945,23 +11945,23 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
1194511945
; PWR5-LABEL: ugt_1_v2i64:
1194611946
; PWR5: # %bb.0:
1194711947
; PWR5-NEXT: addi 5, 3, -1
11948+
; PWR5-NEXT: addi 6, 4, -1
1194811949
; PWR5-NEXT: and 3, 3, 5
11949-
; PWR5-NEXT: addi 5, 4, -1
11950+
; PWR5-NEXT: and 4, 4, 6
1195011951
; PWR5-NEXT: subfic 3, 3, 0
1195111952
; PWR5-NEXT: subfe 3, 3, 3
11952-
; PWR5-NEXT: and 4, 4, 5
1195311953
; PWR5-NEXT: subfic 4, 4, 0
1195411954
; PWR5-NEXT: subfe 4, 4, 4
1195511955
; PWR5-NEXT: blr
1195611956
;
1195711957
; PWR6-LABEL: ugt_1_v2i64:
1195811958
; PWR6: # %bb.0:
1195911959
; PWR6-NEXT: addi 5, 3, -1
11960+
; PWR6-NEXT: addi 6, 4, -1
1196011961
; PWR6-NEXT: and 3, 3, 5
11961-
; PWR6-NEXT: addi 5, 4, -1
11962+
; PWR6-NEXT: and 4, 4, 6
1196211963
; PWR6-NEXT: subfic 3, 3, 0
1196311964
; PWR6-NEXT: subfe 3, 3, 3
11964-
; PWR6-NEXT: and 4, 4, 5
1196511965
; PWR6-NEXT: subfic 4, 4, 0
1196611966
; PWR6-NEXT: subfe 4, 4, 4
1196711967
; PWR6-NEXT: blr
@@ -12016,23 +12016,23 @@ define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
1201612016
; PWR5-LABEL: ult_2_v2i64:
1201712017
; PWR5: # %bb.0:
1201812018
; PWR5-NEXT: addi 5, 3, -1
12019+
; PWR5-NEXT: addi 6, 4, -1
1201912020
; PWR5-NEXT: and 3, 3, 5
12020-
; PWR5-NEXT: addi 5, 4, -1
12021+
; PWR5-NEXT: and 4, 4, 6
1202112022
; PWR5-NEXT: addic 3, 3, -1
1202212023
; PWR5-NEXT: subfe 3, 3, 3
12023-
; PWR5-NEXT: and 4, 4, 5
1202412024
; PWR5-NEXT: addic 4, 4, -1
1202512025
; PWR5-NEXT: subfe 4, 4, 4
1202612026
; PWR5-NEXT: blr
1202712027
;
1202812028
; PWR6-LABEL: ult_2_v2i64:
1202912029
; PWR6: # %bb.0:
1203012030
; PWR6-NEXT: addi 5, 3, -1
12031+
; PWR6-NEXT: addi 6, 4, -1
1203112032
; PWR6-NEXT: and 3, 3, 5
12032-
; PWR6-NEXT: addi 5, 4, -1
12033+
; PWR6-NEXT: and 4, 4, 6
1203312034
; PWR6-NEXT: addic 3, 3, -1
1203412035
; PWR6-NEXT: subfe 3, 3, 3
12035-
; PWR6-NEXT: and 4, 4, 5
1203612036
; PWR6-NEXT: addic 4, 4, -1
1203712037
; PWR6-NEXT: subfe 4, 4, 4
1203812038
; PWR6-NEXT: blr

llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll

Lines changed: 25 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -418,45 +418,18 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
418418
define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
419419
; RV32I-LABEL: ctpop_i64_eq_one:
420420
; RV32I: # %bb.0:
421-
; RV32I-NEXT: srli a2, a0, 1
422-
; RV32I-NEXT: lui a3, 349525
423-
; RV32I-NEXT: lui a4, 209715
424-
; RV32I-NEXT: srli a5, a1, 1
425-
; RV32I-NEXT: addi a3, a3, 1365
426-
; RV32I-NEXT: and a2, a2, a3
427-
; RV32I-NEXT: and a3, a5, a3
428-
; RV32I-NEXT: lui a5, 61681
429-
; RV32I-NEXT: addi a4, a4, 819
430-
; RV32I-NEXT: addi a5, a5, -241
431-
; RV32I-NEXT: sub a0, a0, a2
432-
; RV32I-NEXT: sub a1, a1, a3
433-
; RV32I-NEXT: srli a2, a0, 2
434-
; RV32I-NEXT: and a0, a0, a4
435-
; RV32I-NEXT: srli a3, a1, 2
436-
; RV32I-NEXT: and a1, a1, a4
437-
; RV32I-NEXT: and a2, a2, a4
438-
; RV32I-NEXT: and a3, a3, a4
439-
; RV32I-NEXT: add a0, a2, a0
440-
; RV32I-NEXT: add a1, a3, a1
441-
; RV32I-NEXT: srli a2, a0, 4
442-
; RV32I-NEXT: srli a3, a1, 4
443-
; RV32I-NEXT: add a0, a2, a0
444-
; RV32I-NEXT: add a1, a3, a1
445-
; RV32I-NEXT: and a0, a0, a5
446-
; RV32I-NEXT: and a1, a1, a5
447-
; RV32I-NEXT: slli a2, a0, 8
448-
; RV32I-NEXT: slli a3, a1, 8
449-
; RV32I-NEXT: add a0, a0, a2
450-
; RV32I-NEXT: add a1, a1, a3
451-
; RV32I-NEXT: slli a2, a0, 16
452-
; RV32I-NEXT: slli a3, a1, 16
453-
; RV32I-NEXT: add a0, a0, a2
454-
; RV32I-NEXT: add a1, a1, a3
455-
; RV32I-NEXT: srli a0, a0, 24
456-
; RV32I-NEXT: srli a1, a1, 24
457-
; RV32I-NEXT: add a0, a1, a0
458-
; RV32I-NEXT: xori a0, a0, 1
459-
; RV32I-NEXT: seqz a0, a0
421+
; RV32I-NEXT: addi a2, a0, -1
422+
; RV32I-NEXT: sltiu a3, a2, -1
423+
; RV32I-NEXT: addi a4, a1, -1
424+
; RV32I-NEXT: add a3, a4, a3
425+
; RV32I-NEXT: xor a1, a1, a3
426+
; RV32I-NEXT: beq a1, a3, .LBB8_2
427+
; RV32I-NEXT: # %bb.1:
428+
; RV32I-NEXT: sltu a0, a3, a1
429+
; RV32I-NEXT: ret
430+
; RV32I-NEXT: .LBB8_2:
431+
; RV32I-NEXT: xor a0, a0, a2
432+
; RV32I-NEXT: sltu a0, a2, a0
460433
; RV32I-NEXT: ret
461434
;
462435
; RV32ZBB-LABEL: ctpop_i64_eq_one:
@@ -475,45 +448,20 @@ define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
475448
define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
476449
; RV32I-LABEL: ctpop_i64_ne_one:
477450
; RV32I: # %bb.0:
478-
; RV32I-NEXT: srli a2, a0, 1
479-
; RV32I-NEXT: lui a3, 349525
480-
; RV32I-NEXT: lui a4, 209715
481-
; RV32I-NEXT: srli a5, a1, 1
482-
; RV32I-NEXT: addi a3, a3, 1365
483-
; RV32I-NEXT: and a2, a2, a3
484-
; RV32I-NEXT: and a3, a5, a3
485-
; RV32I-NEXT: lui a5, 61681
486-
; RV32I-NEXT: addi a4, a4, 819
487-
; RV32I-NEXT: addi a5, a5, -241
488-
; RV32I-NEXT: sub a0, a0, a2
489-
; RV32I-NEXT: sub a1, a1, a3
490-
; RV32I-NEXT: srli a2, a0, 2
491-
; RV32I-NEXT: and a0, a0, a4
492-
; RV32I-NEXT: srli a3, a1, 2
493-
; RV32I-NEXT: and a1, a1, a4
494-
; RV32I-NEXT: and a2, a2, a4
495-
; RV32I-NEXT: and a3, a3, a4
496-
; RV32I-NEXT: add a0, a2, a0
497-
; RV32I-NEXT: add a1, a3, a1
498-
; RV32I-NEXT: srli a2, a0, 4
499-
; RV32I-NEXT: srli a3, a1, 4
500-
; RV32I-NEXT: add a0, a2, a0
501-
; RV32I-NEXT: add a1, a3, a1
502-
; RV32I-NEXT: and a0, a0, a5
503-
; RV32I-NEXT: and a1, a1, a5
504-
; RV32I-NEXT: slli a2, a0, 8
505-
; RV32I-NEXT: slli a3, a1, 8
506-
; RV32I-NEXT: add a0, a0, a2
507-
; RV32I-NEXT: add a1, a1, a3
508-
; RV32I-NEXT: slli a2, a0, 16
509-
; RV32I-NEXT: slli a3, a1, 16
510-
; RV32I-NEXT: add a0, a0, a2
511-
; RV32I-NEXT: add a1, a1, a3
512-
; RV32I-NEXT: srli a0, a0, 24
513-
; RV32I-NEXT: srli a1, a1, 24
514-
; RV32I-NEXT: add a0, a1, a0
451+
; RV32I-NEXT: addi a2, a0, -1
452+
; RV32I-NEXT: sltiu a3, a2, -1
453+
; RV32I-NEXT: addi a4, a1, -1
454+
; RV32I-NEXT: add a3, a4, a3
455+
; RV32I-NEXT: xor a1, a1, a3
456+
; RV32I-NEXT: beq a1, a3, .LBB9_2
457+
; RV32I-NEXT: # %bb.1:
458+
; RV32I-NEXT: sltu a0, a3, a1
459+
; RV32I-NEXT: xori a0, a0, 1
460+
; RV32I-NEXT: ret
461+
; RV32I-NEXT: .LBB9_2:
462+
; RV32I-NEXT: xor a0, a0, a2
463+
; RV32I-NEXT: sltu a0, a2, a0
515464
; RV32I-NEXT: xori a0, a0, 1
516-
; RV32I-NEXT: snez a0, a0
517465
; RV32I-NEXT: ret
518466
;
519467
; RV32ZBB-LABEL: ctpop_i64_ne_one:

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -416,9 +416,9 @@ define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind {
416416
; RV32I-LABEL: ctpop_v2i32_ult_two:
417417
; RV32I: # %bb.0:
418418
; RV32I-NEXT: addi a2, a0, -1
419+
; RV32I-NEXT: addi a3, a1, -1
420+
; RV32I-NEXT: and a1, a1, a3
419421
; RV32I-NEXT: and a0, a0, a2
420-
; RV32I-NEXT: addi a2, a1, -1
421-
; RV32I-NEXT: and a1, a1, a2
422422
; RV32I-NEXT: seqz a0, a0
423423
; RV32I-NEXT: seqz a1, a1
424424
; RV32I-NEXT: ret
@@ -439,9 +439,9 @@ define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind {
439439
; RV32I-LABEL: ctpop_v2i32_ugt_one:
440440
; RV32I: # %bb.0:
441441
; RV32I-NEXT: addi a2, a0, -1
442+
; RV32I-NEXT: addi a3, a1, -1
443+
; RV32I-NEXT: and a1, a1, a3
442444
; RV32I-NEXT: and a0, a0, a2
443-
; RV32I-NEXT: addi a2, a1, -1
444-
; RV32I-NEXT: and a1, a1, a2
445445
; RV32I-NEXT: snez a0, a0
446446
; RV32I-NEXT: snez a1, a1
447447
; RV32I-NEXT: ret
@@ -464,11 +464,11 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
464464
; RV32I-LABEL: ctpop_v2i32_eq_one:
465465
; RV32I: # %bb.0:
466466
; RV32I-NEXT: addi a2, a0, -1
467+
; RV32I-NEXT: addi a3, a1, -1
468+
; RV32I-NEXT: xor a1, a1, a3
467469
; RV32I-NEXT: xor a0, a0, a2
468470
; RV32I-NEXT: sltu a0, a2, a0
469-
; RV32I-NEXT: addi a2, a1, -1
470-
; RV32I-NEXT: xor a1, a1, a2
471-
; RV32I-NEXT: sltu a1, a2, a1
471+
; RV32I-NEXT: sltu a1, a3, a1
472472
; RV32I-NEXT: ret
473473
;
474474
; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
@@ -489,11 +489,11 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
489489
; RV32I-LABEL: ctpop_v2i32_ne_one:
490490
; RV32I: # %bb.0:
491491
; RV32I-NEXT: addi a2, a0, -1
492+
; RV32I-NEXT: addi a3, a1, -1
493+
; RV32I-NEXT: xor a1, a1, a3
492494
; RV32I-NEXT: xor a0, a0, a2
493495
; RV32I-NEXT: sltu a0, a2, a0
494-
; RV32I-NEXT: addi a2, a1, -1
495-
; RV32I-NEXT: xor a1, a1, a2
496-
; RV32I-NEXT: sltu a1, a2, a1
496+
; RV32I-NEXT: sltu a1, a3, a1
497497
; RV32I-NEXT: xori a0, a0, 1
498498
; RV32I-NEXT: xori a1, a1, 1
499499
; RV32I-NEXT: ret
@@ -785,20 +785,20 @@ define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
785785
; RV32I-LABEL: ctpop_v2i64_ult_two:
786786
; RV32I: # %bb.0:
787787
; RV32I-NEXT: lw a1, 0(a0)
788-
; RV32I-NEXT: lw a2, 4(a0)
789-
; RV32I-NEXT: lw a3, 8(a0)
788+
; RV32I-NEXT: lw a2, 8(a0)
789+
; RV32I-NEXT: lw a3, 4(a0)
790790
; RV32I-NEXT: lw a0, 12(a0)
791-
; RV32I-NEXT: addi a4, a1, -1
792-
; RV32I-NEXT: and a4, a1, a4
793-
; RV32I-NEXT: seqz a1, a1
794-
; RV32I-NEXT: sub a1, a2, a1
795-
; RV32I-NEXT: and a1, a2, a1
796-
; RV32I-NEXT: addi a2, a3, -1
797-
; RV32I-NEXT: and a2, a3, a2
798-
; RV32I-NEXT: seqz a3, a3
799-
; RV32I-NEXT: sub a3, a0, a3
800-
; RV32I-NEXT: and a0, a0, a3
801-
; RV32I-NEXT: or a1, a4, a1
791+
; RV32I-NEXT: seqz a4, a1
792+
; RV32I-NEXT: seqz a5, a2
793+
; RV32I-NEXT: addi a6, a1, -1
794+
; RV32I-NEXT: addi a7, a2, -1
795+
; RV32I-NEXT: sub a4, a3, a4
796+
; RV32I-NEXT: sub a5, a0, a5
797+
; RV32I-NEXT: and a2, a2, a7
798+
; RV32I-NEXT: and a1, a1, a6
799+
; RV32I-NEXT: and a0, a0, a5
800+
; RV32I-NEXT: and a3, a3, a4
801+
; RV32I-NEXT: or a1, a1, a3
802802
; RV32I-NEXT: or a2, a2, a0
803803
; RV32I-NEXT: seqz a0, a1
804804
; RV32I-NEXT: seqz a1, a2
@@ -828,20 +828,20 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
828828
; RV32I-LABEL: ctpop_v2i64_ugt_one:
829829
; RV32I: # %bb.0:
830830
; RV32I-NEXT: lw a1, 0(a0)
831-
; RV32I-NEXT: lw a2, 4(a0)
832-
; RV32I-NEXT: lw a3, 8(a0)
831+
; RV32I-NEXT: lw a2, 8(a0)
832+
; RV32I-NEXT: lw a3, 4(a0)
833833
; RV32I-NEXT: lw a0, 12(a0)
834-
; RV32I-NEXT: addi a4, a1, -1
835-
; RV32I-NEXT: and a4, a1, a4
836-
; RV32I-NEXT: seqz a1, a1
837-
; RV32I-NEXT: sub a1, a2, a1
838-
; RV32I-NEXT: and a1, a2, a1
839-
; RV32I-NEXT: addi a2, a3, -1
840-
; RV32I-NEXT: and a2, a3, a2
841-
; RV32I-NEXT: seqz a3, a3
842-
; RV32I-NEXT: sub a3, a0, a3
843-
; RV32I-NEXT: and a0, a0, a3
844-
; RV32I-NEXT: or a1, a4, a1
834+
; RV32I-NEXT: seqz a4, a1
835+
; RV32I-NEXT: seqz a5, a2
836+
; RV32I-NEXT: addi a6, a1, -1
837+
; RV32I-NEXT: addi a7, a2, -1
838+
; RV32I-NEXT: sub a4, a3, a4
839+
; RV32I-NEXT: sub a5, a0, a5
840+
; RV32I-NEXT: and a2, a2, a7
841+
; RV32I-NEXT: and a1, a1, a6
842+
; RV32I-NEXT: and a0, a0, a5
843+
; RV32I-NEXT: and a3, a3, a4
844+
; RV32I-NEXT: or a1, a1, a3
845845
; RV32I-NEXT: or a2, a2, a0
846846
; RV32I-NEXT: snez a0, a1
847847
; RV32I-NEXT: snez a1, a2

0 commit comments

Comments
 (0)