Skip to content

Commit 5080a02

Browse files
authored
[CodeGenPrepare] Unfold slow ctpop when used in power-of-two test (#102731)
DAG combiner already does this transformation, but in some cases it does not have a chance because either CodeGenPrepare or SelectionDAGBuilder move icmp to a different basic block. https://alive2.llvm.org/ce/z/ARzh99 Fixes #94829 Pull Request: #102731
1 parent 7b68015 commit 5080a02

16 files changed

+814
-646
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ class CodeGenPrepare {
474474
bool optimizeURem(Instruction *Rem);
475475
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476476
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477+
bool unfoldPowerOf2Test(CmpInst *Cmp);
477478
void verifyBFIUpdates(Function &F);
478479
bool _run(Function &F);
479480
};
@@ -1762,6 +1763,75 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
17621763
return true;
17631764
}
17641765

1766+
// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1767+
// The same transformation exists in DAG combiner, but we repeat it here because
1768+
// DAG builder can break the pattern by moving icmp into a successor block.
1769+
bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1770+
CmpPredicate Pred;
1771+
Value *X;
1772+
const APInt *C;
1773+
1774+
// (icmp (ctpop x), c)
1775+
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),
1776+
m_APIntAllowPoison(C))))
1777+
return false;
1778+
1779+
// We're only interested in "is power of 2 [or zero]" patterns.
1780+
bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1781+
bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1782+
(Pred == CmpInst::ICMP_UGT && *C == 1);
1783+
if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1784+
return false;
1785+
1786+
// Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1787+
// `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1788+
// and otherwise expand ctpop into a few simple instructions.
1789+
Type *OpTy = X->getType();
1790+
if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1791+
// Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1792+
if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1793+
return false;
1794+
1795+
// ctpop(x) == 1 -> ctpop(x) u< 2
1796+
// ctpop(x) != 1 -> ctpop(x) u> 1
1797+
if (Pred == ICmpInst::ICMP_EQ) {
1798+
Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1799+
Cmp->setPredicate(ICmpInst::ICMP_ULT);
1800+
} else {
1801+
Cmp->setPredicate(ICmpInst::ICMP_UGT);
1802+
}
1803+
return true;
1804+
}
1805+
1806+
Value *NewCmp;
1807+
if (IsPowerOf2OrZeroTest ||
1808+
(IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1809+
// ctpop(x) u< 2 -> (x & (x - 1)) == 0
1810+
// ctpop(x) u> 1 -> (x & (x - 1)) != 0
1811+
IRBuilder<> Builder(Cmp);
1812+
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1813+
Value *And = Builder.CreateAnd(X, Sub);
1814+
CmpInst::Predicate NewPred =
1815+
(Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1816+
? CmpInst::ICMP_EQ
1817+
: CmpInst::ICMP_NE;
1818+
NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1819+
} else {
1820+
// ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1821+
// ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1822+
IRBuilder<> Builder(Cmp);
1823+
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1824+
Value *Xor = Builder.CreateXor(X, Sub);
1825+
CmpInst::Predicate NewPred =
1826+
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1827+
NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1828+
}
1829+
1830+
Cmp->replaceAllUsesWith(NewCmp);
1831+
RecursivelyDeleteTriviallyDeadInstructions(Cmp);
1832+
return true;
1833+
}
1834+
17651835
/// Sink the given CmpInst into user blocks to reduce the number of virtual
17661836
/// registers that must be created and coalesced. This is a clear win except on
17671837
/// targets with multiple condition code registers (PowerPC), where it might
@@ -2148,31 +2218,6 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
21482218
return false;
21492219
}
21502220

2151-
/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2152-
/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2153-
/// result cannot be zero.
2154-
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
2155-
const TargetTransformInfo &TTI,
2156-
const DataLayout &DL) {
2157-
CmpPredicate Pred;
2158-
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
2159-
return false;
2160-
if (!ICmpInst::isEquality(Pred))
2161-
return false;
2162-
auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2163-
2164-
if (isKnownNonZero(II, DL)) {
2165-
if (Pred == ICmpInst::ICMP_EQ) {
2166-
Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
2167-
Cmp->setPredicate(ICmpInst::ICMP_ULT);
2168-
} else {
2169-
Cmp->setPredicate(ICmpInst::ICMP_UGT);
2170-
}
2171-
return true;
2172-
}
2173-
return false;
2174-
}
2175-
21762221
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21772222
if (sinkCmpExpression(Cmp, *TLI))
21782223
return true;
@@ -2183,6 +2228,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21832228
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
21842229
return true;
21852230

2231+
if (unfoldPowerOf2Test(Cmp))
2232+
return true;
2233+
21862234
if (foldICmpWithDominatingICmp(Cmp, *TLI))
21872235
return true;
21882236

@@ -2192,9 +2240,6 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
21922240
if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
21932241
return true;
21942242

2195-
if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
2196-
return true;
2197-
21982243
return false;
21992244
}
22002245

llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11945,23 +11945,23 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
1194511945
; PWR5-LABEL: ugt_1_v2i64:
1194611946
; PWR5: # %bb.0:
1194711947
; PWR5-NEXT: addi 5, 3, -1
11948+
; PWR5-NEXT: addi 6, 4, -1
1194811949
; PWR5-NEXT: and 3, 3, 5
11949-
; PWR5-NEXT: addi 5, 4, -1
11950+
; PWR5-NEXT: and 4, 4, 6
1195011951
; PWR5-NEXT: subfic 3, 3, 0
1195111952
; PWR5-NEXT: subfe 3, 3, 3
11952-
; PWR5-NEXT: and 4, 4, 5
1195311953
; PWR5-NEXT: subfic 4, 4, 0
1195411954
; PWR5-NEXT: subfe 4, 4, 4
1195511955
; PWR5-NEXT: blr
1195611956
;
1195711957
; PWR6-LABEL: ugt_1_v2i64:
1195811958
; PWR6: # %bb.0:
1195911959
; PWR6-NEXT: addi 5, 3, -1
11960+
; PWR6-NEXT: addi 6, 4, -1
1196011961
; PWR6-NEXT: and 3, 3, 5
11961-
; PWR6-NEXT: addi 5, 4, -1
11962+
; PWR6-NEXT: and 4, 4, 6
1196211963
; PWR6-NEXT: subfic 3, 3, 0
1196311964
; PWR6-NEXT: subfe 3, 3, 3
11964-
; PWR6-NEXT: and 4, 4, 5
1196511965
; PWR6-NEXT: subfic 4, 4, 0
1196611966
; PWR6-NEXT: subfe 4, 4, 4
1196711967
; PWR6-NEXT: blr
@@ -12016,23 +12016,23 @@ define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
1201612016
; PWR5-LABEL: ult_2_v2i64:
1201712017
; PWR5: # %bb.0:
1201812018
; PWR5-NEXT: addi 5, 3, -1
12019+
; PWR5-NEXT: addi 6, 4, -1
1201912020
; PWR5-NEXT: and 3, 3, 5
12020-
; PWR5-NEXT: addi 5, 4, -1
12021+
; PWR5-NEXT: and 4, 4, 6
1202112022
; PWR5-NEXT: addic 3, 3, -1
1202212023
; PWR5-NEXT: subfe 3, 3, 3
12023-
; PWR5-NEXT: and 4, 4, 5
1202412024
; PWR5-NEXT: addic 4, 4, -1
1202512025
; PWR5-NEXT: subfe 4, 4, 4
1202612026
; PWR5-NEXT: blr
1202712027
;
1202812028
; PWR6-LABEL: ult_2_v2i64:
1202912029
; PWR6: # %bb.0:
1203012030
; PWR6-NEXT: addi 5, 3, -1
12031+
; PWR6-NEXT: addi 6, 4, -1
1203112032
; PWR6-NEXT: and 3, 3, 5
12032-
; PWR6-NEXT: addi 5, 4, -1
12033+
; PWR6-NEXT: and 4, 4, 6
1203312034
; PWR6-NEXT: addic 3, 3, -1
1203412035
; PWR6-NEXT: subfe 3, 3, 3
12035-
; PWR6-NEXT: and 4, 4, 5
1203612036
; PWR6-NEXT: addic 4, 4, -1
1203712037
; PWR6-NEXT: subfe 4, 4, 4
1203812038
; PWR6-NEXT: blr

0 commit comments

Comments
 (0)