@@ -474,6 +474,7 @@ class CodeGenPrepare {
474
474
bool optimizeURem (Instruction *Rem);
475
475
bool combineToUSubWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
476
476
bool combineToUAddWithOverflow (CmpInst *Cmp, ModifyDT &ModifiedDT);
477
+ bool unfoldPowerOf2Test (CmpInst *Cmp);
477
478
void verifyBFIUpdates (Function &F);
478
479
bool _run (Function &F);
479
480
};
@@ -1762,6 +1763,75 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1762
1763
return true ;
1763
1764
}
1764
1765
1766
+ // Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1767
+ // The same transformation exists in DAG combiner, but we repeat it here because
1768
+ // DAG builder can break the pattern by moving icmp into a successor block.
1769
+ bool CodeGenPrepare::unfoldPowerOf2Test (CmpInst *Cmp) {
1770
+ CmpPredicate Pred;
1771
+ Value *X;
1772
+ const APInt *C;
1773
+
1774
+ // (icmp (ctpop x), c)
1775
+ if (!match (Cmp, m_ICmp (Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value (X)),
1776
+ m_APIntAllowPoison (C))))
1777
+ return false ;
1778
+
1779
+ // We're only interested in "is power of 2 [or zero]" patterns.
1780
+ bool IsStrictlyPowerOf2Test = ICmpInst::isEquality (Pred) && *C == 1 ;
1781
+ bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2 ) ||
1782
+ (Pred == CmpInst::ICMP_UGT && *C == 1 );
1783
+ if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1784
+ return false ;
1785
+
1786
+ // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1787
+ // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1788
+ // and otherwise expand ctpop into a few simple instructions.
1789
+ Type *OpTy = X->getType ();
1790
+ if (TLI->isCtpopFast (TLI->getValueType (*DL, OpTy))) {
1791
+ // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1792
+ if (!IsStrictlyPowerOf2Test || !isKnownNonZero (Cmp->getOperand (0 ), *DL))
1793
+ return false ;
1794
+
1795
+ // ctpop(x) == 1 -> ctpop(x) u< 2
1796
+ // ctpop(x) != 1 -> ctpop(x) u> 1
1797
+ if (Pred == ICmpInst::ICMP_EQ) {
1798
+ Cmp->setOperand (1 , ConstantInt::get (OpTy, 2 ));
1799
+ Cmp->setPredicate (ICmpInst::ICMP_ULT);
1800
+ } else {
1801
+ Cmp->setPredicate (ICmpInst::ICMP_UGT);
1802
+ }
1803
+ return true ;
1804
+ }
1805
+
1806
+ Value *NewCmp;
1807
+ if (IsPowerOf2OrZeroTest ||
1808
+ (IsStrictlyPowerOf2Test && isKnownNonZero (Cmp->getOperand (0 ), *DL))) {
1809
+ // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1810
+ // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1811
+ IRBuilder<> Builder (Cmp);
1812
+ Value *Sub = Builder.CreateAdd (X, Constant::getAllOnesValue (OpTy));
1813
+ Value *And = Builder.CreateAnd (X, Sub);
1814
+ CmpInst::Predicate NewPred =
1815
+ (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1816
+ ? CmpInst::ICMP_EQ
1817
+ : CmpInst::ICMP_NE;
1818
+ NewCmp = Builder.CreateICmp (NewPred, And, ConstantInt::getNullValue (OpTy));
1819
+ } else {
1820
+ // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1821
+ // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1822
+ IRBuilder<> Builder (Cmp);
1823
+ Value *Sub = Builder.CreateAdd (X, Constant::getAllOnesValue (OpTy));
1824
+ Value *Xor = Builder.CreateXor (X, Sub);
1825
+ CmpInst::Predicate NewPred =
1826
+ Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
1827
+ NewCmp = Builder.CreateICmp (NewPred, Xor, Sub);
1828
+ }
1829
+
1830
+ Cmp->replaceAllUsesWith (NewCmp);
1831
+ RecursivelyDeleteTriviallyDeadInstructions (Cmp);
1832
+ return true ;
1833
+ }
1834
+
1765
1835
// / Sink the given CmpInst into user blocks to reduce the number of virtual
1766
1836
// / registers that must be created and coalesced. This is a clear win except on
1767
1837
// / targets with multiple condition code registers (PowerPC), where it might
@@ -2148,31 +2218,6 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2148
2218
return false ;
2149
2219
}
2150
2220
2151
- // / Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2152
- // / This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2153
- // / result cannot be zero.
2154
- static bool adjustIsPower2Test (CmpInst *Cmp, const TargetLowering &TLI,
2155
- const TargetTransformInfo &TTI,
2156
- const DataLayout &DL) {
2157
- CmpPredicate Pred;
2158
- if (!match (Cmp, m_ICmp (Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One ())))
2159
- return false ;
2160
- if (!ICmpInst::isEquality (Pred))
2161
- return false ;
2162
- auto *II = cast<IntrinsicInst>(Cmp->getOperand (0 ));
2163
-
2164
- if (isKnownNonZero (II, DL)) {
2165
- if (Pred == ICmpInst::ICMP_EQ) {
2166
- Cmp->setOperand (1 , ConstantInt::get (II->getType (), 2 ));
2167
- Cmp->setPredicate (ICmpInst::ICMP_ULT);
2168
- } else {
2169
- Cmp->setPredicate (ICmpInst::ICMP_UGT);
2170
- }
2171
- return true ;
2172
- }
2173
- return false ;
2174
- }
2175
-
2176
2221
bool CodeGenPrepare::optimizeCmp (CmpInst *Cmp, ModifyDT &ModifiedDT) {
2177
2222
if (sinkCmpExpression (Cmp, *TLI))
2178
2223
return true ;
@@ -2183,6 +2228,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2183
2228
if (combineToUSubWithOverflow (Cmp, ModifiedDT))
2184
2229
return true ;
2185
2230
2231
+ if (unfoldPowerOf2Test (Cmp))
2232
+ return true ;
2233
+
2186
2234
if (foldICmpWithDominatingICmp (Cmp, *TLI))
2187
2235
return true ;
2188
2236
@@ -2192,9 +2240,6 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2192
2240
if (foldFCmpToFPClassTest (Cmp, *TLI, *DL))
2193
2241
return true ;
2194
2242
2195
- if (adjustIsPower2Test (Cmp, *TLI, *TTI, *DL))
2196
- return true ;
2197
-
2198
2243
return false ;
2199
2244
}
2200
2245
0 commit comments