-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[CodeGenPrepare] Unfold slow ctpop when used in power-of-two test #102731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
9193702
6524fbe
cfacad3
2ce5f4a
de66d5d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -474,6 +474,7 @@ class CodeGenPrepare { | |
bool optimizeURem(Instruction *Rem); | ||
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); | ||
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT); | ||
bool unfoldPow2Test(CmpInst *Cmp); | ||
void verifyBFIUpdates(Function &F); | ||
bool _run(Function &F); | ||
}; | ||
|
@@ -1762,6 +1763,61 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, | |
return true; | ||
} | ||
|
||
// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow. | ||
bool CodeGenPrepare::unfoldPow2Test(CmpInst *Cmp) { | ||
CmpPredicate Pred; | ||
Value *X; | ||
const APInt *C; | ||
|
||
// (icmp (ctpop x), c) | ||
if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)), | ||
m_APIntAllowPoison(C)))) | ||
return false; | ||
|
||
// This transformation increases the number of instructions, don't do it if | ||
// ctpop is fast. | ||
Type *OpTy = X->getType(); | ||
if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) | ||
return false; | ||
|
||
// ctpop(x) u< 2 -> (x & (x - 1)) == 0 | ||
// ctpop(x) u> 1 -> (x & (x - 1)) != 0 | ||
// Also handles ctpop(x) == 1 and ctpop(x) != 1 if ctpop(x) is known non-zero. | ||
if ((Pred == CmpInst::ICMP_ULT && *C == 2) || | ||
(Pred == CmpInst::ICMP_UGT && *C == 1) || | ||
(ICmpInst::isEquality(Pred) && *C == 1 && | ||
isKnownNonZero(Cmp->getOperand(0), *DL))) { | ||
IRBuilder<> Builder(Cmp); | ||
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy)); | ||
Value *And = Builder.CreateAnd(X, Sub); | ||
CmpInst::Predicate NewPred = | ||
(Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ) | ||
? CmpInst::ICMP_EQ | ||
: CmpInst::ICMP_NE; | ||
Value *NewCmp = | ||
Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy)); | ||
Cmp->replaceAllUsesWith(NewCmp); | ||
RecursivelyDeleteTriviallyDeadInstructions(Cmp); | ||
return true; | ||
} | ||
|
||
// ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1) | ||
// ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1) | ||
if (ICmpInst::isEquality(Pred) && *C == 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this really need to test both forms? I would hope these canonicalize one way or the other There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think so, this code compiles to two different icmps (one to void bar();
int test_eq(int x, int y) { if (__builtin_popcount(x) == 1 && y) bar(); }
int test_ne(int x, int y) { if (__builtin_popcount(x) != 1 && y) bar(); } There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I get handling the invert and non-inverted forms. But I mean these two blocks of conditions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Middle end doesn't do this until CGP. adjustIsPower2Test changes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Merged with |
||
IRBuilder<> Builder(Cmp); | ||
Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy)); | ||
Value *Xor = Builder.CreateXor(X, Sub); | ||
CmpInst::Predicate NewPred = | ||
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE; | ||
Value *NewCmp = Builder.CreateICmp(NewPred, Xor, Sub); | ||
Cmp->replaceAllUsesWith(NewCmp); | ||
RecursivelyDeleteTriviallyDeadInstructions(Cmp); | ||
return true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add an alive2 proof for the expansion? This probably needs freeze due to use duplication. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does not look it needs a freeze (unless I'm missing something) I've updated the description with the link. |
||
} | ||
|
||
return false; | ||
} | ||
|
||
/// Sink the given CmpInst into user blocks to reduce the number of virtual | ||
/// registers that must be created and coalesced. This is a clear win except on | ||
/// targets with multiple condition code registers (PowerPC), where it might | ||
|
@@ -2183,6 +2239,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { | |
if (combineToUSubWithOverflow(Cmp, ModifiedDT)) | ||
return true; | ||
|
||
if (unfoldPow2Test(Cmp)) | ||
return true; | ||
|
||
if (foldICmpWithDominatingICmp(Cmp, *TLI)) | ||
return true; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could sink this after checking the IR is the right shape
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sank it down