-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LV] Use frozen start value for FindLastIV if needed. #132691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
3e42683
c551166
4591537
ff11744
ab4681a
691d8c3
cc449a8
ab70b60
10d7b09
4e2b58d
4fc8fe6
094607c
6554b64
4a8163c
099676f
5979201
14362e3
3f3306b
50598f1
db60a65
177002a
557a5b9
3df74b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7656,14 +7656,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( | |
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( | ||
RdxDesc.getRecurrenceKind())) { | ||
using namespace llvm::PatternMatch; | ||
Value *Cmp, *OrigResumeV; | ||
Value *Cmp, *OrigResumeV, *CmpOp; | ||
bool IsExpectedPattern = | ||
match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)), | ||
m_Specific(RdxDesc.getSentinelValue()), | ||
m_Value(OrigResumeV))) && | ||
match(Cmp, | ||
m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV), | ||
m_Specific(RdxDesc.getRecurrenceStartValue()))); | ||
(match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV), | ||
m_Value(CmpOp))) && | ||
(match(CmpOp, | ||
m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) || | ||
(CmpOp == RdxDesc.getRecurrenceStartValue() && | ||
isGuaranteedNotToBeUndefOrPoison(CmpOp)))); | ||
assert(IsExpectedPattern && "Unexpected reduction resume pattern"); | ||
(void)IsExpectedPattern; | ||
MainResumeValue = OrigResumeV; | ||
|
@@ -10377,6 +10380,36 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { | |
VPInstruction::ResumePhi, | ||
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, | ||
"vec.epilog.resume.val"); | ||
|
||
// When vectorizing the epilogue, FindLastIV reductions can introduce multiple | ||
// uses of undef/poison. If the reduction start value is not guaranteed to be | ||
// undef or poison, we need to freeze it and use the frozen start when | ||
// computing the reduction result. We also need to use the frozen value in the | ||
// resume phi generated by the main vector loop, as this is also used to | ||
// compute the reduction result after the epilogue vector loop. | ||
auto AddFreezeForFindLastIVReductions = [](VPlan &Plan, | ||
bool UpdateResumePhis) { | ||
for (VPRecipeBase &R : *Plan.getMiddleBlock()) { | ||
auto *VPI = dyn_cast<VPInstruction>(&R); | ||
if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult) | ||
continue; | ||
VPValue *OrigStart = VPI->getOperand(1); | ||
if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue())) | ||
continue; | ||
VPBuilder Builder(Plan.getEntry()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe hoist this out of the loop? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||
VPInstruction *Freeze = | ||
Builder.createNaryOp(Instruction::Freeze, {OrigStart}, {}, "fr"); | ||
VPI->setOperand(1, Freeze); | ||
if (UpdateResumePhis) | ||
OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) { | ||
return Freeze != &U && isa<VPInstruction>(&U) && | ||
cast<VPInstruction>(&U)->getOpcode() == | ||
VPInstruction::ResumePhi; | ||
}); | ||
} | ||
}; | ||
AddFreezeForFindLastIVReductions(MainPlan, true); | ||
AddFreezeForFindLastIVReductions(EpiPlan, false); | ||
} | ||
|
||
/// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded | ||
|
@@ -10389,24 +10422,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); | ||
Header->setName("vec.epilog.vector.body"); | ||
|
||
// Re-use the trip count and steps expanded for the main loop, as | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess in theory this code could be moved to the end of the function in a separate NFC patch, with this patch then adding the new bits:
However, I don't want to create unnecessary burden as I know this is an important fix. I'll leave it up to you! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left it included in the patch for now, could also split it off. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's fine. 👍 |
||
// skeleton creation needs it as a value that dominates both the scalar | ||
// and vector epilogue loops | ||
// TODO: This is a workaround needed for epilogue vectorization and it | ||
// should be removed once induction resume value creation is done | ||
// directly in VPlan. | ||
for (auto &R : make_early_inc_range(*Plan.getEntry())) { | ||
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R); | ||
if (!ExpandR) | ||
continue; | ||
auto *ExpandedVal = | ||
Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second); | ||
ExpandR->replaceAllUsesWith(ExpandedVal); | ||
if (Plan.getTripCount() == ExpandR) | ||
Plan.resetTripCount(ExpandedVal); | ||
ExpandR->eraseFromParent(); | ||
} | ||
|
||
DenseMap<Value *, Value *> ToFrozen; | ||
// Ensure that the start values for all header phi recipes are updated before | ||
// vectorizing the epilogue loop. | ||
for (VPRecipeBase &R : Header->phis()) { | ||
|
@@ -10472,6 +10488,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
ResumeV = | ||
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue()); | ||
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { | ||
ToFrozen[RdxDesc.getRecurrenceStartValue()] = | ||
cast<PHINode>(ResumeV)->getIncomingValueForBlock( | ||
EPI.MainLoopIterationCountCheck); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This confused me at first because there is no obvious indication that |
||
|
||
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment | ||
// to the resume value. The resume value is adjusted to the sentinel | ||
// value when the final value from the main vector loop equals the start | ||
|
@@ -10480,8 +10500,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
// variable. | ||
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent(); | ||
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt()); | ||
Value *Cmp = | ||
Builder.CreateICmpEQ(ResumeV, RdxDesc.getRecurrenceStartValue()); | ||
Value *Cmp = Builder.CreateICmpEQ( | ||
ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]); | ||
ResumeV = | ||
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV); | ||
} | ||
|
@@ -10497,6 +10517,31 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV); | ||
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal); | ||
} | ||
|
||
// Re-use the trip count and steps expanded for the main loop, as | ||
// skeleton creation needs it as a value that dominates both the scalar | ||
// and vector epilogue loops | ||
// TODO: This is a workaround needed for epilogue vectorization and it | ||
// should be removed once induction resume value creation is done | ||
// directly in VPlan. | ||
for (auto &R : make_early_inc_range(*Plan.getEntry())) { | ||
auto *VPI = dyn_cast<VPInstruction>(&R); | ||
if (VPI) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What guarantee is there that VPI corresponds to the frozen start value? Do we need to check for VPInstructions with opcode Instruction::Freeze? I assume this is supposed to match up with the VPInstructions added by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment, there can only be freeze VPInstruction in the header, updated to check though |
||
VPI->replaceAllUsesWith(Plan.getOrAddLiveIn( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I understand what you're doing here, but it looks a bit odd at first glance. You're essentially replacing one freeze in the epilogue entry block with another from the MainLoopIterationCountCheck block, right? Perhaps worth a comment explaining what's happening? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, I also moved the comment about re-using the trip count inside the loop, thanks |
||
ToFrozen[VPI->getOperand(0)->getLiveInIRValue()])); | ||
continue; | ||
} | ||
|
||
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R); | ||
if (!ExpandR) | ||
continue; | ||
auto *ExpandedVal = | ||
Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second); | ||
ExpandR->replaceAllUsesWith(ExpandedVal); | ||
if (Plan.getTripCount() == ExpandR) | ||
Plan.resetTripCount(ExpandedVal); | ||
ExpandR->eraseFromParent(); | ||
} | ||
} | ||
|
||
// Generate bypass values from the additional bypass block. Note that when the | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1403,6 +1403,13 @@ void VPValue::replaceUsesWithIf( | |
} | ||
} | ||
|
||
void VPUser::replaceUsesOfWith(VPValue *From, VPValue *To) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can remove this since we don't use the function in this patch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, removed again |
||
for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) { | ||
if (getOperand(Idx) == From) | ||
setOperand(Idx, To); | ||
} | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { | ||
OS << Tracker.getOrCreateName(this); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -423,6 +423,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { | |
if (isSingleScalar() || isVectorToScalar()) | ||
return true; | ||
switch (Opcode) { | ||
case Instruction::Freeze: | ||
case Instruction::ICmp: | ||
case Instruction::PHI: | ||
case Instruction::Select: | ||
|
@@ -474,6 +475,10 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
Value *Idx = State.get(getOperand(1), /*IsScalar=*/true); | ||
return Builder.CreateExtractElement(Vec, Idx, Name); | ||
} | ||
case Instruction::Freeze: { | ||
Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this)); | ||
return Builder.CreateFreeze(Op, Name); | ||
} | ||
case Instruction::ICmp: { | ||
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); | ||
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed); | ||
|
@@ -909,6 +914,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { | |
return false; | ||
switch (getOpcode()) { | ||
case Instruction::ExtractElement: | ||
case Instruction::Freeze: | ||
case Instruction::ICmp: | ||
case Instruction::Select: | ||
case VPInstruction::AnyOf: | ||
|
@@ -941,6 +947,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case Instruction::ICmp: | ||
case Instruction::Select: | ||
case Instruction::Or: | ||
case Instruction::Freeze: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably also add this opcode to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks |
||
// TODO: Cover additional opcodes. | ||
return vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::ActiveLaneMask: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -246,6 +246,9 @@ class VPUser { | |
New->addUser(*this); | ||
} | ||
|
||
/// Replaces all uses of \p From in the VPUser with \p To. | ||
void replaceUsesOfWith(VPValue *From, VPValue *To); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, removed again |
||
|
||
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator; | ||
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator; | ||
typedef iterator_range<operand_iterator> operand_range; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know this makes it even wordier, but shouldn't this be
If the reduction start value is not guaranteed to **not** be undef or poison
? In other words, if the start value could be either undef or poison we need to freeze it?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I update the comment to say
If the reduction start value may be undef or poison....