-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LV] Use frozen start value for FindLastIV if needed. #132691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
3e42683
c551166
4591537
ff11744
ab4681a
691d8c3
cc449a8
ab70b60
10d7b09
4e2b58d
4fc8fe6
094607c
6554b64
4a8163c
099676f
5979201
14362e3
3f3306b
50598f1
db60a65
177002a
557a5b9
3df74b7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7612,7 +7612,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( | |
BasicBlock *BypassBlock) { | ||
auto *EpiRedResult = dyn_cast<VPInstruction>(R); | ||
if (!EpiRedResult || | ||
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult) | ||
(EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult && | ||
EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult)) | ||
return; | ||
|
||
auto *EpiRedHeaderPhi = | ||
|
@@ -7633,14 +7634,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( | |
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( | ||
RdxDesc.getRecurrenceKind())) { | ||
using namespace llvm::PatternMatch; | ||
Value *Cmp, *OrigResumeV; | ||
Value *Cmp, *OrigResumeV, *CmpOp; | ||
bool IsExpectedPattern = | ||
match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)), | ||
m_Specific(RdxDesc.getSentinelValue()), | ||
m_Value(OrigResumeV))) && | ||
match(Cmp, | ||
m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV), | ||
m_Specific(RdxDesc.getRecurrenceStartValue()))); | ||
(match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV), | ||
m_Value(CmpOp))) && | ||
(match(CmpOp, | ||
m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) || | ||
(CmpOp == RdxDesc.getRecurrenceStartValue() && | ||
isGuaranteedNotToBeUndefOrPoison(CmpOp)))); | ||
assert(IsExpectedPattern && "Unexpected reduction resume pattern"); | ||
(void)IsExpectedPattern; | ||
MainResumeValue = OrigResumeV; | ||
|
@@ -9817,8 +9821,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
Builder.createSelect(Cond, OrigExitingVPV, PhiR, {}, "", FMFs); | ||
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) { | ||
return isa<VPInstruction>(&U) && | ||
cast<VPInstruction>(&U)->getOpcode() == | ||
VPInstruction::ComputeReductionResult; | ||
(cast<VPInstruction>(&U)->getOpcode() == | ||
VPInstruction::ComputeReductionResult || | ||
cast<VPInstruction>(&U)->getOpcode() == | ||
VPInstruction::ComputeFindLastIVResult); | ||
}); | ||
if (CM.usePredicatedReductionSelect( | ||
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy)) | ||
|
@@ -9861,10 +9867,36 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
// bc.merge.rdx phi nodes, hence it needs to be created unconditionally here | ||
// even for in-loop reductions, until the reduction resume value handling is | ||
// also modeled in VPlan. | ||
VPInstruction *FinalReductionResult; | ||
VPBuilder::InsertPointGuard Guard(Builder); | ||
Builder.setInsertPoint(MiddleVPBB, IP); | ||
auto *FinalReductionResult = Builder.createNaryOp( | ||
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL); | ||
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( | ||
RdxDesc.getRecurrenceKind())) { | ||
VPValue *Start = PhiR->getStartValue(); | ||
if (!isGuaranteedNotToBeUndefOrPoison( | ||
PhiR->getStartValue()->getLiveInIRValue())) { | ||
Builder.setInsertPoint(cast<VPBasicBlock>(Plan->getEntry())); | ||
Start = Builder.createNaryOp(Instruction::Freeze, {Start}, {}, "fr"); | ||
} | ||
Builder.setInsertPoint(MiddleVPBB, IP); | ||
FinalReductionResult = | ||
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult, | ||
{PhiR, Start, NewExitingVPV}, ExitDL); | ||
// Update all users outside the vector region. | ||
for (VPUser *U : to_vector(OrigExitingVPV->users())) { | ||
auto *R = cast<VPRecipeBase>(U); | ||
if (R->getParent() && R->getParent()->getParent()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks to be based on the assumption that the parent's parent is a vector loop region. Is it worth asserting this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to check that the parent's region is the vector loop region, thanks |
||
continue; | ||
|
||
for (unsigned Idx = 0; Idx != R->getNumOperands(); ++Idx) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like a commonly-used idiom, something we probably have an equivalent IR helper for. Is it worth adding a helper in VPUser? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added VPUser::replaceUsesOfWith in line with |
||
if (R->getOperand(Idx) == PhiR->getStartValue()) | ||
R->setOperand(Idx, Start); | ||
} | ||
} | ||
} else { | ||
Builder.setInsertPoint(MiddleVPBB, IP); | ||
FinalReductionResult = Builder.createNaryOp( | ||
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL); | ||
} | ||
// Update all users outside the vector region. | ||
OrigExitingVPV->replaceUsesWithIf( | ||
FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned) { | ||
|
@@ -10353,24 +10385,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); | ||
Header->setName("vec.epilog.vector.body"); | ||
|
||
// Re-use the trip count and steps expanded for the main loop, as | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess in theory this code could be moved to the end of the function in a separate NFC patch, with this patch then adding the new bits:
However, I don't want to create unnecessary burden as I know this is an important fix. I'll leave it up to you! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left it included in the patch for now, could also split it off. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's fine. 👍 |
||
// skeleton creation needs it as a value that dominates both the scalar | ||
// and vector epilogue loops | ||
// TODO: This is a workaround needed for epilogue vectorization and it | ||
// should be removed once induction resume value creation is done | ||
// directly in VPlan. | ||
for (auto &R : make_early_inc_range(*Plan.getEntry())) { | ||
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R); | ||
if (!ExpandR) | ||
continue; | ||
auto *ExpandedVal = | ||
Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second); | ||
ExpandR->replaceAllUsesWith(ExpandedVal); | ||
if (Plan.getTripCount() == ExpandR) | ||
Plan.resetTripCount(ExpandedVal); | ||
ExpandR->eraseFromParent(); | ||
} | ||
|
||
DenseMap<Value *, Value *> ToFrozen; | ||
// Ensure that the start values for all header phi recipes are updated before | ||
// vectorizing the epilogue loop. | ||
for (VPRecipeBase &R : Header->phis()) { | ||
|
@@ -10436,6 +10451,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
ResumeV = | ||
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue()); | ||
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { | ||
ToFrozen[RdxDesc.getRecurrenceStartValue()] = | ||
cast<PHINode>(ResumeV)->getIncomingValueForBlock( | ||
EPI.MainLoopIterationCountCheck); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This confused me at first because there is no obvious indication that |
||
|
||
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment | ||
// to the resume value. The resume value is adjusted to the sentinel | ||
// value when the final value from the main vector loop equals the start | ||
|
@@ -10444,8 +10463,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
// variable. | ||
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent(); | ||
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt()); | ||
Value *Cmp = | ||
Builder.CreateICmpEQ(ResumeV, RdxDesc.getRecurrenceStartValue()); | ||
Value *Cmp = Builder.CreateICmpEQ( | ||
ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]); | ||
ResumeV = | ||
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV); | ||
} | ||
|
@@ -10461,6 +10480,31 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, | |
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV); | ||
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal); | ||
} | ||
|
||
// Re-use the trip count and steps expanded for the main loop, as | ||
// skeleton creation needs it as a value that dominates both the scalar | ||
// and vector epilogue loops | ||
// TODO: This is a workaround needed for epilogue vectorization and it | ||
// should be removed once induction resume value creation is done | ||
// directly in VPlan. | ||
for (auto &R : make_early_inc_range(*Plan.getEntry())) { | ||
auto *VPI = dyn_cast<VPInstruction>(&R); | ||
if (VPI) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What guarantee is there that VPI corresponds to the frozen start value? Do we need to check for VPInstructions with opcode Instruction::Freeze? I assume this is supposed to match up with the VPInstructions added by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment, there can only be freeze VPInstruction in the header, updated to check though |
||
VPI->replaceAllUsesWith(Plan.getOrAddLiveIn( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I understand what you're doing here, but it looks a bit odd at first glance. You're essentially replacing one freeze in the epilogue entry block with another from the MainLoopIterationCountCheck block, right? Perhaps worth a comment explaining what's happening? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, I also moved the comment about re-using the trip count inside the loop, thanks |
||
ToFrozen[VPI->getOperand(0)->getLiveInIRValue()])); | ||
continue; | ||
} | ||
|
||
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R); | ||
if (!ExpandR) | ||
continue; | ||
auto *ExpandedVal = | ||
Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second); | ||
ExpandR->replaceAllUsesWith(ExpandedVal); | ||
if (Plan.getTripCount() == ExpandR) | ||
Plan.resetTripCount(ExpandedVal); | ||
ExpandR->eraseFromParent(); | ||
} | ||
} | ||
|
||
// Generate bypass values from the additional bypass block. Note that when the | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -423,6 +423,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { | |
if (isSingleScalar() || isVectorToScalar()) | ||
return true; | ||
switch (Opcode) { | ||
case Instruction::Freeze: | ||
case Instruction::ICmp: | ||
case Instruction::PHI: | ||
case Instruction::Select: | ||
|
@@ -474,6 +475,12 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
Value *Idx = State.get(getOperand(1), /*IsScalar=*/true); | ||
return Builder.CreateExtractElement(Vec, Idx, Name); | ||
} | ||
case Instruction::Freeze: { | ||
if (State.hasVectorValue(this)) | ||
return State.get(this); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will we still generate a freeze instruction if we have a vector value? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The check was left over from earlier version, removed, thanks |
||
Value *Op = State.get(getOperand(0), true); | ||
return Builder.CreateFreeze(Op, Name); | ||
} | ||
case Instruction::ICmp: { | ||
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); | ||
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed); | ||
|
@@ -614,6 +621,28 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
return Builder.CreateVectorSplat( | ||
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast"); | ||
} | ||
case VPInstruction::ComputeFindLastIVResult: { | ||
// The recipe's operands are the reduction phi, followed by one operand for | ||
// each part of the reduction. | ||
unsigned UF = getNumOperands() - 2; | ||
Value *ReducedPartRdx = State.get(getOperand(2)); | ||
for (unsigned Part = 1; Part < UF; ++Part) { | ||
ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, | ||
State.get(getOperand(2 + Part))); | ||
} | ||
|
||
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
// and will be removed by breaking up the recipe further. | ||
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0)); | ||
// Get its reduction variable descriptor. | ||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); | ||
RecurKind RK = RdxDesc.getRecurrenceKind(); | ||
|
||
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)); | ||
assert(!PhiR->isInLoop()); | ||
return createFindLastIVReduction(Builder, ReducedPartRdx, | ||
State.get(getOperand(1), true), RdxDesc); | ||
} | ||
case VPInstruction::ComputeReductionResult: { | ||
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary | ||
// and will be removed by breaking up the recipe further. | ||
|
@@ -623,6 +652,8 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); | ||
|
||
RecurKind RK = RdxDesc.getRecurrenceKind(); | ||
assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) && | ||
"should be handled by ComputeFindLastIVResult"); | ||
|
||
Type *PhiTy = OrigPhi->getType(); | ||
// The recipe's operands are the reduction phi, followed by one operand for | ||
|
@@ -658,9 +689,6 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
if (Op != Instruction::ICmp && Op != Instruction::FCmp) | ||
ReducedPartRdx = Builder.CreateBinOp( | ||
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); | ||
else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) | ||
ReducedPartRdx = | ||
createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart); | ||
else | ||
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); | ||
} | ||
|
@@ -669,8 +697,7 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
// Create the reduction after the loop. Note that inloop reductions create | ||
// the target reduction in the loop using a Reduction recipe. | ||
if ((State.VF.isVector() || | ||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) || | ||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) && | ||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && | ||
!PhiR->isInLoop()) { | ||
// TODO: Support in-order reductions based on the recurrence descriptor. | ||
// All ops in the reduction inherit fast-math-flags from the recurrence | ||
|
@@ -681,9 +708,6 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) | ||
ReducedPartRdx = | ||
createAnyOfReduction(Builder, ReducedPartRdx, RdxDesc, OrigPhi); | ||
else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) | ||
ReducedPartRdx = | ||
createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc); | ||
else | ||
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK); | ||
|
||
|
@@ -829,6 +853,7 @@ bool VPInstruction::isVectorToScalar() const { | |
return getOpcode() == VPInstruction::ExtractFromEnd || | ||
getOpcode() == Instruction::ExtractElement || | ||
getOpcode() == VPInstruction::FirstActiveLane || | ||
getOpcode() == VPInstruction::ComputeFindLastIVResult || | ||
getOpcode() == VPInstruction::ComputeReductionResult || | ||
getOpcode() == VPInstruction::AnyOf; | ||
} | ||
|
@@ -921,6 +946,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case Instruction::ICmp: | ||
case Instruction::Select: | ||
case Instruction::Or: | ||
case Instruction::Freeze: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably also add this opcode to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks |
||
// TODO: Cover additional opcodes. | ||
return vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::ActiveLaneMask: | ||
|
@@ -933,6 +959,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
return true; | ||
case VPInstruction::PtrAdd: | ||
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::ComputeFindLastIVResult: | ||
return Op == getOperand(1); | ||
}; | ||
llvm_unreachable("switch should return"); | ||
} | ||
|
@@ -1011,6 +1039,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
case VPInstruction::ExtractFromEnd: | ||
O << "extract-from-end"; | ||
break; | ||
case VPInstruction::ComputeFindLastIVResult: | ||
O << "compute-find-last-iv-result"; | ||
break; | ||
case VPInstruction::ComputeReductionResult: | ||
O << "compute-reduction-result"; | ||
break; | ||
|
@@ -1571,7 +1602,6 @@ void VPWidenRecipe::execute(VPTransformState &State) { | |
} | ||
case Instruction::Freeze: { | ||
Value *Op = State.get(getOperand(0)); | ||
|
||
Mel-Chen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Value *Freeze = Builder.CreateFreeze(Op); | ||
State.set(this, Freeze); | ||
break; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why has this been sunk into the if/else cases when we are inserting into the same location in each case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Restored, this was left over after moving the code