Skip to content

Commit 1edd220

Browse files
committed
[LV] Retrieve reduction resume values directly for epilogue vec. (NFC)
Use the reduction resume values from the phis in the scalar header, instead of collecting them in a map. This removes some complexity from the general executePlan code paths and pushes it to only the epilogue vectorization part.
1 parent 1714b11 commit 1edd220

File tree

2 files changed

+12
-23
lines changed

2 files changed

+12
-23
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -435,11 +435,10 @@ class LoopVectorizationPlanner {
435435
/// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
436436
/// to re-use expansion results generated during main plan execution.
437437
///
438-
/// Returns a mapping of SCEVs to their expanded IR values and a mapping for
439-
/// the reduction resume values. Note that this is a temporary workaround
440-
/// needed due to the current epilogue handling.
441-
std::pair<DenseMap<const SCEV *, Value *>,
442-
DenseMap<const RecurrenceDescriptor *, Value *>>
438+
/// Returns a mapping of SCEVs to their expanded IR values.
439+
/// Note that this is a temporary workaround needed due to the current
440+
/// epilogue handling.
441+
DenseMap<const SCEV *, Value *>
443442
executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
444443
InnerLoopVectorizer &LB, DominatorTree *DT,
445444
bool IsEpilogueVectorization,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -676,11 +676,6 @@ class InnerLoopVectorizer {
676676
/// Structure to hold information about generated runtime checks, responsible
677677
/// for cleaning the checks, if vectorization turns out unprofitable.
678678
GeneratedRTChecks &RTChecks;
679-
680-
// Holds the resume values for reductions in the loops, used to set the
681-
// correct start value of reduction PHIs when vectorizing the epilogue.
682-
SmallMapVector<const RecurrenceDescriptor *, PHINode *, 4>
683-
ReductionResumeValues;
684679
};
685680

686681
/// Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -7426,10 +7421,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
74267421
}
74277422

74287423
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
7429-
// create a merge phi node for it and add it to \p ReductionResumeValues.
7424+
// create a merge phi node for it.
74307425
static void createAndCollectMergePhiForReduction(
74317426
VPInstruction *RedResult,
7432-
DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues,
74337427
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
74347428
bool VectorizingEpilogue) {
74357429
if (!RedResult ||
@@ -7487,13 +7481,9 @@ static void createAndCollectMergePhiForReduction(
74877481
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
74887482
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
74897483
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
7490-
7491-
ReductionResumeValues[&RdxDesc] = BCBlockPhi;
74927484
}
74937485

7494-
std::pair<DenseMap<const SCEV *, Value *>,
7495-
DenseMap<const RecurrenceDescriptor *, Value *>>
7496-
LoopVectorizationPlanner::executePlan(
7486+
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
74977487
ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
74987488
InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization,
74997489
const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
@@ -7579,12 +7569,11 @@ LoopVectorizationPlanner::executePlan(
75797569
BestVPlan.execute(&State);
75807570

75817571
// 2.5 Collect reduction resume values.
7582-
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
75837572
auto *ExitVPBB =
75847573
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
75857574
for (VPRecipeBase &R : *ExitVPBB) {
75867575
createAndCollectMergePhiForReduction(
7587-
dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
7576+
dyn_cast<VPInstruction>(&R), State, OrigLoop,
75887577
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
75897578
}
75907579

@@ -7634,7 +7623,7 @@ LoopVectorizationPlanner::executePlan(
76347623
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
76357624
}
76367625

7637-
return {State.ExpandedSCEVs, ReductionResumeValues};
7626+
return State.ExpandedSCEVs;
76387627
}
76397628

76407629
//===--------------------------------------------------------------------===//
@@ -10121,8 +10110,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1012110110
EPI, &LVL, &CM, BFI, PSI, Checks);
1012210111

1012310112
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
10124-
const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
10125-
EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, true);
10113+
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
10114+
*BestMainPlan, MainILV, DT, true);
1012610115
++LoopsVectorized;
1012710116

1012810117
// Second pass vectorizes the epilogue and adjusts the control flow
@@ -10167,10 +10156,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016710156
Value *ResumeV = nullptr;
1016810157
// TODO: Move setting of resume values to prepareToExecute.
1016910158
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
10159+
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
10160+
->getIncomingValueForBlock(L->getLoopPreheader());
1017010161
const RecurrenceDescriptor &RdxDesc =
1017110162
ReductionPhi->getRecurrenceDescriptor();
1017210163
RecurKind RK = RdxDesc.getRecurrenceKind();
10173-
ResumeV = ReductionResumeValues.find(&RdxDesc)->second;
1017410164
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
1017510165
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
1017610166
// start value; compare the final value from the main vector loop

0 commit comments

Comments
 (0)