Skip to content

Commit f1e88a8

Browse files
committed
[VPlan] Update scalar induction resume values in VPlan. #110577
1 parent 0dbdc6d commit f1e88a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+750
-736
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 79 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -513,16 +513,17 @@ class InnerLoopVectorizer {
513513
/// Fix the non-induction PHIs in \p Plan.
514514
void fixNonInductionPHIs(VPTransformState &State);
515515

516-
/// Create a new phi node for the induction variable \p OrigPhi to resume
517-
/// iteration count in the scalar epilogue, from where the vectorized loop
518-
/// left off. \p Step is the SCEV-expanded induction step to use. In cases
519-
/// where the loop skeleton is more complicated (i.e., epilogue vectorization)
520-
/// and the resume values can come from an additional bypass block, the \p
521-
/// AdditionalBypass pair provides information about the bypass block and the
522-
/// end value on the edge from bypass to this loop.
523-
PHINode *createInductionResumeValue(
524-
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525-
ArrayRef<BasicBlock *> BypassBlocks,
516+
/// Create a ResumePHI VPInstruction for the induction \p PhiRIR to resume
517+
/// iteration count in the scalar epilogue from where the vectorized loop
518+
/// left off, and add it to the scalar preheader of VPlan. \p Step is the
519+
/// SCEV-expanded induction step to use. In cases where the loop skeleton is
520+
/// more complicated (i.e., epilogue vectorization) and the resume values can
521+
/// come from an additional bypass block, the \p AdditionalBypass pair
522+
/// provides this additional bypass block along with the resume value coming
523+
/// from it.
524+
void createInductionResumeVPValue(
525+
VPIRInstruction *PhiIRI, const InductionDescriptor &ID, Value *Step,
526+
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
526527
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
527528

528529
/// Returns the original loop trip count.
@@ -533,6 +534,11 @@ class InnerLoopVectorizer {
533534
/// count of the original loop for both main loop and epilogue vectorization.
534535
void setTripCount(Value *TC) { TripCount = TC; }
535536

537+
std::pair<BasicBlock *, Value *>
538+
getInductionBypassValue(PHINode *OrigPhi) const {
539+
return InductionBypassValues.at(OrigPhi);
540+
}
541+
536542
protected:
537543
friend class LoopVectorizationPlanner;
538544

@@ -572,7 +578,7 @@ class InnerLoopVectorizer {
572578
/// vectorization) and the resume values can come from an additional bypass
573579
/// block, the \p AdditionalBypass pair provides information about the bypass
574580
/// block and the end value on the edge from bypass to this loop.
575-
void createInductionResumeValues(
581+
void createInductionResumeVPValues(
576582
const SCEV2ValueTy &ExpandedSCEVs,
577583
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
578584

@@ -664,6 +670,11 @@ class InnerLoopVectorizer {
664670
/// for cleaning the checks, if vectorization turns out unprofitable.
665671
GeneratedRTChecks &RTChecks;
666672

673+
/// Mapping of induction phis to their bypass values and bypass blocks. They
674+
/// need to be added to their phi nodes after the epilogue skeleton has been
675+
/// created.
676+
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
677+
667678
VPlan &Plan;
668679
};
669680

@@ -2580,10 +2591,11 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25802591
nullptr, Twine(Prefix) + "scalar.ph");
25812592
}
25822593

2583-
PHINode *InnerLoopVectorizer::createInductionResumeValue(
2584-
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2585-
ArrayRef<BasicBlock *> BypassBlocks,
2594+
void InnerLoopVectorizer::createInductionResumeVPValue(
2595+
VPIRInstruction *PhiR, const InductionDescriptor &II, Value *Step,
2596+
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
25862597
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598+
auto *OrigPhi = cast<PHINode>(&PhiR->getInstruction());
25872599
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
25882600
assert(VectorTripCount && "Expected valid arguments");
25892601

@@ -2615,27 +2627,21 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26152627
}
26162628
}
26172629

2618-
// Create phi nodes to merge from the backedge-taken check block.
2619-
PHINode *BCResumeVal =
2620-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
2621-
LoopScalarPreHeader->getFirstNonPHIIt());
2622-
// Copy original phi DL over to the new one.
2623-
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
2630+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2631+
VPInstruction::ResumePhi,
2632+
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2633+
OrigPhi->getDebugLoc(), "bc.resume.val");
2634+
assert(PhiR->getNumOperands() == 0 && "PhiR should not have any operands");
2635+
PhiR->addOperand(ResumePhiRecipe);
26242636

2625-
// The new PHI merges the original incoming value, in case of a bypass,
2626-
// or the value at the end of the vectorized loop.
2627-
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
2628-
2629-
// Fix the scalar body counter (PHI node).
2630-
// The old induction's phi node in the scalar body needs the truncated
2631-
// value.
2632-
for (BasicBlock *BB : BypassBlocks)
2633-
BCResumeVal->addIncoming(II.getStartValue(), BB);
2634-
2635-
if (AdditionalBypass.first)
2636-
BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
2637-
EndValueFromAdditionalBypass);
2638-
return BCResumeVal;
2637+
if (AdditionalBypass.first) {
2638+
// Store the bypass values here, as they need to be added to their phi nodes
2639+
// after the epilogue skeleton has been created.
2640+
assert(!InductionBypassValues.contains(OrigPhi) &&
2641+
"entry for OrigPhi already exits");
2642+
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
2643+
EndValueFromAdditionalBypass};
2644+
}
26392645
}
26402646

26412647
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2652,26 +2658,31 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26522658
return I->second;
26532659
}
26542660

2655-
void InnerLoopVectorizer::createInductionResumeValues(
2661+
void InnerLoopVectorizer::createInductionResumeVPValues(
26562662
const SCEV2ValueTy &ExpandedSCEVs,
26572663
std::pair<BasicBlock *, Value *> AdditionalBypass) {
26582664
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
26592665
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
26602666
"Inconsistent information about additional bypass.");
26612667
// We are going to resume the execution of the scalar loop.
2662-
// Go over all of the induction variables that we found and fix the
2663-
// PHIs that are left in the scalar version of the loop.
2664-
// The starting values of PHI nodes depend on the counter of the last
2665-
// iteration in the vectorized loop.
2666-
// If we come from a bypass edge then we need to start from the original
2668+
// Go over all of the induction variables in the scalar header and fix the
2669+
// PHIs that are left in the scalar version of the loop. The starting values
2670+
// of PHI nodes depend on the counter of the last iteration in the vectorized
2671+
// loop. If we come from a bypass edge then we need to start from the original
26672672
// start value.
2668-
for (const auto &InductionEntry : Legal->getInductionVars()) {
2669-
PHINode *OrigPhi = InductionEntry.first;
2670-
const InductionDescriptor &II = InductionEntry.second;
2671-
PHINode *BCResumeVal = createInductionResumeValue(
2672-
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
2673-
AdditionalBypass);
2674-
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
2673+
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
2674+
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
2675+
for (VPRecipeBase &R : *Plan.getScalarHeader()) {
2676+
auto *PhiR = cast<VPIRInstruction>(&R);
2677+
auto *Phi = dyn_cast<PHINode>(&PhiR->getInstruction());
2678+
if (!Phi)
2679+
break;
2680+
if (!Legal->getInductionVars().contains(Phi))
2681+
continue;
2682+
const InductionDescriptor &II = Legal->getInductionVars().find(Phi)->second;
2683+
createInductionResumeVPValue(PhiR, II, getExpandedStep(II, ExpandedSCEVs),
2684+
LoopBypassBlocks, ScalarPHBuilder,
2685+
AdditionalBypass);
26752686
}
26762687
}
26772688

@@ -2734,7 +2745,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27342745
emitMemRuntimeChecks(LoopScalarPreHeader);
27352746

27362747
// Emit phis for the new starting index of the scalar loop.
2737-
createInductionResumeValues(ExpandedSCEVs);
2748+
createInductionResumeVPValues(ExpandedSCEVs);
27382749

27392750
return {LoopVectorPreHeader, nullptr};
27402751
}
@@ -7745,13 +7756,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77457756

77467757
BestVPlan.execute(&State);
77477758

7748-
// 2.5 Collect reduction resume values.
77497759
auto *ExitVPBB = BestVPlan.getMiddleBlock();
7750-
if (VectorizingEpilogue)
7760+
// 2.5 When vectorizing the epilogue, fix reduction resume values and
7761+
// induction resume values from the bypass blocks.
7762+
if (VectorizingEpilogue) {
77517763
for (VPRecipeBase &R : *ExitVPBB) {
77527764
fixReductionScalarResumeWhenVectorizingEpilog(
77537765
&R, State, State.CFG.VPBB2IRBB[ExitVPBB]);
77547766
}
7767+
BasicBlock *PH = OrigLoop->getLoopPreheader();
7768+
for (const auto &[IVPhi, _] : Legal->getInductionVars()) {
7769+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
7770+
const auto &[BB, V] = ILV.getInductionBypassValue(IVPhi);
7771+
Inc->setIncomingValueForBlock(BB, V);
7772+
}
7773+
}
77557774

77567775
// 2.6. Maintain Loop Hints
77577776
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7840,10 +7859,10 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78407859
// Generate the induction variable.
78417860
EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
78427861

7843-
// Skip induction resume value creation here because they will be created in
7844-
// the second pass for the scalar loop. The induction resume values for the
7845-
// inductions in the epilogue loop are created before executing the plan for
7846-
// the epilogue loop.
7862+
// Create induction resume values and ResumePhis for the inductions in the
7863+
// epilogue loop in the VPlan for the epilogue vector loop.
7864+
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
7865+
createInductionResumeVPValues(ExpandedSCEVs);
78477866

78487867
return {LoopVectorPreHeader, nullptr};
78497868
}
@@ -8024,9 +8043,9 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80248043
// check, then the resume value for the induction variable comes from
80258044
// the trip count of the main vector loop, hence passing the AdditionalBypass
80268045
// argument.
8027-
createInductionResumeValues(ExpandedSCEVs,
8028-
{VecEpilogueIterationCountCheck,
8029-
EPI.VectorTripCount} /* AdditionalBypass */);
8046+
createInductionResumeVPValues(ExpandedSCEVs,
8047+
{VecEpilogueIterationCountCheck,
8048+
EPI.VectorTripCount} /* AdditionalBypass */);
80308049

80318050
return {LoopVectorPreHeader, EPResumeVal};
80328051
}
@@ -10327,23 +10346,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032710346
RdxDesc.getRecurrenceStartValue());
1032810347
}
1032910348
} else {
10330-
// Create induction resume values for both widened pointer and
10331-
// integer/fp inductions and update the start value of the induction
10332-
// recipes to use the resume value.
10349+
// Retrieve the induction resume values for wide inductions from
10350+
// their original phi nodes in the scalar loop.
1033310351
PHINode *IndPhi = nullptr;
10334-
const InductionDescriptor *ID;
1033510352
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1033610353
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
10337-
ID = &Ind->getInductionDescriptor();
1033810354
} else {
1033910355
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1034010356
IndPhi = WidenInd->getPHINode();
10341-
ID = &WidenInd->getInductionDescriptor();
1034210357
}
10343-
10344-
ResumeV = MainILV.createInductionResumeValue(
10345-
IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10346-
{EPI.MainLoopIterationCountCheck});
10358+
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
1034710359
}
1034810360
assert(ResumeV && "Must have a resume value");
1034910361
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);
@@ -10355,7 +10367,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035510367
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1035610368
DT, true, &ExpandedSCEVs);
1035710369
++LoopsEpilogueVectorized;
10358-
1035910370
if (!MainILV.areSafetyChecksAdded())
1036010371
DisableRuntimeUnroll = true;
1036110372
} else {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
629629
State.CFG
630630
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
631631
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
632-
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
632+
// TODO: Predecessors are temporarily reversed to reduce test changes.
633+
// Remove it and update remaining tests after functional change landed.
634+
for (auto *OtherPred :
635+
reverse(to_vector(predecessors(Builder.GetInsertBlock())))) {
633636
assert(OtherPred != VPlanPred &&
634637
"VPlan predecessors should not be connected yet");
635638
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
205205
; CHECK: vector.ph:
206206
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
207207
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
208+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
208209
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
209210
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
210211
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
211-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
212212
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
213213
; CHECK: vector.body:
214214
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -433,7 +433,7 @@ define void @test_widen_extended_induction(ptr %dst) {
433433
; CHECK: vec.epilog.middle.block:
434434
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
435435
; CHECK: vec.epilog.scalar.ph:
436-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
436+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
437437
; CHECK-NEXT: br label [[LOOP:%.*]]
438438
; CHECK: loop:
439439
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

0 commit comments

Comments
 (0)