@@ -513,16 +513,17 @@ class InnerLoopVectorizer {
513
513
// / Fix the non-induction PHIs in \p Plan.
514
514
void fixNonInductionPHIs (VPTransformState &State);
515
515
516
- // / Create a new phi node for the induction variable \p OrigPhi to resume
517
- // / iteration count in the scalar epilogue, from where the vectorized loop
518
- // / left off. \p Step is the SCEV-expanded induction step to use. In cases
519
- // / where the loop skeleton is more complicated (i.e., epilogue vectorization)
520
- // / and the resume values can come from an additional bypass block, the \p
521
- // / AdditionalBypass pair provides information about the bypass block and the
522
- // / end value on the edge from bypass to this loop.
523
- PHINode *createInductionResumeValue (
524
- PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
525
- ArrayRef<BasicBlock *> BypassBlocks,
516
+ // / Create a ResumePHI VPInstruction for the induction \p PhiRIR to resume
517
+ // / iteration count in the scalar epilogue from where the vectorized loop
518
+ // / left off, and add it to the scalar preheader of VPlan. \p Step is the
519
+ // / SCEV-expanded induction step to use. In cases where the loop skeleton is
520
+ // / more complicated (i.e., epilogue vectorization) and the resume values can
521
+ // / come from an additional bypass block, the \p AdditionalBypass pair
522
+ // / provides this additional bypass block along with the resume value coming
523
+ // / from it.
524
+ void createInductionResumeVPValue (
525
+ VPIRInstruction *PhiIRI, const InductionDescriptor &ID, Value *Step,
526
+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
526
527
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
527
528
528
529
// / Returns the original loop trip count.
@@ -533,6 +534,11 @@ class InnerLoopVectorizer {
533
534
// / count of the original loop for both main loop and epilogue vectorization.
534
535
void setTripCount (Value *TC) { TripCount = TC; }
535
536
537
+ std::pair<BasicBlock *, Value *>
538
+ getInductionBypassValue (PHINode *OrigPhi) const {
539
+ return InductionBypassValues.at (OrigPhi);
540
+ }
541
+
536
542
protected:
537
543
friend class LoopVectorizationPlanner ;
538
544
@@ -572,7 +578,7 @@ class InnerLoopVectorizer {
572
578
// / vectorization) and the resume values can come from an additional bypass
573
579
// / block, the \p AdditionalBypass pair provides information about the bypass
574
580
// / block and the end value on the edge from bypass to this loop.
575
- void createInductionResumeValues (
581
+ void createInductionResumeVPValues (
576
582
const SCEV2ValueTy &ExpandedSCEVs,
577
583
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
578
584
@@ -664,6 +670,11 @@ class InnerLoopVectorizer {
664
670
// / for cleaning the checks, if vectorization turns out unprofitable.
665
671
GeneratedRTChecks &RTChecks;
666
672
673
+ // / Mapping of induction phis to their bypass values and bypass blocks. They
674
+ // / need to be added to their phi nodes after the epilogue skeleton has been
675
+ // / created.
676
+ DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
677
+
667
678
VPlan &Plan;
668
679
};
669
680
@@ -2580,10 +2591,11 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2580
2591
nullptr , Twine (Prefix) + " scalar.ph" );
2581
2592
}
2582
2593
2583
- PHINode * InnerLoopVectorizer::createInductionResumeValue (
2584
- PHINode *OrigPhi , const InductionDescriptor &II, Value *Step,
2585
- ArrayRef<BasicBlock *> BypassBlocks,
2594
+ void InnerLoopVectorizer::createInductionResumeVPValue (
2595
+ VPIRInstruction *PhiR , const InductionDescriptor &II, Value *Step,
2596
+ ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2586
2597
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2598
+ auto *OrigPhi = cast<PHINode>(&PhiR->getInstruction ());
2587
2599
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2588
2600
assert (VectorTripCount && " Expected valid arguments" );
2589
2601
@@ -2615,27 +2627,21 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
2615
2627
}
2616
2628
}
2617
2629
2618
- // Create phi nodes to merge from the backedge-taken check block.
2619
- PHINode *BCResumeVal =
2620
- PHINode::Create (OrigPhi-> getType ( ), 3 , " bc.resume.val " ,
2621
- LoopScalarPreHeader-> getFirstNonPHIIt () );
2622
- // Copy original phi DL over to the new one.
2623
- BCResumeVal-> setDebugLoc (OrigPhi-> getDebugLoc () );
2630
+ auto *ResumePhiRecipe = ScalarPHBuilder. createNaryOp (
2631
+ VPInstruction::ResumePhi,
2632
+ {Plan. getOrAddLiveIn (EndValue ), Plan. getOrAddLiveIn (II. getStartValue ())} ,
2633
+ OrigPhi-> getDebugLoc (), " bc.resume.val " );
2634
+ assert (PhiR-> getNumOperands () == 0 && " PhiR should not have any operands " );
2635
+ PhiR-> addOperand (ResumePhiRecipe );
2624
2636
2625
- // The new PHI merges the original incoming value, in case of a bypass,
2626
- // or the value at the end of the vectorized loop.
2627
- BCResumeVal->addIncoming (EndValue, LoopMiddleBlock);
2628
-
2629
- // Fix the scalar body counter (PHI node).
2630
- // The old induction's phi node in the scalar body needs the truncated
2631
- // value.
2632
- for (BasicBlock *BB : BypassBlocks)
2633
- BCResumeVal->addIncoming (II.getStartValue (), BB);
2634
-
2635
- if (AdditionalBypass.first )
2636
- BCResumeVal->setIncomingValueForBlock (AdditionalBypass.first ,
2637
- EndValueFromAdditionalBypass);
2638
- return BCResumeVal;
2637
+ if (AdditionalBypass.first ) {
2638
+ // Store the bypass values here, as they need to be added to their phi nodes
2639
+ // after the epilogue skeleton has been created.
2640
+ assert (!InductionBypassValues.contains (OrigPhi) &&
2641
+ " entry for OrigPhi already exits" );
2642
+ InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2643
+ EndValueFromAdditionalBypass};
2644
+ }
2639
2645
}
2640
2646
2641
2647
// / Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2652,26 +2658,31 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2652
2658
return I->second ;
2653
2659
}
2654
2660
2655
- void InnerLoopVectorizer::createInductionResumeValues (
2661
+ void InnerLoopVectorizer::createInductionResumeVPValues (
2656
2662
const SCEV2ValueTy &ExpandedSCEVs,
2657
2663
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2658
2664
assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
2659
2665
(!AdditionalBypass.first && !AdditionalBypass.second )) &&
2660
2666
" Inconsistent information about additional bypass." );
2661
2667
// We are going to resume the execution of the scalar loop.
2662
- // Go over all of the induction variables that we found and fix the
2663
- // PHIs that are left in the scalar version of the loop.
2664
- // The starting values of PHI nodes depend on the counter of the last
2665
- // iteration in the vectorized loop.
2666
- // If we come from a bypass edge then we need to start from the original
2668
+ // Go over all of the induction variables in the scalar header and fix the
2669
+ // PHIs that are left in the scalar version of the loop. The starting values
2670
+ // of PHI nodes depend on the counter of the last iteration in the vectorized
2671
+ // loop. If we come from a bypass edge then we need to start from the original
2667
2672
// start value.
2668
- for (const auto &InductionEntry : Legal->getInductionVars ()) {
2669
- PHINode *OrigPhi = InductionEntry.first ;
2670
- const InductionDescriptor &II = InductionEntry.second ;
2671
- PHINode *BCResumeVal = createInductionResumeValue (
2672
- OrigPhi, II, getExpandedStep (II, ExpandedSCEVs), LoopBypassBlocks,
2673
- AdditionalBypass);
2674
- OrigPhi->setIncomingValueForBlock (LoopScalarPreHeader, BCResumeVal);
2673
+ VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2674
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
2675
+ for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
2676
+ auto *PhiR = cast<VPIRInstruction>(&R);
2677
+ auto *Phi = dyn_cast<PHINode>(&PhiR->getInstruction ());
2678
+ if (!Phi)
2679
+ break ;
2680
+ if (!Legal->getInductionVars ().contains (Phi))
2681
+ continue ;
2682
+ const InductionDescriptor &II = Legal->getInductionVars ().find (Phi)->second ;
2683
+ createInductionResumeVPValue (PhiR, II, getExpandedStep (II, ExpandedSCEVs),
2684
+ LoopBypassBlocks, ScalarPHBuilder,
2685
+ AdditionalBypass);
2675
2686
}
2676
2687
}
2677
2688
@@ -2734,7 +2745,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
2734
2745
emitMemRuntimeChecks (LoopScalarPreHeader);
2735
2746
2736
2747
// Emit phis for the new starting index of the scalar loop.
2737
- createInductionResumeValues (ExpandedSCEVs);
2748
+ createInductionResumeVPValues (ExpandedSCEVs);
2738
2749
2739
2750
return {LoopVectorPreHeader, nullptr };
2740
2751
}
@@ -7745,13 +7756,21 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7745
7756
7746
7757
BestVPlan.execute (&State);
7747
7758
7748
- // 2.5 Collect reduction resume values.
7749
7759
auto *ExitVPBB = BestVPlan.getMiddleBlock ();
7750
- if (VectorizingEpilogue)
7760
+ // 2.5 When vectorizing the epilogue, fix reduction resume values and
7761
+ // induction resume values from the bypass blocks.
7762
+ if (VectorizingEpilogue) {
7751
7763
for (VPRecipeBase &R : *ExitVPBB) {
7752
7764
fixReductionScalarResumeWhenVectorizingEpilog (
7753
7765
&R, State, State.CFG .VPBB2IRBB [ExitVPBB]);
7754
7766
}
7767
+ BasicBlock *PH = OrigLoop->getLoopPreheader ();
7768
+ for (const auto &[IVPhi, _] : Legal->getInductionVars ()) {
7769
+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
7770
+ const auto &[BB, V] = ILV.getInductionBypassValue (IVPhi);
7771
+ Inc->setIncomingValueForBlock (BB, V);
7772
+ }
7773
+ }
7755
7774
7756
7775
// 2.6. Maintain Loop Hints
7757
7776
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -7840,10 +7859,10 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7840
7859
// Generate the induction variable.
7841
7860
EPI.VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
7842
7861
7843
- // Skip induction resume value creation here because they will be created in
7844
- // the second pass for the scalar loop. The induction resume values for the
7845
- // inductions in the epilogue loop are created before executing the plan for
7846
- // the epilogue loop.
7862
+ // Create induction resume values and ResumePhis for the inductions in the
7863
+ // epilogue loop in the VPlan for the epilogue vector loop.
7864
+ VPBasicBlock *ScalarPHVPBB = Plan. getScalarPreheader ();
7865
+ createInductionResumeVPValues (ExpandedSCEVs);
7847
7866
7848
7867
return {LoopVectorPreHeader, nullptr };
7849
7868
}
@@ -8024,9 +8043,9 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8024
8043
// check, then the resume value for the induction variable comes from
8025
8044
// the trip count of the main vector loop, hence passing the AdditionalBypass
8026
8045
// argument.
8027
- createInductionResumeValues (ExpandedSCEVs,
8028
- {VecEpilogueIterationCountCheck,
8029
- EPI.VectorTripCount } /* AdditionalBypass */ );
8046
+ createInductionResumeVPValues (ExpandedSCEVs,
8047
+ {VecEpilogueIterationCountCheck,
8048
+ EPI.VectorTripCount } /* AdditionalBypass */ );
8030
8049
8031
8050
return {LoopVectorPreHeader, EPResumeVal};
8032
8051
}
@@ -10327,23 +10346,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10327
10346
RdxDesc.getRecurrenceStartValue ());
10328
10347
}
10329
10348
} else {
10330
- // Create induction resume values for both widened pointer and
10331
- // integer/fp inductions and update the start value of the induction
10332
- // recipes to use the resume value.
10349
+ // Retrieve the induction resume values for wide inductions from
10350
+ // their original phi nodes in the scalar loop.
10333
10351
PHINode *IndPhi = nullptr ;
10334
- const InductionDescriptor *ID;
10335
10352
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
10336
10353
IndPhi = cast<PHINode>(Ind->getUnderlyingValue ());
10337
- ID = &Ind->getInductionDescriptor ();
10338
10354
} else {
10339
10355
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
10340
10356
IndPhi = WidenInd->getPHINode ();
10341
- ID = &WidenInd->getInductionDescriptor ();
10342
10357
}
10343
-
10344
- ResumeV = MainILV.createInductionResumeValue (
10345
- IndPhi, *ID, getExpandedStep (*ID, ExpandedSCEVs),
10346
- {EPI.MainLoopIterationCountCheck });
10358
+ ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
10347
10359
}
10348
10360
assert (ResumeV && " Must have a resume value" );
10349
10361
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn (ResumeV);
@@ -10355,7 +10367,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10355
10367
LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10356
10368
DT, true , &ExpandedSCEVs);
10357
10369
++LoopsEpilogueVectorized;
10358
-
10359
10370
if (!MainILV.areSafetyChecksAdded ())
10360
10371
DisableRuntimeUnroll = true ;
10361
10372
} else {
0 commit comments