@@ -2610,22 +2610,15 @@ void InnerLoopVectorizer::createInductionResumeValue(
2610
2610
assert (VectorTripCount && " Expected valid arguments" );
2611
2611
2612
2612
Instruction *OldInduction = Legal->getPrimaryInduction ();
2613
- Value *EndValue = nullptr;
2614
2613
Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
2615
2614
if (OrigPhi == OldInduction) {
2616
- // We know what the end value is.
2617
- EndValue = VectorTripCount;
2618
2615
} else {
2619
2616
IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
2620
2617
2621
2618
// Fast-math-flags propagate from the original induction instruction.
2622
2619
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
2623
2620
B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
2624
2621
2625
- EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
2626
- Step, II.getKind(), II.getInductionBinOp());
2627
- EndValue->setName("ind.end");
2628
-
2629
2622
// Compute the end value for the additional bypass (if applicable).
2630
2623
if (AdditionalBypass.first ) {
2631
2624
B.SetInsertPoint (AdditionalBypass.first ,
@@ -2637,26 +2630,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
2637
2630
}
2638
2631
}
2639
2632
2640
- VPBasicBlock *MiddleVPBB =
2641
- cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
2642
-
2643
- VPBasicBlock *ScalarPHVPBB = nullptr;
2644
- if (MiddleVPBB->getNumSuccessors() == 2) {
2645
- // Order is strict: first is the exit block, second is the scalar preheader.
2646
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2647
- } else {
2648
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2649
- }
2650
-
2651
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2652
- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2653
- VPInstruction::ResumePhi,
2654
- {Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2655
- OrigPhi->getDebugLoc(), "bc.resume.val");
2656
-
2657
- auto *ScalarLoopHeader =
2658
- cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2659
- addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2660
2633
InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
2661
2634
EndValueFromAdditionalBypass};
2662
2635
}
@@ -7704,10 +7677,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7704
7677
ILV.getOrCreateVectorTripCount (nullptr ),
7705
7678
CanonicalIVStartValue, State);
7706
7679
7680
+ VPBasicBlock *MiddleVPBB =
7681
+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7682
+
7683
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
7684
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
7685
+ // Order is strict: first is the exit block, second is the scalar
7686
+ // preheader.
7687
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
7688
+ } else {
7689
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
7690
+ }
7691
+
7707
7692
BestVPlan.execute (&State);
7708
7693
7709
7694
// 2.5 Collect reduction resume values.
7710
- auto *ExitVPBB =
7695
+ VPBasicBlock *ExitVPBB =
7711
7696
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7712
7697
for (VPRecipeBase &R : *ExitVPBB) {
7713
7698
createAndCollectMergePhiForReduction (
@@ -7992,6 +7977,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7992
7977
// Generate a resume induction for the vector epilogue and put it in the
7993
7978
// vector epilogue preheader
7994
7979
Type *IdxTy = Legal->getWidestInductionType ();
7980
+
7995
7981
PHINode *EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
7996
7982
EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
7997
7983
EPResumeVal->addIncoming (EPI.VectorTripCount , VecEpilogueIterationCountCheck);
@@ -8879,6 +8865,74 @@ addUsersInExitBlock(VPlan &Plan,
8879
8865
}
8880
8866
}
8881
8867
8868
+ static void addResumeValuesForInductions (VPlan &Plan) {
8869
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8870
+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8871
+
8872
+ VPBuilder Builder (
8873
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ()));
8874
+ for (VPRecipeBase &R : Header->phis ()) {
8875
+ PHINode *OrigPhi;
8876
+ const InductionDescriptor *ID;
8877
+ VPValue *Start;
8878
+ VPValue *Step;
8879
+ Type *ScalarTy;
8880
+ bool IsCanonical = false ;
8881
+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8882
+ if (WideIV->getTruncInst ())
8883
+ continue ;
8884
+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8885
+ ID = &WideIV->getInductionDescriptor ();
8886
+ Start = WideIV->getStartValue ();
8887
+ Step = WideIV->getStepValue ();
8888
+ ScalarTy = WideIV->getScalarType ();
8889
+ IsCanonical = WideIV->isCanonical ();
8890
+ } else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8891
+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8892
+ ID = &WideIV->getInductionDescriptor ();
8893
+ Start = WideIV->getStartValue ();
8894
+ Step = WideIV->getOperand (1 );
8895
+ ScalarTy = Start->getLiveInIRValue ()->getType ();
8896
+ } else {
8897
+ continue ;
8898
+ }
8899
+
8900
+ VPValue *EndValue = &Plan.getVectorTripCount ();
8901
+ if (!IsCanonical) {
8902
+ EndValue = Builder.createDerivedIV (
8903
+ ID->getKind (),
8904
+ dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp ()), Start,
8905
+ &Plan.getVectorTripCount (), Step);
8906
+ }
8907
+
8908
+ if (ScalarTy != TypeInfo.inferScalarType (EndValue)) {
8909
+ EndValue =
8910
+ Builder.createScalarCast (Instruction::Trunc, EndValue, ScalarTy);
8911
+ }
8912
+
8913
+ VPBasicBlock *MiddleVPBB =
8914
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8915
+
8916
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
8917
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
8918
+ // Order is strict: first is the exit block, second is the scalar
8919
+ // preheader.
8920
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8921
+ } else {
8922
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
8923
+ }
8924
+
8925
+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8926
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
8927
+ VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc (),
8928
+ " bc.resume.val" );
8929
+
8930
+ auto *ScalarLoopHeader =
8931
+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
8932
+ addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8933
+ }
8934
+ }
8935
+
8882
8936
// / Handle live-outs for first order reductions, both in the scalar preheader
8883
8937
// / and the original exit block:
8884
8938
// / 1. Feed a resume value for every FOR from the vector loop to the scalar
@@ -9174,6 +9228,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9174
9228
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9175
9229
addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9176
9230
addUsersInExitBlock (*Plan, ExitUsersToFix);
9231
+ addResumeValuesForInductions (*Plan);
9177
9232
9178
9233
// ---------------------------------------------------------------------------
9179
9234
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9279,6 +9334,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9279
9334
bool HasNUW = true ;
9280
9335
addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
9281
9336
DebugLoc ());
9337
+ addResumeValuesForInductions (*Plan);
9282
9338
assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
9283
9339
return Plan;
9284
9340
}
@@ -9562,7 +9618,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
9562
9618
State.Builder , CanonicalIV, getStartValue ()->getLiveInIRValue (), Step,
9563
9619
Kind, cast_if_present<BinaryOperator>(FPBinOp));
9564
9620
DerivedIV->setName (" offset.idx" );
9565
- assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9621
+ assert ((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9622
+ " IV didn't need transforming?" );
9566
9623
9567
9624
State.set (this , DerivedIV, VPLane (0 ));
9568
9625
}
@@ -10231,6 +10288,50 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10231
10288
EPI, &LVL, &CM, BFI, PSI, Checks,
10232
10289
*BestMainPlan);
10233
10290
10291
+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10292
+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those will need their resume-values computed from the main vector loop. Others can be removed in the main VPlan.
10293
+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10294
+ for (VPRecipeBase &R :
10295
+ BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10296
+ if (!isa<VPWidenIntOrFpInductionRecipe,
10297
+ VPWidenPointerInductionRecipe>(&R))
10298
+ continue ;
10299
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10300
+ WidenedPhis.insert (
10301
+ cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10302
+ else
10303
+ WidenedPhis.insert (
10304
+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10305
+ }
10306
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10307
+ BestMainPlan->getVectorLoopRegion ()->getSingleSuccessor ());
10308
+
10309
+ VPBasicBlock *ScalarPHVPBB = nullptr ;
10310
+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
10311
+ // Order is strict: first is the exit block, second is the scalar
10312
+ // preheader.
10313
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
10314
+ } else {
10315
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
10316
+ }
10317
+
10318
+ for (VPRecipeBase &R :
10319
+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ())) {
10320
+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10321
+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10322
+ if (!IRI)
10323
+ break ;
10324
+ if (WidenedPhis.contains (IRI) ||
10325
+ !LVL.getInductionVars ().contains (IRI))
10326
+ continue ;
10327
+ VPRecipeBase *ResumePhi =
10328
+ VPIRInst->getOperand (0 )->getDefiningRecipe ();
10329
+ VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10330
+ Constant::getNullValue (IRI->getType ())));
10331
+ ResumePhi->eraseFromParent ();
10332
+ }
10333
+ VPlanTransforms::removeDeadRecipes (*BestMainPlan);
10334
+
10234
10335
auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10235
10336
*BestMainPlan, MainILV, DT, true );
10236
10337
++LoopsVectorized;
@@ -10239,7 +10340,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10239
10340
// edges from the first pass.
10240
10341
EPI.MainLoopVF = EPI.EpilogueVF ;
10241
10342
EPI.MainLoopUF = EPI.EpilogueUF ;
10242
- VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
10243
10343
EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
10244
10344
ORE, EPI, &LVL, &CM, BFI, PSI,
10245
10345
Checks, BestEpiPlan);
0 commit comments