@@ -523,7 +523,7 @@ class InnerLoopVectorizer {
523
523
/// and the resume values can come from an additional bypass block, the \p
524
524
/// AdditionalBypass pair provides information about the bypass block and the
525
525
/// end value on the edge from bypass to this loop.
526
- void createInductionResumeValue (
526
+ void createInductionBypassValue (
527
527
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
528
528
ArrayRef<BasicBlock *> BypassBlocks,
529
529
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
@@ -574,15 +574,11 @@ class InnerLoopVectorizer {
574
574
/// vector loop preheader, middle block and scalar preheader.
575
575
void createVectorLoopSkeleton(StringRef Prefix);
576
576
577
- /// Create new phi nodes for the induction variables to resume iteration count
578
- /// in the scalar epilogue, from where the vectorized loop left off.
579
- /// In cases where the loop skeleton is more complicated (eg. epilogue
580
- /// vectorization) and the resume values can come from an additional bypass
581
- /// block, the \p AdditionalBypass pair provides information about the bypass
582
- /// block and the end value on the edge from bypass to this loop.
583
- void createInductionResumeValues(
577
+ /// Create values for the induction variables to resume iteration count
578
+ /// in bypass block.
579
+ void createInductionBypassValues(
584
580
const SCEV2ValueTy &ExpandedSCEVs,
585
- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr} );
581
+ std::pair<BasicBlock *, Value *> AdditionalBypass);
586
582
587
583
/// Allow subclasses to override and print debug traces before/after vplan
588
584
/// execution, when trace information is requested.
@@ -2602,30 +2598,19 @@ static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
2602
2598
}
2603
2599
}
2604
2600
2605
- void InnerLoopVectorizer::createInductionResumeValue (
2601
+ void InnerLoopVectorizer::createInductionBypassValue (
2606
2602
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
2607
2603
ArrayRef<BasicBlock *> BypassBlocks,
2608
2604
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2609
- Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
2610
- assert(VectorTripCount && "Expected valid arguments");
2611
-
2612
2605
Instruction *OldInduction = Legal->getPrimaryInduction();
2613
- Value *EndValue = nullptr;
2614
2606
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
2615
- if (OrigPhi == OldInduction) {
2616
- // We know what the end value is.
2617
- EndValue = VectorTripCount;
2618
- } else {
2607
+ if (OrigPhi != OldInduction) {
2619
2608
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
2620
2609
2621
2610
// Fast-math-flags propagate from the original induction instruction.
2622
2611
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
2623
2612
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
2624
2613
2625
- EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
2626
- Step, II.getKind(), II.getInductionBinOp());
2627
- EndValue->setName("ind.end");
2628
-
2629
2614
// Compute the end value for the additional bypass (if applicable).
2630
2615
if (AdditionalBypass.first) {
2631
2616
B.SetInsertPoint(AdditionalBypass.first,
@@ -2637,26 +2622,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
2637
2622
}
2638
2623
}
2639
2624
2640
- VPBasicBlock *MiddleVPBB =
2641
- cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
2642
-
2643
- VPBasicBlock *ScalarPHVPBB = nullptr;
2644
- if (MiddleVPBB->getNumSuccessors() == 2) {
2645
- // Order is strict: first is the exit block, second is the scalar preheader.
2646
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2647
- } else {
2648
- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2649
- }
2650
-
2651
- VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2652
- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2653
- VPInstruction::ResumePhi,
2654
- {Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2655
- OrigPhi->getDebugLoc(), "bc.resume.val");
2656
-
2657
- auto *ScalarLoopHeader =
2658
- cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2659
- addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2660
2625
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
2661
2626
EndValueFromAdditionalBypass};
2662
2627
}
@@ -2675,23 +2640,16 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2675
2640
return I->second;
2676
2641
}
2677
2642
2678
- void InnerLoopVectorizer::createInductionResumeValues (
2643
+ void InnerLoopVectorizer::createInductionBypassValues (
2679
2644
const SCEV2ValueTy &ExpandedSCEVs,
2680
2645
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2681
- assert(((AdditionalBypass.first && AdditionalBypass.second) ||
2682
- (!AdditionalBypass.first && !AdditionalBypass.second)) &&
2683
- "Inconsistent information about additional bypass.");
2684
- // We are going to resume the execution of the scalar loop.
2685
- // Go over all of the induction variables that we found and fix the
2686
- // PHIs that are left in the scalar version of the loop.
2687
- // The starting values of PHI nodes depend on the counter of the last
2688
- // iteration in the vectorized loop.
2689
- // If we come from a bypass edge then we need to start from the original
2690
- // start value.
2646
+ assert(AdditionalBypass.first && AdditionalBypass.second &&
2647
+ "Must have bypass information");
2648
+
2691
2649
for (const auto &InductionEntry : Legal->getInductionVars()) {
2692
2650
PHINode *OrigPhi = InductionEntry.first;
2693
2651
const InductionDescriptor &II = InductionEntry.second;
2694
- createInductionResumeValue (OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2652
+ createInductionBypassValue (OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2695
2653
LoopBypassBlocks, AdditionalBypass);
2696
2654
}
2697
2655
}
@@ -2754,8 +2712,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
2754
2712
// faster.
2755
2713
emitMemRuntimeChecks(LoopScalarPreHeader);
2756
2714
2757
- // Emit phis for the new starting index of the scalar loop.
2758
- createInductionResumeValues(ExpandedSCEVs );
2715
+ Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
2716
+ assert(VectorTripCount && "Expected valid arguments" );
2759
2717
2760
2718
return {LoopVectorPreHeader, nullptr};
2761
2719
}
@@ -7719,6 +7677,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7719
7677
ILV.getOrCreateVectorTripCount(nullptr),
7720
7678
CanonicalIVStartValue, State);
7721
7679
7680
+ VPBasicBlock *MiddleVPBB =
7681
+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7682
+
7683
+ VPBasicBlock *ScalarPHVPBB = nullptr;
7684
+ if (MiddleVPBB->getNumSuccessors() == 2) {
7685
+ // Order is strict: first is the exit block, second is the scalar
7686
+ // preheader.
7687
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
7688
+ } else {
7689
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
7690
+ }
7691
+
7722
7692
BestVPlan.execute(&State);
7723
7693
7724
7694
// 2.5 Collect reduction resume values.
@@ -7836,7 +7806,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7836
7806
} else
7837
7807
continue;
7838
7808
7839
- createInductionResumeValue (IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7809
+ createInductionBypassValue (IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7840
7810
LoopBypassBlocks);
7841
7811
}
7842
7812
@@ -8006,20 +7976,22 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8006
7976
// Generate a resume induction for the vector epilogue and put it in the
8007
7977
// vector epilogue preheader
8008
7978
Type *IdxTy = Legal->getWidestInductionType();
7979
+
8009
7980
PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
8010
7981
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
8011
7982
EPResumeVal->addIncoming(EPI.VectorTripCount, VecEpilogueIterationCountCheck);
8012
7983
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
8013
7984
EPI.MainLoopIterationCountCheck);
8014
7985
8015
- // Generate induction resume values. These variables save the new starting
8016
- // indexes for the scalar loop. They are used to test if there are any tail
8017
- // iterations left once the vector loop has completed.
7986
+ Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
7987
+ assert(VectorTripCount && "Expected valid arguments");
7988
+
7989
+ // Generate induction resume values for the bypass blocks.
8018
7990
// Note that when the vectorized epilogue is skipped due to iteration count
8019
7991
// check, then the resume value for the induction variable comes from
8020
7992
// the trip count of the main vector loop, hence passing the AdditionalBypass
8021
7993
// argument.
8022
- createInductionResumeValues (ExpandedSCEVs,
7994
+ createInductionBypassValues (ExpandedSCEVs,
8023
7995
{VecEpilogueIterationCountCheck,
8024
7996
EPI.VectorTripCount} /* AdditionalBypass */);
8025
7997
@@ -8932,6 +8904,74 @@ addUsersInExitBlock(VPlan &Plan,
8932
8904
}
8933
8905
}
8934
8906
8907
+ static void addResumeValuesForInductions(VPlan &Plan) {
8908
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8909
+ VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
8910
+
8911
+ VPBuilder Builder(
8912
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()));
8913
+ for (VPRecipeBase &R : Header->phis()) {
8914
+ PHINode *OrigPhi;
8915
+ const InductionDescriptor *ID;
8916
+ VPValue *Start;
8917
+ VPValue *Step;
8918
+ Type *ScalarTy;
8919
+ bool IsCanonical = false;
8920
+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8921
+ if (WideIV->getTruncInst())
8922
+ continue;
8923
+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8924
+ ID = &WideIV->getInductionDescriptor();
8925
+ Start = WideIV->getStartValue();
8926
+ Step = WideIV->getStepValue();
8927
+ ScalarTy = WideIV->getScalarType();
8928
+ IsCanonical = WideIV->isCanonical();
8929
+ } else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8930
+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8931
+ ID = &WideIV->getInductionDescriptor();
8932
+ Start = WideIV->getStartValue();
8933
+ Step = WideIV->getOperand(1);
8934
+ ScalarTy = Start->getLiveInIRValue()->getType();
8935
+ } else {
8936
+ continue;
8937
+ }
8938
+
8939
+ VPValue *EndValue = &Plan.getVectorTripCount();
8940
+ if (!IsCanonical) {
8941
+ EndValue = Builder.createDerivedIV(
8942
+ ID->getKind(),
8943
+ dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp()), Start,
8944
+ &Plan.getVectorTripCount(), Step);
8945
+ }
8946
+
8947
+ if (ScalarTy != TypeInfo.inferScalarType(EndValue)) {
8948
+ EndValue =
8949
+ Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy);
8950
+ }
8951
+
8952
+ VPBasicBlock *MiddleVPBB =
8953
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8954
+
8955
+ VPBasicBlock *ScalarPHVPBB = nullptr;
8956
+ if (MiddleVPBB->getNumSuccessors() == 2) {
8957
+ // Order is strict: first is the exit block, second is the scalar
8958
+ // preheader.
8959
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8960
+ } else {
8961
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8962
+ }
8963
+
8964
+ VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8965
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
8966
+ VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc(),
8967
+ "bc.resume.val");
8968
+
8969
+ auto *ScalarLoopHeader =
8970
+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
8971
+ addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8972
+ }
8973
+ }
8974
+
8935
8975
/// Handle users in the exit block for first order reductions in the original
8936
8976
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
8937
8977
/// users in the original exit block using the VPIRInstruction wrapping to the
@@ -9205,6 +9245,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9205
9245
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9206
9246
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9207
9247
addUsersInExitBlock(*Plan, ExitUsersToFix);
9248
+ addResumeValuesForInductions(*Plan);
9249
+
9208
9250
// ---------------------------------------------------------------------------
9209
9251
// Transform initial VPlan: Apply previously taken decisions, in order, to
9210
9252
// bring the VPlan to its final state.
@@ -9315,6 +9357,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
9315
9357
bool HasNUW = true;
9316
9358
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
9317
9359
DebugLoc());
9360
+ addResumeValuesForInductions(*Plan);
9318
9361
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
9319
9362
return Plan;
9320
9363
}
@@ -9599,7 +9642,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
9599
9642
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
9600
9643
Kind, cast_if_present<BinaryOperator>(FPBinOp));
9601
9644
DerivedIV->setName(Name);
9602
- assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9645
+ /* assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&*/
9646
+ /*"IV didn't need transforming?");*/
9603
9647
9604
9648
State.set(this, DerivedIV, VPLane(0));
9605
9649
}
@@ -10268,6 +10312,52 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10268
10312
EPI, &LVL, &CM, BFI, PSI, Checks,
10269
10313
*BestMainPlan);
10270
10314
10315
+ VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
10316
+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue.
10317
+ // Those will need their resume-values computed from the main vector
10318
+ // loop. Others can be removed in the main VPlan.
10319
+ SmallPtrSet<PHINode *, 2> WidenedPhis;
10320
+ for (VPRecipeBase &R :
10321
+ BestEpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
10322
+ if (!isa<VPWidenIntOrFpInductionRecipe,
10323
+ VPWidenPointerInductionRecipe>(&R))
10324
+ continue;
10325
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10326
+ WidenedPhis.insert(
10327
+ cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode());
10328
+ else
10329
+ WidenedPhis.insert(
10330
+ cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
10331
+ }
10332
+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10333
+ BestMainPlan->getVectorLoopRegion()->getSingleSuccessor());
10334
+
10335
+ VPBasicBlock *ScalarPHVPBB = nullptr;
10336
+ if (MiddleVPBB->getNumSuccessors() == 2) {
10337
+ // Order is strict: first is the exit block, second is the scalar
10338
+ // preheader.
10339
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
10340
+ } else {
10341
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
10342
+ }
10343
+
10344
+ for (VPRecipeBase &R :
10345
+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
10346
+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10347
+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
10348
+ if (!IRI)
10349
+ break;
10350
+ if (WidenedPhis.contains(IRI) ||
10351
+ !LVL.getInductionVars().contains(IRI))
10352
+ continue;
10353
+ VPRecipeBase *ResumePhi =
10354
+ VPIRInst->getOperand(0)->getDefiningRecipe();
10355
+ VPIRInst->setOperand(0, BestMainPlan->getOrAddLiveIn(
10356
+ Constant::getNullValue(IRI->getType())));
10357
+ ResumePhi->eraseFromParent();
10358
+ }
10359
+ VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10360
+
10271
10361
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
10272
10362
*BestMainPlan, MainILV, DT, false);
10273
10363
++LoopsVectorized;
@@ -10276,7 +10366,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10276
10366
// edges from the first pass.
10277
10367
EPI.MainLoopVF = EPI.EpilogueVF;
10278
10368
EPI.MainLoopUF = EPI.EpilogueUF;
10279
- VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
10280
10369
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
10281
10370
ORE, EPI, &LVL, &CM, BFI, PSI,
10282
10371
Checks, BestEpiPlan);
0 commit comments