@@ -9163,6 +9163,31 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
9163
9163
}
9164
9164
}
9165
9165
9166
+ // Add the necessary canonical IV and branch recipes required to control the
9167
+ // loop.
9168
+ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
9169
+ DebugLoc DL) {
9170
+ Value *StartIdx = ConstantInt::get(IdxTy, 0);
9171
+ auto *StartV = Plan.getOrAddLiveIn(StartIdx);
9172
+
9173
+ // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
9174
+ auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
9175
+ VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
9176
+ VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
9177
+ Header->insert(CanonicalIVPHI, Header->begin());
9178
+
9179
+ VPBuilder Builder(TopRegion->getExitingBasicBlock());
9180
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
9181
+ auto *CanonicalIVIncrement = Builder.createOverflowingOp(
9182
+ Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
9183
+ "index.next");
9184
+ CanonicalIVPHI->addOperand(CanonicalIVIncrement);
9185
+
9186
+ // Add the BranchOnCount VPInstruction to the latch.
9187
+ Builder.createNaryOp(VPInstruction::BranchOnCount,
9188
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
9189
+ }
9190
+
9166
9191
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
9167
9192
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
9168
9193
/// the end value of the induction.
@@ -9434,8 +9459,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9434
9459
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
9435
9460
VPlanTransforms::prepareForVectorization(
9436
9461
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
9437
- CM.foldTailByMasking(), OrigLoop,
9438
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9462
+ CM.foldTailByMasking(), OrigLoop);
9439
9463
VPlanTransforms::createLoopRegions(*Plan);
9440
9464
9441
9465
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9446,22 +9470,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9446
9470
for (ElementCount VF : Range)
9447
9471
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
9448
9472
9473
+ DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
9449
9474
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
9450
9475
// Use NUW for the induction increment if we proved that it won't overflow in
9451
9476
// the vector loop or when not folding the tail. In the later case, we know
9452
9477
// that the canonical induction increment will not overflow as the vector trip
9453
9478
// count is >= increment and a multiple of the increment.
9454
9479
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
9455
- if (!HasNUW) {
9456
- auto *IVInc = Plan->getVectorLoopRegion()
9457
- ->getExitingBasicBlock()
9458
- ->getTerminator()
9459
- ->getOperand(0);
9460
- assert(match(IVInc, m_VPInstruction<Instruction::Add>(
9461
- m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
9462
- "Did not find the canonical IV increment");
9463
- cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
9464
- }
9480
+ addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
9465
9481
9466
9482
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
9467
9483
Builder);
@@ -9735,13 +9751,19 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
9735
9751
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
9736
9752
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
9737
9753
VPlanTransforms::prepareForVectorization(
9738
- *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9739
- getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9754
+ *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
9740
9755
VPlanTransforms::createLoopRegions(*Plan);
9741
9756
9742
9757
for (ElementCount VF : Range)
9743
9758
Plan->addVF(VF);
9744
9759
9760
+ // Tail folding is not supported for outer loops, so the induction increment
9761
+ // is guaranteed to not wrap.
9762
+ bool HasNUW = true;
9763
+ addCanonicalIVRecipes(
9764
+ *Plan, Legal->getWidestInductionType(), HasNUW,
9765
+ getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9766
+
9745
9767
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
9746
9768
Plan,
9747
9769
[this](PHINode *P) {
0 commit comments