Skip to content

Commit e17122f

Browse files
committed
[VPlan] Add canonical IV during construction (NFC).
This addresses an existing TODO and simply moves the current code to add canonical IV recipes to the initial skeleton construction, at the same place where the corresponding region will be introduced.
1 parent 6a16da7 commit e17122f

File tree

5 files changed

+61
-39
lines changed

5 files changed

+61
-39
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9163,31 +9163,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
91639163
}
91649164
}
91659165

9166-
// Add the necessary canonical IV and branch recipes required to control the
9167-
// loop.
9168-
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
9169-
DebugLoc DL) {
9170-
Value *StartIdx = ConstantInt::get(IdxTy, 0);
9171-
auto *StartV = Plan.getOrAddLiveIn(StartIdx);
9172-
9173-
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
9174-
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
9175-
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
9176-
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
9177-
Header->insert(CanonicalIVPHI, Header->begin());
9178-
9179-
VPBuilder Builder(TopRegion->getExitingBasicBlock());
9180-
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
9181-
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
9182-
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
9183-
"index.next");
9184-
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
9185-
9186-
// Add the BranchOnCount VPInstruction to the latch.
9187-
Builder.createNaryOp(VPInstruction::BranchOnCount,
9188-
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
9189-
}
9190-
91919166
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
91929167
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
91939168
/// the end value of the induction.
@@ -9459,7 +9434,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94599434
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
94609435
VPlanTransforms::prepareForVectorization(
94619436
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
9462-
CM.foldTailByMasking(), OrigLoop);
9437+
CM.foldTailByMasking(), OrigLoop,
9438+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
94639439
VPlanTransforms::createLoopRegions(*Plan);
94649440

94659441
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9470,14 +9446,22 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94709446
for (ElementCount VF : Range)
94719447
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
94729448

9473-
DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
94749449
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
94759450
// Use NUW for the induction increment if we proved that it won't overflow in
94769451
// the vector loop or when not folding the tail. In the later case, we know
94779452
// that the canonical induction increment will not overflow as the vector trip
94789453
// count is >= increment and a multiple of the increment.
94799454
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
9480-
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
9455+
if (!HasNUW) {
9456+
auto *IVInc = Plan->getVectorLoopRegion()
9457+
->getExitingBasicBlock()
9458+
->getTerminator()
9459+
->getOperand(0);
9460+
assert(match(IVInc, m_VPInstruction<Instruction::Add>(
9461+
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
9462+
"Did not find the canonical IV increment");
9463+
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
9464+
}
94819465

94829466
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
94839467
Builder);
@@ -9751,19 +9735,13 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97519735
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
97529736
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
97539737
VPlanTransforms::prepareForVectorization(
9754-
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
9738+
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9739+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
97559740
VPlanTransforms::createLoopRegions(*Plan);
97569741

97579742
for (ElementCount VF : Range)
97589743
Plan->addVF(VF);
97599744

9760-
// Tail folding is not supported for outer loops, so the induction increment
9761-
// is guaranteed to not wrap.
9762-
bool HasNUW = true;
9763-
addCanonicalIVRecipes(
9764-
*Plan, Legal->getWidestInductionType(), HasNUW,
9765-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9766-
97679745
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
97689746
Plan,
97699747
[this](PHINode *P) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,11 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
736736
return R && classof(R);
737737
}
738738

739+
static inline bool classof(const VPValue *V) {
740+
auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
741+
return R && classof(R);
742+
}
743+
739744
/// Drop all poison-generating flags.
740745
void dropPoisonGeneratingFlags() {
741746
// NOTE: This needs to be kept in-sync with

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "VPlan.h"
1616
#include "VPlanCFG.h"
1717
#include "VPlanDominatorTree.h"
18+
#include "VPlanPatternMatch.h"
1819
#include "VPlanTransforms.h"
1920
#include "llvm/Analysis/LoopInfo.h"
2021
#include "llvm/Analysis/LoopIterator.h"
@@ -461,10 +462,44 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
461462
VPBlockUtils::connectBlocks(R, Succ);
462463
}
463464

465+
// Add the necessary canonical IV and branch recipes required to control the
466+
// loop.
467+
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
468+
VPBasicBlock *LatchVPBB, Type *IdxTy,
469+
DebugLoc DL) {
470+
using namespace VPlanPatternMatch;
471+
Value *StartIdx = ConstantInt::get(IdxTy, 0);
472+
auto *StartV = Plan.getOrAddLiveIn(StartIdx);
473+
474+
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
475+
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
476+
HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
477+
478+
// We are about to replace the branch to exit the region. Remove the original
479+
// BranchOnCond, if there is any.
480+
if (!LatchVPBB->empty() &&
481+
match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
482+
LatchVPBB->getTerminator()->eraseFromParent();
483+
484+
VPBuilder Builder(LatchVPBB);
485+
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
486+
// Initially the induction increment is guaranteed to not wrap, but that may
487+
// change later, e.g. when tail-folding, when the flags need to be dropped.
488+
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
489+
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
490+
"index.next");
491+
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
492+
493+
// Add the BranchOnCount VPInstruction to the latch.
494+
Builder.createNaryOp(VPInstruction::BranchOnCount,
495+
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
496+
}
497+
464498
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
465499
PredicatedScalarEvolution &PSE,
466500
bool RequiresScalarEpilogueCheck,
467-
bool TailFolded, Loop *TheLoop) {
501+
bool TailFolded, Loop *TheLoop,
502+
DebugLoc IVDL) {
468503
VPDominatorTree VPDT;
469504
VPDT.recalculate(Plan);
470505

@@ -479,6 +514,9 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
479514
VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
480515
LatchVPB->swapSuccessors();
481516

517+
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
518+
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
519+
482520
// Create SCEV and VPValue for the trip count.
483521
// We use the symbolic max backedge-taken-count, which works also when
484522
// vectorizing loops with uncountable early exits.

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ struct VPlanTransforms {
6868
static void prepareForVectorization(VPlan &Plan, Type *InductionTy,
6969
PredicatedScalarEvolution &PSE,
7070
bool RequiresScalarEpilogueCheck,
71-
bool TailFolded, Loop *TheLoop);
71+
bool TailFolded, Loop *TheLoop,
72+
DebugLoc IVDL);
7273

7374
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7475
/// flat CFG into a hierarchical CFG.

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
7373
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
7474
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
7575
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
76-
PSE, true, false, L);
76+
PSE, true, false, L, {});
7777
VPlanTransforms::createLoopRegions(*Plan);
7878
return Plan;
7979
}

0 commit comments

Comments
 (0)