Skip to content

Commit edb690d

Browse files
committed
Reapply "[VPlan] Add canonical IV during construction (NFC)."
This reverts commit d431921. Missing gtests have been updated. Original message: This addresses an existing TODO and simply moves the current code to add canonical IV recipes to the initial skeleton construction, at the same place where the corresponding region will be introduced.
1 parent 5b02a26 commit edb690d

File tree

8 files changed

+115
-80
lines changed

8 files changed

+115
-80
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9159,31 +9159,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
91599159
}
91609160
}
91619161

9162-
// Add the necessary canonical IV and branch recipes required to control the
9163-
// loop.
9164-
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
9165-
DebugLoc DL) {
9166-
Value *StartIdx = ConstantInt::get(IdxTy, 0);
9167-
auto *StartV = Plan.getOrAddLiveIn(StartIdx);
9168-
9169-
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
9170-
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
9171-
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
9172-
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
9173-
Header->insert(CanonicalIVPHI, Header->begin());
9174-
9175-
VPBuilder Builder(TopRegion->getExitingBasicBlock());
9176-
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
9177-
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
9178-
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
9179-
"index.next");
9180-
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
9181-
9182-
// Add the BranchOnCount VPInstruction to the latch.
9183-
Builder.createNaryOp(VPInstruction::BranchOnCount,
9184-
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
9185-
}
9186-
91879162
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
91889163
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
91899164
/// the end value of the induction.
@@ -9455,7 +9430,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94559430
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
94569431
VPlanTransforms::prepareForVectorization(
94579432
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
9458-
CM.foldTailByMasking(), OrigLoop);
9433+
CM.foldTailByMasking(), OrigLoop,
9434+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
94599435
VPlanTransforms::createLoopRegions(*Plan);
94609436

94619437
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9466,14 +9442,22 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94669442
for (ElementCount VF : Range)
94679443
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
94689444

9469-
DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
94709445
TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
94719446
// Use NUW for the induction increment if we proved that it won't overflow in
94729447
// the vector loop or when not folding the tail. In the later case, we know
94739448
// that the canonical induction increment will not overflow as the vector trip
94749449
// count is >= increment and a multiple of the increment.
94759450
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
9476-
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
9451+
if (!HasNUW) {
9452+
auto *IVInc = Plan->getVectorLoopRegion()
9453+
->getExitingBasicBlock()
9454+
->getTerminator()
9455+
->getOperand(0);
9456+
assert(match(IVInc, m_VPInstruction<Instruction::Add>(
9457+
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
9458+
"Did not find the canonical IV increment");
9459+
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
9460+
}
94779461

94789462
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
94799463
Builder);
@@ -9747,19 +9731,13 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
97479731
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
97489732
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
97499733
VPlanTransforms::prepareForVectorization(
9750-
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
9734+
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9735+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
97519736
VPlanTransforms::createLoopRegions(*Plan);
97529737

97539738
for (ElementCount VF : Range)
97549739
Plan->addVF(VF);
97559740

9756-
// Tail folding is not supported for outer loops, so the induction increment
9757-
// is guaranteed to not wrap.
9758-
bool HasNUW = true;
9759-
addCanonicalIVRecipes(
9760-
*Plan, Legal->getWidestInductionType(), HasNUW,
9761-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9762-
97639741
if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
97649742
Plan,
97659743
[this](PHINode *P) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,11 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
722722
return R && classof(R);
723723
}
724724

725+
static inline bool classof(const VPValue *V) {
726+
auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
727+
return R && classof(R);
728+
}
729+
725730
/// Drop all poison-generating flags.
726731
void dropPoisonGeneratingFlags() {
727732
// NOTE: This needs to be kept in-sync with

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "VPlan.h"
1616
#include "VPlanCFG.h"
1717
#include "VPlanDominatorTree.h"
18+
#include "VPlanPatternMatch.h"
1819
#include "VPlanTransforms.h"
1920
#include "llvm/Analysis/LoopInfo.h"
2021
#include "llvm/Analysis/LoopIterator.h"
@@ -461,10 +462,44 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
461462
VPBlockUtils::connectBlocks(R, Succ);
462463
}
463464

465+
// Add the necessary canonical IV and branch recipes required to control the
466+
// loop.
467+
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
468+
VPBasicBlock *LatchVPBB, Type *IdxTy,
469+
DebugLoc DL) {
470+
using namespace VPlanPatternMatch;
471+
Value *StartIdx = ConstantInt::get(IdxTy, 0);
472+
auto *StartV = Plan.getOrAddLiveIn(StartIdx);
473+
474+
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
475+
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
476+
HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
477+
478+
// We are about to replace the branch to exit the region. Remove the original
479+
// BranchOnCond, if there is any.
480+
if (!LatchVPBB->empty() &&
481+
match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
482+
LatchVPBB->getTerminator()->eraseFromParent();
483+
484+
VPBuilder Builder(LatchVPBB);
485+
// Add a VPInstruction to increment the scalar canonical IV by VF * UF.
486+
// Initially the induction increment is guaranteed to not wrap, but that may
487+
// change later, e.g. when tail-folding, when the flags need to be dropped.
488+
auto *CanonicalIVIncrement = Builder.createOverflowingOp(
489+
Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {true, false}, DL,
490+
"index.next");
491+
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
492+
493+
// Add the BranchOnCount VPInstruction to the latch.
494+
Builder.createNaryOp(VPInstruction::BranchOnCount,
495+
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
496+
}
497+
464498
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
465499
PredicatedScalarEvolution &PSE,
466500
bool RequiresScalarEpilogueCheck,
467-
bool TailFolded, Loop *TheLoop) {
501+
bool TailFolded, Loop *TheLoop,
502+
DebugLoc IVDL) {
468503
VPDominatorTree VPDT;
469504
VPDT.recalculate(Plan);
470505

@@ -479,6 +514,9 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
479514
VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB);
480515
LatchVPB->swapSuccessors();
481516

517+
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
518+
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
519+
482520
// Create SCEV and VPValue for the trip count.
483521
// We use the symbolic max backedge-taken-count, which works also when
484522
// vectorizing loops with uncountable early exits.

llvm/lib/Transforms/Vectorize/VPlanSLP.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
5858
for (VPRecipeBase &VPI : *VPBB) {
5959
if (isa<VPWidenPHIRecipe>(&VPI))
6060
continue;
61-
auto *VPInst = cast<VPInstruction>(&VPI);
61+
auto *VPInst = dyn_cast<VPInstruction>(&VPI);
62+
if (!VPInst)
63+
continue;
6264
auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
6365
if (!Inst)
6466
continue;

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ struct VPlanTransforms {
6868
static void prepareForVectorization(VPlan &Plan, Type *InductionTy,
6969
PredicatedScalarEvolution &PSE,
7070
bool RequiresScalarEpilogueCheck,
71-
bool TailFolded, Loop *TheLoop);
71+
bool TailFolded, Loop *TheLoop,
72+
DebugLoc IVDL);
7273

7374
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7475
/// flat CFG into a hierarchical CFG.

llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
5151
// Check that the region following the preheader consists of a block for the
5252
// original header and a separate latch.
5353
VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
54-
EXPECT_EQ(7u, VecBB->size());
54+
EXPECT_EQ(10u, VecBB->size());
5555
EXPECT_EQ(0u, VecBB->getNumPredecessors());
5656
EXPECT_EQ(0u, VecBB->getNumSuccessors());
5757
EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
5858
EXPECT_EQ(&*Plan, VecBB->getPlan());
5959

6060
auto Iter = VecBB->begin();
61+
auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&*Iter++);
62+
EXPECT_NE(nullptr, CanIV);
6163
VPWidenPHIRecipe *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
6264
EXPECT_NE(nullptr, Phi);
6365

@@ -100,7 +102,7 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
100102
raw_string_ostream OS(FullDump);
101103
Plan->printDOT(OS);
102104
const char *ExpectedStr = R"(digraph VPlan {
103-
graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
105+
graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = VF * UF\nLive-in vp\<%1\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
104106
node [shape=rect, fontname=Courier, fontsize=30]
105107
edge [fontname=Courier, fontsize=30]
106108
compound=true
@@ -119,20 +121,23 @@ compound=true
119121
label="\<x1\> vector loop"
120122
N2 [label =
121123
"vector.body:\l" +
124+
" EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
122125
" WIDEN-PHI ir\<%indvars.iv\> = phi ir\<0\>, ir\<%indvars.iv.next\>\l" +
123126
" EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%indvars.iv\>\l" +
124127
" EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
125128
" EMIT ir\<%res\> = add ir\<%l1\>, ir\<10\>\l" +
126129
" EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
127130
" EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
128131
" EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
132+
" EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
133+
" EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
129134
"No successors\l"
130135
]
131136
}
132137
N2 -> N4 [ label="" ltail=cluster_N3]
133138
N4 [label =
134139
"middle.block:\l" +
135-
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
140+
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%1\>\l" +
136141
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
137142
"Successor(s): ir-bb\<for.end\>, scalar.ph\l"
138143
]
@@ -207,12 +212,13 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
207212
// Check that the region following the preheader consists of a block for the
208213
// original header and a separate latch.
209214
VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
210-
EXPECT_EQ(8u, VecBB->size());
215+
EXPECT_EQ(11u, VecBB->size());
211216
EXPECT_EQ(0u, VecBB->getNumPredecessors());
212217
EXPECT_EQ(0u, VecBB->getNumSuccessors());
213218
EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
214219

215220
auto Iter = VecBB->begin();
221+
EXPECT_NE(nullptr, dyn_cast<VPCanonicalIVPHIRecipe>(&*Iter++));
216222
EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++));
217223
EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++));
218224
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
@@ -221,6 +227,8 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
221227
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
222228
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
223229
EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
230+
EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
231+
EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
224232
EXPECT_EQ(VecBB->end(), Iter);
225233
}
226234

@@ -261,7 +269,7 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoopMultiExit) {
261269
raw_string_ostream OS(FullDump);
262270
Plan->printDOT(OS);
263271
const char *ExpectedStr = R"(digraph VPlan {
264-
graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
272+
graph [labelloc=t, fontsize=30; label="Vectorization Plan\n for UF\>=1\nLive-in vp\<%0\> = VF * UF\nLive-in vp\<%1\> = vector-trip-count\nLive-in ir\<%N\> = original trip-count\n"]
265273
node [shape=rect, fontname=Courier, fontsize=30]
266274
edge [fontname=Courier, fontsize=30]
267275
compound=true
@@ -280,6 +288,7 @@ compound=true
280288
label="\<x1\> vector loop"
281289
N2 [label =
282290
"vector.body:\l" +
291+
" EMIT vp\<%2\> = CANONICAL-INDUCTION ir\<0\>, vp\<%index.next\>\l" +
283292
" WIDEN-PHI ir\<%iv\> = phi ir\<0\>, ir\<%iv.next\>\l" +
284293
" EMIT ir\<%arr.idx\> = getelementptr ir\<%A\>, ir\<%iv\>\l" +
285294
" EMIT ir\<%l1\> = load ir\<%arr.idx\>\l" +
@@ -293,13 +302,15 @@ compound=true
293302
" EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
294303
" EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
295304
" EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
305+
" EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" +
306+
" EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" +
296307
"No successors\l"
297308
]
298309
}
299310
N4 -> N5 [ label="" ltail=cluster_N3]
300311
N5 [label =
301312
"middle.block:\l" +
302-
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
313+
" EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%1\>\l" +
303314
" EMIT branch-on-cond vp\<%cmp.n\>\l" +
304315
"Successor(s): ir-bb\<exit.2\>, scalar.ph\l"
305316
]

0 commit comments

Comments
 (0)