Skip to content

Commit db6c1ee

Browse files
committed
[VPlan] Handle early exit before forming regions. (NFC)
Move early-exit handling up front to original VPlan construction, before introducing early exits. This builds on llvm#137709, which adds exiting edges to the original VPlan, instead of adding exit blocks later. This retains the exit conditions early, and means we can handle early exits before forming regions, without the reliance on VPRecipeBuilder. Once we retain all exits initially, handling early exits before region construction ensures the regions are valid; otherwise we would leave edges exiting the region from elsewhere than the latch. Removing the reliance on VPRecipeBuilder removes the dependence on mapping IR BBs to VPBBs and unblocks predication as VPlan transform: llvm#128420. Depends on llvm#137709.
1 parent 83f946a commit db6c1ee

File tree

5 files changed

+81
-70
lines changed

5 files changed

+81
-70
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9366,7 +9366,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93669366
VPlanTransforms::prepareForVectorization(
93679367
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
93689368
CM.foldTailByMasking(), OrigLoop,
9369-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9369+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
9370+
Legal->hasUncountableEarlyExit(), Range);
93709371
VPlanTransforms::createLoopRegions(*Plan);
93719372

93729373
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9564,12 +9565,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95649565
R->setOperand(1, WideIV->getStepValue());
95659566
}
95669567

9567-
if (auto *UncountableExitingBlock =
9568-
Legal->getUncountableEarlyExitingBlock()) {
9569-
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9570-
OrigLoop, UncountableExitingBlock, RecipeBuilder,
9571-
Range);
9572-
}
95739568
DenseMap<VPValue *, VPValue *> IVEndValues;
95749569
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95759570
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9667,7 +9662,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
96679662
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
96689663
VPlanTransforms::prepareForVectorization(
96699664
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9670-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9665+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
9666+
Range);
96719667
VPlanTransforms::createLoopRegions(*Plan);
96729668

96739669
for (ElementCount VF : Range)

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -474,11 +474,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
474474
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
475475
}
476476

477-
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
478-
PredicatedScalarEvolution &PSE,
479-
bool RequiresScalarEpilogueCheck,
480-
bool TailFolded, Loop *TheLoop,
481-
DebugLoc IVDL) {
477+
void VPlanTransforms::prepareForVectorization(
478+
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
479+
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
480+
DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) {
482481
VPDominatorTree VPDT;
483482
VPDT.recalculate(Plan);
484483

@@ -505,15 +504,18 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
505504

506505
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
507506
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
508-
509-
// Disconnect all edges between exit blocks other than from the latch.
510-
// TODO: Uncountable exit blocks should be handled here.
511-
for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
512-
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
513-
if (Pred == MiddleVPBB)
514-
continue;
515-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
516-
VPBlockUtils::disconnectBlocks(Pred, EB);
507+
if (HandleUncountableExit) {
508+
handleUncountableEarlyExit(Plan, cast<VPBasicBlock>(HeaderVPB),
509+
cast<VPBasicBlock>(LatchVPB), Range);
510+
} else {
511+
// Disconnect all edges between exit blocks other than from the latch.
512+
for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
513+
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
514+
if (Pred == MiddleVPBB)
515+
continue;
516+
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
517+
VPBlockUtils::disconnectBlocks(Pred, EB);
518+
}
517519
}
518520
}
519521

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 54 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,64 +2488,75 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24882488
R->eraseFromParent();
24892489
}
24902490

2491-
void VPlanTransforms::handleUncountableEarlyExit(
2492-
VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
2493-
VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
2494-
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2495-
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
2491+
void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
2492+
VPBasicBlock *HeaderVPBB,
2493+
VPBasicBlock *LatchVPBB,
2494+
VFRange &Range) {
2495+
// First find the uncountable early exiting block by looking at the
2496+
// predecessors of the exit blocks.
2497+
VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
2498+
VPBasicBlock *EarlyExitingVPBB = nullptr;
2499+
VPIRBasicBlock *EarlyExitVPBB = nullptr;
2500+
for (auto *EB : Plan.getExitBlocks()) {
2501+
for (VPBlockBase *Pred : EB->getPredecessors()) {
2502+
if (Pred != MiddleVPBB) {
2503+
EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
2504+
EarlyExitVPBB = EB;
2505+
break;
2506+
}
2507+
}
2508+
}
2509+
24962510
VPBuilder Builder(LatchVPBB->getTerminator());
2497-
auto *MiddleVPBB = Plan.getMiddleBlock();
2498-
VPValue *IsEarlyExitTaken = nullptr;
2499-
2500-
// Process the uncountable exiting block. Update IsEarlyExitTaken, which
2501-
// tracks if the uncountable early exit has been taken. Also split the middle
2502-
// block and have it conditionally branch to the early exit block if
2503-
// EarlyExitTaken.
2504-
auto *EarlyExitingBranch =
2505-
cast<BranchInst>(UncountableExitingBlock->getTerminator());
2506-
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2507-
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2508-
BasicBlock *EarlyExitIRBB =
2509-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
2510-
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
2511-
2512-
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2513-
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2514-
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2515-
IsEarlyExitTaken =
2516-
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
2511+
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
2512+
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
2513+
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
2514+
? EarlyExitCond
2515+
: Builder.createNot(EarlyExitCond);
2516+
2517+
if (!EarlyExitVPBB->getSinglePredecessor() &&
2518+
EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
2519+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
2520+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
2521+
// a single predecessor and 1 if it has two.
2522+
// If EarlyExitVPBB has two predecessors, they are already ordered such
2523+
// that early exit is second (and latch exit is first), by construction.
2524+
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2525+
// ordered the other way around, and it is the order of the latter which
2526+
// corresponds to the order of operands of EarlyExitVPBB's phi recipes.
2527+
// Therefore, if early exit (UncountableExitingBlock) is the first
2528+
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2529+
// thereby bringing them to match EarlyExitVPBB's predecessor order,
2530+
// with early exit being last (second). Otherwise they already match.
2531+
cast<VPIRPhi>(&R)->swapOperands();
2532+
}
2533+
}
25172534

2535+
EarlyExitingVPBB->getTerminator()->eraseFromParent();
2536+
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
2537+
2538+
// Split the middle block and have it conditionally branch to the early exit
2539+
// block if EarlyExitTaken.
2540+
VPValue *IsEarlyExitTaken =
2541+
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
25182542
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
25192543
VPBasicBlock *VectorEarlyExitVPBB =
25202544
Plan.createVPBasicBlock("vector.early.exit");
2521-
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2545+
VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
25222546
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
25232547
NewMiddle->swapSuccessors();
25242548

2525-
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2549+
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
25262550

25272551
// Update the exit phis in the early exit block.
25282552
VPBuilder MiddleBuilder(NewMiddle);
25292553
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
2530-
for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
2554+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
25312555
auto *ExitIRI = cast<VPIRPhi>(&R);
2532-
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
2556+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
25332557
// a single predecessor and 1 if it has two.
25342558
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
2535-
if (!VPEarlyExitBlock->getSinglePredecessor()) {
2536-
// If VPEarlyExitBlock has two predecessors, they are already ordered such
2537-
// that early exit is second (and latch exit is first), by construction.
2538-
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2539-
// ordered the other way around, and it is the order of the latter which
2540-
// corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
2541-
// Therefore, if early exit (UncountableExitingBlock) is the first
2542-
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2543-
// thereby bringing them to match VPEarlyExitBlock's predecessor order,
2544-
// with early exit being last (second). Otherwise they already match.
2545-
if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
2546-
UncountableExitingBlock)
2547-
ExitIRI->swapOperands();
2548-
2559+
if (!EarlyExitVPBB->getSinglePredecessor()) {
25492560
// The first of two operands corresponds to the latch exit, via MiddleVPBB
25502561
// predecessor. Extract its last lane.
25512562
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ struct VPlanTransforms {
6969
PredicatedScalarEvolution &PSE,
7070
bool RequiresScalarEpilogueCheck,
7171
bool TailFolded, Loop *TheLoop,
72-
DebugLoc IVDL);
72+
DebugLoc IVDL, bool HandleUncountableExit,
73+
VFRange &Range);
7374

7475
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7576
/// flat CFG into a hierarchical CFG.
@@ -179,9 +180,8 @@ struct VPlanTransforms {
179180
/// exit conditions
180181
/// * splitting the original middle block to branch to the early exit block
181182
/// if taken.
182-
static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
183-
BasicBlock *UncountableExitingBlock,
184-
VPRecipeBuilder &RecipeBuilder,
183+
static void handleUncountableEarlyExit(VPlan &Plan, VPBasicBlock *HeaderVPBB,
184+
VPBasicBlock *LatchVPBB,
185185
VFRange &Range);
186186

187187
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
1414

1515
#include "../lib/Transforms/Vectorize/VPlan.h"
16+
#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
1617
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
1718
#include "llvm/Analysis/AssumptionCache.h"
1819
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
7273
PredicatedScalarEvolution PSE(*SE, *L);
7374
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
7475
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
76+
VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
7577
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
76-
PSE, true, false, L, {});
78+
PSE, true, false, L, {}, false, R);
7779
VPlanTransforms::createLoopRegions(*Plan);
7880
return Plan;
7981
}

0 commit comments

Comments
 (0)