Skip to content

Commit d0d2c2e

Browse files
committed
[VPlan] Handle early exit before forming regions. (NFC)
Move early-exit handling up front to original VPlan construction, before introducing early exits. This builds on llvm#137709, which adds exiting edges to the original VPlan, instead of adding exit blocks later. This retains the exit conditions early, and means we can handle early exits before forming regions, without the reliance on VPRecipeBuilder. Once we retain all exits initially, handling early exits before region construction ensures the regions are valid; otherwise we would leave edges exiting the region from elsewhere than the latch. Removing the reliance on VPRecipeBuilder removes the dependence on mapping IR BBs to VPBBs and unblocks predication as VPlan transform: llvm#128420. Depends on llvm#137709.
1 parent 7feba5f commit d0d2c2e

File tree

5 files changed

+94
-71
lines changed

5 files changed

+94
-71
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9384,7 +9384,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93849384
VPlanTransforms::prepareForVectorization(
93859385
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
93869386
CM.foldTailByMasking(), OrigLoop,
9387-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9387+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
9388+
Legal->hasUncountableEarlyExit(), Range);
93889389
VPlanTransforms::createLoopRegions(*Plan);
93899390

93909391
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9582,12 +9583,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95829583
R->setOperand(1, WideIV->getStepValue());
95839584
}
95849585

9585-
if (auto *UncountableExitingBlock =
9586-
Legal->getUncountableEarlyExitingBlock()) {
9587-
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9588-
OrigLoop, UncountableExitingBlock, RecipeBuilder,
9589-
Range);
9590-
}
95919586
DenseMap<VPValue *, VPValue *> IVEndValues;
95929587
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95939588
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9685,7 +9680,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
96859680
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
96869681
VPlanTransforms::prepareForVectorization(
96879682
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9688-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9683+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
9684+
Range);
96899685
VPlanTransforms::createLoopRegions(*Plan);
96909686

96919687
for (ElementCount VF : Range)

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -460,11 +460,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
460460
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
461461
}
462462

463-
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
464-
PredicatedScalarEvolution &PSE,
465-
bool RequiresScalarEpilogueCheck,
466-
bool TailFolded, Loop *TheLoop,
467-
DebugLoc IVDL) {
463+
void VPlanTransforms::prepareForVectorization(
464+
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
465+
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
466+
DebugLoc IVDL, bool HandleUncountableExit, VFRange &Range) {
468467
VPDominatorTree VPDT;
469468
VPDT.recalculate(Plan);
470469

@@ -491,16 +490,20 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
491490
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
492491
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
493492

494-
// Disconnect all edges to exit blocks other than from the middle block.
495-
// TODO: VPlans with early exits should be explicitly converted to a form
496-
// exiting only via the latch here, including adjusting the exit condition,
497-
// instead of simply disconnecting the edges and adjusting the VPlan later.
498-
for (VPBlockBase *EB : Plan.getExitBlocks()) {
499-
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
500-
if (Pred == MiddleVPBB)
501-
continue;
502-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
503-
VPBlockUtils::disconnectBlocks(Pred, EB);
493+
if (HandleUncountableExit) {
494+
// Convert VPlans with early exits to a form only exiting via the latch
495+
// here, including adjusting the exit condition.
496+
handleUncountableEarlyExit(Plan, cast<VPBasicBlock>(HeaderVPB),
497+
cast<VPBasicBlock>(LatchVPB), Range);
498+
} else {
499+
// Disconnect all edges to exit blocks other than from the middle block.
500+
for (VPBlockBase *EB : to_vector(Plan.getExitBlocks())) {
501+
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
502+
if (Pred == MiddleVPBB)
503+
continue;
504+
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
505+
VPBlockUtils::disconnectBlocks(Pred, EB);
506+
}
504507
}
505508
}
506509

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 65 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2458,64 +2458,86 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24582458
R->eraseFromParent();
24592459
}
24602460

2461-
void VPlanTransforms::handleUncountableEarlyExit(
2462-
VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
2463-
VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
2464-
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2465-
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
2461+
void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan,
2462+
VPBasicBlock *HeaderVPBB,
2463+
VPBasicBlock *LatchVPBB,
2464+
VFRange &Range) {
2465+
// First find the uncountable early exiting block by looking at the
2466+
// predecessors of the exit blocks.
2467+
VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
2468+
VPBasicBlock *EarlyExitingVPBB = nullptr;
2469+
VPIRBasicBlock *EarlyExitVPBB = nullptr;
2470+
for (auto *EB : Plan.getExitBlocks()) {
2471+
for (VPBlockBase *Pred : EB->getPredecessors()) {
2472+
if (Pred != MiddleVPBB) {
2473+
EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
2474+
EarlyExitVPBB = EB;
2475+
break;
2476+
}
2477+
}
2478+
}
2479+
assert(EarlyExitVPBB && "Must have a early exiting block.");
2480+
assert(all_of(Plan.getExitBlocks(),
2481+
[EarlyExitingVPBB, MiddleVPBB](VPIRBasicBlock *EB) {
2482+
return all_of(
2483+
EB->getPredecessors(),
2484+
[EarlyExitingVPBB, MiddleVPBB](VPBlockBase *Pred) {
2485+
return Pred == EarlyExitingVPBB || Pred == MiddleVPBB;
2486+
});
2487+
}) &&
2488+
"All exit blocks must only have EarlyExitingVPBB or MiddleVPBB as "
2489+
"predecessors.");
2490+
24662491
VPBuilder Builder(LatchVPBB->getTerminator());
2467-
auto *MiddleVPBB = Plan.getMiddleBlock();
2468-
VPValue *IsEarlyExitTaken = nullptr;
2469-
2470-
// Process the uncountable exiting block. Update IsEarlyExitTaken, which
2471-
// tracks if the uncountable early exit has been taken. Also split the middle
2472-
// block and have it conditionally branch to the early exit block if
2473-
// EarlyExitTaken.
2474-
auto *EarlyExitingBranch =
2475-
cast<BranchInst>(UncountableExitingBlock->getTerminator());
2476-
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2477-
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2478-
BasicBlock *EarlyExitIRBB =
2479-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
2480-
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
2481-
2482-
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2483-
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2484-
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2485-
IsEarlyExitTaken =
2486-
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
2492+
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
2493+
VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
2494+
auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
2495+
? EarlyExitCond
2496+
: Builder.createNot(EarlyExitCond);
2497+
2498+
if (!EarlyExitVPBB->getSinglePredecessor() &&
2499+
EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
2500+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
2501+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
2502+
// a single predecessor and 1 if it has two.
2503+
// If EarlyExitVPBB has two predecessors, they are already ordered such
2504+
// that early exit is second (and latch exit is first), by construction.
2505+
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2506+
// ordered the other way around, and it is the order of the latter which
2507+
// corresponds to the order of operands of EarlyExitVPBB's phi recipes.
2508+
// Therefore, if early exit (UncountableExitingBlock) is the first
2509+
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2510+
// thereby bringing them to match EarlyExitVPBB's predecessor order,
2511+
// with early exit being last (second). Otherwise they already match.
2512+
cast<VPIRPhi>(&R)->swapOperands();
2513+
}
2514+
}
24872515

2516+
EarlyExitingVPBB->getTerminator()->eraseFromParent();
2517+
VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
2518+
2519+
// Split the middle block and have it conditionally branch to the early exit
2520+
// block if EarlyExitTaken.
2521+
VPValue *IsEarlyExitTaken =
2522+
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
24882523
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
24892524
VPBasicBlock *VectorEarlyExitVPBB =
24902525
Plan.createVPBasicBlock("vector.early.exit");
2491-
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2526+
VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
24922527
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
24932528
NewMiddle->swapSuccessors();
24942529

2495-
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2530+
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
24962531

24972532
// Update the exit phis in the early exit block.
24982533
VPBuilder MiddleBuilder(NewMiddle);
24992534
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
2500-
for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
2535+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
25012536
auto *ExitIRI = cast<VPIRPhi>(&R);
2502-
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
2537+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
25032538
// a single predecessor and 1 if it has two.
25042539
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
2505-
if (!VPEarlyExitBlock->getSinglePredecessor()) {
2506-
// If VPEarlyExitBlock has two predecessors, they are already ordered such
2507-
// that early exit is second (and latch exit is first), by construction.
2508-
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2509-
// ordered the other way around, and it is the order of the latter which
2510-
// corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
2511-
// Therefore, if early exit (UncountableExitingBlock) is the first
2512-
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2513-
// thereby bringing them to match VPEarlyExitBlock's predecessor order,
2514-
// with early exit being last (second). Otherwise they already match.
2515-
if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
2516-
UncountableExitingBlock)
2517-
ExitIRI->swapOperands();
2518-
2540+
if (!EarlyExitVPBB->getSinglePredecessor()) {
25192541
// The first of two operands corresponds to the latch exit, via MiddleVPBB
25202542
// predecessor. Extract its last lane.
25212543
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ struct VPlanTransforms {
6969
PredicatedScalarEvolution &PSE,
7070
bool RequiresScalarEpilogueCheck,
7171
bool TailFolded, Loop *TheLoop,
72-
DebugLoc IVDL);
72+
DebugLoc IVDL, bool HandleUncountableExit,
73+
VFRange &Range);
7374

7475
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7576
/// flat CFG into a hierarchical CFG.
@@ -179,9 +180,8 @@ struct VPlanTransforms {
179180
/// exit conditions
180181
/// * splitting the original middle block to branch to the early exit block
181182
/// if taken.
182-
static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
183-
BasicBlock *UncountableExitingBlock,
184-
VPRecipeBuilder &RecipeBuilder,
183+
static void handleUncountableEarlyExit(VPlan &Plan, VPBasicBlock *HeaderVPBB,
184+
VPBasicBlock *LatchVPBB,
185185
VFRange &Range);
186186

187187
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
1414

1515
#include "../lib/Transforms/Vectorize/VPlan.h"
16+
#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
1617
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
1718
#include "llvm/Analysis/AssumptionCache.h"
1819
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
7273
PredicatedScalarEvolution PSE(*SE, *L);
7374
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
7475
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
76+
VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
7577
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
76-
PSE, true, false, L, {});
78+
PSE, true, false, L, {}, false, R);
7779
VPlanTransforms::createLoopRegions(*Plan);
7880
return Plan;
7981
}

0 commit comments

Comments
 (0)