Skip to content

Commit 2f55123

Browse files
authored
[VPlan] Handle early exit before forming regions. (NFC) (#138393)
Move early-exit handling up front to original VPlan construction, before introducing early exits. This builds on #137709, which adds exiting edges to the original VPlan, instead of adding exit blocks later. This retains the exit conditions early, and means we can handle early exits before forming regions, without the reliance on VPRecipeBuilder. Once we retain all exits initially, handling early exits before region construction ensures the regions are valid; otherwise we would leave edges exiting the region from elsewhere than the latch. Removing the reliance on VPRecipeBuilder removes the dependence on mapping IR BBs to VPBBs and unblocks predication as VPlan transform: #128420. Depends on #137709 (included in PR). PR: #138393
1 parent 3aacd74 commit 2f55123

File tree

5 files changed

+78
-72
lines changed

5 files changed

+78
-72
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9383,7 +9383,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
93839383
VPlanTransforms::prepareForVectorization(
93849384
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
93859385
CM.foldTailByMasking(), OrigLoop,
9386-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9386+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()),
9387+
Legal->hasUncountableEarlyExit(), Range);
93879388
VPlanTransforms::createLoopRegions(*Plan);
93889389

93899390
// Don't use getDecisionAndClampRange here, because we don't know the UF
@@ -9584,12 +9585,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
95849585
R->setOperand(1, WideIV->getStepValue());
95859586
}
95869587

9587-
if (auto *UncountableExitingBlock =
9588-
Legal->getUncountableEarlyExitingBlock()) {
9589-
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9590-
OrigLoop, UncountableExitingBlock, RecipeBuilder,
9591-
Range);
9592-
}
95939588
DenseMap<VPValue *, VPValue *> IVEndValues;
95949589
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95959590
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9687,7 +9682,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
96879682
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
96889683
VPlanTransforms::prepareForVectorization(
96899684
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
9690-
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()));
9685+
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
9686+
Range);
96919687
VPlanTransforms::createLoopRegions(*Plan);
96929688

96939689
for (ElementCount VF : Range)

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -460,11 +460,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
460460
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
461461
}
462462

463-
void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
464-
PredicatedScalarEvolution &PSE,
465-
bool RequiresScalarEpilogueCheck,
466-
bool TailFolded, Loop *TheLoop,
467-
DebugLoc IVDL) {
463+
void VPlanTransforms::prepareForVectorization(
464+
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
465+
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop,
466+
DebugLoc IVDL, bool HasUncountableEarlyExit, VFRange &Range) {
468467
VPDominatorTree VPDT;
469468
VPDT.recalculate(Plan);
470469

@@ -491,19 +490,33 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy,
491490
addCanonicalIVRecipes(Plan, cast<VPBasicBlock>(HeaderVPB),
492491
cast<VPBasicBlock>(LatchVPB), InductionTy, IVDL);
493492

494-
// Disconnect all edges to exit blocks other than from the middle block.
495-
// TODO: VPlans with early exits should be explicitly converted to a form
496-
// exiting only via the latch here, including adjusting the exit condition,
497-
// instead of simply disconnecting the edges and adjusting the VPlan later.
498-
for (VPBlockBase *EB : Plan.getExitBlocks()) {
493+
[[maybe_unused]] bool HandledUncountableEarlyExit = false;
494+
// Disconnect all early exits from the loop leaving it with a single exit from
495+
// the latch. Early exits that are countable are left for a scalar epilog. The
496+
// condition of uncountable early exits (currently at most one is supported)
497+
// is fused into the latch exit, and used to branch from middle block to the
498+
// early exit destination.
499+
for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
499500
for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
500501
if (Pred == MiddleVPBB)
501502
continue;
503+
if (HasUncountableEarlyExit) {
504+
assert(!HandledUncountableEarlyExit &&
505+
"can handle exactly one uncountable early exit");
506+
handleUncountableEarlyExit(cast<VPBasicBlock>(Pred), EB, Plan,
507+
cast<VPBasicBlock>(HeaderVPB),
508+
cast<VPBasicBlock>(LatchVPB), Range);
509+
HandledUncountableEarlyExit = true;
510+
}
511+
502512
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
503513
VPBlockUtils::disconnectBlocks(Pred, EB);
504514
}
505515
}
506516

517+
assert((!HasUncountableEarlyExit || HandledUncountableEarlyExit) &&
518+
"missed an uncountable exit that must be handled");
519+
507520
// Create SCEV and VPValue for the trip count.
508521
// We use the symbolic max backedge-taken-count, which works also when
509522
// vectorizing loops with uncountable early exits.

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 37 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,63 +2461,56 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
24612461
}
24622462

24632463
void VPlanTransforms::handleUncountableEarlyExit(
2464-
VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
2465-
VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
2466-
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2467-
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
2468-
VPBuilder Builder(LatchVPBB->getTerminator());
2469-
auto *MiddleVPBB = Plan.getMiddleBlock();
2470-
VPValue *IsEarlyExitTaken = nullptr;
2471-
2472-
// Process the uncountable exiting block. Update IsEarlyExitTaken, which
2473-
// tracks if the uncountable early exit has been taken. Also split the middle
2474-
// block and have it conditionally branch to the early exit block if
2475-
// EarlyExitTaken.
2476-
auto *EarlyExitingBranch =
2477-
cast<BranchInst>(UncountableExitingBlock->getTerminator());
2478-
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2479-
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2480-
BasicBlock *EarlyExitIRBB =
2481-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
2482-
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
2483-
2484-
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2485-
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2486-
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2487-
IsEarlyExitTaken =
2488-
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
2464+
VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan,
2465+
VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VFRange &Range) {
2466+
using namespace llvm::VPlanPatternMatch;
24892467

2468+
VPBlockBase *MiddleVPBB = LatchVPBB->getSuccessors()[0];
2469+
if (!EarlyExitVPBB->getSinglePredecessor() &&
2470+
EarlyExitVPBB->getPredecessors()[1] == MiddleVPBB) {
2471+
assert(EarlyExitVPBB->getNumPredecessors() == 2 &&
2472+
EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&
2473+
"unsupported early exit VPBB");
2474+
// Early exit operand should always be last phi operand. If EarlyExitVPBB
2475+
// has two predecessors and EarlyExitingVPBB is the first, swap the operands
2476+
// of the phis.
2477+
for (VPRecipeBase &R : EarlyExitVPBB->phis())
2478+
cast<VPIRPhi>(&R)->swapOperands();
2479+
}
2480+
2481+
VPBuilder Builder(LatchVPBB->getTerminator());
2482+
VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
2483+
assert(
2484+
match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) &&
2485+
"Terminator must be be BranchOnCond");
2486+
VPValue *CondOfEarlyExitingVPBB =
2487+
EarlyExitingVPBB->getTerminator()->getOperand(0);
2488+
auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB
2489+
? CondOfEarlyExitingVPBB
2490+
: Builder.createNot(CondOfEarlyExitingVPBB);
2491+
2492+
// Split the middle block and have it conditionally branch to the early exit
2493+
// block if CondToEarlyExit.
2494+
VPValue *IsEarlyExitTaken =
2495+
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
24902496
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
24912497
VPBasicBlock *VectorEarlyExitVPBB =
24922498
Plan.createVPBasicBlock("vector.early.exit");
2493-
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2499+
VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
24942500
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
24952501
NewMiddle->swapSuccessors();
24962502

2497-
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
2503+
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
24982504

24992505
// Update the exit phis in the early exit block.
25002506
VPBuilder MiddleBuilder(NewMiddle);
25012507
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
2502-
for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
2508+
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
25032509
auto *ExitIRI = cast<VPIRPhi>(&R);
2504-
// Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
2510+
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
25052511
// a single predecessor and 1 if it has two.
25062512
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
2507-
if (!VPEarlyExitBlock->getSinglePredecessor()) {
2508-
// If VPEarlyExitBlock has two predecessors, they are already ordered such
2509-
// that early exit is second (and latch exit is first), by construction.
2510-
// But its underlying IRBB (EarlyExitIRBB) may have its predecessors
2511-
// ordered the other way around, and it is the order of the latter which
2512-
// corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
2513-
// Therefore, if early exit (UncountableExitingBlock) is the first
2514-
// predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
2515-
// thereby bringing them to match VPEarlyExitBlock's predecessor order,
2516-
// with early exit being last (second). Otherwise they already match.
2517-
if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
2518-
UncountableExitingBlock)
2519-
ExitIRI->swapOperands();
2520-
2513+
if (ExitIRI->getNumOperands() != 1) {
25212514
// The first of two operands corresponds to the latch exit, via MiddleVPBB
25222515
// predecessor. Extract its last lane.
25232516
ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
@@ -2533,7 +2526,7 @@ void VPlanTransforms::handleUncountableEarlyExit(
25332526
LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
25342527
// Update the incoming value from the early exit.
25352528
VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
2536-
VPInstruction::FirstActiveLane, {EarlyExitTakenCond}, nullptr,
2529+
VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
25372530
"first.active.lane");
25382531
IncomingFromEarlyExit = EarlyExitB.createNaryOp(
25392532
Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ struct VPlanTransforms {
6969
PredicatedScalarEvolution &PSE,
7070
bool RequiresScalarEpilogueCheck,
7171
bool TailFolded, Loop *TheLoop,
72-
DebugLoc IVDL);
72+
DebugLoc IVDL, bool HasUncountableExit,
73+
VFRange &Range);
7374

7475
/// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turning \p Plan's
7576
/// flat CFG into a hierarchical CFG.
@@ -173,15 +174,16 @@ struct VPlanTransforms {
173174
/// Remove dead recipes from \p Plan.
174175
static void removeDeadRecipes(VPlan &Plan);
175176

176-
/// Update \p Plan to account for the uncountable early exit block in \p
177-
/// UncountableExitingBlock by
178-
/// * updating the condition exiting the vector loop to include the early
179-
/// exit conditions
177+
/// Update \p Plan to account for the uncountable early exit from \p
178+
/// EarlyExitingVPBB to \p EarlyExitVPBB by
179+
/// * updating the condition exiting the loop via the latch to include the
180+
/// early exit condition,
180181
/// * splitting the original middle block to branch to the early exit block
181-
/// if taken.
182-
static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
183-
BasicBlock *UncountableExitingBlock,
184-
VPRecipeBuilder &RecipeBuilder,
182+
/// conditionally - according to the early exit condition.
183+
static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
184+
VPBasicBlock *EarlyExitVPBB,
185+
VPlan &Plan, VPBasicBlock *HeaderVPBB,
186+
VPBasicBlock *LatchVPBB,
185187
VFRange &Range);
186188

187189
/// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p

llvm/unittests/Transforms/Vectorize/VPlanTestBase.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define LLVM_UNITTESTS_TRANSFORMS_VECTORIZE_VPLANTESTBASE_H
1414

1515
#include "../lib/Transforms/Vectorize/VPlan.h"
16+
#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
1617
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
1718
#include "llvm/Analysis/AssumptionCache.h"
1819
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -72,8 +73,9 @@ class VPlanTestIRBase : public testing::Test {
7273
PredicatedScalarEvolution PSE(*SE, *L);
7374
DenseMap<const VPBlockBase *, BasicBlock *> VPB2IRBB;
7475
auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
76+
VFRange R(ElementCount::getFixed(1), ElementCount::getFixed(2));
7577
VPlanTransforms::prepareForVectorization(*Plan, IntegerType::get(*Ctx, 64),
76-
PSE, true, false, L, {});
78+
PSE, true, false, L, {}, false, R);
7779
VPlanTransforms::createLoopRegions(*Plan);
7880
return Plan;
7981
}

0 commit comments

Comments
 (0)