Skip to content

Commit 317d975

Browse files
committed
[VPlan] Retain exit conditions early
1 parent 5652db7 commit 317d975

File tree

6 files changed

+81
-58
lines changed

6 files changed

+81
-58
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+29-6
Original file line numberDiff line numberDiff line change
@@ -9350,6 +9350,24 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93509350
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
93519351
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
93529352

9353+
if (auto *UncountableExitingBlock =
9354+
Legal->getUncountableEarlyExitingBlock()) {
9355+
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9356+
*PSE.getSE(), OrigLoop, UncountableExitingBlock);
9357+
} else {
9358+
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
9359+
Plan->getExitBlocks().end());
9360+
for (VPBlockBase *VPBB : to_vector(
9361+
vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
9362+
for (VPBlockBase *EB : ExitBlocks) {
9363+
if (is_contained(VPBB->getSuccessors(), EB)) {
9364+
cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
9365+
VPBlockUtils::disconnectBlocks(VPBB, EB);
9366+
}
9367+
}
9368+
}
9369+
}
9370+
93539371
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
93549372
Builder);
93559373

@@ -9528,12 +9546,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
95289546
R->setOperand(1, WideIV->getStepValue());
95299547
}
95309548

9531-
if (auto *UncountableExitingBlock =
9532-
Legal->getUncountableEarlyExitingBlock()) {
9533-
VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
9534-
*PSE.getSE(), OrigLoop, UncountableExitingBlock,
9535-
RecipeBuilder);
9536-
}
95379549
DenseMap<VPValue *, VPValue *> IVEndValues;
95389550
addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
95399551
SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9631,6 +9643,17 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
96319643
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
96329644
VPlanTransforms::introduceRegions(*Plan, Legal->getWidestInductionType(), PSE,
96339645
true, false, OrigLoop);
9646+
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
9647+
Plan->getExitBlocks().end());
9648+
for (VPBlockBase *VPBB : to_vector(
9649+
vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
9650+
for (VPBlockBase *EB : ExitBlocks) {
9651+
if (is_contained(VPBB->getSuccessors(), EB)) {
9652+
cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
9653+
VPBlockUtils::disconnectBlocks(VPBB, EB);
9654+
}
9655+
}
9656+
}
96349657

96359658
for (ElementCount VF : Range)
96369659
Plan->addVF(VF);

llvm/lib/Transforms/Vectorize/VPlan.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -655,9 +655,10 @@ VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
655655
return std::nullopt;
656656

657657
for (unsigned Idx : {0, 1}) {
658-
auto *PreheaderVPBB = cast<VPBasicBlock>(Preds[Idx]);
659-
auto *LatchVPBB = cast<VPBasicBlock>(Preds[1 - Idx]);
660-
if (VPDT.dominates(PreheaderVPBB, this) && VPDT.dominates(this, LatchVPBB))
658+
auto *PreheaderVPBB = dyn_cast<VPBasicBlock>(Preds[Idx]);
659+
auto *LatchVPBB = dyn_cast<VPBasicBlock>(Preds[1 - Idx]);
660+
if (PreheaderVPBB && LatchVPBB && VPDT.dominates(PreheaderVPBB, this) &&
661+
VPDT.dominates(this, LatchVPBB))
661662
return {std::make_pair(PreheaderVPBB, LatchVPBB)};
662663
}
663664

@@ -871,7 +872,7 @@ VPlan::VPlan(Loop *L) {
871872
ScalarHeader = createVPIRBasicBlock(L->getHeader());
872873

873874
SmallVector<BasicBlock *> IRExitBlocks;
874-
L->getExitBlocks(IRExitBlocks);
875+
L->getUniqueExitBlocks(IRExitBlocks);
875876
for (BasicBlock *EB : IRExitBlocks)
876877
ExitBlocks.push_back(createVPIRBasicBlock(EB));
877878
}

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

+18-33
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
122122
return VPBB;
123123
}
124124

125+
if (!TheLoop->contains(BB))
126+
return Plan.getExitBlock(BB);
127+
125128
// Create new VPBB.
126129
StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName();
127130
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -155,14 +158,6 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
155158
// Instruction definition is in outermost loop PH.
156159
return false;
157160

158-
// Check whether Instruction definition is in a loop exit.
159-
SmallVector<BasicBlock *> ExitBlocks;
160-
TheLoop->getExitBlocks(ExitBlocks);
161-
if (is_contained(ExitBlocks, InstParent)) {
162-
// Instruction definition is in outermost loop exit.
163-
return false;
164-
}
165-
166161
// Check whether Instruction definition is in loop body.
167162
return !TheLoop->contains(Inst);
168163
}
@@ -211,11 +206,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
211206
"Instruction shouldn't have been visited.");
212207

213208
if (auto *Br = dyn_cast<BranchInst>(Inst)) {
214-
if (TheLoop->getLoopLatch() == BB ||
215-
any_of(successors(BB),
216-
[this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
209+
if (TheLoop->getLoopLatch() == BB)
217210
continue;
218-
219211
// Conditional branch instruction are represented using BranchOnCond
220212
// recipes.
221213
if (Br->isConditional()) {
@@ -305,7 +297,6 @@ void PlainCFGBuilder::buildPlainCFG(
305297
for (BasicBlock *BB : RPO) {
306298
// Create or retrieve the VPBasicBlock for this BB.
307299
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
308-
Loop *LoopForBB = LI.getLoopFor(BB);
309300
// Set VPBB predecessors in the same order as they are in the incoming BB.
310301
setVPBBPredsFromBB(VPBB, BB);
311302

@@ -339,24 +330,12 @@ void PlainCFGBuilder::buildPlainCFG(
339330
BasicBlock *IRSucc1 = BI->getSuccessor(1);
340331
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
341332
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
342-
343-
// Don't connect any blocks outside the current loop except the latch, which
344-
// is handled below.
345-
if (LoopForBB &&
346-
(LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
347-
if (!LoopForBB->contains(IRSucc0)) {
348-
VPBB->setOneSuccessor(Successor1);
349-
continue;
350-
}
351-
if (!LoopForBB->contains(IRSucc1)) {
352-
VPBB->setOneSuccessor(Successor0);
353-
continue;
354-
}
355-
}
356-
357333
VPBB->setTwoSuccessors(Successor0, Successor1);
358334
}
359335

336+
for (auto *EB : Plan.getExitBlocks()) {
337+
setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
338+
}
360339
// 2. The whole CFG has been built at this point so all the input Values must
361340
// have a VPlan counterpart. Fix VPlan header phi by adding their
362341
// corresponding VPlan operands.
@@ -413,10 +392,15 @@ static VPRegionBlock *introduceRegion(VPlan &Plan, VPBasicBlock *PreheaderVPBB,
413392
auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
414393
false /*isReplicator*/);
415394
R->setParent(HeaderVPBB->getParent());
395+
416396
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
417397
// because VPlan is expected to end at top level latch disconnected above.
418-
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
419-
VPBB->setParent(R);
398+
SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
399+
Plan.getExitBlocks().end());
400+
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) {
401+
if (!ExitBlocks.contains(VPBB))
402+
VPBB->setParent(R);
403+
}
420404

421405
VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
422406
if (Succ)
@@ -466,7 +450,11 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
466450

467451
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
468452
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
453+
BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
454+
auto *VPExitBlock = IRExitBlock ? Plan.getExitBlock(IRExitBlock) : nullptr;
469455
if (!RequiresScalarEpilogueCheck) {
456+
if (VPExitBlock)
457+
VPBlockUtils::disconnectBlocks(MiddleVPBB, VPExitBlock);
470458
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
471459
for (auto *EB : Plan.getExitBlocks()) {
472460
for (VPRecipeBase &R : *EB)
@@ -484,10 +472,7 @@ void VPlanTransforms::introduceRegions(VPlan &Plan, Type *InductionTy,
484472
// 2) If we require a scalar epilogue, there is no conditional branch as
485473
// we unconditionally branch to the scalar preheader. Do nothing.
486474
// 3) Otherwise, construct a runtime check.
487-
BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
488-
auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
489475
// The connection order corresponds to the operands of the conditional branch.
490-
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
491476
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
492477

493478
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+24-12
Original file line numberDiff line numberDiff line change
@@ -2368,7 +2368,7 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
23682368

23692369
void VPlanTransforms::handleUncountableEarlyExit(
23702370
VPlan &Plan, ScalarEvolution &SE, Loop *OrigLoop,
2371-
BasicBlock *UncountableExitingBlock, VPRecipeBuilder &RecipeBuilder) {
2371+
BasicBlock *UncountableExitingBlock) {
23722372
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
23732373
auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
23742374
VPBuilder Builder(LatchVPBB->getTerminator());
@@ -2379,17 +2379,29 @@ void VPlanTransforms::handleUncountableEarlyExit(
23792379
// tracks if the uncountable early exit has been taken. Also split the middle
23802380
// block and have it conditionally branch to the early exit block if
23812381
// EarlyExitTaken.
2382-
auto *EarlyExitingBranch =
2383-
cast<BranchInst>(UncountableExitingBlock->getTerminator());
2384-
BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
2385-
BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
2386-
BasicBlock *EarlyExitIRBB =
2387-
!OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
2388-
VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
2389-
2390-
VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
2391-
OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
2392-
auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
2382+
VPBasicBlock *EEB = nullptr;
2383+
for (auto *EB : Plan.getExitBlocks()) {
2384+
for (VPBlockBase *Pred : EB->getPredecessors()) {
2385+
if (Pred != MiddleVPBB) {
2386+
EEB = cast<VPBasicBlock>(Pred);
2387+
break;
2388+
}
2389+
}
2390+
}
2391+
2392+
VPBlockBase *TrueSucc = EEB->getSuccessors()[0];
2393+
VPBlockBase *FalseSucc = EEB->getSuccessors()[1];
2394+
auto *VPEarlyExitBlock =
2395+
cast<VPIRBasicBlock>(TrueSucc->getParent() ? FalseSucc : TrueSucc);
2396+
2397+
VPValue *EarlyExitCond = EEB->getTerminator()->getOperand(0);
2398+
auto *EarlyExitTakenCond = TrueSucc == VPEarlyExitBlock
2399+
? EarlyExitCond
2400+
: Builder.createNot(EarlyExitCond);
2401+
2402+
EEB->getTerminator()->eraseFromParent();
2403+
VPBlockUtils::disconnectBlocks(EEB, VPEarlyExitBlock);
2404+
23932405
IsEarlyExitTaken =
23942406
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
23952407

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,7 @@ struct VPlanTransforms {
176176
/// if taken.
177177
static void handleUncountableEarlyExit(VPlan &Plan, ScalarEvolution &SE,
178178
Loop *OrigLoop,
179-
BasicBlock *UncountableExitingBlock,
180-
VPRecipeBuilder &RecipeBuilder);
179+
BasicBlock *UncountableExitingBlock);
181180

182181
/// Lower abstract recipes to concrete ones, that can be codegen'd.
183182
static void convertToConcreteRecipes(VPlan &Plan);

llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll

+4-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ define void @foo(i64 %n) {
3131
; CHECK-NEXT: outer.latch:
3232
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
3333
; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
34-
; CHECK-NEXT: Successor(s): vector.body
34+
; CHECK-NEXT: Successor(s): ir-bb<exit>, vector.body
35+
; CHECK-EMPTY:
36+
; CHECK-NEXT: ir-bb<exit>:
37+
; CHECK-NEXT: No successors
3538
; CHECK-NEXT: }
3639
entry:
3740
br label %outer.header

0 commit comments

Comments
 (0)