Skip to content

Commit 74d093d

Browse files
committed
[VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI).
Building on top of llvm#114305, replace VPRegionBlocks with explicit CFG before executing. This will enable further simplifications of phi handling during execution and transformations that do not have to preserve the canonical IV required by loop regions. This for example could include replacing the canonical IV with an EVL based phi while completely removing the original canonical IV.
1 parent 65a5601 commit 74d093d

File tree

6 files changed

+128
-96
lines changed

6 files changed

+128
-96
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,12 +2368,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23682368
// End if-block.
23692369
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
23702370
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
2371-
assert((Parent || all_of(RepRecipe->operands(),
2372-
[](VPValue *Op) {
2373-
return Op->isDefinedOutsideLoopRegions();
2374-
})) &&
2375-
"Expected a recipe is either within a region or all of its operands "
2376-
"are defined outside the vectorized region.");
23772371
if (IfPredicateInstr)
23782372
PredicatedInstructions.push_back(Cloned);
23792373
}
@@ -2969,8 +2963,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29692963
for (Instruction *PI : PredicatedInstructions)
29702964
sinkScalarOperands(&*PI);
29712965

2972-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2973-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
2966+
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
2967+
State.Plan->getVectorPreheader()->getSingleSuccessor());
29742968
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
29752969

29762970
// Remove redundant induction instructions.
@@ -7764,7 +7758,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77647758
LLVMLoopVectorizeFollowupVectorized});
77657759

77667760
VPBasicBlock *HeaderVPBB =
7767-
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
7761+
cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor());
77687762
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
77697763
if (VectorizedLoopID)
77707764
L->setLoopID(*VectorizedLoopID);

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 95 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
352352
}
353353

354354
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
355-
VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
356-
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
355+
356+
return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
357357
}
358358

359359
void VPTransformState::addNewMetadata(Instruction *To,
@@ -425,13 +425,17 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
425425
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
426426
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
427427
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
428+
if (!PredBB)
429+
continue;
428430

429431
assert(PredBB && "Predecessor basic-block not found building successor.");
430432
auto *PredBBTerminator = PredBB->getTerminator();
431433
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
432434

433435
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
434436
if (isa<UnreachableInst>(PredBBTerminator)) {
437+
if (PredVPSuccessors.size() == 2)
438+
continue;
435439
assert(PredVPSuccessors.size() == 1 &&
436440
"Predecessor ending w/o branch must have single successor.");
437441
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -480,6 +484,21 @@ void VPBasicBlock::execute(VPTransformState *State) {
480484
bool Replica = bool(State->Lane);
481485
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
482486

487+
if (isHeader()) {
488+
// Create and register the new vector loop.
489+
State->CurrentVectorLoop = State->LI->AllocateLoop();
490+
BasicBlock *VectorPH =
491+
State->CFG.VPBB2IRBB[cast<VPBasicBlock>(getPredecessors()[0])];
492+
Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
493+
494+
// Insert the new loop into the loop nest and register the new basic blocks
495+
// before calling any utilities such as SCEV that require valid LoopInfo.
496+
if (ParentLoop)
497+
ParentLoop->addChildLoop(State->CurrentVectorLoop);
498+
else
499+
State->LI->addTopLevelLoop(State->CurrentVectorLoop);
500+
}
501+
483502
auto IsReplicateRegion = [](VPBlockBase *BB) {
484503
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
485504
return R && R->isReplicator();
@@ -718,37 +737,13 @@ void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
718737
}
719738

720739
void VPRegionBlock::execute(VPTransformState *State) {
721-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
722-
RPOT(Entry);
723-
724-
if (!isReplicator()) {
725-
// Create and register the new vector loop.
726-
Loop *PrevLoop = State->CurrentVectorLoop;
727-
State->CurrentVectorLoop = State->LI->AllocateLoop();
728-
BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
729-
Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
730-
731-
// Insert the new loop into the loop nest and register the new basic blocks
732-
// before calling any utilities such as SCEV that require valid LoopInfo.
733-
if (ParentLoop)
734-
ParentLoop->addChildLoop(State->CurrentVectorLoop);
735-
else
736-
State->LI->addTopLevelLoop(State->CurrentVectorLoop);
737-
738-
// Visit the VPBlocks connected to "this", starting from it.
739-
for (VPBlockBase *Block : RPOT) {
740-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
741-
Block->execute(State);
742-
}
743-
744-
State->CurrentVectorLoop = PrevLoop;
745-
return;
746-
}
747-
740+
assert(isReplicator() &&
741+
"Loop regions should have been lowered to plain CFG");
748742
assert(!State->Lane && "Replicating a Region with non-null instance.");
749-
750-
// Enter replicating mode.
751743
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
744+
745+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
746+
Entry);
752747
State->Lane = VPLane(0);
753748
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
754749
++Lane) {
@@ -823,6 +818,26 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
823818
}
824819
#endif
825820

821+
void VPRegionBlock::removeRegion() {
822+
auto *Header = cast<VPBasicBlock>(getEntry());
823+
VPBlockBase *Preheader = getSinglePredecessor();
824+
auto *Exiting = cast<VPBasicBlock>(getExiting());
825+
826+
VPBlockBase *Middle = getSingleSuccessor();
827+
VPBlockUtils::disconnectBlocks(Preheader, this);
828+
VPBlockUtils::disconnectBlocks(this, Middle);
829+
830+
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
831+
VPB->setParent(nullptr);
832+
833+
VPBlockUtils::connectBlocks(Preheader, Header);
834+
VPBlockUtils::connectBlocks(Exiting, Middle);
835+
836+
// Set LoopRegion's Entry to nullptr, as the CFG from LoopRegion shouldn't
837+
// be deleted when the region is deleted.
838+
Entry = nullptr;
839+
}
840+
826841
VPlan::~VPlan() {
827842
if (Entry) {
828843
VPValue DummyValue;
@@ -1032,50 +1047,55 @@ void VPlan::execute(VPTransformState *State) {
10321047
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
10331048
Block->execute(State);
10341049

1035-
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
1036-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1037-
10381050
// Fix the latch value of canonical, reduction and first-order recurrences
10391051
// phis in the vector loop.
1040-
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
1041-
for (VPRecipeBase &R : Header->phis()) {
1042-
// Skip phi-like recipes that generate their backedege values themselves.
1043-
if (isa<VPWidenPHIRecipe>(&R))
1052+
for (VPBasicBlock *Header :
1053+
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
1054+
if (!Header->isHeader())
10441055
continue;
1056+
for (VPRecipeBase &R : Header->phis()) {
1057+
VPBasicBlock *LatchVPBB =
1058+
cast<VPBasicBlock>(Header->getPredecessors()[1]);
1059+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
10451060

1046-
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1047-
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1048-
PHINode *Phi = nullptr;
1049-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1050-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1051-
} else {
1052-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1053-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1054-
"recipe generating only scalars should have been replaced");
1055-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1056-
Phi = cast<PHINode>(GEP->getPointerOperand());
1057-
}
1058-
1059-
Phi->setIncomingBlock(1, VectorLatchBB);
1061+
// Skip phi-like recipes that generate their backedege values themselves.
1062+
if (isa<VPWidenPHIRecipe>(&R))
1063+
continue;
10601064

1061-
// Move the last step to the end of the latch block. This ensures
1062-
// consistent placement of all induction updates.
1063-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1064-
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1065+
if (isa<VPWidenPointerInductionRecipe>(&R) ||
1066+
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1067+
PHINode *Phi = nullptr;
1068+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1069+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1070+
} else {
1071+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1072+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1073+
"recipe generating only scalars should have been replaced");
1074+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1075+
Phi = cast<PHINode>(GEP->getPointerOperand());
1076+
}
1077+
1078+
Phi->setIncomingBlock(1, VectorLatchBB);
1079+
1080+
// Move the last step to the end of the latch block. This ensures
1081+
// consistent placement of all induction updates.
1082+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1083+
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1084+
1085+
// Use the steps for the last part as backedge value for the induction.
1086+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1087+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1088+
continue;
1089+
}
10651090

1066-
// Use the steps for the last part as backedge value for the induction.
1067-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1068-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1069-
continue;
1091+
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1092+
bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
1093+
(isa<VPReductionPHIRecipe>(PhiR) &&
1094+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1095+
Value *Phi = State->get(PhiR, NeedsScalar);
1096+
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1097+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10701098
}
1071-
1072-
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1073-
bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
1074-
(isa<VPReductionPHIRecipe>(PhiR) &&
1075-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1076-
Value *Phi = State->get(PhiR, NeedsScalar);
1077-
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
1078-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10791099
}
10801100

10811101
State->CFG.DTU.flush();
@@ -1417,8 +1437,13 @@ void VPlanIngredient::print(raw_ostream &O) const {
14171437
#endif
14181438

14191439
bool VPValue::isDefinedOutsideLoopRegions() const {
1420-
return !hasDefiningRecipe() ||
1421-
!getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
1440+
auto *DefR = getDefiningRecipe();
1441+
if (!DefR)
1442+
return true;
1443+
1444+
const VPBasicBlock *DefVPBB = DefR->getParent();
1445+
auto *Plan = DefVPBB->getPlan();
1446+
return DefVPBB == Plan->getPreheader() || DefVPBB == Plan->getEntry();
14221447
}
14231448

14241449
void VPValue::replaceAllUsesWith(VPValue *New) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3585,6 +3585,8 @@ class VPBasicBlock : public VPBlockBase {
35853585
return NewBlock;
35863586
}
35873587

3588+
bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); }
3589+
35883590
protected:
35893591
/// Execute the recipes in the IR basic block \p BB.
35903592
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3742,6 +3744,10 @@ class VPRegionBlock : public VPBlockBase {
37423744
/// Clone all blocks in the single-entry single-exit region of the block and
37433745
/// their recipes without updating the operands of the cloned recipes.
37443746
VPRegionBlock *clone() override;
3747+
3748+
/// Remove the current region from its VPlan, connecting its predecessor to
3749+
/// its entry and exiting block to its successor.
3750+
void removeRegion();
37453751
};
37463752

37473753
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -3875,10 +3881,10 @@ class VPlan {
38753881
/// whether to execute the scalar tail loop or the exit block from the loop
38763882
/// latch.
38773883
const VPBasicBlock *getMiddleBlock() const {
3878-
return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
3884+
return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
38793885
}
38803886
VPBasicBlock *getMiddleBlock() {
3881-
return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
3887+
return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
38823888
}
38833889

38843890
/// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
@@ -4000,9 +4006,7 @@ class VPlan {
40004006
}
40014007

40024008
/// Returns the preheader of the vector loop region.
4003-
VPBasicBlock *getVectorPreheader() {
4004-
return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
4005-
}
4009+
VPBasicBlock *getVectorPreheader() { return cast<VPBasicBlock>(getEntry()); }
40064010

40074011
/// Returns the canonical induction recipe of the vector loop.
40084012
VPCanonicalIVPHIRecipe *getCanonicalIV() {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -491,11 +491,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
491491
CondBr->setSuccessor(0, nullptr);
492492
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
493493

494-
if (!getParent()->isExiting())
494+
VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
495+
if (!State.CFG.VPBB2IRBB.contains(Header))
495496
return CondBr;
496497

497-
VPRegionBlock *ParentRegion = getParent()->getParent();
498-
VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
499498
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
500499
return CondBr;
501500
}
@@ -506,9 +505,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
506505
Value *Cond = Builder.CreateICmpEQ(IV, TC);
507506

508507
// Now create the branch.
509-
auto *Plan = getParent()->getPlan();
510-
VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
511-
VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
508+
VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
512509

513510
// Replace the temporary unreachable terminator with a new conditional
514511
// branch, hooking it up to backward destination (the header) now and to the
@@ -3150,9 +3147,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
31503147
PHINode *NewPointerPhi = nullptr;
31513148
if (CurrentPart == 0) {
31523149
auto *IVR = cast<VPHeaderPHIRecipe>(&getParent()
3153-
->getPlan()
3154-
->getVectorLoopRegion()
3155-
->getEntryBasicBlock()
31563150
->front());
31573151
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, /*IsScalar*/ true));
31583152
NewPointerPhi = PHINode::Create(ScStValueType, 2, "pointer.phi",

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,8 +1802,22 @@ void VPlanTransforms::createInterleaveGroups(
18021802
}
18031803

18041804
void VPlanTransforms::prepareToExecute(VPlan &Plan) {
1805-
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1806-
Plan.getVectorLoopRegion());
1805+
// Replace loop regions with explicity CFG.
1806+
SmallVector<VPRegionBlock *> LoopRegions;
1807+
for (VPRegionBlock *R : VPBlockUtils::blocksOnly<VPRegionBlock>(
1808+
vp_depth_first_deep(Plan.getEntry()))) {
1809+
if (!R->isReplicator())
1810+
LoopRegions.push_back(R);
1811+
}
1812+
for (VPRegionBlock *R : LoopRegions) {
1813+
VPBlockBase *Header = R->getEntry();
1814+
VPBlockBase *Latch = R->getExiting();
1815+
R->removeRegion();
1816+
// Add explicit backedge.
1817+
VPBlockUtils::connectBlocks(Latch, Header);
1818+
delete R;
1819+
}
1820+
18071821
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
18081822
vp_depth_first_deep(Plan.getEntry()))) {
18091823
for (VPRecipeBase &R : make_early_inc_range(VPBB->phis())) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ struct VPlanTransforms {
124124
/// Remove dead recipes from \p Plan.
125125
static void removeDeadRecipes(VPlan &Plan);
126126

127-
/// Lower abstract recipes to concrete ones, that can be codegen'd.
127+
/// Lower abstract recipes to concrete ones, that can be codegen'd and replace
128+
/// loop regions with explicit CFG.
128129
static void prepareToExecute(VPlan &Plan);
129130
};
130131

0 commit comments

Comments
 (0)