Skip to content

[VPlan] Introduce all loop regions as VPlan transform. (NFC) #129402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9544,14 +9544,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
// Convert to VPlan-transform and consoliate all transforms for VPlan
// TODO: Convert to VPlan-transform and consolidate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
HCFGBuilder.buildPlainCFG();

VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop);
VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop);

// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
Expand Down Expand Up @@ -9851,10 +9851,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
HCFGBuilder.buildPlainCFG();

VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
PSE, true, false, OrigLoop);

for (ElementCount VF : Range)
Plan->addVF(VF);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class SCEV;
class Type;
class VPBasicBlock;
class VPBuilder;
class VPDominatorTree;
class VPRegionBlock;
class VPlan;
class VPLane;
Expand Down Expand Up @@ -303,6 +304,13 @@ class VPBlockBase {
/// Remove all the successors of this block.
void clearSuccessors() { Successors.clear(); }

/// Swap predecessors of the block. The block must have exactly 2
/// predecessors.
void swapPredecessors() {
assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
std::swap(Predecessors[0], Predecessors[1]);
}

/// Swap successors of the block. The block must have exactly 2 successors.
// TODO: This should be part of introducing conditional branch recipes rather
// than being independent.
Expand Down
103 changes: 78 additions & 25 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,88 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"

using namespace llvm;

void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
// TODO: Generalize to introduce all loop regions.
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB);
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
/// has exactly 2 predecessors (preheader and latch), where the block
/// dominates the latch and the preheader dominates the block. If it is a
/// header block return true, making sure the preheader appears first and
/// the latch second. Otherwise return false.
static bool canonicalHeader(VPBlockBase *HeaderVPB,
const VPDominatorTree &VPDT) {
ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
if (Preds.size() != 2)
return false;

VPBasicBlock *OriginalLatch =
cast<VPBasicBlock>(HeaderVPBB->getSinglePredecessor());
VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB);
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader);
assert(OriginalLatch->getNumSuccessors() == 0 &&
"Plan should end at top level latch");
auto *PreheaderVPBB = Preds[0];
auto *LatchVPBB = Preds[1];
if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
VPDT.dominates(HeaderVPB, LatchVPBB))
return true;

std::swap(PreheaderVPBB, LatchVPBB);

if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
VPDT.dominates(HeaderVPB, LatchVPBB)) {
// Canonicalize predecessors of header so that preheader is first and latch
// second.
HeaderVPB->swapPredecessors();
for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
R.swapOperands();
return true;
}

return false;
}

/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
auto *LatchVPBB = HeaderVPB->getPredecessors()[1];

VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
assert(LatchVPBB->getNumSuccessors() <= 1 && "Latch has more than one successor");
VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

assert(LatchVPBB->getNumSuccessors() <= 1 &&
"Latch has more than one successor");
if (Succ)
VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);

auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
false /*isReplicator*/);
R->setParent(HeaderVPB->getParent());
// All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
// because VPlan is expected to end at top level latch disconnected above.
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
VPBB->setParent(R);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about setting the parent of the newly introduced region itself?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!


VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
if (Succ)
VPBlockUtils::connectBlocks(R, Succ);
}

void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
if (canonicalHeader(HeaderVPB, VPDT))
createLoopRegion(Plan, HeaderVPB);

VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
auto *OrigExiting = TopRegion->getExiting();
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
TopRegion->setExiting(LatchVPBB);
Comment on lines +93 to +96
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this additional latch block really needed, or can it be removed (independently)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is unsed only for convenience, when adjusting reductions it is used to place selects there if needed. I'll look into remove it separartely.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth a comment, along with explaining that this section assigns distinct names to the (just created) Top Region and its latch block, which is introduced as a convenience.
Can also reset the name of Top Region's header to "vector.body" here, instead of setting it during plain CFG construction?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

TopRegion->setName("vector loop");
TopRegion->getEntryBasicBlock()->setName("vector.body");

// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
Expand All @@ -47,18 +109,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
Plan.setTripCount(
vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE));

// Create VPRegionBlock, with existing header and new empty latch block, to be
// filled.
VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch);
auto *TopRegion = Plan.createVPRegionBlock(
HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/);
// All VPBB's reachable shallowly from HeaderVPBB belong to top level loop,
// because VPlan is expected to end at top level latch.
for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
VPBB->setParent(TopRegion);

VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());

VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);

Expand Down
Loading
Loading