Skip to content

[LoopVectorize] Add initial support to VPRegionBlock for multiple successors #108563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,11 @@ const VPRecipeBase *VPBasicBlock::getTerminator() const {
}

bool VPBasicBlock::isExiting() const {
return getParent() && getParent()->getExitingBasicBlock() == this;
const VPRegionBlock *VPRB = getParent();
if (!VPRB)
return false;
return VPRB->getExitingBasicBlock() == this ||
VPRB->getEarlyExiting() == this;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down Expand Up @@ -989,6 +993,9 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
/// Assumes a single pre-header basic-block was created for this. Introduce
/// additional basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
assert(!getVectorLoopRegion()->getEarlyExiting() &&
"Executing vplans with an early exit not yet supported");

// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
Expand All @@ -1007,6 +1014,7 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *MiddleBB = State->CFG.ExitBB;
VPBasicBlock *MiddleVPBB =
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());

// Find the VPBB for the scalar preheader, relying on the current structure
// when creating the middle block and its successrs: if there's a single
// predecessor, it must be the scalar preheader. Otherwise, the second
Expand Down
44 changes: 37 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3236,21 +3236,33 @@ class VPIRBasicBlock : public VPBasicBlock {
};

/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
/// which form a Single-Entry-Single-Exiting or Single-Entry-Multiple-Exiting
/// subgraph of the output IR CFG. For the multiple-exiting case only a total
/// of two exits are currently supported and the early exit is tracked
/// separately. The first successor should always correspond to the normal
/// exiting block, i.e. vector latch -> middle.block. An optional second
/// successor corresponds to the early exit.
/// A VPRegionBlock may indicate that its contents are to be replicated several
/// times. This is designed to support predicated scalarization, in which a
/// scalar if-then code structure needs to be generated VF * UF times. Having
/// this replication indicator helps to keep a single model for multiple
/// candidate VF's. The actual replication takes place only once the desired VF
/// and UF have been determined.
/// TODO: The SEME case is a work in progress and any attempt to execute a
/// VPlan containing a region with multiple exits will assert.
class VPRegionBlock : public VPBlockBase {
/// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
/// Hold the Single Entry of the SESE/SEME region modelled by the
/// VPRegionBlock.
VPBlockBase *Entry;

/// Hold the Single Exiting block of the SESE region modelled by the
/// Hold the normal Exiting block of the SESE/SEME region modelled by the
/// VPRegionBlock.
VPBlockBase *Exiting;

/// Hold the Early Exiting block of the SEME region. If this is a SESE region
/// this value should be nullptr.
VPBlockBase *EarlyExiting;

/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
bool IsReplicator;
Expand All @@ -3259,15 +3271,15 @@ class VPRegionBlock : public VPBlockBase {
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
const std::string &Name = "", bool IsReplicator = false)
: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
IsReplicator(IsReplicator) {
EarlyExiting(nullptr), IsReplicator(IsReplicator) {
assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
Entry->setParent(this);
Exiting->setParent(this);
}
VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
IsReplicator(IsReplicator) {}
EarlyExiting(nullptr), IsReplicator(IsReplicator) {}

~VPRegionBlock() override {
if (Entry) {
Expand Down Expand Up @@ -3297,15 +3309,33 @@ class VPRegionBlock : public VPBlockBase {
const VPBlockBase *getExiting() const { return Exiting; }
VPBlockBase *getExiting() { return Exiting; }

/// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
/// ExitingBlock must have no successors.
/// Set \p ExitingBlock as the normal exiting VPBlockBase of this
/// VPRegionBlock. \p ExitingBlock must have no successors.
void setExiting(VPBlockBase *ExitingBlock) {
assert(ExitingBlock->getSuccessors().empty() &&
"Exit block cannot have successors.");
Exiting = ExitingBlock;
ExitingBlock->setParent(this);
}

/// Set \p EarlyExitingBlock as the early exiting VPBlockBase of this
/// VPRegionBlock. \p EarlyExitingBlock must have a successor, since
/// it cannot be the latch.
void setEarlyExiting(VPBlockBase *EarlyExitingBlock) {
assert(EarlyExitingBlock->getNumSuccessors() == 1 &&
"Early exit block must have a successor.");
assert(EarlyExitingBlock->getParent() == this &&
"Early exit block should already be in loop region");
EarlyExiting = EarlyExitingBlock;
}

const VPBlockBase *getEarlyExiting() const { return EarlyExiting; }
VPBlockBase *getEarlyExiting() { return EarlyExiting; }

/// Return the number of exiting blocks from this region. It should match
/// the number of successors.
unsigned getNumExitingBlocks() const { return EarlyExiting ? 2 : 1; }

/// Returns the pre-header VPBasicBlock of the loop region.
VPBasicBlock *getPreheaderVPBB() {
assert(!isReplicator() && "should only get pre-header of loop regions");
Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,9 @@ static bool hasDuplicates(const SmallVectorImpl<VPBlockBase *> &VPBlockVec) {
bool VPlanVerifier::verifyBlock(const VPBlockBase *VPB) {
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
// Check block's condition bit.
if (VPB->getNumSuccessors() > 1 ||
// NOTE: A VPRegionBlock can legally have multiple successors due to
// early exits from the loop.
if ((VPB->getNumSuccessors() > 1 && !isa<VPRegionBlock>(VPB)) ||
(VPBB && VPBB->getParent() && VPBB->isExiting() &&
!VPBB->getParent()->isReplicator())) {
if (!VPBB || !VPBB->getTerminator()) {
Expand All @@ -210,6 +212,19 @@ bool VPlanVerifier::verifyBlock(const VPBlockBase *VPB) {
return false;
}

// If this is a loop region with multiple successors it must have as many
// exiting blocks as successors, even if the original scalar loop only had a
// single exit block. That's because in the vector loop we always create a
// middle block for the vector latch exit, which is distinct from the early
// exit.
auto *VPRB = dyn_cast<VPRegionBlock>(VPB);
if (VPRB && VPRB->getNumExitingBlocks() != VPB->getNumSuccessors()) {
errs() << "Number of exiting blocks (" << VPRB->getNumExitingBlocks()
<< ") does not match number of successors ("
<< VPB->getNumSuccessors() << ")!\n";
return false;
}

for (const VPBlockBase *Succ : Successors) {
// There must be a bi-directional link between block and successor.
const auto &SuccPreds = Succ->getPredecessors();
Expand Down
133 changes: 133 additions & 0 deletions llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "../lib/Transforms/Vectorize/VPlanVerifier.h"
#include "../lib/Transforms/Vectorize/VPlan.h"
#include "../lib/Transforms/Vectorize/VPlanTransforms.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "gtest/gtest.h"
Expand All @@ -28,6 +29,10 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
VPBasicBlock *VPBB2 = new VPBasicBlock();
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);

VPlan Plan(VPPH, &*TC, VPBB1);

#if GTEST_HAS_STREAM_REDIRECTION
Expand Down Expand Up @@ -59,6 +64,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);

auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);

Expand Down Expand Up @@ -102,6 +110,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);

auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);

Expand Down Expand Up @@ -138,6 +149,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(VPBB1, R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);

auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);

Expand Down Expand Up @@ -175,6 +189,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);

auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);

Expand Down Expand Up @@ -204,6 +221,9 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB1->setParent(R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);

auto TC = std::make_unique<VPValue>();
VPlan Plan(VPPH, &*TC, VPBB1);

Expand All @@ -217,4 +237,117 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
#endif
}

TEST(VPVerifierTest, LoopRegionMultipleSuccessors1) {
VPInstruction *TC = new VPInstruction(Instruction::Add, {});
VPBasicBlock *VPBBPH = new VPBasicBlock("preheader");
VPBBPH->appendRecipe(TC);

VPInstruction *TC2 = new VPInstruction(Instruction::Add, {});
VPBasicBlock *VPBBENTRY = new VPBasicBlock("entry");
VPBBENTRY->appendRecipe(TC2);

// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(TC2, {});
VPInstruction *I1 = new VPInstruction(Instruction::Add, {});
VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1});
VPInstruction *I3 = new VPInstruction(VPInstruction::BranchOnCond, {I1});

VPBasicBlock *RBB1 = new VPBasicBlock();
RBB1->appendRecipe(CanonicalIVPHI);
RBB1->appendRecipe(I1);
RBB1->appendRecipe(I2);
RBB1->appendRecipe(I3);
RBB1->setName("bb1");

VPInstruction *I4 = new VPInstruction(Instruction::Mul, {I2, I1});
VPInstruction *I5 = new VPInstruction(VPInstruction::BranchOnCond, {I4});
VPBasicBlock *RBB2 = new VPBasicBlock();
RBB2->appendRecipe(I4);
RBB2->appendRecipe(I5);
RBB2->setName("bb2");

VPRegionBlock *R1 = new VPRegionBlock(RBB1, RBB2, "R1");
VPBlockUtils::connectBlocks(RBB1, RBB2);
VPBlockUtils::connectBlocks(VPBBENTRY, R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBasicBlock *VPEARLYEXIT = new VPBasicBlock("early.exit");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);
VPBlockUtils::connectBlocks(R1, VPEARLYEXIT);

VPlan Plan(VPBBPH, TC, VPBBENTRY);
Plan.setName("TestPlan");
Plan.addVF(ElementCount::getFixed(4));
Plan.getVectorLoopRegion()->setEarlyExiting(RBB1);

EXPECT_TRUE(verifyVPlanIsValid(Plan));
}

TEST(VPVerifierTest, LoopRegionMultipleSuccessors2) {
VPInstruction *TC = new VPInstruction(Instruction::Add, {});
VPBasicBlock *VPBBPH = new VPBasicBlock("preheader");
VPBBPH->appendRecipe(TC);

VPInstruction *TC2 = new VPInstruction(Instruction::Add, {});
VPBasicBlock *VPBBENTRY = new VPBasicBlock("entry");
VPBBENTRY->appendRecipe(TC2);

// We can't create a live-in without a VPlan, but we can't create
// a VPlan without the blocks. So we initialize this to a silly
// value here, then fix it up later.
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(TC2, {});
VPInstruction *I1 = new VPInstruction(Instruction::Add, {});
VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1});
VPInstruction *I3 = new VPInstruction(VPInstruction::BranchOnCond, {I1});

VPBasicBlock *RBB1 = new VPBasicBlock();
RBB1->appendRecipe(CanonicalIVPHI);
RBB1->appendRecipe(I1);
RBB1->appendRecipe(I2);
RBB1->appendRecipe(I3);
RBB1->setName("vector.body");

// This really is what the vplan cfg looks like before optimising!
VPBasicBlock *RBB2 = new VPBasicBlock();
RBB2->setName("loop.inc");
// A block that inherits the latch name from the original scalar loop.

VPBasicBlock *RBB3 = new VPBasicBlock();
// No name

VPInstruction *I4 = new VPInstruction(Instruction::Mul, {I2, I1});
VPInstruction *I5 = new VPInstruction(VPInstruction::BranchOnCond, {I4});
VPBasicBlock *RBB4 = new VPBasicBlock();
RBB4->appendRecipe(I4);
RBB4->appendRecipe(I5);
RBB4->setName("vector.latch");

VPRegionBlock *R1 = new VPRegionBlock(RBB1, RBB4, "R1");
VPBlockUtils::insertBlockAfter(RBB2, RBB1);
VPBlockUtils::insertBlockAfter(RBB3, RBB2);
VPBlockUtils::insertBlockAfter(RBB4, RBB3);
VPBlockUtils::connectBlocks(VPBBENTRY, R1);

VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block");
VPBasicBlock *VPEARLYEXIT = new VPBasicBlock("early.exit");
VPBlockUtils::connectBlocks(R1, VPMIDDLE);
VPBlockUtils::connectBlocks(R1, VPEARLYEXIT);

VPlan Plan(VPBBPH, TC, VPBBENTRY);
Plan.setName("TestPlan");
Plan.addVF(ElementCount::getFixed(4));
Plan.getVectorLoopRegion()->setEarlyExiting(RBB1);

// Update the VPCanonicalIVPHIRecipe to have a live-in IR value.
LLVMContext C;
IntegerType *Int32 = IntegerType::get(C, 32);
Value *StartIV = PoisonValue::get(Int32);
CanonicalIVPHI->setStartValue(Plan.getOrAddLiveIn(StartIV));

EXPECT_TRUE(verifyVPlanIsValid(Plan));

VPlanTransforms::optimize(Plan, C);
EXPECT_TRUE(verifyVPlanIsValid(Plan));
}

} // namespace
Loading