Skip to content

Commit a3812ba

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent 74e1554 commit a3812ba

26 files changed

+440
-205
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2969,10 +2969,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29692969
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29702970
}
29712971

2972-
// Fix live-out phis not already fixed earlier.
2973-
for (const auto &KV : Plan.getLiveOuts())
2974-
KV.second->fixPhi(Plan, State);
2975-
29762972
for (Instruction *PI : PredicatedInstructions)
29772973
sinkScalarOperands(&*PI);
29782974

@@ -8860,21 +8856,9 @@ static void addLiveOutsForFirstOrderRecurrences(
88608856
// Start by finding out if middle block branches to scalar preheader, which is
88618857
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of
88628858
// middle block.
8863-
// TODO: Should be replaced by
8864-
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8865-
// scalar region is modeled as well.
8866-
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8867-
VPBasicBlock *ScalarPHVPBB = nullptr;
8868-
if (MiddleVPBB->getNumSuccessors() == 2) {
8869-
// Order is strict: first is the exit block, second is the scalar preheader.
8870-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8871-
} else if (ExitUsersToFix.empty()) {
8872-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8873-
} else {
8874-
llvm_unreachable("unsupported CFG in VPlan");
8875-
}
8876-
8859+
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
88778860
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8861+
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
88788862
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
88798863
VPValue *OneVPV = Plan.getOrAddLiveIn(
88808864
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
@@ -8961,7 +8945,14 @@ static void addLiveOutsForFirstOrderRecurrences(
89618945
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
89628946
"scalar.recur.init");
89638947
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8964-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8948+
for (VPRecipeBase &R :
8949+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8950+
auto *IRI = cast<VPIRInstruction>(&R);
8951+
if (&IRI->getInstruction() == FORPhi) {
8952+
IRI->addOperand(ResumePhiRecipe);
8953+
break;
8954+
}
8955+
}
89658956

89668957
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
89678958
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
456456
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457457
executeRecipes(State, getIRBasicBlock());
458458
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
459+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
460+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
461+
getIRBasicBlock()->getSingleSuccessor()) {
462+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
463+
->setOperand(0, nullptr);
464+
} else {
465+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
466+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
467+
Br->setOperand(0, nullptr);
468+
getIRBasicBlock()->getTerminator()->eraseFromParent();
469+
}
463470
}
464471

465472
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -663,14 +670,16 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
663670
}
664671
#endif
665672

666-
static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
673+
static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
674+
cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader = nullptr);
667675

668676
// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
669677
// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
670678
// Remapping of operands must be done separately. Returns a pair with the new
671679
// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
672680
// region, return nullptr for the exiting block.
673-
static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
681+
static std::tuple<VPBlockBase *, VPBlockBase *, VPIRBasicBlock *>
682+
cloneFrom(VPBlockBase *Entry, VPIRBasicBlock *ScalarHeader) {
674683
DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
675684
VPBlockBase *Exiting = nullptr;
676685
bool InRegion = Entry->getParent();
@@ -716,12 +725,14 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
716725
}
717726
#endif
718727

719-
return std::make_pair(Old2NewVPBlocks[Entry],
720-
Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
728+
return std::tuple(
729+
Old2NewVPBlocks[Entry], Exiting ? Old2NewVPBlocks[Exiting] : nullptr,
730+
ScalarHeader ? cast<VPIRBasicBlock>(Old2NewVPBlocks[ScalarHeader])
731+
: nullptr);
721732
}
722733

723734
VPRegionBlock *VPRegionBlock::clone() {
724-
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
735+
const auto &[NewEntry, NewExiting, _] = cloneFrom(getEntry());
725736
auto *NewRegion =
726737
new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
727738
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
@@ -843,10 +854,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
843854
#endif
844855

845856
VPlan::~VPlan() {
846-
for (auto &KV : LiveOuts)
847-
delete KV.second;
848-
LiveOuts.clear();
849-
850857
if (Entry) {
851858
VPValue DummyValue;
852859
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -878,7 +885,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
878885
VPIRBasicBlock *Entry =
879886
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
880887
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
881-
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
888+
VPIRBasicBlock *ScalarHeader =
889+
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
890+
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
882891

883892
// Create SCEV and VPValue for the trip count.
884893

@@ -909,6 +918,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
909918
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
910919

911920
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
921+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
912922
if (!RequiresScalarEpilogueCheck) {
913923
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
914924
return Plan;
@@ -1054,6 +1064,8 @@ void VPlan::execute(VPTransformState *State) {
10541064
BrInst->insertBefore(MiddleBB->getTerminator());
10551065
MiddleBB->getTerminator()->eraseFromParent();
10561066
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1067+
State->CFG.DTU.applyUpdates(
1068+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10571069

10581070
// Generate code in the loop pre-header and body.
10591071
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1172,12 +1184,6 @@ void VPlan::print(raw_ostream &O) const {
11721184
Block->print(O, "", SlotTracker);
11731185
}
11741186

1175-
if (!LiveOuts.empty())
1176-
O << "\n";
1177-
for (const auto &KV : LiveOuts) {
1178-
KV.second->print(O, SlotTracker);
1179-
}
1180-
11811187
O << "}\n";
11821188
}
11831189

@@ -1214,11 +1220,6 @@ LLVM_DUMP_METHOD
12141220
void VPlan::dump() const { print(dbgs()); }
12151221
#endif
12161222

1217-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1218-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1219-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1220-
}
1221-
12221223
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12231224
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12241225
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1260,10 +1261,12 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12601261
VPlan *VPlan::duplicate() {
12611262
// Clone blocks.
12621263
VPBasicBlock *NewPreheader = Preheader->clone();
1263-
const auto &[NewEntry, __] = cloneFrom(Entry);
1264+
const auto &[NewEntry, __, NewScalarHeader] =
1265+
cloneFrom(Entry, getScalarHeader());
12641266

12651267
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
1266-
auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
1268+
auto *NewPlan =
1269+
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
12671270
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
12681271
for (VPValue *OldLiveIn : VPLiveInsToFree) {
12691272
Old2NewVPValues[OldLiveIn] =
@@ -1286,10 +1289,6 @@ VPlan *VPlan::duplicate() {
12861289
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12871290
remapOperands(Entry, NewEntry, Old2NewVPValues);
12881291

1289-
// Clone live-outs.
1290-
for (const auto &[_, LO] : LiveOuts)
1291-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1292-
12931292
// Initialize remaining fields of cloned VPlan.
12941293
NewPlan->VFs = VFs;
12951294
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -675,48 +675,6 @@ class VPBlockBase {
675675
virtual VPBlockBase *clone() = 0;
676676
};
677677

678-
/// A value that is used outside the VPlan. The operand of the user needs to be
679-
/// added to the associated phi node. The incoming block from VPlan is
680-
/// determined by where the VPValue is defined: if it is defined by a recipe
681-
/// outside a region, its parent block is used, otherwise the middle block is
682-
/// used.
683-
class VPLiveOut : public VPUser {
684-
PHINode *Phi;
685-
686-
public:
687-
VPLiveOut(PHINode *Phi, VPValue *Op)
688-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
689-
690-
static inline bool classof(const VPUser *U) {
691-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
692-
}
693-
694-
/// Fix the wrapped phi node. This means adding an incoming value to exit
695-
/// block phi's from the vector loop via middle block (values from scalar loop
696-
/// already reach these phi's), and updating the value to scalar header phi's
697-
/// from the scalar preheader.
698-
void fixPhi(VPlan &Plan, VPTransformState &State);
699-
700-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
701-
bool usesScalars(const VPValue *Op) const override {
702-
assert(is_contained(operands(), Op) &&
703-
"Op must be an operand of the recipe");
704-
return true;
705-
}
706-
707-
PHINode *getPhi() const { return Phi; }
708-
709-
/// Live-outs are marked as only using the first part during the transition
710-
/// to unrolling directly on VPlan.
711-
/// TODO: Remove after unroller transition.
712-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
713-
714-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
715-
/// Print the VPLiveOut to \p O.
716-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
717-
#endif
718-
};
719-
720678
/// Struct to hold various analysis needed for cost computations.
721679
struct VPCostContext {
722680
const TargetTransformInfo &TTI;
@@ -3592,6 +3550,9 @@ class VPlan {
35923550
/// rest of VPlan execution.
35933551
VPBasicBlock *Preheader;
35943552

3553+
/// VPIRBasicBlock wrapping the header of the original scalar loop.
3554+
VPIRBasicBlock *ScalarHeader;
3555+
35953556
/// Holds the VFs applicable to this VPlan.
35963557
SmallSetVector<ElementCount, 2> VFs;
35973558

@@ -3627,11 +3588,6 @@ class VPlan {
36273588
/// definitions are VPValues that hold a pointer to their underlying IR.
36283589
SmallVector<VPValue *, 16> VPLiveInsToFree;
36293590

3630-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3631-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3632-
/// live-outs are fixed via VPLiveOut::fixPhi.
3633-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3634-
36353591
/// Mapping from SCEVs to the VPValues representing their expansions.
36363592
/// NOTE: This mapping is temporary and will be removed once all users have
36373593
/// been modeled in VPlan directly.
@@ -3642,22 +3598,26 @@ class VPlan {
36423598
/// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
36433599
/// be disconnected, as the bypass blocks between them are not yet modeled in
36443600
/// VPlan.
3645-
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3646-
: VPlan(Preheader, Entry) {
3601+
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry,
3602+
VPIRBasicBlock *ScalarHeader)
3603+
: VPlan(Preheader, Entry, ScalarHeader) {
36473604
TripCount = TC;
36483605
}
36493606

36503607
/// Construct a VPlan with original preheader \p Preheader and \p Entry to
36513608
/// the plan. At the moment, \p Preheader and \p Entry need to be
36523609
/// disconnected, as the bypass blocks between them are not yet modeled in
36533610
/// VPlan.
3654-
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3655-
: Entry(Entry), Preheader(Preheader) {
3611+
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry,
3612+
VPIRBasicBlock *ScalarHeader)
3613+
: Entry(Entry), Preheader(Preheader), ScalarHeader(ScalarHeader) {
36563614
Entry->setPlan(this);
36573615
Preheader->setPlan(this);
36583616
assert(Preheader->getNumSuccessors() == 0 &&
36593617
Preheader->getNumPredecessors() == 0 &&
36603618
"preheader must be disconnected");
3619+
assert(ScalarHeader->getNumSuccessors() == 0 &&
3620+
"scalar header must be a leaf node");
36613621
}
36623622

36633623
~VPlan();
@@ -3689,6 +3649,14 @@ class VPlan {
36893649
VPBasicBlock *getEntry() { return Entry; }
36903650
const VPBasicBlock *getEntry() const { return Entry; }
36913651

3652+
/// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3653+
VPIRBasicBlock *getScalarHeader() { return ScalarHeader; }
3654+
3655+
/// Return the VPBasicBlock for the preheader of the scalar loop.
3656+
VPBasicBlock *getScalarPreheader() {
3657+
return cast<VPBasicBlock>(ScalarHeader->getSinglePredecessor());
3658+
}
3659+
36923660
/// The trip count of the original loop.
36933661
VPValue *getTripCount() const {
36943662
assert(TripCount && "trip count needs to be set before accessing it");
@@ -3811,12 +3779,6 @@ class VPlan {
38113779
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
38123780
}
38133781

3814-
void addLiveOut(PHINode *PN, VPValue *V);
3815-
3816-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3817-
return LiveOuts;
3818-
}
3819-
38203782
VPValue *getSCEVExpansion(const SCEV *S) const {
38213783
return SCEVToExpansion.lookup(S);
38223784
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -210,35 +210,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
210210
}
211211
}
212212

213-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
214-
VPValue *ExitValue = getOperand(0);
215-
VPBasicBlock *MiddleVPBB =
216-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
217-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
218-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
219-
// Values leaving the vector loop reach live out phi's in the exiting block
220-
// via middle block.
221-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
222-
? MiddleVPBB
223-
: ExitingVPBB;
224-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
225-
Value *V = State.get(ExitValue, VPLane(0));
226-
if (Phi->getBasicBlockIndex(PredBB) != -1)
227-
Phi->setIncomingValueForBlock(PredBB, V);
228-
else
229-
Phi->addIncoming(V, PredBB);
230-
}
231-
232-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
233-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
234-
O << "Live-out ";
235-
getPhi()->printAsOperand(O);
236-
O << " = ";
237-
getOperand(0)->printAsOperand(O, SlotTracker);
238-
O << "\n";
239-
}
240-
#endif
241-
242213
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
243214
assert(!Parent && "Recipe already in some VPBasicBlock");
244215
assert(InsertPos->getParent() &&
@@ -869,7 +840,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
869840
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
870841
Value *V = State.get(ExitValue, VPLane(Lane));
871842
auto *Phi = cast<PHINode>(&I);
872-
Phi->addIncoming(V, PredBB);
843+
if (Phi->getBasicBlockIndex(PredBB) == -1)
844+
Phi->addIncoming(V, PredBB);
845+
else
846+
Phi->setIncomingValueForBlock(PredBB, V);
873847
}
874848

875849
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
379379
// Don't fold the exit block of the Plan into its single predecessor for
380380
// now.
381381
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
382-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
382+
if (!VPBB->getParent())
383383
continue;
384384
auto *PredVPBB =
385385
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

0 commit comments

Comments
 (0)