Skip to content

Commit 08b76d3

Browse files
committed
[VPlan] Introduce scalar loop header in plan, remove VPLiveOut.
Update VPlan to include the scalar loop header. This allows retiring VPLiveOut, as the remaining live-outs can now be handled by adding operands to the wrapped phis in the scalar loop header. Note that the current version only includes the scalar loop header, no other loop blocks and also does not wrap it in a region block. This can either be included in this PR or in follow-ups as needed.
1 parent 2c5dd03 commit 08b76d3

File tree

8 files changed

+54
-132
lines changed

8 files changed

+54
-132
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2929,10 +2929,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
29292929
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29302930
}
29312931

2932-
// Fix live-out phis not already fixed earlier.
2933-
for (const auto &KV : Plan.getLiveOuts())
2934-
KV.second->fixPhi(Plan, State);
2935-
29362932
for (Instruction *PI : PredicatedInstructions)
29372933
sinkScalarOperands(&*PI);
29382934

@@ -8915,7 +8911,14 @@ static void addLiveOutsForFirstOrderRecurrences(
89158911
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
89168912
"scalar.recur.init");
89178913
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
8918-
Plan.addLiveOut(FORPhi, ResumePhiRecipe);
8914+
for (VPRecipeBase &R :
8915+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
8916+
auto *IRI = cast<VPIRInstruction>(&R);
8917+
if (&IRI->getInstruction() == FORPhi) {
8918+
IRI->addOperand(ResumePhiRecipe);
8919+
break;
8920+
}
8921+
}
89198922

89208923
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
89218924
// Extract the penultimate value of the recurrence and use it as operand for

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,17 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
456456
State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
457457
executeRecipes(State, getIRBasicBlock());
458458
if (getSingleSuccessor()) {
459-
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
460-
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
461-
Br->setOperand(0, nullptr);
462-
getIRBasicBlock()->getTerminator()->eraseFromParent();
459+
auto *SuccVPIRBB = dyn_cast<VPIRBasicBlock>(getSingleSuccessor());
460+
if (SuccVPIRBB && SuccVPIRBB->getIRBasicBlock() ==
461+
getIRBasicBlock()->getSingleSuccessor()) {
462+
cast<BranchInst>(getIRBasicBlock()->getTerminator())
463+
->setOperand(0, nullptr);
464+
} else {
465+
assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
466+
auto *Br = State->Builder.CreateBr(getIRBasicBlock());
467+
Br->setOperand(0, nullptr);
468+
getIRBasicBlock()->getTerminator()->eraseFromParent();
469+
}
463470
}
464471

465472
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -843,10 +850,6 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
843850
#endif
844851

845852
VPlan::~VPlan() {
846-
for (auto &KV : LiveOuts)
847-
delete KV.second;
848-
LiveOuts.clear();
849-
850853
if (Entry) {
851854
VPValue DummyValue;
852855
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -909,6 +912,9 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
909912
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
910913

911914
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
915+
VPBasicBlock *ScalarHeader =
916+
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
917+
VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader);
912918
if (!RequiresScalarEpilogueCheck) {
913919
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
914920
return Plan;
@@ -1058,6 +1064,8 @@ void VPlan::execute(VPTransformState *State) {
10581064
BrInst->insertBefore(MiddleBB->getTerminator());
10591065
MiddleBB->getTerminator()->eraseFromParent();
10601066
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
1067+
State->CFG.DTU.applyUpdates(
1068+
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
10611069

10621070
// Generate code in the loop pre-header and body.
10631071
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
@@ -1176,12 +1184,6 @@ void VPlan::print(raw_ostream &O) const {
11761184
Block->print(O, "", SlotTracker);
11771185
}
11781186

1179-
if (!LiveOuts.empty())
1180-
O << "\n";
1181-
for (const auto &KV : LiveOuts) {
1182-
KV.second->print(O, SlotTracker);
1183-
}
1184-
11851187
O << "}\n";
11861188
}
11871189

@@ -1218,11 +1220,6 @@ LLVM_DUMP_METHOD
12181220
void VPlan::dump() const { print(dbgs()); }
12191221
#endif
12201222

1221-
void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
1222-
assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
1223-
LiveOuts.insert({PN, new VPLiveOut(PN, V)});
1224-
}
1225-
12261223
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
12271224
DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
12281225
// Update the operands of all cloned recipes starting at NewEntry. This
@@ -1290,10 +1287,6 @@ VPlan *VPlan::duplicate() {
12901287
remapOperands(Preheader, NewPreheader, Old2NewVPValues);
12911288
remapOperands(Entry, NewEntry, Old2NewVPValues);
12921289

1293-
// Clone live-outs.
1294-
for (const auto &[_, LO] : LiveOuts)
1295-
NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
1296-
12971290
// Initialize remaining fields of cloned VPlan.
12981291
NewPlan->VFs = VFs;
12991292
NewPlan->UFs = UFs;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -655,48 +655,6 @@ class VPBlockBase {
655655
virtual VPBlockBase *clone() = 0;
656656
};
657657

658-
/// A value that is used outside the VPlan. The operand of the user needs to be
659-
/// added to the associated phi node. The incoming block from VPlan is
660-
/// determined by where the VPValue is defined: if it is defined by a recipe
661-
/// outside a region, its parent block is used, otherwise the middle block is
662-
/// used.
663-
class VPLiveOut : public VPUser {
664-
PHINode *Phi;
665-
666-
public:
667-
VPLiveOut(PHINode *Phi, VPValue *Op)
668-
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
669-
670-
static inline bool classof(const VPUser *U) {
671-
return U->getVPUserID() == VPUser::VPUserID::LiveOut;
672-
}
673-
674-
/// Fix the wrapped phi node. This means adding an incoming value to exit
675-
/// block phi's from the vector loop via middle block (values from scalar loop
676-
/// already reach these phi's), and updating the value to scalar header phi's
677-
/// from the scalar preheader.
678-
void fixPhi(VPlan &Plan, VPTransformState &State);
679-
680-
/// Returns true if the VPLiveOut uses scalars of operand \p Op.
681-
bool usesScalars(const VPValue *Op) const override {
682-
assert(is_contained(operands(), Op) &&
683-
"Op must be an operand of the recipe");
684-
return true;
685-
}
686-
687-
PHINode *getPhi() const { return Phi; }
688-
689-
/// Live-outs are marked as only using the first part during the transition
690-
/// to unrolling directly on VPlan.
691-
/// TODO: Remove after unroller transition.
692-
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; }
693-
694-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
695-
/// Print the VPLiveOut to \p O.
696-
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
697-
#endif
698-
};
699-
700658
/// Struct to hold various analysis needed for cost computations.
701659
struct VPCostContext {
702660
const TargetTransformInfo &TTI;
@@ -3583,11 +3541,6 @@ class VPlan {
35833541
/// definitions are VPValues that hold a pointer to their underlying IR.
35843542
SmallVector<VPValue *, 16> VPLiveInsToFree;
35853543

3586-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3587-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3588-
/// live-outs are fixed via VPLiveOut::fixPhi.
3589-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3590-
35913544
/// Mapping from SCEVs to the VPValues representing their expansions.
35923545
/// NOTE: This mapping is temporary and will be removed once all users have
35933546
/// been modeled in VPlan directly.
@@ -3767,12 +3720,6 @@ class VPlan {
37673720
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
37683721
}
37693722

3770-
void addLiveOut(PHINode *PN, VPValue *V);
3771-
3772-
const MapVector<PHINode *, VPLiveOut *> &getLiveOuts() const {
3773-
return LiveOuts;
3774-
}
3775-
37763723
VPValue *getSCEVExpansion(const SCEV *S) const {
37773724
return SCEVToExpansion.lookup(S);
37783725
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -208,35 +208,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
208208
}
209209
}
210210

211-
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
212-
VPValue *ExitValue = getOperand(0);
213-
VPBasicBlock *MiddleVPBB =
214-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
215-
VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
216-
auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
217-
// Values leaving the vector loop reach live out phi's in the exiting block
218-
// via middle block.
219-
auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
220-
? MiddleVPBB
221-
: ExitingVPBB;
222-
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
223-
Value *V = State.get(ExitValue, VPLane(0));
224-
if (Phi->getBasicBlockIndex(PredBB) != -1)
225-
Phi->setIncomingValueForBlock(PredBB, V);
226-
else
227-
Phi->addIncoming(V, PredBB);
228-
}
229-
230-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
231-
void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
232-
O << "Live-out ";
233-
getPhi()->printAsOperand(O);
234-
O << " = ";
235-
getOperand(0)->printAsOperand(O, SlotTracker);
236-
O << "\n";
237-
}
238-
#endif
239-
240211
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
241212
assert(!Parent && "Recipe already in some VPBasicBlock");
242213
assert(InsertPos->getParent() &&
@@ -860,7 +831,10 @@ void VPIRInstruction::execute(VPTransformState &State) {
860831
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
861832
Value *V = State.get(ExitValue, VPLane(Lane));
862833
auto *Phi = cast<PHINode>(&I);
863-
Phi->addIncoming(V, PredBB);
834+
if (Phi->getBasicBlockIndex(PredBB) == -1)
835+
Phi->addIncoming(V, PredBB);
836+
else
837+
Phi->setIncomingValueForBlock(PredBB, V);
864838
}
865839

866840
// Advance the insert point after the wrapped IR instruction. This allows

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
379379
// Don't fold the exit block of the Plan into its single predecessor for
380380
// now.
381381
// TODO: Remove restriction once more of the skeleton is modeled in VPlan.
382-
if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
382+
if (!VPBB->getParent())
383383
continue;
384384
auto *PredVPBB =
385385
dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
264264
return;
265265

266266
if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
267+
VPValue *Op0, *Op1;
268+
if (match(VPI, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(Op0),
269+
m_VPValue(Op1)))) {
270+
addUniformForAllParts(VPI);
271+
return;
272+
}
273+
267274
if (vputils::onlyFirstPartUsed(VPI)) {
268275
addUniformForAllParts(VPI);
269276
return;
@@ -449,11 +456,5 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
449456
Part++;
450457
}
451458

452-
// Remap the operand of live-outs to the last part.
453-
for (const auto &[_, LO] : Plan.getLiveOuts()) {
454-
VPValue *In = Unroller.getValueForPart(LO->getOperand(0), UF - 1);
455-
LO->setOperand(0, In);
456-
}
457-
458459
VPlanTransforms::removeDeadRecipes(Plan);
459460
}

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,6 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
244244
return false;
245245
}
246246

247-
VPBlockBase *MiddleBB =
248-
IRBB->getPlan()->getVectorLoopRegion()->getSingleSuccessor();
249-
if (IRBB != IRBB->getPlan()->getPreheader() &&
250-
IRBB->getSinglePredecessor() != MiddleBB) {
251-
errs() << "VPIRBasicBlock can only be used as pre-header or a successor of "
252-
"middle-block at the moment!\n";
253-
return false;
254-
}
255247
return true;
256248
}
257249

@@ -416,12 +408,6 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
416408
return false;
417409
}
418410

419-
for (const auto &KV : Plan.getLiveOuts())
420-
if (KV.second->getNumOperands() != 1) {
421-
errs() << "live outs must have a single operand\n";
422-
return false;
423-
}
424-
425411
return true;
426412
}
427413

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,6 +1077,17 @@ define void @merge_with_dead_gep_between_regions(i32 %n, ptr noalias %src, ptr n
10771077
; CHECK-NEXT: No successors
10781078
; CHECK-EMPTY:
10791079
; CHECK-NEXT: scalar.ph
1080+
; CHECK-NEXT: Successor(s): ir-bb<loop>
1081+
; CHECK-EMPTY:
1082+
; CHECK-NEXT: ir-bb<loop>:
1083+
; CHECK-NEXT: IR %iv = phi i32 [ %n, %entry ], [ %iv.next, %loop ]
1084+
; CHECK-NEXT: IR %iv.next = add nsw i32 %iv, -1
1085+
; CHECK-NEXT: IR %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
1086+
; CHECK-NEXT: IR %l = load i32, ptr %gep.src, align 16
1087+
; CHECK-NEXT: IR %dead_gep = getelementptr inbounds i32, ptr %dst, i64 1
1088+
; CHECK-NEXT: IR %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
1089+
; CHECK-NEXT: IR store i32 %l, ptr %gep.dst, align 16
1090+
; CHECK-NEXT: IR %ec = icmp eq i32 %iv.next, 0
10801091
; CHECK-NEXT: No successors
10811092
; CHECK-NEXT: }
10821093
;
@@ -1156,6 +1167,13 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
11561167
; CHECK-NEXT: No successors
11571168
; CHECK-EMPTY:
11581169
; CHECK-NEXT: scalar.ph:
1170+
; CHECK-NEXT: Successor(s): ir-bb<loop.header>
1171+
; CHECK-EMPTY:
1172+
; CHECK-NEXT: ir-bb<loop.header>:
1173+
; CHECK-NEXT: IR %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
1174+
; CHECK-NEXT: IR %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 -1
1175+
; CHECK-NEXT: IR %l = load i8, ptr %ptr.iv.next, align 1
1176+
; CHECK-NEXT: IR %c.1 = icmp eq i8 %l, 0
11591177
; CHECK-NEXT: No successors
11601178
; CHECK-NEXT: }
11611179
;

0 commit comments

Comments
 (0)