Skip to content

Commit 0f00a96

Browse files
authored
[VPlan] Simplify branch on False in VPlan transform (NFC). (#140409)
Simplify branch on false, starting with the branch from the middle block to the scalar preheader. Initially this helps simplifying the initial VPlan construction. Depends on #140405. PR: #140409
1 parent 29f79ea commit 0f00a96

File tree

6 files changed

+106
-79
lines changed

6 files changed

+106
-79
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,10 +2380,12 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
23802380

23812381
// We just connected a new block to the scalar preheader. Update all
23822382
// VPPhis by adding an incoming value for it, replicating the last value.
2383+
unsigned NumPredecessors = ScalarPH->getNumPredecessors();
23832384
for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
2384-
auto *ResumePhi = cast<VPPhi>(&R);
2385-
ResumePhi->addOperand(
2386-
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
2385+
assert(isa<VPPhi>(&R) && "Phi expected to be VPPhi");
2386+
assert(cast<VPPhi>(&R)->getNumIncoming() == NumPredecessors - 1 &&
2387+
"must have incoming values for all operands");
2388+
R.addOperand(R.getOperand(NumPredecessors - 2));
23872389
}
23882390
}
23892391

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,10 @@ class VPPhiAccessors {
11361136
return getAsRecipe()->getNumOperands();
11371137
}
11381138

1139+
/// Removes the incoming value for \p IncomingBlock, which must be a
1140+
/// predecessor.
1141+
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1142+
11391143
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11401144
/// Print the recipe.
11411145
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -3545,14 +3549,13 @@ template <> struct CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
35453549
};
35463550
/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
35473551
/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3548-
template <>
3549-
struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
3550-
: public CastIsPossible<VPPhiAccessors, const VPRecipeBase *> {
3552+
template <typename SrcTy>
3553+
struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
35513554

3552-
using Self = CastInfo<VPPhiAccessors, const VPRecipeBase *>;
3555+
using Self = CastInfo<VPPhiAccessors, SrcTy>;
35533556

35543557
/// doCast is used by cast<>.
3555-
static inline VPPhiAccessors *doCast(const VPRecipeBase *R) {
3558+
static inline VPPhiAccessors *doCast(SrcTy R) {
35563559
return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
35573560
switch (R->getVPDefID()) {
35583561
case VPDef::VPInstructionSC:
@@ -3568,12 +3571,18 @@ struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
35683571
}
35693572

35703573
/// doCastIfPossible is used by dyn_cast<>.
3571-
static inline VPPhiAccessors *doCastIfPossible(const VPRecipeBase *f) {
3574+
static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
35723575
if (!Self::isPossible(f))
35733576
return nullptr;
35743577
return doCast(f);
35753578
}
35763579
};
3580+
template <>
3581+
struct CastInfo<VPPhiAccessors, VPRecipeBase *>
3582+
: CastInfoVPPhiAccessors<VPRecipeBase *> {};
3583+
template <>
3584+
struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
3585+
: CastInfoVPPhiAccessors<const VPRecipeBase *> {};
35773586

35783587
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
35793588
/// holds a sequence of zero or more VPRecipe's each representing a sequence of

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,10 @@ void VPlanTransforms::prepareForVectorization(
501501
cast<VPBasicBlock>(HeaderVPB),
502502
cast<VPBasicBlock>(LatchVPB), Range);
503503
HandledUncountableEarlyExit = true;
504+
} else {
505+
for (VPRecipeBase &R : EB->phis())
506+
cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
504507
}
505-
506508
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
507509
VPBlockUtils::disconnectBlocks(Pred, EB);
508510
}
@@ -526,32 +528,6 @@ void VPlanTransforms::prepareForVectorization(
526528
VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
527529
VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader());
528530

529-
// If needed, add a check in the middle block to see if we have completed
530-
// all of the iterations in the first vector loop. Three cases:
531-
// 1) If we require a scalar epilogue, there is no conditional branch as
532-
// we unconditionally branch to the scalar preheader. Remove the recipes
533-
// from the exit blocks.
534-
// 2) If (N - N%VF) == N, then we *don't* need to run the remainder.
535-
// Thus if tail is to be folded, we know we don't need to run the
536-
// remainder and we can set the condition to true.
537-
// 3) Otherwise, construct a runtime check.
538-
539-
if (!RequiresScalarEpilogueCheck) {
540-
if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
541-
VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
542-
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
543-
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
544-
Plan.getEntry()->swapSuccessors();
545-
546-
// The exit blocks are unreachable, remove their recipes to make sure no
547-
// users remain that may pessimize transforms.
548-
for (auto *EB : Plan.getExitBlocks()) {
549-
for (VPRecipeBase &R : make_early_inc_range(*EB))
550-
R.eraseFromParent();
551-
}
552-
return;
553-
}
554-
555531
// The connection order corresponds to the operands of the conditional branch,
556532
// with the middle block already connected to the exit block.
557533
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
@@ -561,21 +537,45 @@ void VPlanTransforms::prepareForVectorization(
561537
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
562538
Plan.getEntry()->swapSuccessors();
563539

564-
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
565-
// Here we use the same DebugLoc as the scalar loop latch terminator instead
566-
// of the corresponding compare because they may have ended up with
567-
// different line numbers and we want to avoid awkward line stepping while
568-
// debugging. Eg. if the compare has got a line number inside the loop.
540+
// If MiddleVPBB has a single successor then the original loop does not exit
541+
// via the latch and the single successor must be the scalar preheader.
542+
// There's no need to add a runtime check to MiddleVPBB.
543+
if (MiddleVPBB->getNumSuccessors() == 1) {
544+
assert(MiddleVPBB->getSingleSuccessor() == ScalarPH &&
545+
"must have ScalarPH as single successor");
546+
return;
547+
}
548+
549+
assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
550+
551+
// Add a check in the middle block to see if we have completed all of the
552+
// iterations in the first vector loop.
553+
//
554+
// Three cases:
555+
// 1) If we require a scalar epilogue, the scalar ph must execute. Set the
556+
// condition to false.
557+
// 2) If (N - N%VF) == N, then we *don't* need to run the
558+
// remainder. Thus if tail is to be folded, we know we don't need to run
559+
// the remainder and we can set the condition to true.
560+
// 3) Otherwise, construct a runtime check.
561+
562+
// We use the same DebugLoc as the scalar loop latch terminator instead of
563+
// the corresponding compare because they may have ended up with different
564+
// line numbers and we want to avoid awkward line stepping while debugging.
565+
// E.g., if the compare has got a line number inside the loop.
566+
DebugLoc LatchDL = TheLoop->getLoopLatch()->getTerminator()->getDebugLoc();
569567
VPBuilder Builder(MiddleVPBB);
570-
VPValue *Cmp =
571-
TailFolded
572-
? Plan.getOrAddLiveIn(ConstantInt::getTrue(
573-
IntegerType::getInt1Ty(TripCount->getType()->getContext())))
574-
: Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
575-
&Plan.getVectorTripCount(),
576-
ScalarLatchTerm->getDebugLoc(), "cmp.n");
577-
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
578-
ScalarLatchTerm->getDebugLoc());
568+
VPValue *Cmp;
569+
if (!RequiresScalarEpilogueCheck)
570+
Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
571+
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
572+
else if (TailFolded)
573+
Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
574+
IntegerType::getInt1Ty(TripCount->getType()->getContext())));
575+
else
576+
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
577+
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
578+
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
579579
}
580580

581581
void VPlanTransforms::createLoopRegions(VPlan &Plan) {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,14 @@ void VPIRPhi::execute(VPTransformState &State) {
11851185
State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));
11861186
}
11871187

1188+
void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {
1189+
VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
1190+
assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&
1191+
"Number of phi operands must match number of predecessors");
1192+
unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);
1193+
R->removeOperand(Position);
1194+
}
1195+
11881196
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11891197
void VPPhiAccessors::printPhiOperands(raw_ostream &O,
11901198
VPSlotTracker &SlotTracker) const {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,40 +1841,37 @@ void VPlanTransforms::truncateToMinimalBitwidths(
18411841
}
18421842
}
18431843

1844-
/// Remove BranchOnCond recipes with true conditions together with removing
1845-
/// dead edges to their successors.
1846-
static void removeBranchOnCondTrue(VPlan &Plan) {
1844+
/// Remove BranchOnCond recipes with true or false conditions together with
1845+
/// removing dead edges to their successors.
1846+
static void removeBranchOnConst(VPlan &Plan) {
18471847
using namespace llvm::VPlanPatternMatch;
18481848
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
18491849
vp_depth_first_shallow(Plan.getEntry()))) {
1850+
VPValue *Cond;
18501851
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
1851-
!match(&VPBB->back(), m_BranchOnCond(m_True())))
1852+
!match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
18521853
continue;
18531854

1854-
VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
1855-
unsigned DeadIdx = RemovedSucc->getIndexForPredecessor(VPBB);
1856-
1857-
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
1858-
// from these recipes.
1859-
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
1860-
assert((!isa<VPIRInstruction>(&R) ||
1861-
!isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
1862-
!isa<VPHeaderPHIRecipe>(&R) &&
1863-
"Cannot update VPIRInstructions wrapping phis or header phis yet");
1864-
auto *VPI = dyn_cast<VPPhi>(&R);
1865-
if (!VPI)
1866-
break;
1867-
VPBuilder B(VPI);
1868-
SmallVector<VPValue *> NewOperands;
1869-
// Create new operand list, with the dead incoming value filtered out.
1870-
for (const auto &[Idx, Op] : enumerate(VPI->operands())) {
1871-
if (Idx == DeadIdx)
1872-
continue;
1873-
NewOperands.push_back(Op);
1874-
}
1875-
VPI->replaceAllUsesWith(
1876-
B.createScalarPhi(NewOperands, VPI->getDebugLoc(), VPI->getName()));
1877-
VPI->eraseFromParent();
1855+
unsigned RemovedIdx;
1856+
if (match(Cond, m_True()))
1857+
RemovedIdx = 1;
1858+
else if (match(Cond, m_False()))
1859+
RemovedIdx = 0;
1860+
else
1861+
continue;
1862+
1863+
VPBasicBlock *RemovedSucc =
1864+
cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
1865+
const auto &Preds = RemovedSucc->getPredecessors();
1866+
assert(count(Preds, VPBB) == 1 &&
1867+
"There must be a single edge between VPBB and its successor");
1868+
// Values coming from VPBB into phi recipes of RemoveSucc are removed from
1869+
// these recipes.
1870+
for (VPRecipeBase &R : RemovedSucc->phis()) {
1871+
auto *Phi = cast<VPPhiAccessors>(&R);
1872+
assert((!isa<VPIRPhi>(&R) || RemovedSucc->getNumPredecessors() == 1) &&
1873+
"VPIRPhis must have a single predecessor");
1874+
Phi->removeIncomingValueFor(VPBB);
18781875
}
18791876
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
18801877
// automatically on VPlan destruction if it becomes unreachable.
@@ -1894,7 +1891,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
18941891
runPass(legalizeAndOptimizeInductions, Plan);
18951892
runPass(removeRedundantExpandSCEVRecipes, Plan);
18961893
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
1897-
runPass(removeBranchOnCondTrue, Plan);
1894+
runPass(removeBranchOnConst, Plan);
18981895
runPass(removeDeadRecipes, Plan);
18991896

19001897
runPass(createAndOptimizeReplicateRegions, Plan);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class VPSlotTracker;
3838
class VPUser;
3939
class VPRecipeBase;
4040
class VPInterleaveRecipe;
41+
class VPPhiAccessors;
4142

4243
// This is the base class of the VPlan Def/Use graph, used for modeling the data
4344
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -199,8 +200,18 @@ raw_ostream &operator<<(raw_ostream &OS, const VPRecipeBase &R);
199200
/// This class augments VPValue with operands which provide the inverse def-use
200201
/// edges from VPValue's users to their defs.
201202
class VPUser {
203+
/// Grant access to removeOperand for VPPhiAccessors, the only supported user.
204+
friend class VPPhiAccessors;
205+
202206
SmallVector<VPValue *, 2> Operands;
203207

208+
/// Removes the operand at index \p Idx. This also removes the VPUser from the
209+
/// use-list of the operand.
210+
void removeOperand(unsigned Idx) {
211+
getOperand(Idx)->removeUser(*this);
212+
Operands.erase(Operands.begin() + Idx);
213+
}
214+
204215
protected:
205216
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
206217
/// Print the operands to \p O.

0 commit comments

Comments
 (0)