-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Invert condition if needed when creating inner regions. #132292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesAs pointed out by @iamlouk in #129402, the current code doesn't handle latches with different successor orders correctly. Introduce a Depends on #129402 Full diff: https://github.com/llvm/llvm-project/pull/132292.diff 9 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0d2d5e1c312a1..99203ac0e1acd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9333,10 +9333,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
- // Convert to VPlan-transform and consoliate all transforms for VPlan
+ // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9640,7 +9640,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..56a68e024cb94 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,12 +14,50 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
using namespace llvm;
+/// Introduce VPRegionBlocks for each loop modeled using a plain CFG in \p Plan.
+static void introduceInnerLoopRegions(VPlan &Plan) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+
+ for (VPBlockBase *HeaderVPBB :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ if (HeaderVPBB->getNumPredecessors() != 2)
+ continue;
+ VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
+ VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+ if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
+ continue;
+ assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
+ "preheader must dominate header");
+ if (LatchVPBB->getSuccessors()[1] != HeaderVPBB) {
+ auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
+ auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
+ Not->insertBefore(Term);
+ Term->setOperand(0, Not);
+ }
+
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+ VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
+ VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+ auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
+ false /*isReplicator*/);
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
+ VPBB->setParent(R);
+
+ VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+ VPBlockUtils::connectBlocks(R, Succ);
+ }
+}
+
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
@@ -98,4 +136,6 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
+
+ introduceInnerLoopRegions(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..5e31b09bcd7d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
- /// Maps loops in the original IR to their corresponding region.
- DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
- auto *SinglePred = BB->getSinglePredecessor();
- Loop *LoopForBB = LI->getLoopFor(BB);
- if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
- return nullptr;
- // The input IR must be in loop-simplify form, ensuring a single predecessor
- // for exit blocks.
- assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
- "SinglePred must be the only loop latch");
- return SinglePred;
- };
- if (auto *LatchBB = GetLatchOfExit(BB)) {
- auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
- assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
- "successor must already be set for PredRegion; it must have VPBB "
- "as single successor");
- VPBB->setPredecessors({PredRegion});
- return;
- }
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
- BasicBlock *BB) {
- // BB is a loop header block. Connect the region to the loop preheader.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
- if (L->getLoopDepth() < OuterLoop->getLoopDepth())
- return false;
- const Loop *P = L;
- while (P) {
- if (P == OuterLoop)
- return true;
- P = P->getParentLoop();
- }
- return false;
-}
-
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
-
- // Get or create a region for the loop containing BB, except for the top
- // region of TheLoop which is created later.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
- return VPBB;
-
- auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
- if (!isHeaderBB(BB, LoopOfBB)) {
- assert(RegionOfVPBB &&
- "Region should have been created by visiting header earlier");
- VPBB->setParent(RegionOfVPBB);
- return VPBB;
- }
-
- assert(!RegionOfVPBB &&
- "First visit of a header basic block expects to register its region.");
- // Handle a header - take care of its Region.
- RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
- RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
- RegionOfVPBB->setEntry(VPBB);
- Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -376,15 +309,13 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
if (!isHeaderBB(BB, LoopForBB)) {
setVPBBPredsFromBB(VPBB, BB);
- } else if (Region) {
- // BB is a loop header and there's a corresponding region, set the
- // predecessor for it.
- setRegionPredsFromBB(Region, BB);
+ } else if (LoopForBB != TheLoop) {
+ VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
+ getOrCreateVPBB(LoopForBB->getLoopLatch())});
}
// Create VPInstructions for BB.
@@ -423,21 +354,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
- if (BB == LoopForBB->getLoopLatch()) {
- // For a latch we need to set the successor of the region rather than that
- // of VPBB and it should be set to the exit, i.e., non-header successor,
- // except for the top region, which is handled elsewhere.
- assert(LoopForBB != TheLoop &&
- "Latch of the top region should have been handled earlier");
- Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
- : Successor0);
- Region->setExiting(VPBB);
- continue;
- }
- // Don't connect any blocks outside the current loop except the latch for
- // now. The latch is handled above.
- if (LoopForBB) {
+ // Don't connect any blocks outside the current loop except the latch, which
+ // is handled below.
+ if (LoopForBB &&
+ (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
@@ -461,16 +382,11 @@ void PlainCFGBuilder::buildPlainCFG(
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
}
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG.
- buildPlainCFG();
- LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
class Loop;
class LoopInfo;
-class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update Plan accordingly.
- void buildHierarchicalCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d3ccd704b63dd..88edb03a3f330 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -51,6 +51,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
VPValue *VPV = Ingredient.getVPSingleValue();
+ if (!VPV->getUnderlyingValue())
+ continue;
+
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
VPRecipeBase *NewRecipe = nullptr;
diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
index 388da8540646f..afd1308a2d24a 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
@@ -4,7 +4,6 @@
@A = common global [1024 x i64] zeroinitializer, align 16
@B = common global [1024 x i64] zeroinitializer, align 16
-; FIXME: The exit condition of the inner loop is incorrect when vectorizing.
define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-LABEL: define void @inner_latch_header_first_successor(
; CHECK-SAME: i64 [[N:%.*]], i32 [[C:%.*]], i64 [[M:%.*]]) {
@@ -35,8 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-NEXT: [[TMP3]] = add nsw <4 x i64> [[TMP2]], [[VEC_PHI4]]
; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]]
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]]
; CHECK: [[VECTOR_LATCH]]:
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[VEC_PHI6]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true))
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..b4b6d3d760349 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,7 +6,7 @@
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT: inner:
-; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
+; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..92961e44c5e54 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
return Plan;
|
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesAs pointed out by @iamlouk in #129402, the current code doesn't handle latches with different successor orders correctly. Introduce a Depends on #129402 Full diff: https://github.com/llvm/llvm-project/pull/132292.diff 9 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0d2d5e1c312a1..99203ac0e1acd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9333,10 +9333,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Range);
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG.
- // Convert to VPlan-transform and consoliate all transforms for VPlan
+ // TODO: Convert to VPlan-transform and consoliate all transforms for VPlan
// creation.
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
@@ -9640,7 +9640,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
auto Plan = std::make_unique<VPlan>(OrigLoop);
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f58f0290b5fa9..56a68e024cb94 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -14,12 +14,50 @@
#include "LoopVectorizationPlanner.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
using namespace llvm;
+/// Introduce VPRegionBlocks for each loop modeled using a plain CFG in \p Plan.
+static void introduceInnerLoopRegions(VPlan &Plan) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+
+ for (VPBlockBase *HeaderVPBB :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ if (HeaderVPBB->getNumPredecessors() != 2)
+ continue;
+ VPBlockBase *PreheaderVPBB = HeaderVPBB->getPredecessors()[0];
+ VPBlockBase *LatchVPBB = HeaderVPBB->getPredecessors()[1];
+ if (!VPDT.dominates(HeaderVPBB, LatchVPBB))
+ continue;
+ assert(VPDT.dominates(PreheaderVPBB, HeaderVPBB) &&
+ "preheader must dominate header");
+ if (LatchVPBB->getSuccessors()[1] != HeaderVPBB) {
+ auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
+ auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
+ Not->insertBefore(Term);
+ Term->setOperand(0, Not);
+ }
+
+ VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB);
+ VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB);
+ VPBlockBase *Succ = LatchVPBB->getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+
+ auto *R = Plan.createVPRegionBlock(HeaderVPBB, LatchVPBB, "",
+ false /*isReplicator*/);
+ for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB))
+ VPBB->setParent(R);
+
+ VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
+ VPBlockUtils::connectBlocks(R, Succ);
+ }
+}
+
void VPlanTransforms::introduceTopLevelVectorLoopRegion(
VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) {
@@ -98,4 +136,6 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion(
ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
+
+ introduceInnerLoopRegions(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 4b8a2420b3037..5e31b09bcd7d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -12,9 +12,7 @@
/// components and steps:
//
/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
-/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
-/// Region) is created to enclose and serve as parent of all the VPBasicBlocks
-/// in the plain CFG.
+/// faithfully represents the CFG in the incoming IR.
/// NOTE: At this point, there is a direct correspondence between all the
/// VPBasicBlocks created for the initial plain CFG and the incoming
/// BasicBlocks. However, this might change in the future.
@@ -57,12 +55,8 @@ class PlainCFGBuilder {
// Hold phi node's that need to be fixed once the plain CFG has been built.
SmallVector<PHINode *, 8> PhisToFix;
- /// Maps loops in the original IR to their corresponding region.
- DenseMap<Loop *, VPRegionBlock *> Loop2Region;
-
// Utility functions.
void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
- void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB);
void fixHeaderPhis();
VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
#ifndef NDEBUG
@@ -83,25 +77,6 @@ class PlainCFGBuilder {
// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
// must have no predecessors.
void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
- auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * {
- auto *SinglePred = BB->getSinglePredecessor();
- Loop *LoopForBB = LI->getLoopFor(BB);
- if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB)
- return nullptr;
- // The input IR must be in loop-simplify form, ensuring a single predecessor
- // for exit blocks.
- assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() &&
- "SinglePred must be the only loop latch");
- return SinglePred;
- };
- if (auto *LatchBB = GetLatchOfExit(BB)) {
- auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent();
- assert(VPBB == cast<VPBasicBlock>(PredRegion->getSingleSuccessor()) &&
- "successor must already be set for PredRegion; it must have VPBB "
- "as single successor");
- VPBB->setPredecessors({PredRegion});
- return;
- }
// Collect VPBB predecessors.
SmallVector<VPBlockBase *, 2> VPBBPreds;
for (BasicBlock *Pred : predecessors(BB))
@@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) {
return L && BB == L->getHeader();
}
-void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region,
- BasicBlock *BB) {
- // BB is a loop header block. Connect the region to the loop preheader.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())});
-}
-
// Add operands to VPInstructions representing phi nodes from the input IR.
void PlainCFGBuilder::fixHeaderPhis() {
for (auto *Phi : PhisToFix) {
@@ -150,19 +118,6 @@ static bool isHeaderVPBB(VPBasicBlock *VPBB) {
return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB;
}
-/// Return true of \p L loop is contained within \p OuterLoop.
-static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) {
- if (L->getLoopDepth() < OuterLoop->getLoopDepth())
- return false;
- const Loop *P = L;
- while (P) {
- if (P == OuterLoop)
- return true;
- P = P->getParentLoop();
- }
- return false;
-}
-
// Create a new empty VPBasicBlock for an incoming BasicBlock in the region
// corresponding to the containing loop or retrieve an existing one if it was
// already created. If no region exists yet for the loop containing \p BB, a new
@@ -178,28 +133,6 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name);
BB2VPBB[BB] = VPBB;
-
- // Get or create a region for the loop containing BB, except for the top
- // region of TheLoop which is created later.
- Loop *LoopOfBB = LI->getLoopFor(BB);
- if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop))
- return VPBB;
-
- auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB);
- if (!isHeaderBB(BB, LoopOfBB)) {
- assert(RegionOfVPBB &&
- "Region should have been created by visiting header earlier");
- VPBB->setParent(RegionOfVPBB);
- return VPBB;
- }
-
- assert(!RegionOfVPBB &&
- "First visit of a header basic block expects to register its region.");
- // Handle a header - take care of its Region.
- RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/);
- RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]);
- RegionOfVPBB->setEntry(VPBB);
- Loop2Region[LoopOfBB] = RegionOfVPBB;
return VPBB;
}
@@ -376,15 +309,13 @@ void PlainCFGBuilder::buildPlainCFG(
for (BasicBlock *BB : RPO) {
// Create or retrieve the VPBasicBlock for this BB.
VPBasicBlock *VPBB = getOrCreateVPBB(BB);
- VPRegionBlock *Region = VPBB->getParent();
Loop *LoopForBB = LI->getLoopFor(BB);
// Set VPBB predecessors in the same order as they are in the incoming BB.
if (!isHeaderBB(BB, LoopForBB)) {
setVPBBPredsFromBB(VPBB, BB);
- } else if (Region) {
- // BB is a loop header and there's a corresponding region, set the
- // predecessor for it.
- setRegionPredsFromBB(Region, BB);
+ } else if (LoopForBB != TheLoop) {
+ VPBB->setPredecessors({getOrCreateVPBB(LoopForBB->getLoopPredecessor()),
+ getOrCreateVPBB(LoopForBB->getLoopLatch())});
}
// Create VPInstructions for BB.
@@ -423,21 +354,11 @@ void PlainCFGBuilder::buildPlainCFG(
BasicBlock *IRSucc1 = BI->getSuccessor(1);
VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
- if (BB == LoopForBB->getLoopLatch()) {
- // For a latch we need to set the successor of the region rather than that
- // of VPBB and it should be set to the exit, i.e., non-header successor,
- // except for the top region, which is handled elsewhere.
- assert(LoopForBB != TheLoop &&
- "Latch of the top region should have been handled earlier");
- Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1
- : Successor0);
- Region->setExiting(VPBB);
- continue;
- }
- // Don't connect any blocks outside the current loop except the latch for
- // now. The latch is handled above.
- if (LoopForBB) {
+ // Don't connect any blocks outside the current loop except the latch, which
+ // is handled below.
+ if (LoopForBB &&
+ (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch())) {
if (!LoopForBB->contains(IRSucc0)) {
VPBB->setOneSuccessor(Successor1);
continue;
@@ -461,16 +382,11 @@ void PlainCFGBuilder::buildPlainCFG(
for (const auto &[IRBB, VPB] : BB2VPBB)
VPB2IRBB[VPB] = IRBB;
+
+ LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan);
}
void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
PCFGBuilder.buildPlainCFG(VPB2IRBB);
}
-
-// Public interface to build a H-CFG.
-void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG.
- buildPlainCFG();
- LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
-}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index f7f98ed7b1755..f2e90d3f4d9b3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -30,7 +30,6 @@ namespace llvm {
class Loop;
class LoopInfo;
-class VPRegionBlock;
class VPlan;
class VPlanTestIRBase;
class VPBlockBase;
@@ -54,15 +53,12 @@ class VPlanHCFGBuilder {
/// created for a input IR basic block.
DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
- /// Build plain CFG for TheLoop and connects it to Plan's entry.
- void buildPlainCFG();
-
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build H-CFG for TheLoop and update Plan accordingly.
- void buildHierarchicalCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
/// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if
/// there is no such corresponding block.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d3ccd704b63dd..88edb03a3f330 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -51,6 +51,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
VPValue *VPV = Ingredient.getVPSingleValue();
+ if (!VPV->getUnderlyingValue())
+ continue;
+
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
VPRecipeBase *NewRecipe = nullptr;
diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
index 388da8540646f..afd1308a2d24a 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll
@@ -4,7 +4,6 @@
@A = common global [1024 x i64] zeroinitializer, align 16
@B = common global [1024 x i64] zeroinitializer, align 16
-; FIXME: The exit condition of the inner loop is incorrect when vectorizing.
define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-LABEL: define void @inner_latch_header_first_successor(
; CHECK-SAME: i64 [[N:%.*]], i32 [[C:%.*]], i64 [[M:%.*]]) {
@@ -35,8 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
; CHECK-NEXT: [[TMP3]] = add nsw <4 x i64> [[TMP2]], [[VEC_PHI4]]
; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]]
+; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]]
; CHECK: [[VECTOR_LATCH]]:
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[VEC_PHI6]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true))
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 625a32c098f94..b4b6d3d760349 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -6,7 +6,7 @@
@arr = external global [8 x [8 x i64]], align 16
define void @foo(i64 %n) {
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
; CHECK-NEXT: {
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<entry>:
@@ -19,17 +19,14 @@ define void @foo(i64 %n) {
; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n>
; CHECK-NEXT: Successor(s): inner
; CHECK-EMPTY:
-; CHECK-NEXT: <x1> inner: {
-; CHECK-NEXT: inner:
-; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
-; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
-; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
-; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
-; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
-; CHECK-NEXT: No successors
-; CHECK-NEXT: }
-; CHECK-NEXT: Successor(s): outer.latch
+; CHECK-NEXT: inner:
+; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next>
+; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
+; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2>
+; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec>
+; CHECK-NEXT: Successor(s): outer.latch, inner
; CHECK-EMPTY:
; CHECK-NEXT: outer.latch:
; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
index 89eaca0cfa8c8..29aeb7c4e97f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll
@@ -4,7 +4,7 @@
; Verify that the stress testing flag for the VPlan H-CFG builder works as
; expected with and without enabling the VPlan H-CFG Verifier.
-; CHECK: VPlan 'HCFGBuilder: Plain CFG
+; CHECK: VPlan 'Plain CFG
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index caf5d2357411d..92961e44c5e54 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -73,7 +73,7 @@ class VPlanTestIRBase : public testing::Test {
PredicatedScalarEvolution PSE(*SE, *L);
auto Plan = std::make_unique<VPlan>(L);
VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan);
- HCFGBuilder.buildHierarchicalCFG();
+ HCFGBuilder.buildPlainCFG();
VPlanTransforms::introduceTopLevelVectorLoopRegion(
*Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L);
return Plan;
|
7c836e2
to
f263854
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ping :)
rebased now that #129402 has landed
f263854
to
29bfcd0
Compare
@@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( | |||
make_early_inc_range(make_range(VPBB->begin(), EndIter))) { | |||
|
|||
VPValue *VPV = Ingredient.getVPSingleValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Independent: using Ingredient
as the name of a recipe? Which (now) may even be ingredient-less...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can rename separately, thanks
// We are canonicalizing the successors of the latch when introducing the | ||
// region. We will exit the region of the latch condition is true; invert the | ||
// original condition if the original CFG branches to the header on true. | ||
if (!LatchVPBB->getSingleSuccessor() && | ||
LatchVPBB->getSuccessors()[0] == HeaderVPB) { | ||
auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator(); | ||
auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)}); | ||
Not->insertBefore(Term); | ||
Term->setOperand(0, Not); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to perform this canonicalization in a separate canonicalizeLatch(), analogous to canonical[ize]Header()?
E.g., something like
if (canonicalizeHeader(HeaderVPB, VPDT)) {
canonicalizeLatchOfHeader(HeaderVPB);
createLoopRegion(Plan, HeaderVPB);
}
or possibly fused into canonicalizeHeaderAndLatch(HeaderVPB, VPDT)
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fused, thanks!
// We are canonicalizing the successors of the latch when introducing the | ||
// region. We will exit the region of the latch condition is true; invert the | ||
// original condition if the original CFG branches to the header on true. | ||
if (!LatchVPBB->getSingleSuccessor() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can LatchVPBB have a single successor, should we assert that it has two, following the check that header has two predecessors?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment, it may have 1 successor (the header, when vectorizing inner loops, where exit edges are not yet connected; that should be coming soon thought) or 2 (for inner loops when vectorizing outer loops)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, and in that case there's no terminating conditional branch yet to reverse? May be worth leaving a note.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done,t hanks
@@ -420,6 +420,17 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { | |||
auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; | |||
auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; | |||
|
|||
// We are canonicalizing the successors of the latch when introducing the | |||
// region. We will exit the region of the latch condition is true; invert the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// region. We will exit the region of the latch condition is true; invert the | |
// region, such that the latch exits the region when its condition is true; invert the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
// original condition if the original CFG branches to the header on true. | ||
if (!LatchVPBB->getSingleSuccessor() && | ||
LatchVPBB->getSuccessors()[0] == HeaderVPB) { | ||
auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Assert that Term is a conditional branch, so that we're certain who's first operand we're resetting?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
@@ -420,6 +420,17 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { | |||
auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; | |||
auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; | |||
|
|||
// We are canonicalizing the successors of the latch when introducing the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// We are canonicalizing the successors of the latch when introducing the | |
// The two successors of conditional branches match the condition, with the first successor corresponding to true and the second to false. | |
// We canonicalize the successors of the latch when introducing the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator(); | ||
auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)}); | ||
Not->insertBefore(Term); | ||
Term->setOperand(0, Not); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also swap the successors to keep successors and branch condition in sync, even though both successors are soon to be removed. As in canonicalizing the two predecessors of header.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks,
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, LGTM!
!VPDT.dominates(HeaderVPB, LatchVPBB)) { | ||
return false; | ||
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
!VPDT.dominates(HeaderVPB, LatchVPBB)) { | |
return false; | |
} else { | |
!VPDT.dominates(HeaderVPB, LatchVPBB)) | |
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
// We are canonicalizing the successors of the latch when introducing the | ||
// region. We will exit the region of the latch condition is true; invert the | ||
// original condition if the original CFG branches to the header on true. | ||
if (!LatchVPBB->getSingleSuccessor() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, and in that case there's no terminating conditional branch yet to reverse? May be worth leaving a note.
// canonicalize the successors of the latch when introducing the region, such | ||
// that the latch exits the region when its condition is true; invert the | ||
// original condition if the original CFG branches to the header on true. | ||
if (!LatchVPBB->getSingleSuccessor() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: consider reversing (the above conditional branch which is also terminating ;-) to early-return.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
…#132292) As pointed out by @iamlouk in llvm#129402, the current code doesn't handle latches with different successor orders correctly. Introduce a `NOT`, if needed. Depends on llvm#129402 PR: llvm#132292
…#132292) As pointed out by @iamlouk in llvm#129402, the current code doesn't handle latches with different successor orders correctly. Introduce a `NOT`, if needed. Depends on llvm#129402 PR: llvm#132292
…#132292) As pointed out by @iamlouk in llvm#129402, the current code doesn't handle latches with different successor orders correctly. Introduce a `NOT`, if needed. Depends on llvm#129402 PR: llvm#132292
…ions. (#132292) As pointed out by @iamlouk in llvm/llvm-project#129402, the current code doesn't handle latches with different successor orders correctly. Introduce a `NOT`, if needed. Depends on llvm/llvm-project#129402 PR: llvm/llvm-project#132292
…#132292) As pointed out by @iamlouk in llvm#129402, the current code doesn't handle latches with different successor orders correctly. Introduce a `NOT`, if needed. Depends on llvm#129402 PR: llvm#132292
As pointed out by @iamlouk in #129402, the current code doesn't handle latches with different successor orders correctly. Introduce a
NOT
, if needed.Depends on #129402