@@ -1362,9 +1362,11 @@ class LoopVectorizationCostModel {
1362
1362
// If we might exit from anywhere but the latch, must run the exiting
1363
1363
// iteration in scalar form.
1364
1364
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1365
- LLVM_DEBUG(
1366
- dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1367
- return true;
1365
+ if (!Legal->canVectorizeMultiCond()) {
1366
+ LLVM_DEBUG(
1367
+ dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
1368
+ return true;
1369
+ }
1368
1370
}
1369
1371
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
1370
1372
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
@@ -2535,8 +2537,17 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2535
2537
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
2536
2538
assert(LoopVectorPreHeader && "Invalid loop structure");
2537
2539
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
2538
- assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
2539
- "multiple exit loop without required epilogue?");
2540
+ if (Legal->canVectorizeMultiCond()) {
2541
+ BasicBlock *Latch = OrigLoop->getLoopLatch();
2542
+ BasicBlock *TrueSucc =
2543
+ cast<BranchInst>(Latch->getTerminator())->getSuccessor(0);
2544
+ BasicBlock *FalseSucc =
2545
+ cast<BranchInst>(Latch->getTerminator())->getSuccessor(1);
2546
+ LoopExitBlock = OrigLoop->contains(TrueSucc) ? FalseSucc : TrueSucc;
2547
+ } else {
2548
+ assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
2549
+ "multiple exit loop without required epilogue?");
2550
+ }
2540
2551
2541
2552
LoopMiddleBlock =
2542
2553
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
@@ -2910,7 +2921,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
2910
2921
for (PHINode &PN : Exit->phis())
2911
2922
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
2912
2923
2913
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
2924
+ if (Legal->canVectorizeMultiCond() ||
2925
+ Cost->requiresScalarEpilogue(VF.isVector())) {
2914
2926
// No edge from the middle block to the unique exit block has been inserted
2915
2927
// and there is nothing to fix from vector loop; phis should have incoming
2916
2928
// from scalar loop only.
@@ -3554,7 +3566,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3554
3566
TheLoop->getExitingBlocks(Exiting);
3555
3567
for (BasicBlock *E : Exiting) {
3556
3568
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
3557
- if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
3569
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() &&
3570
+ (TheLoop->getLoopLatch() == E || !Legal->canVectorizeMultiCond()))
3558
3571
AddToWorklistIfAllowed(Cmp);
3559
3572
}
3560
3573
@@ -7643,12 +7656,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7643
7656
BestVPlan.execute(&State);
7644
7657
7645
7658
// 2.5 Collect reduction resume values.
7646
- auto *ExitVPBB =
7647
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7648
- for (VPRecipeBase &R : *ExitVPBB) {
7649
- createAndCollectMergePhiForReduction(
7650
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
7651
- State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7659
+ VPBasicBlock *ExitVPBB = nullptr;
7660
+ if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) {
7661
+ ExitVPBB = cast<VPBasicBlock>(
7662
+ BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7663
+ for (VPRecipeBase &R : *ExitVPBB) {
7664
+ createAndCollectMergePhiForReduction(
7665
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
7666
+ State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
7667
+ }
7652
7668
}
7653
7669
7654
7670
// 2.6. Maintain Loop Hints
@@ -7674,6 +7690,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7674
7690
LoopVectorizeHints Hints(L, true, *ORE);
7675
7691
Hints.setAlreadyVectorized();
7676
7692
}
7693
+
7677
7694
TargetTransformInfo::UnrollingPreferences UP;
7678
7695
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
7679
7696
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7686,15 +7703,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7686
7703
ILV.printDebugTracesAtEnd();
7687
7704
7688
7705
// 4. Adjust branch weight of the branch in the middle block.
7689
- auto *MiddleTerm =
7690
- cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7691
- if (MiddleTerm->isConditional() &&
7692
- hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7693
- // Assume that `Count % VectorTripCount` is equally distributed.
7694
- unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7695
- assert(TripCount > 0 && "trip count should not be zero");
7696
- const uint32_t Weights[] = {1, TripCount - 1};
7697
- setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7706
+ if (ExitVPBB) {
7707
+ auto *MiddleTerm =
7708
+ cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
7709
+ if (MiddleTerm->isConditional() &&
7710
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
7711
+ // Assume that `Count % VectorTripCount` is equally distributed.
7712
+ unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
7713
+ assert(TripCount > 0 && "trip count should not be zero");
7714
+ const uint32_t Weights[] = {1, TripCount - 1};
7715
+ setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
7716
+ }
7698
7717
}
7699
7718
7700
7719
return State.ExpandedSCEVs;
@@ -8079,7 +8098,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8079
8098
// If source is an exiting block, we know the exit edge is dynamically dead
8080
8099
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8081
8100
// adding uses of an otherwise potentially dead instruction.
8082
- if (OrigLoop->isLoopExiting(Src))
8101
+ if (!Legal->canVectorizeMultiCond() && OrigLoop->isLoopExiting(Src))
8083
8102
return EdgeMaskCache[Edge] = SrcMask;
8084
8103
8085
8104
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8729,6 +8748,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
8729
8748
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
8730
8749
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8731
8750
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8751
+ if (!Plan.getVectorLoopRegion()->getSingleSuccessor())
8752
+ return {};
8732
8753
auto *MiddleVPBB =
8733
8754
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8734
8755
// No edge from the middle block to the unique exit block has been inserted
@@ -8814,6 +8835,8 @@ static void addLiveOutsForFirstOrderRecurrences(
8814
8835
// TODO: Should be replaced by
8815
8836
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
8816
8837
// scalar region is modeled as well.
8838
+ if (!VectorRegion->getSingleSuccessor())
8839
+ return;
8817
8840
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
8818
8841
VPBasicBlock *ScalarPHVPBB = nullptr;
8819
8842
if (MiddleVPBB->getNumSuccessors() == 2) {
@@ -9100,10 +9123,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9100
9123
"VPBasicBlock");
9101
9124
RecipeBuilder.fixHeaderPhis();
9102
9125
9103
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
9104
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9105
- addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9106
- addUsersInExitBlock(*Plan, ExitUsersToFix);
9126
+ if (Legal->canVectorizeMultiCond()) {
9127
+ VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop,
9128
+ RecipeBuilder);
9129
+ } else {
9130
+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
9131
+ OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9132
+ addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9133
+ addUsersInExitBlock(*Plan, ExitUsersToFix);
9134
+ }
9107
9135
9108
9136
// ---------------------------------------------------------------------------
9109
9137
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9231,8 +9259,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9231
9259
using namespace VPlanPatternMatch;
9232
9260
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
9233
9261
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
9234
- VPBasicBlock *MiddleVPBB =
9235
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9236
9262
for (VPRecipeBase &R : Header->phis()) {
9237
9263
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9238
9264
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
@@ -9251,8 +9277,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9251
9277
for (VPUser *U : Cur->users()) {
9252
9278
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
9253
9279
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
9254
- assert(UserRecipe->getParent() == MiddleVPBB &&
9255
- "U must be either in the loop region or the middle block.");
9256
9280
continue;
9257
9281
}
9258
9282
Worklist.insert(UserRecipe);
@@ -9357,6 +9381,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9357
9381
}
9358
9382
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
9359
9383
Builder.setInsertPoint(&*LatchVPBB->begin());
9384
+ if (!VectorLoopRegion->getSingleSuccessor())
9385
+ return;
9386
+ VPBasicBlock *MiddleVPBB =
9387
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
9360
9388
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
9361
9389
for (VPRecipeBase &R :
9362
9390
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
0 commit comments