@@ -385,6 +385,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
385
385
cl::Hidden,
386
386
cl::desc(" Try wider VFs if they enable the use of vector variants" ));
387
387
388
+ static cl::opt<bool > EnableEarlyExitVectorization (
389
+ " enable-early-exit-vectorization" , cl::init(false ), cl::Hidden,
390
+ cl::desc(
391
+ " Enable vectorization of early exit loops with uncountable exits." ));
392
+
388
393
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
389
394
// variables not overflowing do not hold. See `emitSCEVChecks`.
390
395
static constexpr uint32_t SCEVCheckBypassWeights[] = {1 , 127 };
@@ -1350,9 +1355,10 @@ class LoopVectorizationCostModel {
1350
1355
LLVM_DEBUG (dbgs () << " LV: Loop does not require scalar epilogue\n " );
1351
1356
return false ;
1352
1357
}
1353
- // If we might exit from anywhere but the latch, must run the exiting
1354
- // iteration in scalar form.
1355
- if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch ()) {
1358
+ // If we might exit from anywhere but the latch and early exit vectorization
1359
+ // is disabled, we must run the exiting iteration in scalar form.
1360
+ if (TheLoop->getExitingBlock () != TheLoop->getLoopLatch () &&
1361
+ !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit ())) {
1356
1362
LLVM_DEBUG (dbgs () << " LV: Loop requires scalar epilogue: not exiting "
1357
1363
" from latch block\n " );
1358
1364
return true ;
@@ -2568,9 +2574,9 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
2568
2574
void InnerLoopVectorizer::createVectorLoopSkeleton (StringRef Prefix) {
2569
2575
LoopVectorPreHeader = OrigLoop->getLoopPreheader ();
2570
2576
assert (LoopVectorPreHeader && " Invalid loop structure" );
2571
- assert ((OrigLoop->getUniqueExitBlock () ||
2577
+ assert ((OrigLoop->getUniqueLatchExitBlock () ||
2572
2578
Cost->requiresScalarEpilogue (VF.isVector ())) &&
2573
- " multiple exit loop without required epilogue?" );
2579
+ " loops not exiting via the latch without required epilogue?" );
2574
2580
2575
2581
LoopMiddleBlock =
2576
2582
SplitBlock (LoopVectorPreHeader, LoopVectorPreHeader->getTerminator (), DT,
@@ -2753,8 +2759,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2753
2759
// value (the value that feeds into the phi from the loop latch).
2754
2760
// We allow both, but they, obviously, have different values.
2755
2761
2756
- assert (OrigLoop->getUniqueExitBlock () && " Expected a single exit block" );
2757
-
2758
2762
DenseMap<Value *, Value *> MissingVals;
2759
2763
2760
2764
Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock (
@@ -2808,6 +2812,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
2808
2812
}
2809
2813
}
2810
2814
2815
+ assert ((MissingVals.empty () || OrigLoop->getUniqueExitBlock ()) &&
2816
+ " Expected a single exit block for escaping values" );
2811
2817
for (auto &I : MissingVals) {
2812
2818
PHINode *PHI = cast<PHINode>(I.first );
2813
2819
// One corner case we have to handle is two IVs "chasing" each-other,
@@ -3591,7 +3597,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3591
3597
TheLoop->getExitingBlocks (Exiting);
3592
3598
for (BasicBlock *E : Exiting) {
3593
3599
auto *Cmp = dyn_cast<Instruction>(E->getTerminator ()->getOperand (0 ));
3594
- if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse ())
3600
+ if (Cmp && TheLoop->contains (Cmp) && Cmp->hasOneUse () &&
3601
+ (TheLoop->getLoopLatch () == E || !Legal->hasUncountableEarlyExit ()))
3595
3602
AddToWorklistIfAllowed (Cmp);
3596
3603
}
3597
3604
@@ -7775,6 +7782,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7775
7782
LoopVectorizeHints Hints (L, true , *ORE);
7776
7783
Hints.setAlreadyVectorized ();
7777
7784
}
7785
+
7778
7786
TargetTransformInfo::UnrollingPreferences UP;
7779
7787
TTI.getUnrollingPreferences (L, *PSE.getSE (), UP, ORE);
7780
7788
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7787,15 +7795,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7787
7795
ILV.printDebugTracesAtEnd ();
7788
7796
7789
7797
// 4. Adjust branch weight of the branch in the middle block.
7790
- auto *MiddleTerm =
7791
- cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7792
- if (MiddleTerm->isConditional () &&
7793
- hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7794
- // Assume that `Count % VectorTripCount` is equally distributed.
7795
- unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7796
- assert (TripCount > 0 && " trip count should not be zero" );
7797
- const uint32_t Weights[] = {1 , TripCount - 1 };
7798
- setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7798
+ if (ExitVPBB) {
7799
+ auto *MiddleTerm =
7800
+ cast<BranchInst>(State.CFG .VPBB2IRBB [ExitVPBB]->getTerminator ());
7801
+ if (MiddleTerm->isConditional () &&
7802
+ hasBranchWeightMD (*OrigLoop->getLoopLatch ()->getTerminator ())) {
7803
+ // Assume that `Count % VectorTripCount` is equally distributed.
7804
+ unsigned TripCount = BestVPlan.getUF () * State.VF .getKnownMinValue ();
7805
+ assert (TripCount > 0 && " trip count should not be zero" );
7806
+ const uint32_t Weights[] = {1 , TripCount - 1 };
7807
+ setBranchWeights (*MiddleTerm, Weights, /* IsExpected=*/ false );
7808
+ }
7799
7809
}
7800
7810
7801
7811
return State.ExpandedSCEVs ;
@@ -8180,7 +8190,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8180
8190
// If source is an exiting block, we know the exit edge is dynamically dead
8181
8191
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8182
8192
// adding uses of an otherwise potentially dead instruction.
8183
- if (OrigLoop->isLoopExiting (Src))
8193
+ if (!Legal-> hasUncountableEarlyExit () && OrigLoop->isLoopExiting (Src))
8184
8194
return EdgeMaskCache[Edge] = SrcMask;
8185
8195
8186
8196
VPValue *EdgeMask = getVPValueOrAddLiveIn (BI->getCondition ());
@@ -8863,76 +8873,78 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8863
8873
}
8864
8874
}
8865
8875
8866
- // Collect VPIRInstructions for phis in the original exit block that are modeled
8876
+ // Collect VPIRInstructions for phis in the exit blocks that are modeled
8867
8877
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
8868
8878
// modeled explicitly yet and won't be included. Those are un-truncated
8869
8879
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
8870
8880
// increments.
8871
- static SetVector<VPIRInstruction *> collectUsersInExitBlock (
8881
+ static SetVector<VPIRInstruction *> collectUsersInExitBlocks (
8872
8882
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8873
8883
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8874
- auto *MiddleVPBB = Plan.getMiddleBlock ();
8875
- // No edge from the middle block to the unique exit block has been inserted
8876
- // and there is nothing to fix from vector loop; phis should have incoming
8877
- // from scalar loop only.
8878
- if (MiddleVPBB->getNumSuccessors () != 2 )
8879
- return {};
8880
8884
SetVector<VPIRInstruction *> ExitUsersToFix;
8881
- VPBasicBlock *ExitVPBB = cast<VPIRBasicBlock>(MiddleVPBB->getSuccessors ()[0 ]);
8882
- BasicBlock *ExitingBB = OrigLoop->getExitingBlock ();
8883
- for (VPRecipeBase &R : *ExitVPBB) {
8884
- auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8885
- if (!ExitIRI)
8886
- continue ;
8887
- auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
8888
- if (!ExitPhi)
8889
- break ;
8890
- Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8891
- VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8892
- // Exit values for inductions are computed and updated outside of VPlan and
8893
- // independent of induction recipes.
8894
- // TODO: Compute induction exit values in VPlan.
8895
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8896
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8897
- isa<VPWidenPointerInductionRecipe>(V) ||
8898
- (isa<Instruction>(IncomingValue) &&
8899
- OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8900
- any_of (IncomingValue->users (), [&Inductions](User *U) {
8901
- auto *P = dyn_cast<PHINode>(U);
8902
- return P && Inductions.contains (P);
8903
- })))
8904
- continue ;
8905
- ExitUsersToFix.insert (ExitIRI);
8906
- ExitIRI->addOperand (V);
8885
+ for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks ()) {
8886
+ BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock ();
8887
+ for (VPRecipeBase &R : *ExitVPBB) {
8888
+ auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8889
+ if (!ExitIRI)
8890
+ continue ;
8891
+ auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction ());
8892
+ if (!ExitPhi)
8893
+ break ;
8894
+ for (BasicBlock *ExitingBB : predecessors (ExitBB)) {
8895
+ if (!OrigLoop->contains (ExitingBB))
8896
+ continue ;
8897
+ Value *IncomingValue = ExitPhi->getIncomingValueForBlock (ExitingBB);
8898
+ VPValue *V = Builder.getVPValueOrAddLiveIn (IncomingValue);
8899
+ // Exit values for inductions are computed and updated outside of VPlan
8900
+ // and independent of induction recipes.
8901
+ // TODO: Compute induction exit values in VPlan.
8902
+ if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8903
+ !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst ()) ||
8904
+ isa<VPWidenPointerInductionRecipe>(V) ||
8905
+ (isa<Instruction>(IncomingValue) &&
8906
+ OrigLoop->contains (cast<Instruction>(IncomingValue)) &&
8907
+ any_of (IncomingValue->users (), [&Inductions](User *U) {
8908
+ auto *P = dyn_cast<PHINode>(U);
8909
+ return P && Inductions.contains (P);
8910
+ })))
8911
+ continue ;
8912
+ ExitUsersToFix.insert (ExitIRI);
8913
+ ExitIRI->addOperand (V);
8914
+ }
8915
+ }
8907
8916
}
8908
8917
return ExitUsersToFix;
8909
8918
}
8910
8919
8911
8920
// Add exit values to \p Plan. Extracts are added for each entry in \p
8912
8921
// ExitUsersToFix if needed and their operands are updated.
8913
8922
static void
8914
- addUsersInExitBlock (VPlan &Plan,
8915
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8923
+ addUsersInExitBlocks (VPlan &Plan,
8924
+ const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8916
8925
if (ExitUsersToFix.empty ())
8917
8926
return ;
8918
8927
8919
- auto *MiddleVPBB = Plan.getMiddleBlock ();
8920
- VPBuilder B (MiddleVPBB, MiddleVPBB->getFirstNonPhi ());
8921
-
8922
8928
// Introduce extract for exiting values and update the VPIRInstructions
8923
8929
// modeling the corresponding LCSSA phis.
8924
8930
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
8931
+
8925
8932
VPValue *V = ExitIRI->getOperand (0 );
8926
8933
// Pass live-in values used by exit phis directly through to their users in
8927
8934
// the exit block.
8928
8935
if (V->isLiveIn ())
8929
8936
continue ;
8930
8937
8931
- LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
8932
- VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
8933
- {V, Plan.getOrAddLiveIn (ConstantInt::get (
8934
- IntegerType::get (Ctx, 32 ), 1 ))});
8935
- ExitIRI->setOperand (0 , Ext);
8938
+ for (VPBlockBase *PredVPB : ExitIRI->getParent ()->getPredecessors ()) {
8939
+ auto *PredVPBB = cast<VPBasicBlock>(PredVPB);
8940
+ VPBuilder B (PredVPBB, PredVPBB->getFirstNonPhi ());
8941
+
8942
+ LLVMContext &Ctx = ExitIRI->getInstruction ().getContext ();
8943
+ VPValue *Ext = B.createNaryOp (VPInstruction::ExtractFromEnd,
8944
+ {V, Plan.getOrAddLiveIn (ConstantInt::get (
8945
+ IntegerType::get (Ctx, 32 ), 1 ))});
8946
+ ExitIRI->setOperand (0 , Ext);
8947
+ }
8936
8948
}
8937
8949
}
8938
8950
@@ -9204,11 +9216,32 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9204
9216
" VPBasicBlock" );
9205
9217
RecipeBuilder.fixHeaderPhis ();
9206
9218
9219
+ if (Legal->hasUncountableEarlyExit ()) {
9220
+ VPlanTransforms::handleUncountableEarlyExit (
9221
+ *Plan, *PSE.getSE (), OrigLoop, Legal->getUncountableExitingBlocks (),
9222
+ RecipeBuilder);
9223
+ }
9207
9224
addScalarResumePhis (RecipeBuilder, *Plan);
9208
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock (
9225
+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks (
9209
9226
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
9210
9227
addExitUsersForFirstOrderRecurrences (*Plan, ExitUsersToFix);
9211
- addUsersInExitBlock (*Plan, ExitUsersToFix);
9228
+ addUsersInExitBlocks (*Plan, ExitUsersToFix);
9229
+
9230
+ // Currently only live-ins can be used by exit values. We also bail out if any
9231
+ // exit value isn't handled in VPlan yet, i.e. a VPIRInstruction in the exit
9232
+ // without any operands.
9233
+ if (Legal->hasUncountableEarlyExit ()) {
9234
+ if (any_of (Plan->getExitBlocks (), [](VPIRBasicBlock *ExitBB) {
9235
+ return any_of (*ExitBB, [](VPRecipeBase &R) {
9236
+ auto VPIRI = cast<VPIRInstruction>(&R);
9237
+ return VPIRI->getNumOperands () == 0 ||
9238
+ any_of (VPIRI->operands (),
9239
+ [](VPValue *Op) { return !Op->isLiveIn (); });
9240
+ });
9241
+ }))
9242
+ return nullptr ;
9243
+ }
9244
+
9212
9245
// ---------------------------------------------------------------------------
9213
9246
// Transform initial VPlan: Apply previously taken decisions, in order, to
9214
9247
// bring the VPlan to its final state.
@@ -9968,12 +10001,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9968
10001
}
9969
10002
9970
10003
if (LVL.hasUncountableEarlyExit ()) {
9971
- reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
9972
- " early exit is not yet supported" ,
9973
- " Auto-vectorization of loops with uncountable "
9974
- " early exit is not yet supported" ,
9975
- " UncountableEarlyExitLoopsUnsupported" , ORE, L);
9976
- return false ;
10004
+ if (!EnableEarlyExitVectorization) {
10005
+ reportVectorizationFailure (" Auto-vectorization of loops with uncountable "
10006
+ " early exit is not yet supported" ,
10007
+ " Auto-vectorization of loops with uncountable "
10008
+ " early exit is not yet supported" ,
10009
+ " UncountableEarlyExitLoopsUnsupported" , ORE,
10010
+ L);
10011
+ return false ;
10012
+ }
9977
10013
}
9978
10014
9979
10015
// Entrance to the VPlan-native vectorization path. Outer loops are processed
0 commit comments