@@ -385,6 +385,11 @@ static cl::opt<bool> UseWiderVFIfCallVariantsPresent(
385
385
cl::Hidden,
386
386
cl::desc("Try wider VFs if they enable the use of vector variants"));
387
387
388
+ static cl::opt<bool> EnableEarlyExitVectorization(
389
+ "enable-early-exit-vectorization", cl::init(false), cl::Hidden,
390
+ cl::desc(
391
+ "Enable vectorization of early exit loops with uncountable exits."));
392
+
388
393
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
389
394
// variables not overflowing do not hold. See `emitSCEVChecks`.
390
395
static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
@@ -1382,9 +1387,10 @@ class LoopVectorizationCostModel {
1382
1387
LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n");
1383
1388
return false;
1384
1389
}
1385
- // If we might exit from anywhere but the latch, must run the exiting
1386
- // iteration in scalar form.
1387
- if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1390
+ // If we might exit from anywhere but the latch and early exit vectorization
1391
+ // is disabled, we must run the exiting iteration in scalar form.
1392
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() &&
1393
+ !(EnableEarlyExitVectorization && Legal->hasUncountableEarlyExit())) {
1388
1394
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: not exiting "
1389
1395
"from latch block\n");
1390
1396
return true;
@@ -3656,10 +3662,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
3656
3662
3657
3663
// Start with the conditional branches exiting the loop. If the branch
3658
3664
// condition is an instruction contained in the loop that is only used by the
3659
- // branch, it is uniform.
3665
+ // branch, it is uniform. Note conditions from uncountable early exits are not
3666
+ // uniform.
3660
3667
SmallVector<BasicBlock *> Exiting;
3661
3668
TheLoop->getExitingBlocks(Exiting);
3662
3669
for (BasicBlock *E : Exiting) {
3670
+ if (Legal->hasUncountableEarlyExit() && TheLoop->getLoopLatch() != E)
3671
+ continue;
3663
3672
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
3664
3673
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
3665
3674
AddToWorklistIfAllowed(Cmp);
@@ -8239,8 +8248,11 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
8239
8248
8240
8249
// If source is an exiting block, we know the exit edge is dynamically dead
8241
8250
// in the vector loop, and thus we don't need to restrict the mask. Avoid
8242
- // adding uses of an otherwise potentially dead instruction.
8243
- if (OrigLoop->isLoopExiting(Src))
8251
+ // adding uses of an otherwise potentially dead instruction unless we are
8252
+ // vectorizing a loop with uncountable exits. In that case, we always
8253
+ // materialize the mask.
8254
+ if (OrigLoop->isLoopExiting(Src) &&
8255
+ Src != Legal->getUncountableEarlyExitingBlock())
8244
8256
return EdgeMaskCache[Edge] = SrcMask;
8245
8257
8246
8258
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8931,50 +8943,58 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
8931
8943
static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
8932
8944
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
8933
8945
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
8946
+ auto *MiddleVPBB = Plan.getMiddleBlock();
8934
8947
SetVector<VPIRInstruction *> ExitUsersToFix;
8935
8948
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
8936
- BasicBlock *ExitBB = ExitVPBB->getIRBasicBlock();
8937
- BasicBlock *ExitingBB = find_singleton<BasicBlock>(
8938
- to_vector(predecessors(ExitBB)),
8939
- [OrigLoop](BasicBlock *Pred, bool AllowRepeats) {
8940
- return OrigLoop->contains(Pred) ? Pred : nullptr;
8941
- });
8942
8949
for (VPRecipeBase &R : *ExitVPBB) {
8943
8950
auto *ExitIRI = dyn_cast<VPIRInstruction>(&R);
8944
8951
if (!ExitIRI)
8945
8952
continue;
8946
8953
auto *ExitPhi = dyn_cast<PHINode>(&ExitIRI->getInstruction());
8947
8954
if (!ExitPhi)
8948
8955
break;
8949
- Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
8950
- VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
8951
- // Exit values for inductions are computed and updated outside of VPlan
8952
- // and independent of induction recipes.
8953
- // TODO: Compute induction exit values in VPlan.
8954
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8955
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8956
- isa<VPWidenPointerInductionRecipe>(V) ||
8957
- (isa<Instruction>(IncomingValue) &&
8958
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8959
- any_of(IncomingValue->users(), [&Inductions](User *U) {
8960
- auto *P = dyn_cast<PHINode>(U);
8961
- return P && Inductions.contains(P);
8962
- })))
8963
- continue;
8964
- ExitUsersToFix.insert(ExitIRI);
8965
- ExitIRI->addOperand(V);
8956
+ for (VPBlockBase *PredVPBB : ExitVPBB->getPredecessors()) {
8957
+ BasicBlock *ExitingBB = OrigLoop->getLoopLatch();
8958
+ if (PredVPBB != MiddleVPBB) {
8959
+ SmallVector<BasicBlock *> ExitingBlocks;
8960
+ OrigLoop->getExitingBlocks(ExitingBlocks);
8961
+ assert(ExitingBlocks.size() == 2 && "only support 2 exiting blocks");
8962
+ ExitingBB = ExitingBB == ExitingBlocks[0] ? ExitingBlocks[1]
8963
+ : ExitingBlocks[0];
8964
+ }
8965
+ Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
8966
+ VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
8967
+ // Exit values for inductions are computed and updated outside of VPlan
8968
+ // and independent of induction recipes.
8969
+ // TODO: Compute induction exit values in VPlan.
8970
+ if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
8971
+ !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
8972
+ isa<VPWidenPointerInductionRecipe>(V) ||
8973
+ (isa<Instruction>(IncomingValue) &&
8974
+ OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
8975
+ any_of(IncomingValue->users(), [&Inductions](User *U) {
8976
+ auto *P = dyn_cast<PHINode>(U);
8977
+ return P && Inductions.contains(P);
8978
+ }))) {
8979
+ if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
8980
+ continue;
8981
+ }
8982
+ ExitUsersToFix.insert(ExitIRI);
8983
+ ExitIRI->addOperand(V);
8984
+ }
8966
8985
}
8967
8986
}
8968
8987
return ExitUsersToFix;
8969
8988
}
8970
8989
8971
8990
// Add exit values to \p Plan. Extracts are added for each entry in \p
8972
- // ExitUsersToFix if needed and their operands are updated.
8973
- static void
8991
+ // ExitUsersToFix if needed and their operands are updated. Returns true if all
8992
+ // exit users can be handled, otherwise return false.
8993
+ static bool
8974
8994
addUsersInExitBlocks(VPlan &Plan,
8975
8995
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
8976
8996
if (ExitUsersToFix.empty())
8977
- return;
8997
+ return true ;
8978
8998
8979
8999
auto *MiddleVPBB = Plan.getMiddleBlock();
8980
9000
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
@@ -8988,14 +9008,18 @@ addUsersInExitBlocks(VPlan &Plan,
8988
9008
if (V->isLiveIn())
8989
9009
continue;
8990
9010
8991
- assert(ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
8992
- "Exit value not handled yet for this edge.");
9011
+ // Currently only live-ins can be used by exit values from blocks not
9012
+ // exiting via the vector latch through to the middle block.
9013
+ if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9014
+ return false;
9015
+
8993
9016
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
8994
9017
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
8995
9018
{V, Plan.getOrAddLiveIn(ConstantInt::get(
8996
9019
IntegerType::get(Ctx, 32), 1))});
8997
9020
ExitIRI->setOperand(0, Ext);
8998
9021
}
9022
+ return true;
8999
9023
}
9000
9024
9001
9025
/// Handle users in the exit block for first order reductions in the original
@@ -9268,11 +9292,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
9268
9292
"VPBasicBlock");
9269
9293
RecipeBuilder.fixHeaderPhis();
9270
9294
9295
+ if (auto *UncountableExitingBlock =
9296
+ Legal->getUncountableEarlyExitingBlock()) {
9297
+ VPlanTransforms::handleUncountableEarlyExit(
9298
+ *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
9299
+ }
9271
9300
addScalarResumePhis(RecipeBuilder, *Plan);
9272
9301
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
9273
9302
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
9274
9303
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9275
- addUsersInExitBlocks(*Plan, ExitUsersToFix);
9304
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9305
+ reportVectorizationFailure(
9306
+ "Some exit values in loop with uncountable exit not supported yet",
9307
+ "Some exit values in loop with uncountable exit not supported yet",
9308
+ "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9309
+ return nullptr;
9310
+ }
9311
+
9276
9312
// ---------------------------------------------------------------------------
9277
9313
// Transform initial VPlan: Apply previously taken decisions, in order, to
9278
9314
// bring the VPlan to its final state.
@@ -10138,12 +10174,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10138
10174
return false;
10139
10175
}
10140
10176
10141
- if (LVL.hasUncountableEarlyExit()) {
10177
+ if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization ) {
10142
10178
reportVectorizationFailure("Auto-vectorization of loops with uncountable "
10143
- "early exit is not yet supported ",
10179
+ "early exit is not enabled ",
10144
10180
"Auto-vectorization of loops with uncountable "
10145
- "early exit is not yet supported ",
10146
- "UncountableEarlyExitLoopsUnsupported ", ORE, L);
10181
+ "early exit is not enabled ",
10182
+ "UncountableEarlyExitLoopsDisabled ", ORE, L);
10147
10183
return false;
10148
10184
}
10149
10185
0 commit comments