Skip to content

Commit b0ff57d

Browse files
committed
[LoopVectorize] Enable vectorisation of early exit loops with live-outs
This work feeds part of PR llvm#88385, and adds support for vectorising loops with uncountable early exits and outside users of loop-defined variables. When calculating the final value from an uncountable early exit we need to calculate the vector lane that triggered the exit, and hence determine the value at the point we exited. All code for calculating the last value when exiting the loop early now lives in a new vector.early.exit block, which sits between the middle.split block and the original exit block. Doing this required two fixes: 1. The vplan verifier incorrectly assumed that the block containing a definition always dominates the block of the user. That's not true if you can arrive at the use block from multiple incoming blocks. This is possible for early exit loops where both the early exit and the latch jump to the same block. I've added a new ExtractFirstActive VPInstruction that extracts the first active lane of a vector, i.e. the lane of the vector predicate that triggered the exit. NOTE: The IR generated for dealing with live-outs from early exit loops is unoptimised, as opposed to normal loops. This inevitably leads to poor quality code, but this can be fixed up later.
1 parent 5139c90 commit b0ff57d

17 files changed

+1063
-152
lines changed

llvm/docs/Vectorizers.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,9 +405,11 @@ Early Exit Vectorization
405405
When vectorizing a loop with a single early exit, the loop blocks following the
406406
early exit are predicated and the vector loop will always exit via the latch.
407407
If the early exit has been taken, the vector loop's successor block
408-
(``middle.split`` below) branches to the early exit block. Otherwise
409-
``middle.block`` selects between the exit block from the latch or the scalar
410-
remainder loop.
408+
(``middle.split`` below) branches to the early exit block via an intermediate
409+
block (``vector.early.exit`` below). This intermediate block is responsible for
410+
calculating any exit values of loop-defined variables that are used in the
411+
early exit block. Otherwise, ``middle.block`` selects between the exit block
412+
from the latch or the scalar remainder loop.
411413

412414
.. image:: vplan-early-exit.png
413415

llvm/docs/vplan-early-exit.dot

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,27 @@ compound=true
1919
"middle.split"
2020
]
2121
N4 -> N5 [ label=""]
22-
N4 -> N6 [ label=""]
22+
N4 -> N7 [ label=""]
2323
N5 [label =
24-
"early.exit"
24+
"vector.early.exit"
2525
]
26+
N5 -> N6 [ label=""]
2627
N6 [label =
27-
"middle.block"
28+
"early.exit"
2829
]
29-
N6 -> N9 [ label=""]
30-
N6 -> N7 [ label=""]
3130
N7 [label =
32-
"scalar.ph"
31+
"middle.block"
3332
]
33+
N7 -> N10 [ label=""]
3434
N7 -> N8 [ label=""]
3535
N8 [label =
36-
"loop.header"
36+
"scalar.ph"
3737
]
38+
N8 -> N9 [ label=""]
3839
N9 [label =
40+
"loop.header"
41+
]
42+
N10 [label =
3943
"latch.exit"
4044
]
4145
}

llvm/docs/vplan-early-exit.png

-54.4 KB
Loading

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,11 @@ class LoopVectorizationLegality {
407407

408408
/// Returns the destination of an uncountable early exiting block.
409409
BasicBlock *getUncountableEarlyExitBlock() const {
410+
if (!HasUncountableEarlyExit) {
411+
assert(getUncountableExitBlocks().empty() &&
412+
"Expected no uncountable exiting blocks");
413+
return nullptr;
414+
}
410415
assert(getUncountableExitBlocks().size() == 1 &&
411416
"Expected only a single uncountable exit block");
412417
return getUncountableExitBlocks()[0];

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9067,15 +9067,21 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
90679067
// Add exit values to \p Plan. Extracts are added for each entry in \p
90689068
// ExitUsersToFix if needed and their operands are updated. Returns true if all
90699069
// exit users can be handled, otherwise return false.
9070-
static bool
9070+
static void
90719071
addUsersInExitBlocks(VPlan &Plan,
90729072
const SetVector<VPIRInstruction *> &ExitUsersToFix) {
90739073
if (ExitUsersToFix.empty())
9074-
return true;
9074+
return;
90759075

90769076
auto *MiddleVPBB = Plan.getMiddleBlock();
90779077
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9078-
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
9078+
VPBuilder MiddleB(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
9079+
VPBuilder EarlyExitB;
9080+
VPBasicBlock *VectorEarlyExitVPBB = Plan.getEarlyExit();
9081+
VPValue *EarlyExitMask = nullptr;
9082+
if (VectorEarlyExitVPBB)
9083+
EarlyExitB.setInsertPoint(VectorEarlyExitVPBB,
9084+
VectorEarlyExitVPBB->getFirstNonPhi());
90799085

90809086
// Introduce extract for exiting values and update the VPIRInstructions
90819087
// modeling the corresponding LCSSA phis.
@@ -9086,19 +9092,38 @@ addUsersInExitBlocks(VPlan &Plan,
90869092
if (Op->isLiveIn())
90879093
continue;
90889094

9089-
// Currently only live-ins can be used by exit values from blocks not
9090-
// exiting via the vector latch through to the middle block.
9091-
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
9092-
return false;
9093-
90949095
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
9095-
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
9096-
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
9097-
IntegerType::get(Ctx, 32), 1))});
9096+
VPValue *Ext;
9097+
VPBasicBlock *PredVPBB =
9098+
cast<VPBasicBlock>(ExitIRI->getParent()->getPredecessors()[Idx]);
9099+
if (PredVPBB != MiddleVPBB) {
9100+
assert(ExitIRI->getParent()->getNumPredecessors() <= 2);
9101+
9102+
// Lookup and cache the early exit mask.
9103+
if (!EarlyExitMask) {
9104+
VPBasicBlock *MiddleSplitVPBB =
9105+
cast<VPBasicBlock>(VectorEarlyExitVPBB->getSinglePredecessor());
9106+
VPInstruction *PredTerm =
9107+
cast<VPInstruction>(MiddleSplitVPBB->getTerminator());
9108+
assert(PredTerm->getOpcode() == VPInstruction::BranchOnCond &&
9109+
"Unexpected middle split block terminator");
9110+
VPInstruction *ScalarCond =
9111+
cast<VPInstruction>(PredTerm->getOperand(0));
9112+
assert(
9113+
ScalarCond->getOpcode() == VPInstruction::AnyOf &&
9114+
"Unexpected condition for middle split block terminator branch");
9115+
EarlyExitMask = ScalarCond->getOperand(0);
9116+
}
9117+
Ext = EarlyExitB.createNaryOp(VPInstruction::ExtractFirstActive,
9118+
{Op, EarlyExitMask});
9119+
} else {
9120+
Ext = MiddleB.createNaryOp(VPInstruction::ExtractFromEnd,
9121+
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
9122+
IntegerType::get(Ctx, 32), 1))});
9123+
}
90989124
ExitIRI->setOperand(Idx, Ext);
90999125
}
91009126
}
9101-
return true;
91029127
}
91039128

91049129
/// Handle users in the exit block for first order reductions in the original
@@ -9402,12 +9427,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94029427
SetVector<VPIRInstruction *> ExitUsersToFix =
94039428
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
94049429
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
9405-
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
9406-
reportVectorizationFailure(
9407-
"Some exit values in loop with uncountable exit not supported yet",
9408-
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
9409-
return nullptr;
9410-
}
9430+
addUsersInExitBlocks(*Plan, ExitUsersToFix);
94119431

94129432
// ---------------------------------------------------------------------------
94139433
// Transform initial VPlan: Apply previously taken decisions, in order, to

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,15 @@ void VPBasicBlock::execute(VPTransformState *State) {
501501
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
502502
// Register NewBB in its loop. In innermost loops its the same for all
503503
// BB's.
504-
if (State->CurrentParentLoop)
505-
State->CurrentParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
504+
Loop *ParentLoop = State->CurrentParentLoop;
505+
// If this block has a sole successor that is an exit block then it needs
506+
// adding to the same parent loop as the exit block.
507+
VPBlockBase *SuccVPBB = getSingleSuccessor();
508+
if (SuccVPBB && State->Plan->isExitBlock(SuccVPBB))
509+
ParentLoop = State->LI->getLoopFor(
510+
cast<VPIRBasicBlock>(SuccVPBB)->getIRBasicBlock());
511+
if (ParentLoop)
512+
ParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
506513
State->Builder.SetInsertPoint(Terminator);
507514

508515
State->CFG.PrevBB = NewBB;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
12281228
// Returns a scalar boolean value, which is true if any lane of its (only
12291229
// boolean) vector operand is true.
12301230
AnyOf,
1231+
// Extracts the first active lane of a vector, where the first operand is
1232+
// the predicate, and the second operand is the vector to extract.
1233+
ExtractFirstActive,
12311234
};
12321235

12331236
private:
@@ -3938,6 +3941,22 @@ class VPlan {
39383941
VPRegionBlock *getVectorLoopRegion();
39393942
const VPRegionBlock *getVectorLoopRegion() const;
39403943

3944+
/// Get the vector early exit block
3945+
VPBasicBlock *getEarlyExit() {
3946+
auto LoopRegion = getVectorLoopRegion();
3947+
if (!LoopRegion)
3948+
return nullptr;
3949+
3950+
auto *SuccessorVPBB = LoopRegion->getSingleSuccessor();
3951+
auto *MiddleVPBB = getMiddleBlock();
3952+
if (SuccessorVPBB == MiddleVPBB)
3953+
return nullptr;
3954+
3955+
assert(SuccessorVPBB->getSuccessors()[1] == MiddleVPBB &&
3956+
"Expected second successor to be the middle block");
3957+
return cast<VPBasicBlock>(SuccessorVPBB->getSuccessors()[0]);
3958+
}
3959+
39413960
/// Returns the 'middle' block of the plan, that is the block that selects
39423961
/// whether to execute the scalar tail loop or the exit block from the loop
39433962
/// latch.
@@ -3962,6 +3981,9 @@ class VPlan {
39623981
/// of VPBlockShallowTraversalWrapper.
39633982
auto getExitBlocks();
39643983

3984+
/// Returns true if \p VPBB is an exit block.
3985+
bool isExitBlock(VPBlockBase *VPBB);
3986+
39653987
/// The trip count of the original loop.
39663988
VPValue *getTripCount() const {
39673989
assert(TripCount && "trip count needs to be set before accessing it");

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
7878
case VPInstruction::CanonicalIVIncrementForPart:
7979
case VPInstruction::AnyOf:
8080
return SetResultTyFromOp();
81+
case VPInstruction::ExtractFirstActive:
8182
case VPInstruction::ExtractFromEnd: {
8283
Type *BaseTy = inferScalarType(R->getOperand(0));
8384
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))

llvm/lib/Transforms/Vectorize/VPlanCFG.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,24 @@ template <> struct GraphTraits<VPlan *> {
306306
}
307307
};
308308

309+
inline bool VPlan::isExitBlock(VPBlockBase *VPBB) {
310+
if (!isa<VPIRBasicBlock>(VPBB) || VPBB->getNumSuccessors() ||
311+
VPBB == getScalarHeader())
312+
return false;
313+
314+
VPRegionBlock *RegionBlock = getVectorLoopRegion();
315+
if (!RegionBlock)
316+
return false;
317+
318+
// The block must be a successor of the region block.
319+
for (auto *OtherVPBB :
320+
vp_depth_first_shallow(RegionBlock->getSingleSuccessor()))
321+
if (OtherVPBB == VPBB)
322+
return true;
323+
324+
return false;
325+
}
326+
309327
inline auto VPlan::getExitBlocks() {
310328
VPBlockBase *ScalarHeader = getScalarHeader();
311329
return make_filter_range(

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,14 +696,21 @@ Value *VPInstruction::generate(VPTransformState &State) {
696696
Value *A = State.get(getOperand(0));
697697
return Builder.CreateOrReduce(A);
698698
}
699-
699+
case VPInstruction::ExtractFirstActive: {
700+
Value *Vec = State.get(getOperand(0));
701+
Value *Mask = State.get(getOperand(1));
702+
Value *Ctz =
703+
Builder.CreateCountTrailingZeroElems(Builder.getInt64Ty(), Mask);
704+
return Builder.CreateExtractElement(Vec, Ctz);
705+
}
700706
default:
701707
llvm_unreachable("Unsupported opcode for instruction");
702708
}
703709
}
704710

705711
bool VPInstruction::isVectorToScalar() const {
706712
return getOpcode() == VPInstruction::ExtractFromEnd ||
713+
getOpcode() == VPInstruction::ExtractFirstActive ||
707714
getOpcode() == VPInstruction::ComputeReductionResult ||
708715
getOpcode() == VPInstruction::AnyOf;
709716
}
@@ -768,6 +775,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
768775
case VPInstruction::CalculateTripCountMinusVF:
769776
case VPInstruction::CanonicalIVIncrementForPart:
770777
case VPInstruction::ExtractFromEnd:
778+
case VPInstruction::ExtractFirstActive:
771779
case VPInstruction::FirstOrderRecurrenceSplice:
772780
case VPInstruction::LogicalAnd:
773781
case VPInstruction::Not:
@@ -887,6 +895,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
887895
case VPInstruction::AnyOf:
888896
O << "any-of";
889897
break;
898+
case VPInstruction::ExtractFirstActive:
899+
O << "extract-first-active";
900+
break;
890901
default:
891902
O << Instruction::getOpcodeName(getOpcode());
892903
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2099,10 +2099,13 @@ void VPlanTransforms::handleUncountableEarlyExit(
20992099
Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
21002100

21012101
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
2102+
VPBasicBlock *EarlyExitVPBB = Plan.createVPBasicBlock("vector.early.exit");
21022103
VPBlockUtils::insertOnEdge(LoopRegion, MiddleVPBB, NewMiddle);
2103-
VPBlockUtils::connectBlocks(NewMiddle, VPEarlyExitBlock);
2104+
VPBlockUtils::connectBlocks(NewMiddle, EarlyExitVPBB);
21042105
NewMiddle->swapSuccessors();
21052106

2107+
VPBlockUtils::connectBlocks(EarlyExitVPBB, VPEarlyExitBlock);
2108+
21062109
VPBuilder MiddleBuilder(NewMiddle);
21072110
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
21082111

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,11 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
222222
continue;
223223
}
224224

225-
if (!VPDT.dominates(VPBB, UI->getParent())) {
225+
// Now that we support vectorising loops with uncountable early exits
226+
// we can end up in situations where VPBB does not dominate the exit
227+
// block. Only do the check if the user is not in a VPIRBasicBlock.
228+
if (!isa<VPIRBasicBlock>(UI->getParent()) &&
229+
!VPDT.dominates(VPBB, UI->getParent())) {
226230
errs() << "Use before def!\n";
227231
return false;
228232
}

0 commit comments

Comments
 (0)