Skip to content

Draft: [LV] Outer-loop vectorization in the default vectorizer codepath #128202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,10 @@ class LoopVectorizationLegality {
/// Keep track of the loop edge to an uncountable exit, comprising a pair
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;

/// Contains true for a nested loop if it or any of its parents up
/// to the loop to vectorize needs a inner-loop active lane mask.
mutable DenseMap<const Loop *, bool> InnerLoopsNeedingPredication;
};

} // namespace llvm
Expand Down
69 changes: 58 additions & 11 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -792,21 +792,65 @@ class AccessAnalysis {

} // end anonymous namespace

/// Return true if \p E is invariant with regards to the Loop \p L.
/// If \p E is a recurrence around a inner loop of \p L, then the
/// start and step of that inner loop recurrence must be invariant
/// to \p L.
static bool isInvariantToTheLoop(const Loop *L, ScalarEvolution &SE,
const SCEV *E) {
if (SE.isLoopInvariant(E, L))
return true;

if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(E);
AddRec && L != AddRec->getLoop() && L->contains(AddRec->getLoop())) {
for (auto *Op : AddRec->operands())
if (!isInvariantToTheLoop(L, SE, Op))
return false;

return true;
}

return false;
}

/// Try to compute a constant stride for \p AR. Used by getPtrStride and
/// isNoWrap.
static std::optional<int64_t>
getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
Value *Ptr, PredicatedScalarEvolution &PSE) {
// The access function must stride over the innermost loop.
// The access function must stride over the queried loop.
if (Lp != AR->getLoop()) {
LLVM_DEBUG({
dbgs() << "LAA: Bad stride - Not striding over innermost loop ";
if (Ptr)
dbgs() << *Ptr << " ";
assert(!Lp->isInnermost() && Lp->contains(AR->getLoop()) &&
"Classic SE should have detected invariance");
while (AR && Lp != AR->getLoop()) {
if (isInvariantToTheLoop(Lp, *PSE.getSE(), AR))
return {0};

const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
if (!isInvariantToTheLoop(Lp, *PSE.getSE(), Step)) {
LLVM_DEBUG({
dbgs() << "LAA: Bad stride - Depends on inner loop ";
if (Ptr)
dbgs() << *Ptr << " ";

dbgs() << "SCEV: " << *AR << "\n";
});
return std::nullopt;
}

dbgs() << "SCEV: " << *AR << "\n";
});
return std::nullopt;
AR = dyn_cast<SCEVAddRecExpr>(AR->getStart());
}

if (!AR || Lp != AR->getLoop()) {
LLVM_DEBUG({
dbgs() << "LAA: Bad stride - Strides over inner loop ";
if (Ptr)
dbgs() << *Ptr << " ";

dbgs() << "SCEV: " << *AR << "\n";
});
return std::nullopt;
}
}

// Check the step is constant.
Expand Down Expand Up @@ -2365,8 +2409,9 @@ bool LoopAccessInfo::canAnalyzeLoop() {
<< TheLoop->getHeader()->getParent()->getName() << "' from "
<< TheLoop->getLocStr() << "\n");

// We can only analyze innermost loops.
if (!TheLoop->isInnermost()) {
// We can only analyze innermost loops if no memory dependency checks
// are needed.
if (!TheLoop->isInnermost() && !TheLoop->isAnnotatedParallel()) {
LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
return false;
Expand Down Expand Up @@ -2587,6 +2632,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
return true;
}

assert(TheLoop->isInnermost());

for (LoadInst *LD : Loads) {
Value *Ptr = LD->getPointerOperand();
// If we did *not* see this pointer before, insert it to the
Expand Down Expand Up @@ -2812,7 +2859,7 @@ bool LoopAccessInfo::isInvariant(Value *V) const {
if (!SE->isSCEVable(V->getType()))
return false;
const SCEV *S = SE->getSCEV(V);
return SE->isLoopInvariant(S, TheLoop);
return isInvariantToTheLoop(TheLoop, *SE, S);
}

/// If \p Ptr is a GEP, which has a loop-variant operand, return that operand.
Expand Down
66 changes: 55 additions & 11 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,11 @@ bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
if (VF.isScalar())
return true;

// The SCEVAddRecForUniformityRewriter does not support accesses to addresses
// invariant w.r.t. the vectorized loop but with recurrences of inner loops.
if (!TheLoop->isInnermost())
return false;

// Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
// never considered uniform.
auto *SE = PSE.getSE();
Expand Down Expand Up @@ -1207,8 +1212,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
});
}

if (!LAI->canVectorizeMemory())
return canVectorizeIndirectUnsafeDependences();
if (!LAI->canVectorizeMemory()) {
if (canVectorizeIndirectUnsafeDependences())
return true;

return false;
}

if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("We don't allow storing to uniform addresses",
Expand Down Expand Up @@ -1403,7 +1412,31 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
"Uncountable exiting block must be a direct predecessor of latch");
return BB == Latch;
}
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);

if (LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT))
return true;

// Blocks in inner loops need predication if the inner loop trip-count
// is not invariant to the vectorized loop.
if (!TheLoop->isInnermost()) {
Loop *BBLoop = LI->getLoopFor(BB);
if (BBLoop != TheLoop) {
if (auto Iter = InnerLoopsNeedingPredication.find(BBLoop);
Iter != InnerLoopsNeedingPredication.end())
return Iter->second;

for (Loop *L = BBLoop; L != TheLoop; L = L->getParentLoop())
if (!isUniformLoop(L, TheLoop)) {
InnerLoopsNeedingPredication[BBLoop] = true;
return true;
}

InnerLoopsNeedingPredication[BBLoop] = false;
return false;
}
}

return false;
}

bool LoopVectorizationLegality::blockCanBePredicated(
Expand Down Expand Up @@ -1537,9 +1570,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// Helper function to canVectorizeLoopNestCFG.
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
bool UseVPlanNativePath) {
assert((UseVPlanNativePath || Lp->isInnermost()) &&
"VPlan-native path is not enabled.");

// TODO: ORE should be improved to show more accurate information when an
// outer loop can't be vectorized because a nested loop is not understood or
// legal. Something like: "outer_loop_location: loop not vectorized:
Expand Down Expand Up @@ -1573,6 +1603,23 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
return false;
}

if (Lp != TheLoop && !UseVPlanNativePath) {
// Inner loops must be in loop-simplify form with the latch block being
// also the only exiting block and a dedicated exit.
BasicBlock *Exiting = Lp->getExitingBlock();
if (!Lp->isLoopSimplifyForm() || !Exiting ||
Exiting != Lp->getLoopLatch() || !Lp->isLCSSAForm(*DT)) {
reportVectorizationFailure(
"The inner loops must exit through their latch",
"loop control flow is not understood by vectorizer",
"CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
return false;
}
}

return Result;
}

Expand Down Expand Up @@ -1775,9 +1822,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {

// Specific checks for outer loops. We skip the remaining legal checks at this
// point because they don't support outer loops.
if (!TheLoop->isInnermost()) {
assert(UseVPlanNativePath && "VPlan-native path is not enabled.");

if (!TheLoop->isInnermost() && UseVPlanNativePath) {
if (!canVectorizeOuterLoop()) {
reportVectorizationFailure("Unsupported outer loop",
"UnsupportedOuterLoop", ORE, TheLoop);
Expand All @@ -1790,7 +1835,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}

assert(TheLoop->isInnermost() && "Inner loop expected.");
// Check if we can if-convert non-single-bb loops.
unsigned NumBlocks = TheLoop->getNumBlocks();
if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
Expand All @@ -1811,7 +1855,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
}

if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
if (TheLoop->getExitingBlock()) {
if (TheLoop->getExitingBlock() || !TheLoop->isInnermost()) {
reportVectorizationFailure("Cannot vectorize uncountable loop",
"UnsupportedUncountableLoop", ORE, TheLoop);
if (DoExtraAnalysis)
Expand Down
Loading