Skip to content

Commit e762d4d

Browse files
authored
[LoopVectorize] Teach LoopVectorizationLegality about more early exits (#107004)
This patch is split off from PR #88385 and concerns only the code related to the legality of vectorising early exit loops. It is the first step in adding support for vectorisation of a simple class of loops that typically involves searching for something, i.e. for (int i = 0; i < n; i++) { if (p[i] == val) return i; } return n; or for (int i = 0; i < n; i++) { if (p1[i] != p2[i]) return i; } return n; In this initial commit LoopVectorizationLegality will only consider early exit loops legal for vectorising if they follow these criteria: 1. There are no stores in the loop. 2. The loop must have only one early exit like those shown in the above example. I have referred to such exits as speculative early exits, to distinguish from existing support for early exits where the exit-not-taken count is known exactly at compile time. 3. The early exit block dominates the latch block. 4. The latch block must have an exact exit count. 5. There are no loads after the early exit block. 6. The loop must not contain reductions or recurrences. I don't see anything fundamental blocking vectorisation of such loops, but I just haven't done the work to support them yet. 7. We must be able to prove at compile-time that loops will not contain faulting loads. Tests have been added here: Transforms/LoopVectorize/AArch64/simple_early_exit.ll
1 parent 60a8b2b commit e762d4d

File tree

7 files changed

+2168
-17
lines changed

7 files changed

+2168
-17
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,24 @@ class LoopVectorizationLegality {
377377
return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
378378
}
379379

380+
/// Returns true if the loop has a speculative early exit, i.e. an
381+
/// uncountable exit that isn't the latch block.
382+
bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; }
383+
384+
/// Returns the speculative early exiting block.
385+
BasicBlock *getSpeculativeEarlyExitingBlock() const {
386+
assert(getUncountableExitingBlocks().size() == 1 &&
387+
"Expected only a single uncountable exiting block");
388+
return getUncountableExitingBlocks()[0];
389+
}
390+
391+
/// Returns the destination of a speculative early exiting block.
392+
BasicBlock *getSpeculativeEarlyExitBlock() const {
393+
assert(getUncountableExitBlocks().size() == 1 &&
394+
"Expected only a single uncountable exit block");
395+
return getUncountableExitBlocks()[0];
396+
}
397+
380398
/// Returns true if vector representation of the instruction \p I
381399
/// requires mask.
382400
bool isMaskRequired(const Instruction *I) const {
@@ -404,6 +422,22 @@ class LoopVectorizationLegality {
404422

405423
DominatorTree *getDominatorTree() const { return DT; }
406424

425+
/// Returns all exiting blocks with a countable exit, i.e. the
426+
/// exit-not-taken count is known exactly at compile time.
427+
const SmallVector<BasicBlock *, 4> &getCountableExitingBlocks() const {
428+
return CountableExitingBlocks;
429+
}
430+
431+
/// Returns all the exiting blocks with an uncountable exit.
432+
const SmallVector<BasicBlock *, 4> &getUncountableExitingBlocks() const {
433+
return UncountableExitingBlocks;
434+
}
435+
436+
/// Returns all the exit blocks from uncountable exiting blocks.
437+
SmallVector<BasicBlock *, 4> getUncountableExitBlocks() const {
438+
return UncountableExitBlocks;
439+
}
440+
407441
private:
408442
/// Return true if the pre-header, exiting and latch blocks of \p Lp and all
409443
/// its nested loops are considered legal for vectorization. These legal
@@ -446,6 +480,23 @@ class LoopVectorizationLegality {
446480
/// specific checks for outer loop vectorization.
447481
bool canVectorizeOuterLoop();
448482

483+
/// Returns true if this is an early exit loop that can be vectorized.
484+
/// Currently, a loop with an uncountable early exit is considered
485+
/// vectorizable if:
486+
/// 1. There are no writes to memory in the loop.
487+
/// 2. The loop has only one early uncountable exit
488+
/// 3. The early exit block dominates the latch block.
489+
/// 4. The latch block has an exact exit count.
490+
/// 5. The loop does not contain reductions or recurrences.
491+
/// 6. We can prove at compile-time that loops will not contain faulting
492+
/// loads.
493+
/// 7. It is safe to speculatively execute instructions such as divide or
494+
/// call instructions.
495+
/// The list above is not based on theoretical limitations of vectorization,
496+
/// but simply a statement that more work is needed to support these
497+
/// additional cases safely.
498+
bool isVectorizableEarlyExitLoop();
499+
449500
/// Return true if all of the instructions in the block can be speculatively
450501
/// executed, and record the loads/stores that require masking.
451502
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -551,6 +602,17 @@ class LoopVectorizationLegality {
551602
/// (potentially) make a better decision on the maximum VF and enable
552603
/// the use of those function variants.
553604
bool VecCallVariantsFound = false;
605+
606+
/// Indicates whether this loop has a speculative early exit, i.e. an
607+
/// uncountable exiting block that is not the latch.
608+
bool HasSpeculativeEarlyExit = false;
609+
610+
/// Keep track of all the loop exiting blocks.
611+
SmallVector<BasicBlock *, 4> CountableExitingBlocks;
612+
SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
613+
614+
/// Keep track of the destinations of all uncountable exits.
615+
SmallVector<BasicBlock *, 4> UncountableExitBlocks;
554616
};
555617

556618
} // namespace llvm

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 150 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,145 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
14451445
return Result;
14461446
}
14471447

1448+
bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
1449+
BasicBlock *LatchBB = TheLoop->getLoopLatch();
1450+
if (!LatchBB) {
1451+
reportVectorizationFailure("Loop does not have a latch",
1452+
"Cannot vectorize early exit loop",
1453+
"NoLatchEarlyExit", ORE, TheLoop);
1454+
return false;
1455+
}
1456+
1457+
if (Reductions.size() || FixedOrderRecurrences.size()) {
1458+
reportVectorizationFailure(
1459+
"Found reductions or recurrences in early-exit loop",
1460+
"Cannot vectorize early exit loop with reductions or recurrences",
1461+
"RecurrencesInEarlyExitLoop", ORE, TheLoop);
1462+
return false;
1463+
}
1464+
1465+
SmallVector<BasicBlock *, 8> ExitingBlocks;
1466+
TheLoop->getExitingBlocks(ExitingBlocks);
1467+
1468+
// Keep a record of all the exiting blocks.
1469+
SmallVector<const SCEVPredicate *, 4> Predicates;
1470+
for (BasicBlock *BB1 : ExitingBlocks) {
1471+
const SCEV *EC =
1472+
PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates);
1473+
if (isa<SCEVCouldNotCompute>(EC)) {
1474+
UncountableExitingBlocks.push_back(BB1);
1475+
1476+
SmallVector<BasicBlock *, 2> Succs(successors(BB1));
1477+
if (Succs.size() != 2) {
1478+
reportVectorizationFailure(
1479+
"Early exiting block does not have exactly two successors",
1480+
"Incorrect number of successors from early exiting block",
1481+
"EarlyExitTooManySuccessors", ORE, TheLoop);
1482+
return false;
1483+
}
1484+
1485+
BasicBlock *BB2;
1486+
if (!TheLoop->contains(Succs[0]))
1487+
BB2 = Succs[0];
1488+
else {
1489+
assert(!TheLoop->contains(Succs[1]));
1490+
BB2 = Succs[1];
1491+
}
1492+
UncountableExitBlocks.push_back(BB2);
1493+
} else
1494+
CountableExitingBlocks.push_back(BB1);
1495+
}
1496+
Predicates.clear();
1497+
1498+
// We only support one uncountable early exit.
1499+
if (getUncountableExitingBlocks().size() != 1) {
1500+
reportVectorizationFailure(
1501+
"Loop has too many uncountable exits",
1502+
"Cannot vectorize early exit loop with more than one early exit",
1503+
"TooManyUncountableEarlyExits", ORE, TheLoop);
1504+
return false;
1505+
}
1506+
1507+
// The only supported early exit loops so far are ones where the early
1508+
// exiting block is a unique predecessor of the latch block.
1509+
BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1510+
if (LatchPredBB != getSpeculativeEarlyExitingBlock()) {
1511+
reportVectorizationFailure("Early exit is not the latch predecessor",
1512+
"Cannot vectorize early exit loop",
1513+
"EarlyExitNotLatchPredecessor", ORE, TheLoop);
1514+
return false;
1515+
}
1516+
1517+
// Check to see if there are instructions that could potentially generate
1518+
// exceptions or have side-effects.
1519+
auto IsSafeOperation = [](Instruction *I) -> bool {
1520+
switch (I->getOpcode()) {
1521+
case Instruction::Load:
1522+
case Instruction::Store:
1523+
case Instruction::PHI:
1524+
case Instruction::Br:
1525+
// These are checked separately.
1526+
return true;
1527+
default:
1528+
return isSafeToSpeculativelyExecute(I);
1529+
}
1530+
};
1531+
1532+
for (auto *BB : TheLoop->blocks())
1533+
for (auto &I : *BB) {
1534+
if (I.mayWriteToMemory()) {
1535+
// We don't support writes to memory.
1536+
reportVectorizationFailure(
1537+
"Writes to memory unsupported in early exit loops",
1538+
"Cannot vectorize early exit loop with writes to memory",
1539+
"WritesInEarlyExitLoop", ORE, TheLoop);
1540+
return false;
1541+
} else if (!IsSafeOperation(&I)) {
1542+
reportVectorizationFailure("Early exit loop contains operations that "
1543+
"cannot be speculatively executed",
1544+
"Early exit loop contains operations that "
1545+
"cannot be speculatively executed",
1546+
"UnsafeOperationsEarlyExitLoop", ORE,
1547+
TheLoop);
1548+
return false;
1549+
}
1550+
}
1551+
1552+
// The latch block must have a countable exit.
1553+
if (isa<SCEVCouldNotCompute>(
1554+
PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1555+
reportVectorizationFailure(
1556+
"Cannot determine exact exit count for latch block",
1557+
"Cannot vectorize early exit loop",
1558+
"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1559+
return false;
1560+
}
1561+
1562+
// The vectoriser cannot handle loads that occur after the early exit block.
1563+
assert(LatchBB->getUniquePredecessor() == getSpeculativeEarlyExitingBlock() &&
1564+
"Expected latch predecessor to be the early exiting block");
1565+
1566+
// TODO: Handle loops that may fault.
1567+
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
1568+
reportVectorizationFailure(
1569+
"Loop may fault",
1570+
"Cannot vectorize potentially faulting early exit loop",
1571+
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1572+
return false;
1573+
}
1574+
1575+
LLVM_DEBUG(
1576+
dbgs()
1577+
<< "LV: Found an early exit. Retrying with speculative exit count.\n");
1578+
const SCEV *SpecExitCount = PSE.getSymbolicMaxBackedgeTakenCount();
1579+
assert(!isa<SCEVCouldNotCompute>(SpecExitCount) &&
1580+
"Failed to get symbolic expression for backedge taken count");
1581+
1582+
LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: "
1583+
<< *SpecExitCount << '\n');
1584+
return true;
1585+
}
1586+
14481587
bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
14491588
// Store the result and return it at the end instead of exiting early, in case
14501589
// allowExtraAnalysis is used to report multiple reasons for not vectorizing.
@@ -1505,6 +1644,17 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15051644
return false;
15061645
}
15071646

1647+
HasSpeculativeEarlyExit = false;
1648+
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1649+
if (!isVectorizableEarlyExitLoop()) {
1650+
if (DoExtraAnalysis)
1651+
Result = false;
1652+
else
1653+
return false;
1654+
} else
1655+
HasSpeculativeEarlyExit = true;
1656+
}
1657+
15081658
// Go over each instruction and look at memory deps.
15091659
if (!canVectorizeMemory()) {
15101660
LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
@@ -1514,16 +1664,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15141664
return false;
15151665
}
15161666

1517-
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1518-
reportVectorizationFailure("could not determine number of loop iterations",
1519-
"could not determine number of loop iterations",
1520-
"CantComputeNumberOfIterations", ORE, TheLoop);
1521-
if (DoExtraAnalysis)
1522-
Result = false;
1523-
else
1524-
return false;
1525-
}
1526-
15271667
if (Result) {
15281668
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
15291669
<< (LAI->getRuntimePointerChecking()->Need

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9807,6 +9807,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98079807
return false;
98089808
}
98099809

9810+
if (LVL.hasSpeculativeEarlyExit()) {
9811+
reportVectorizationFailure(
9812+
"Auto-vectorization of early exit loops is not yet supported.",
9813+
"Auto-vectorization of early exit loops is not yet supported.",
9814+
"EarlyExitLoopsUnsupported", ORE, L);
9815+
return false;
9816+
}
9817+
98109818
// Entrance to the VPlan-native vectorization path. Outer loops are processed
98119819
// here. They may require CFG and instruction level transformations before
98129820
// even evaluating whether vectorization is profitable. Since we cannot modify

llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
; }
1313
; }
1414
; File, line, and column should match those specified in the metadata
15-
; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations
15+
; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop
1616
; CHECK: remark: source.cpp:5:9: loop not vectorized
1717

1818
; void test_disabled(int *A, int Length) {
@@ -46,12 +46,12 @@
4646

4747
; YAML: --- !Analysis
4848
; YAML-NEXT: Pass: loop-vectorize
49-
; YAML-NEXT: Name: CantComputeNumberOfIterations
49+
; YAML-NEXT: Name: EarlyExitNotLatchPredecessor
5050
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 5, Column: 9 }
5151
; YAML-NEXT: Function: _Z4testPii
5252
; YAML-NEXT: Args:
5353
; YAML-NEXT: - String: 'loop not vectorized: '
54-
; YAML-NEXT: - String: could not determine number of loop iterations
54+
; YAML-NEXT: - String: Cannot vectorize early exit loop
5555
; YAML-NEXT: ...
5656
; YAML-NEXT: --- !Missed
5757
; YAML-NEXT: Pass: loop-vectorize
@@ -117,12 +117,12 @@
117117
; YAML-NEXT: ...
118118
; YAML-NEXT: --- !Analysis
119119
; YAML-NEXT: Pass: loop-vectorize
120-
; YAML-NEXT: Name: CantComputeNumberOfIterations
120+
; YAML-NEXT: Name: EarlyExitNotLatchPredecessor
121121
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
122122
; YAML-NEXT: Function: test_multiple_failures
123123
; YAML-NEXT: Args:
124124
; YAML-NEXT: - String: 'loop not vectorized: '
125-
; YAML-NEXT: - String: could not determine number of loop iterations
125+
; YAML-NEXT: - String: Cannot vectorize early exit loop
126126
; YAML-NEXT: ...
127127
; YAML: --- !Missed
128128
; YAML-NEXT: Pass: loop-vectorize

llvm/test/Transforms/LoopVectorize/control-flow.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; return 0;
1111
; }
1212

13-
; CHECK: remark: source.cpp:5:9: loop not vectorized: could not determine number of loop iterations
13+
; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
1414
; CHECK: remark: source.cpp:5:9: loop not vectorized
1515

1616
; CHECK: _Z4testPii

llvm/test/Transforms/LoopVectorize/remarks-multi-exit-loops.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; Make sure LV does not crash when generating remarks for loops with non-unique
44
; exit blocks.
55
define i32 @test_non_unique_exit_blocks(ptr nocapture readonly align 4 dereferenceable(1024) %data, i32 %x) {
6-
; CHECK: loop not vectorized: could not determine number of loop iterations
6+
; CHECK: loop not vectorized: Cannot vectorize early exit loop
77
;
88
entry:
99
br label %for.header

0 commit comments

Comments
 (0)