Skip to content

Commit 21c1c41

Browse files
committed
[LoopVectorize] Add support for vectorisation of more early exit loops
This patch follows on from PR llvm#107004 by adding support for vectorisation of a simple class of loops that typically involves searching for something, i.e. for (int i = 0; i < n; i++) { if (p[i] == val) return i; } return n; or for (int i = 0; i < n; i++) { if (p1[i] != p2[i]) return i; } return n; In this initial commit we will only vectorise early exit loops legal if they follow these criteria: 1. There are no stores in the loop. 2. The loop must have only one early uncountable exit like those shown in the above example. 3. The early exit block dominates the latch block. 4. The latch block must have an exact exit count. 6. The loop must not contain reductions or recurrences. 7. We must be able to prove at compile-time that loops will not contain faulting loads. For point 7 once this patch lands I intend to follow up by supporting some limited cases of faulting loops where we can version the loop based on pointer alignment. For example, it turns out in the SPEC2017 benchmark (xalancbmk) there is a std::find loop that we can vectorise provided we add SCEV checks for the initial pointer being aligned to a multiple of the VF. In practice, the pointer is regularly aligned to at least 32/64 bytes and since the VF is a power of 2, any vector loads <= 32/64 bytes in size will always fault on the first lane, following the same behaviour as the scalar loop. Given we already do such speculative versioning for loops with unknown strides, alignment-based versioning doesn't seem to be any worse at least for loops with only one load. This patch makes use of the existing experimental_cttz_elems intrinsic that's required in the vectorised early exit block to determine the first lane that triggered the exit. This intrinsic has generic lowering support so it's guaranteed to work for all targets. Tests have been updated here: Transforms/LoopVectorize/simple_early_exit.ll
1 parent 9e73159 commit 21c1c41

File tree

13 files changed

+2064
-427
lines changed

13 files changed

+2064
-427
lines changed

llvm/include/llvm/Support/GenericLoopInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,10 @@ template <class BlockT, class LoopT> class LoopBase {
294294
/// Otherwise return null.
295295
BlockT *getUniqueExitBlock() const;
296296

297+
/// Return the unique exit block for the latch, or null if there are multiple
298+
/// different exit blocks.
299+
BlockT *getUniqueLatchExitBlock() const;
300+
297301
/// Return true if this loop does not have any exit blocks.
298302
bool hasNoExitBlocks() const;
299303

llvm/include/llvm/Support/GenericLoopInfoImpl.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,16 @@ BlockT *LoopBase<BlockT, LoopT>::getUniqueExitBlock() const {
159159
return getExitBlockHelper(this, true).first;
160160
}
161161

162+
template <class BlockT, class LoopT>
163+
BlockT *LoopBase<BlockT, LoopT>::getUniqueLatchExitBlock() const {
164+
const BlockT *Latch = getLoopLatch();
165+
assert(Latch && "Latch block must exists");
166+
SmallVector<BlockT *, 4> ExitBlocks;
167+
getUniqueExitBlocksHelper(this, ExitBlocks,
168+
[Latch](const BlockT *BB) { return BB == Latch; });
169+
return ExitBlocks.size() == 1 ? ExitBlocks[0] : nullptr;
170+
}
171+
162172
/// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
163173
template <class BlockT, class LoopT>
164174
void LoopBase<BlockT, LoopT>::getExitEdges(

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -377,19 +377,19 @@ class LoopVectorizationLegality {
377377
return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
378378
}
379379

380-
/// Returns true if the loop has a speculative early exit, i.e. an
380+
/// Returns true if the loop has an uncountable early exit, i.e. an
381381
/// uncountable exit that isn't the latch block.
382-
bool hasSpeculativeEarlyExit() const { return HasSpeculativeEarlyExit; }
382+
bool hasUncountableEarlyExit() const { return HasUncountableEarlyExit; }
383383

384-
/// Returns the speculative early exiting block.
385-
BasicBlock *getSpeculativeEarlyExitingBlock() const {
384+
/// Returns the uncountable early exiting block.
385+
BasicBlock *getUncountableEarlyExitingBlock() const {
386386
assert(getUncountableExitingBlocks().size() == 1 &&
387387
"Expected only a single uncountable exiting block");
388388
return getUncountableExitingBlocks()[0];
389389
}
390390

391-
/// Returns the destination of a speculative early exiting block.
392-
BasicBlock *getSpeculativeEarlyExitBlock() const {
391+
/// Returns the destination of an uncountable early exiting block.
392+
BasicBlock *getUncountableEarlyExitBlock() const {
393393
assert(getUncountableExitBlocks().size() == 1 &&
394394
"Expected only a single uncountable exit block");
395395
return getUncountableExitBlocks()[0];
@@ -603,15 +603,17 @@ class LoopVectorizationLegality {
603603
/// the use of those function variants.
604604
bool VecCallVariantsFound = false;
605605

606-
/// Indicates whether this loop has a speculative early exit, i.e. an
606+
/// Indicates whether this loop has an uncountable early exit, i.e. an
607607
/// uncountable exiting block that is not the latch.
608-
bool HasSpeculativeEarlyExit = false;
608+
bool HasUncountableEarlyExit = false;
609609

610-
/// Keep track of all the loop exiting blocks.
610+
/// Keep track of all the countable and uncountable exiting blocks if
611+
/// the exact backedge taken count is not computable.
611612
SmallVector<BasicBlock *, 4> CountableExitingBlocks;
612613
SmallVector<BasicBlock *, 4> UncountableExitingBlocks;
613614

614-
/// Keep track of the destinations of all uncountable exits.
615+
/// Keep track of the destinations of all uncountable exits if the
616+
/// exact backedge taken count is not computable.
615617
SmallVector<BasicBlock *, 4> UncountableExitBlocks;
616618
};
617619

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ static cl::opt<LoopVectorizeHints::ScalableForceKind>
7979
"Scalable vectorization is available and favored when the "
8080
"cost is inconclusive.")));
8181

82+
static cl::opt<bool> AssumeNoMemFault(
83+
"vectorizer-no-mem-fault", cl::init(false), cl::Hidden,
84+
cl::desc("Assume vectorized loops will not have memory faults, which is "
85+
"potentially unsafe but can be useful for testing vectorization "
86+
"of early exit loops."));
87+
8288
/// Maximum vectorization interleave count.
8389
static const unsigned MaxInterleaveFactor = 16;
8490

@@ -1467,13 +1473,13 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
14671473

14681474
// Keep a record of all the exiting blocks.
14691475
SmallVector<const SCEVPredicate *, 4> Predicates;
1470-
for (BasicBlock *BB1 : ExitingBlocks) {
1476+
for (BasicBlock *BB : ExitingBlocks) {
14711477
const SCEV *EC =
1472-
PSE.getSE()->getPredicatedExitCount(TheLoop, BB1, &Predicates);
1478+
PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
14731479
if (isa<SCEVCouldNotCompute>(EC)) {
1474-
UncountableExitingBlocks.push_back(BB1);
1480+
UncountableExitingBlocks.push_back(BB);
14751481

1476-
SmallVector<BasicBlock *, 2> Succs(successors(BB1));
1482+
SmallVector<BasicBlock *, 2> Succs(successors(BB));
14771483
if (Succs.size() != 2) {
14781484
reportVectorizationFailure(
14791485
"Early exiting block does not have exactly two successors",
@@ -1482,17 +1488,21 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
14821488
return false;
14831489
}
14841490

1485-
BasicBlock *BB2;
1491+
BasicBlock *ExitBlock;
14861492
if (!TheLoop->contains(Succs[0]))
1487-
BB2 = Succs[0];
1493+
ExitBlock = Succs[0];
14881494
else {
14891495
assert(!TheLoop->contains(Succs[1]));
1490-
BB2 = Succs[1];
1496+
ExitBlock = Succs[1];
14911497
}
1492-
UncountableExitBlocks.push_back(BB2);
1498+
UncountableExitBlocks.push_back(ExitBlock);
14931499
} else
1494-
CountableExitingBlocks.push_back(BB1);
1500+
CountableExitingBlocks.push_back(BB);
14951501
}
1502+
// We can safely ignore the predicates here because when vectorizing the loop
1503+
// the PredicatatedScalarEvolution class will keep track of all predicates
1504+
// for each exiting block anyway. This happens when calling
1505+
// PSE.getSymbolicMaxBackedgeTakenCount() below.
14961506
Predicates.clear();
14971507

14981508
// We only support one uncountable early exit.
@@ -1507,13 +1517,25 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
15071517
// The only supported early exit loops so far are ones where the early
15081518
// exiting block is a unique predecessor of the latch block.
15091519
BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor();
1510-
if (LatchPredBB != getSpeculativeEarlyExitingBlock()) {
1520+
if (LatchPredBB != getUncountableEarlyExitingBlock()) {
15111521
reportVectorizationFailure("Early exit is not the latch predecessor",
15121522
"Cannot vectorize early exit loop",
15131523
"EarlyExitNotLatchPredecessor", ORE, TheLoop);
15141524
return false;
15151525
}
15161526

1527+
// The latch block must have a countable exit.
1528+
if (isa<SCEVCouldNotCompute>(
1529+
PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1530+
reportVectorizationFailure(
1531+
"Cannot determine exact exit count for latch block",
1532+
"Cannot vectorize early exit loop",
1533+
"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1534+
return false;
1535+
}
1536+
assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
1537+
"Latch block not found in list of countable exits!");
1538+
15171539
// Check to see if there are instructions that could potentially generate
15181540
// exceptions or have side-effects.
15191541
auto IsSafeOperation = [](Instruction *I) -> bool {
@@ -1549,39 +1571,32 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
15491571
}
15501572
}
15511573

1552-
// The latch block must have a countable exit.
1553-
if (isa<SCEVCouldNotCompute>(
1554-
PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
1555-
reportVectorizationFailure(
1556-
"Cannot determine exact exit count for latch block",
1557-
"Cannot vectorize early exit loop",
1558-
"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
1559-
return false;
1560-
}
1561-
15621574
// The vectoriser cannot handle loads that occur after the early exit block.
1563-
assert(LatchBB->getUniquePredecessor() == getSpeculativeEarlyExitingBlock() &&
1575+
assert(LatchBB->getUniquePredecessor() == getUncountableEarlyExitingBlock() &&
15641576
"Expected latch predecessor to be the early exiting block");
15651577

15661578
// TODO: Handle loops that may fault.
15671579
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC)) {
1568-
reportVectorizationFailure(
1569-
"Loop may fault",
1570-
"Cannot vectorize potentially faulting early exit loop",
1571-
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1572-
return false;
1580+
if (!AssumeNoMemFault) {
1581+
reportVectorizationFailure(
1582+
"Loop may fault",
1583+
"Cannot vectorize potentially faulting early exit loop",
1584+
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
1585+
return false;
1586+
} else
1587+
LLVM_DEBUG(dbgs() << "LV: Assuming early exit vector loop will not "
1588+
<< "fault\n");
15731589
}
15741590

1575-
LLVM_DEBUG(
1576-
dbgs()
1577-
<< "LV: Found an early exit. Retrying with speculative exit count.\n");
1578-
[[maybe_unused]] const SCEV *SpecExitCount =
1591+
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
15791592
PSE.getSymbolicMaxBackedgeTakenCount();
1580-
assert(!isa<SCEVCouldNotCompute>(SpecExitCount) &&
1593+
// Since we have an exact exit count for the latch and the early exit
1594+
// dominates the latch, then this should guarantee a computed SCEV value.
1595+
assert(!isa<SCEVCouldNotCompute>(SymbolicMaxBTC) &&
15811596
"Failed to get symbolic expression for backedge taken count");
1582-
1583-
LLVM_DEBUG(dbgs() << "LV: Found speculative backedge taken count: "
1584-
<< *SpecExitCount << '\n');
1597+
LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max "
1598+
"backedge taken count: "
1599+
<< *SymbolicMaxBTC << '\n');
15851600
return true;
15861601
}
15871602

@@ -1645,15 +1660,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
16451660
return false;
16461661
}
16471662

1648-
HasSpeculativeEarlyExit = false;
1663+
HasUncountableEarlyExit = false;
16491664
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
16501665
if (!isVectorizableEarlyExitLoop()) {
16511666
if (DoExtraAnalysis)
16521667
Result = false;
16531668
else
16541669
return false;
16551670
} else
1656-
HasSpeculativeEarlyExit = true;
1671+
HasUncountableEarlyExit = true;
16571672
}
16581673

16591674
// Go over each instruction and look at memory deps.

0 commit comments

Comments
 (0)