[LoopVectorize] Use new getUniqueLatchExitBlock routine

david-arm · david-arm · commit 36c23112b9c0 · 2024-09-11T14:04:18.000Z
With PR #88385 I am introducing support for vectorising more loops with early exits that don't require a scalar epilogue. As such, if a loop doesn't have a unique exit block it will not automatically imply we require a scalar epilogue. Also, in all places in the code today where we use the variable LoopExitBlock we actually mean the exit block from the latch. Therefore, it seemed reasonable to add a new getUniqueLatchExitBlock that allows the caller to determine the exit block taken from the latch and use this instead of getUniqueExitBlock. I also renamed LoopExitBlock to be LatchExitBlock. I feel this not only better reflects how the variable is used today, but also prepares the code for PR #88385. While doing this I also noticed that one of the comments in requiresScalarEpilogue is wrong when we require a scalar epilogue, i.e. when we're not exiting from the latch block. This doesn't always imply we have multiple exits, e.g. see the test in Transforms/LoopVectorize/unroll_nonlatch.ll where the latch unconditionally branches back to the only exiting block.
diff --git a/llvm/include/llvm/Support/GenericLoopInfo.h b/llvm/include/llvm/Support/GenericLoopInfo.h
@@ -294,6 +294,10 @@ template <class BlockT, class LoopT> class LoopBase {
   /// Otherwise return null.
   BlockT *getUniqueExitBlock() const;
 
+  /// Return the unique exit block for the latch, or null if there are multiple
+  /// different exit blocks.
+  BlockT *getUniqueLatchExitBlock() const;
+
   /// Return true if this loop does not have any exit blocks.
   bool hasNoExitBlocks() const;
 
diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
@@ -159,6 +159,16 @@ BlockT *LoopBase<BlockT, LoopT>::getUniqueExitBlock() const {
   return getExitBlockHelper(this, true).first;
 }
 
+template <class BlockT, class LoopT>
+BlockT *LoopBase<BlockT, LoopT>::getUniqueLatchExitBlock() const {
+  const BlockT *Latch = getLoopLatch();
+  assert(Latch && "Latch block must exists");
+  SmallVector<BlockT *, 4> ExitBlocks;
+  getUniqueExitBlocksHelper(this, ExitBlocks,
+                            [Latch](const BlockT *BB) { return BB == Latch; });
+  return ExitBlocks.size() == 1 ? ExitBlocks[0] : nullptr;
+}
+
 /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
 template <class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::getExitEdges(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -652,9 +652,8 @@ class InnerLoopVectorizer {
   /// Middle Block between the vector and the scalar.
   BasicBlock *LoopMiddleBlock;
 
-  /// The unique ExitBlock of the scalar loop if one exists.  Note that
-  /// there can be multiple exiting edges reaching this block.
-  BasicBlock *LoopExitBlock;
+  /// The exit block from the loop latch, if one exists.
+  BasicBlock *LatchExitBlock;
 
   /// The scalar loop body.
   BasicBlock *LoopScalarBody;
@@ -1390,6 +1389,8 @@ class LoopVectorizationCostModel {
 
   /// Returns true if we're required to use a scalar epilogue for at least
   /// the final iteration of the original loop.
+  /// TODO: It would be good to cache the result and avoid recalculating,
+  /// which will also avoid reprinting the same debug message many times.
   bool requiresScalarEpilogue(bool IsVectorizing) const {
     if (!isScalarEpilogueAllowed()) {
       LLVM_DEBUG(dbgs() << "LV: Loop does not require scalar epilogue\n");
@@ -1398,8 +1399,8 @@ class LoopVectorizationCostModel {
     // If we might exit from anywhere but the latch, must run the exiting
     // iteration in scalar form.
     if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
-      LLVM_DEBUG(
-          dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
+      LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: not exiting "
+                           "from latch block\n");
       return true;
     }
     if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
@@ -2043,8 +2044,7 @@ class GeneratedRTChecks {
   /// adjusts the branches to branch to the vector preheader or \p Bypass,
   /// depending on the generated condition.
   BasicBlock *emitSCEVChecks(BasicBlock *Bypass,
-                             BasicBlock *LoopVectorPreHeader,
-                             BasicBlock *LoopExitBlock) {
+                             BasicBlock *LoopVectorPreHeader) {
     if (!SCEVCheckCond)
       return nullptr;
 
@@ -2516,7 +2516,7 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
 
 BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
   BasicBlock *const SCEVCheckBlock =
-      RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock);
+      RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader);
   if (!SCEVCheckBlock)
     return nullptr;
 
@@ -2533,7 +2533,7 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
       // If there is an epilogue which must run, there's no edge from the
       // middle block to exit blocks  and thus no need to update the immediate
       // dominator of the exit blocks.
-      DT->changeImmediateDominator(LoopExitBlock, SCEVCheckBlock);
+      DT->changeImmediateDominator(LatchExitBlock, SCEVCheckBlock);
   }
 
   LoopBypassBlocks.push_back(SCEVCheckBlock);
@@ -2581,9 +2581,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
   LoopScalarBody = OrigLoop->getHeader();
   LoopVectorPreHeader = OrigLoop->getLoopPreheader();
   assert(LoopVectorPreHeader && "Invalid loop structure");
-  LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
-  assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
-         "multiple exit loop without required epilogue?");
+  LatchExitBlock = OrigLoop->getUniqueLatchExitBlock();
+  assert((LatchExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
+         "Expected loop without exit from latch to require an epilogue");
 
   LoopMiddleBlock =
       SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
@@ -7788,7 +7788,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
     // If there is an epilogue which must run, there's no edge from the
     // middle block to exit blocks  and thus no need to update the immediate
     // dominator of the exit blocks.
-    DT->changeImmediateDominator(LoopExitBlock,
+    DT->changeImmediateDominator(LatchExitBlock,
                                  EPI.EpilogueIterationCountCheck);
 
   // Keep track of bypass blocks, as they feed start values to the induction and