Skip to content

Commit 92bfbbc

Browse files
authored
[VPlan] Invert condition if needed when creating inner regions. (#132292)
As pointed out by @iamlouk in #129402, the current code doesn't handle latches with different successor orders correctly. Introduce a `NOT`, if needed. Depends on #129402 PR: #132292
1 parent ec6b619 commit 92bfbbc

File tree

2 files changed

+39
-18
lines changed

2 files changed

+39
-18
lines changed

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -386,33 +386,54 @@ std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
386386
/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
387387
/// has exactly 2 predecessors (preheader and latch), where the block
388388
/// dominates the latch and the preheader dominates the block. If it is a
389-
/// header block return true, making sure the preheader appears first and
390-
/// the latch second. Otherwise return false.
391-
static bool canonicalHeader(VPBlockBase *HeaderVPB,
392-
const VPDominatorTree &VPDT) {
389+
/// header block return true and canonicalize the predecessors of the header
390+
/// (making sure the preheader appears first and the latch second) and the
391+
/// successors of the latch (making sure the loop exit comes first). Otherwise
392+
/// return false.
393+
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB,
394+
const VPDominatorTree &VPDT) {
393395
ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
394396
if (Preds.size() != 2)
395397
return false;
396398

397399
auto *PreheaderVPBB = Preds[0];
398400
auto *LatchVPBB = Preds[1];
399-
if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
400-
VPDT.dominates(HeaderVPB, LatchVPBB))
401-
return true;
401+
if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
402+
!VPDT.dominates(HeaderVPB, LatchVPBB)) {
403+
std::swap(PreheaderVPBB, LatchVPBB);
402404

403-
std::swap(PreheaderVPBB, LatchVPBB);
405+
if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
406+
!VPDT.dominates(HeaderVPB, LatchVPBB))
407+
return false;
404408

405-
if (VPDT.dominates(PreheaderVPBB, HeaderVPB) &&
406-
VPDT.dominates(HeaderVPB, LatchVPBB)) {
407-
// Canonicalize predecessors of header so that preheader is first and latch
408-
// second.
409+
// Canonicalize predecessors of header so that preheader is first and
410+
// latch second.
409411
HeaderVPB->swapPredecessors();
410412
for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
411413
R.swapOperands();
412-
return true;
413414
}
414415

415-
return false;
416+
// The two successors of conditional branch match the condition, with the
417+
// first successor corresponding to true and the second to false. We
418+
// canonicalize the successors of the latch when introducing the region, such
419+
// that the latch exits the region when its condition is true; invert the
420+
// original condition if the original CFG branches to the header on true.
421+
// Note that the exit edge is not yet connected for top-level loops.
422+
if (LatchVPBB->getSingleSuccessor() ||
423+
LatchVPBB->getSuccessors()[0] != HeaderVPB)
424+
return true;
425+
426+
assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
427+
auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
428+
assert(cast<VPInstruction>(Term)->getOpcode() ==
429+
VPInstruction::BranchOnCond &&
430+
"terminator must be a BranchOnCond");
431+
auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
432+
Not->insertBefore(Term);
433+
Term->setOperand(0, Not);
434+
LatchVPBB->swapSuccessors();
435+
436+
return true;
416437
}
417438

418439
/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
@@ -447,7 +468,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
447468
VPDominatorTree VPDT;
448469
VPDT.recalculate(Plan);
449470
for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry()))
450-
if (canonicalHeader(HeaderVPB, VPDT))
471+
if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
451472
createLoopRegion(Plan, HeaderVPB);
452473

453474
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();

llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
@A = common global [1024 x i64] zeroinitializer, align 16
55
@B = common global [1024 x i64] zeroinitializer, align 16
66

7-
; FIXME: The exit condition of the inner loop is incorrect when vectorizing.
87
define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
98
; CHECK-LABEL: define void @inner_latch_header_first_successor(
109
; CHECK-SAME: i64 [[N:%.*]], i32 [[C:%.*]], i64 [[M:%.*]]) {
@@ -35,8 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) {
3534
; CHECK-NEXT: [[TMP3]] = add nsw <4 x i64> [[TMP2]], [[VEC_PHI4]]
3635
; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
3736
; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]]
38-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
39-
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]]
37+
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
38+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
39+
; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]]
4040
; CHECK: [[VECTOR_LATCH]]:
4141
; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ]
4242
; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[VEC_PHI6]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true))

0 commit comments

Comments
 (0)