Skip to content

Commit 7eb410e

Browse files
committed
Address review comments + rebase issues
* Reverted some unncessary test changes. * Added some CHECK-NOT lines back in for the legality tests. * Amended single_early_exit_with_outer_loop.ll test to include new vector.early.exit block. * Fixed a bug in VPBasicBlock::execute that incorrectly assumed all VPBasicBlocks should be added to the same parent loop. This may not be true when the VPBasicBlock has a single successor that happens to be the exit block. In this case it seems like ScalarEvolution and other analysis passes expect the block to have the same parent loop as the exit block.
1 parent 62e1786 commit 7eb410e

File tree

8 files changed

+39
-24
lines changed

8 files changed

+39
-24
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,11 @@ class LoopVectorizationLegality {
407407

408408
/// Returns the destination of an uncountable early exiting block.
409409
BasicBlock *getUncountableEarlyExitBlock() const {
410+
if (!HasUncountableEarlyExit) {
411+
assert(getUncountableExitBlocks().empty() &&
412+
"Expected no uncountable exiting blocks");
413+
return nullptr;
414+
}
410415
assert(getUncountableExitBlocks().size() == 1 &&
411416
"Expected only a single uncountable exit block");
412417
return getUncountableExitBlocks()[0];

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3094,18 +3094,21 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
30943094
PSE.getSE()->forgetLoop(OrigLoop);
30953095
PSE.getSE()->forgetBlockAndLoopDispositions();
30963096

3097-
// When dealing with uncountable early exits we create middle.split blocks
3098-
// between the vector loop region and the exit block. These blocks need
3099-
// adding to any outer loop.
3097+
// When dealing with uncountable early exits we create a vector.early.exit
3098+
// block predecessor of the exit block. This block needs to have the same
3099+
// parent loop as the exit block itself.
31003100
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
3101-
Loop *OuterLoop = OrigLoop->getParentLoop();
3102-
if (Legal->hasUncountableEarlyExit() && OuterLoop) {
3103-
BasicBlock *OrigEarlyExitBB = Legal->getUncountableEarlyExitBlock();
3104-
if (Loop *EEL = LI->getLoopFor(OrigEarlyExitBB)) {
3105-
BasicBlock *VectorEarlyExitBB =
3106-
State.CFG.VPBB2IRBB[VectorRegion->getEarlyExit()];
3101+
if (BasicBlock *OrigEarlyExitBB = Legal->getUncountableEarlyExitBlock()) {
3102+
// This has possibly been added to the wrong loop - we need to add this
3103+
// to the same loop as the original exit block.
3104+
BasicBlock *VectorEarlyExitBB =
3105+
State.CFG.VPBB2IRBB[VectorRegion->getEarlyExit()];
3106+
if (LI->getLoopFor(VectorEarlyExitBB))
3107+
LI->removeBlock(VectorEarlyExitBB);
3108+
3109+
Loop *EEL = LI->getLoopFor(OrigEarlyExitBB);
3110+
if (EEL)
31073111
EEL->addBasicBlockToLoop(VectorEarlyExitBB, *LI);
3108-
}
31093112
}
31103113

31113114
// After vectorization, the exit blocks of the original loop will have

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ void VPBasicBlock::execute(VPTransformState *State) {
504504
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
505505
// Register NewBB in its loop. In innermost loops its the same for all
506506
// BB's.
507+
// TODO: This is not always correct, for example if this block has a single
508+
// successor that is an exit block in the original loop and the exit block
509+
// is in a different loop to CurrentParentLoop. Or the exit block may not
510+
// be in a loop at all! See fixVectorizedLoop where we have to patch up
511+
// cases like this for loops with uncountable early exits.
507512
if (State->CurrentParentLoop)
508513
State->CurrentParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
509514
State->Builder.SetInsertPoint(Terminator);

llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
2222
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16
2323
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
2424
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
25-
; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add i64 3, [[N_VEC]]
2625
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
2726
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
27+
; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add i64 3, [[N_VEC]]
2828
; CHECK-NEXT: br label [[LOOP:%.*]]
2929
; CHECK: vector.body:
3030
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
@@ -117,9 +117,9 @@ define i64 @same_exit_block_pre_inc_use4() #1 {
117117
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP14]], 2
118118
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]]
119119
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
120-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]]
121120
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
122121
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP4]], 2
122+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]]
123123
; CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
124124
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP16]], splat (i64 1)
125125
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> splat (i64 3), [[TMP17]]
@@ -473,9 +473,9 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) #1 {
473473
; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 4
474474
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], [[TMP24]]
475475
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
476-
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
477476
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
478477
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP13]], 4
478+
; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i8
479479
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
480480
; CHECK: vector.body:
481481
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/early_exit_legality.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ define i64 @same_exit_block_pre_inc_use1() {
4949
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1'
5050
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
5151
; CHECK-NEXT: LV: We can vectorize this loop!
52+
; CHECK-NOT: LV: Not vectorizing
5253
entry:
5354
%p1 = alloca [1024 x i8]
5455
%p2 = alloca [1024 x i8]
@@ -140,6 +141,7 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
140141
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit'
141142
; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63
142143
; CHECK-NEXT: LV: We can vectorize this loop!
144+
; CHECK-NOT: LV: Not vectorizing
143145
entry:
144146
%p1 = alloca [1024 x i8]
145147
call void @init_mem(ptr %p1, i64 1024)

llvm/test/Transforms/LoopVectorize/multi_early_exit.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,18 @@ define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() {
1414
; CHECK: loop:
1515
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
1616
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
17-
; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END1:%.*]]
17+
; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]]
1818
; CHECK: search:
1919
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
2020
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
2121
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
2222
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
2323
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
24-
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END1]], label [[LOOP_INC]]
24+
; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]]
2525
; CHECK: loop.inc:
2626
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
2727
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
28-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END1]]
28+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
2929
; CHECK: loop.end:
3030
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ]
3131
; CHECK-NEXT: ret i64 [[RETVAL]]

llvm/test/Transforms/LoopVectorize/multi_early_exit_live_outs.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define i64 @one_uncountable_two_countable_same_exit() {
1212
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
1313
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
1414
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
15-
; CHECK-NEXT: br label [[LOOP1:%.*]]
15+
; CHECK-NEXT: br label [[LOOP:%.*]]
1616
; CHECK: loop:
1717
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
1818
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
@@ -27,9 +27,9 @@ define i64 @one_uncountable_two_countable_same_exit() {
2727
; CHECK: loop.inc:
2828
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
2929
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
30-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
30+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
3131
; CHECK: loop.end:
32-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP1]] ], [ [[INDEX]], [[SEARCH]] ], [ 128, [[LOOP_INC]] ]
32+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ [[INDEX]], [[SEARCH]] ], [ 128, [[LOOP_INC]] ]
3333
; CHECK-NEXT: ret i64 [[RETVAL]]
3434
;
3535
entry:
@@ -70,7 +70,7 @@ define i64 @one_uncountable_two_countable_diff_exit() {
7070
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
7171
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
7272
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
73-
; CHECK-NEXT: br label [[LOOP1:%.*]]
73+
; CHECK-NEXT: br label [[LOOP:%.*]]
7474
; CHECK: loop:
7575
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
7676
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64
@@ -85,12 +85,12 @@ define i64 @one_uncountable_two_countable_diff_exit() {
8585
; CHECK: loop.inc:
8686
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
8787
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128
88-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
88+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
8989
; CHECK: loop.end.early:
9090
; CHECK-NEXT: [[RET_EARLY:%.*]] = phi i64 [ [[INDEX]], [[SEARCH]] ]
9191
; CHECK-NEXT: ret i64 [[RET_EARLY]]
9292
; CHECK: loop.end:
93-
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP1]] ], [ 128, [[LOOP_INC]] ]
93+
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ 128, [[LOOP_INC]] ]
9494
; CHECK-NEXT: ret i64 [[RETVAL]]
9595
;
9696
entry:

llvm/test/Transforms/LoopVectorize/single_early_exit_with_outer_loop.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ declare void @init_mem(ptr, i64);
66
; uncountable early exits is correctly adding to the outer loop at depth 1.
77
define void @early_exit_in_outer_loop1() {
88
; CHECK-LABEL: Loop info for function 'early_exit_in_outer_loop1':
9-
; CHECK: Loop at depth 1 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split
9+
; CHECK: Loop at depth 1 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split,%vector.early.exit
1010
entry:
1111
%p1 = alloca [1024 x i8]
1212
%p2 = alloca [1024 x i8]
@@ -45,7 +45,7 @@ loop.inner.end:
4545
; loops at depths 1 and 2, respectively.
4646
define void @early_exit_in_outer_loop2() {
4747
; CHECK-LABEL: Loop info for function 'early_exit_in_outer_loop2':
48-
; CHECK: Loop at depth 1 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split
48+
; CHECK: Loop at depth 1 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split,%vector.early.exit
4949
; CHECK: Loop at depth 2 containing: {{.*}}%middle.block,%scalar.ph,%vector.ph,%vector.body,%middle.split<exiting>
5050
entry:
5151
%p1 = alloca [1024 x i8]

0 commit comments

Comments
 (0)