Skip to content

Commit 8cbcd2f

Browse files
committed
[IndVars] Eliminate loop exits with equivalent exit counts
We can end up with two loop exits whose exit counts are equivalent, but whose textual representation is different and non-obvious. For the sub-case where we have a series of exits which dominate one another (common), eliminate any exits which would iterate *after* a previous exit on the exiting iteration. As noted in the TODO being removed, I'd always thought this was a good idea, but I've now seen this in a real workload as well. Interestingly, in review, Nikita pointed out there's let another oppurtunity to leverage SCEV's reasoning. If we kept track of the min of dominanting exits so far, we could discharge exits with EC >= MDE. This is less powerful than the existing transform (since later exits aren't considered), but potentially more powerful for any case where SCEV can prove a >= b, but neither a == b or a > b. I don't have an example to illustrate that oppurtunity, but won't be suprised if we find one and return to handle that case as well. Differential Revision: https://reviews.llvm.org/D69009 llvm-svn: 375379
1 parent 7015a5c commit 8cbcd2f

File tree

4 files changed

+68
-12
lines changed

4 files changed

+68
-12
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2717,6 +2717,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
27172717
if (isa<SCEVCouldNotCompute>(MaxExitCount))
27182718
return false;
27192719

2720+
// Visit our exit blocks in order of dominance. We know from the fact that
2721+
// all exits (left) are analyzeable that the must be a total dominance order
2722+
// between them as each must dominate the latch. The visit order only
2723+
// matters for the provably equal case.
2724+
llvm::sort(ExitingBlocks,
2725+
[&](BasicBlock *A, BasicBlock *B) {
2726+
// std::sort sorts in ascending order, so we want the inverse of
2727+
// the normal dominance relation.
2728+
if (DT->properlyDominates(A, B)) return true;
2729+
if (DT->properlyDominates(B, A)) return false;
2730+
llvm_unreachable("expected total dominance order!");
2731+
});
2732+
#ifdef ASSERT
2733+
for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2734+
assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2735+
}
2736+
#endif
2737+
27202738
auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
27212739
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
27222740
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
@@ -2729,6 +2747,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
27292747
};
27302748

27312749
bool Changed = false;
2750+
SmallSet<const SCEV*, 8> DominatingExitCounts;
27322751
for (BasicBlock *ExitingBB : ExitingBlocks) {
27332752
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
27342753
assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
@@ -2766,10 +2785,15 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
27662785
continue;
27672786
}
27682787

2769-
// TODO: If we can prove that the exiting iteration is equal to the exit
2770-
// count for this exit and that no previous exit oppurtunities exist within
2771-
// the loop, then we can discharge all other exits. (May fall out of
2772-
// previous TODO.)
2788+
// As we run, keep track of which exit counts we've encountered. If we
2789+
// find a duplicate, we've found an exit which would have exited on the
2790+
// exiting iteration, but (from the visit order) strictly follows another
2791+
// which does the same and is thus dead.
2792+
if (!DominatingExitCounts.insert(ExitCount).second) {
2793+
FoldExit(ExitingBB, false);
2794+
Changed = true;
2795+
continue;
2796+
}
27732797
}
27742798
return Changed;
27752799
}

llvm/test/Transforms/IndVarSimplify/eliminate-exit.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,5 +185,39 @@ exit:
185185
ret void
186186
}
187187

188+
define void @mixed_width(i32 %len) {
189+
; CHECK-LABEL: @mixed_width(
190+
; CHECK-NEXT: entry:
191+
; CHECK-NEXT: [[LEN_ZEXT:%.*]] = zext i32 [[LEN:%.*]] to i64
192+
; CHECK-NEXT: br label [[LOOP:%.*]]
193+
; CHECK: loop:
194+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
195+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
196+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IV]], [[LEN_ZEXT]]
197+
; CHECK-NEXT: br i1 [[CMP1]], label [[BACKEDGE]], label [[EXIT:%.*]]
198+
; CHECK: backedge:
199+
; CHECK-NEXT: call void @side_effect()
200+
; CHECK-NEXT: br i1 true, label [[LOOP]], label [[EXIT]]
201+
; CHECK: exit:
202+
; CHECK-NEXT: ret void
203+
;
204+
entry:
205+
%len.zext = zext i32 %len to i64
206+
br label %loop
207+
loop:
208+
%iv = phi i64 [0, %entry], [%iv.next, %backedge]
209+
%iv2 = phi i32 [0, %entry], [%iv2.next, %backedge]
210+
%iv.next = add i64 %iv, 1
211+
%iv2.next = add i32 %iv2, 1
212+
%cmp1 = icmp ult i64 %iv, %len.zext
213+
br i1 %cmp1, label %backedge, label %exit
214+
215+
backedge:
216+
call void @side_effect()
217+
%cmp2 = icmp ult i32 %iv2, %len
218+
br i1 %cmp2, label %loop, label %exit
219+
exit:
220+
ret void
221+
}
188222

189223
declare void @side_effect()

llvm/test/Transforms/IndVarSimplify/loop-predication.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,6 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l
464464
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]]
465465
; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]]
466466
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
467-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
468467
; CHECK-NEXT: br label [[LOOP:%.*]]
469468
; CHECK: loop:
470469
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
@@ -478,7 +477,7 @@ define i32 @duplicate_checks(i32* %array.1, i32* %array.2, i32* %array.3, i32 %l
478477
; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
479478
; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4
480479
; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
481-
; CHECK-NEXT: br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0
480+
; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0
482481
; CHECK: deopt2:
483482
; CHECK-NEXT: call void @prevent_merging()
484483
; CHECK-NEXT: ret i32 -1
@@ -784,7 +783,7 @@ exit:
784783
; If we have a dominating exit (exit1) which can't be itself rewritten, we
785784
; can't rewrite a later exit (exit2). Doing so would cause the loop to exit
786785
; from the exit2 when it should have exited from exit1.
787-
define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) {
786+
define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %length2, i32 %n) {
788787
; CHECK-LABEL: @neg_dominating_exit(
789788
; CHECK-NEXT: loop.preheader:
790789
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -798,7 +797,7 @@ define i32 @neg_dominating_exit(i32* %array, i32 %length, i32 %n) {
798797
; CHECK-NEXT: call void @prevent_merging()
799798
; CHECK-NEXT: ret i32 [[RESULT]]
800799
; CHECK: guarded:
801-
; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]]
800+
; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH2:%.*]]
802801
; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0
803802
; CHECK: deopt2:
804803
; CHECK-NEXT: call void @prevent_merging()
@@ -830,7 +829,7 @@ deopt: ; preds = %loop
830829
ret i32 %result
831830

832831
guarded: ; preds = %loop
833-
%within.bounds2 = icmp ult i32 %i, %length
832+
%within.bounds2 = icmp ult i32 %i, %length2
834833
br i1 %within.bounds2, label %guarded2, label %deopt2, !prof !0
835834

836835
deopt2: ; preds = %loop

llvm/test/Transforms/IndVarSimplify/pr38674.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@ define i32 @test_01() {
1414
; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32
1515
; CHECK-NEXT: br label [[FOR_BODY6:%.*]]
1616
; CHECK: for.cond4:
17-
; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2
18-
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]]
17+
; CHECK-NEXT: br i1 true, label [[FOR_BODY6]], label [[FOR_END:%.*]]
1918
; CHECK: for.body6:
20-
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ]
19+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC:%.*]], [[FOR_COND4:%.*]] ]
2120
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]]
2221
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
2322
; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]]

0 commit comments

Comments
 (0)