Skip to content

Commit 1ce660b

Browse files
committed
[LAA] Use getBackedgeTakenCountForCountableExits.
Update LAA to use getBackedgeTakenCountForCountableExits which returns the minimum of the countable exits When analyzing dependences and computing runtime checks, we need the smallest upper bound on the number of iterations. In terms of memory safety, it shouldn't matter if any uncomputable exits leave the loop, as long as we prove that there are no dependences given the minimum of the countable exits. The same should apply also for generating runtime checks. Note that this shifts the responsiblity of checking whether all exit counts are computable or handling early-exits to the users of LAA.
1 parent afe475d commit 1ce660b

File tree

6 files changed

+216
-16
lines changed

6 files changed

+216
-16
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
214214
if (SE->isLoopInvariant(PtrExpr, Lp)) {
215215
ScStart = ScEnd = PtrExpr;
216216
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
217-
const SCEV *Ex = PSE.getBackedgeTakenCount();
217+
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();
218218

219219
ScStart = AR->getStart();
220220
ScEnd = AR->evaluateAtIteration(Ex, *SE);
@@ -2055,9 +2055,9 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20552055
// stride multiplied by the backedge taken count, the accesses are independet,
20562056
// i.e. they are far enough appart that accesses won't access the same
20572057
// location across all loop ierations.
2058-
if (HasSameSize &&
2059-
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
2060-
MaxStride, TypeByteSize))
2058+
if (HasSameSize && isSafeDependenceDistance(
2059+
DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()),
2060+
*Dist, MaxStride, TypeByteSize))
20612061
return Dependence::NoDep;
20622062

20632063
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
@@ -2395,7 +2395,7 @@ bool LoopAccessInfo::canAnalyzeLoop() {
23952395
}
23962396

23972397
// ScalarEvolution needs to be able to find the exit count.
2398-
const SCEV *ExitCount = PSE->getBackedgeTakenCount();
2398+
const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
23992399
if (isa<SCEVCouldNotCompute>(ExitCount)) {
24002400
recordAnalysis("CantComputeNumberOfIterations")
24012401
<< "could not determine number of loop iterations";
@@ -3004,7 +3004,7 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
30043004
// of various possible stride specializations, considering the alternatives
30053005
// of using gather/scatters (if available).
30063006

3007-
const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
3007+
const SCEV *BETakenCount = PSE->getSymbolicMaxBackedgeTakenCount();
30083008

30093009
// Match the types so we can compare the stride and the BETakenCount.
30103010
// The Stride can be positive/negative, so we sign extend Stride;

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,16 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15061506
return false;
15071507
}
15081508

1509+
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1510+
reportVectorizationFailure("could not determine number of loop iterations",
1511+
"could not determine number of loop iterations",
1512+
"CantComputeNumberOfIterations", ORE, TheLoop);
1513+
if (DoExtraAnalysis)
1514+
Result = false;
1515+
else
1516+
return false;
1517+
}
1518+
15091519
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
15101520
<< (LAI->getRuntimePointerChecking()->Need
15111521
? " (with a runtime bound check)"

llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,21 @@
44
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations(ptr %A, ptr %B) {
55
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations'
66
; CHECK-NEXT: loop.header:
7-
; CHECK-NEXT: Report: could not determine number of loop iterations
7+
; CHECK-NEXT: Memory dependences are safe with run-time checks
88
; CHECK-NEXT: Dependences:
99
; CHECK-NEXT: Run-time memory checks:
10+
; CHECK-NEXT: Check 0:
11+
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
12+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
13+
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
14+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
1015
; CHECK-NEXT: Grouped accesses:
16+
; CHECK-NEXT: Group [[GRP1]]:
17+
; CHECK-NEXT: (Low: %B High: (2000 + %B))
18+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
19+
; CHECK-NEXT: Group [[GRP2]]:
20+
; CHECK-NEXT: (Low: %A High: (2000 + %A))
21+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
1122
; CHECK-EMPTY:
1223
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
1324
; CHECK-NEXT: SCEV assumptions:
@@ -53,10 +64,21 @@ e.2:
5364
define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations(ptr %A, ptr %B) {
5465
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_1000_iterations'
5566
; CHECK-NEXT: loop.header:
56-
; CHECK-NEXT: Report: could not determine number of loop iterations
67+
; CHECK-NEXT: Memory dependences are safe with run-time checks
5768
; CHECK-NEXT: Dependences:
5869
; CHECK-NEXT: Run-time memory checks:
70+
; CHECK-NEXT: Check 0:
71+
; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
72+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
73+
; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
74+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
5975
; CHECK-NEXT: Grouped accesses:
76+
; CHECK-NEXT: Group [[GRP3]]:
77+
; CHECK-NEXT: (Low: %B High: (4004 + %B))
78+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
79+
; CHECK-NEXT: Group [[GRP4]]:
80+
; CHECK-NEXT: (Low: %A High: (4004 + %A))
81+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
6082
; CHECK-EMPTY:
6183
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
6284
; CHECK-NEXT: SCEV assumptions:
@@ -145,10 +167,21 @@ e.2:
145167
define i32 @b3_does_not_dominate_latch(ptr %A, ptr %B) {
146168
; CHECK-LABEL: 'b3_does_not_dominate_latch'
147169
; CHECK-NEXT: loop.header:
148-
; CHECK-NEXT: Report: could not determine number of loop iterations
170+
; CHECK-NEXT: Memory dependences are safe with run-time checks
149171
; CHECK-NEXT: Dependences:
150172
; CHECK-NEXT: Run-time memory checks:
173+
; CHECK-NEXT: Check 0:
174+
; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]):
175+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
176+
; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
177+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
151178
; CHECK-NEXT: Grouped accesses:
179+
; CHECK-NEXT: Group [[GRP5]]:
180+
; CHECK-NEXT: (Low: %B High: (4004 + %B))
181+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
182+
; CHECK-NEXT: Group [[GRP6]]:
183+
; CHECK-NEXT: (Low: %A High: (4004 + %A))
184+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
152185
; CHECK-EMPTY:
153186
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
154187
; CHECK-NEXT: SCEV assumptions:

llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,20 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
2929
; CHECK-NEXT: Run-time memory checks:
3030
; CHECK-NEXT: Check 0:
3131
; CHECK-NEXT: Comparing group
32-
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
33-
; CHECK-NEXT: Against group
3432
; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, ptr %b, i64 %conv11
33+
; CHECK-NEXT: Against group
34+
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
3535
; CHECK-NEXT: Grouped accesses:
3636
; CHECK-NEXT: Group
37-
; CHECK-NEXT: (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a))
38-
; CHECK-NEXT: Member: {(4 + %a),+,4}<%for.body>
39-
; CHECK-NEXT: Group
4037
; CHECK-NEXT: (Low: %b High: ((4 * (1 umax %x)) + %b))
4138
; CHECK-NEXT: Member: {%b,+,4}<%for.body>
39+
; CHECK-NEXT: Group
40+
; CHECK-NEXT: (Low: (4 + %a) High: (4 + (4 * (1 umax %x)) + %a))
41+
; CHECK-NEXT: Member: {(4 + %a),+,4}<%for.body>
4242
; CHECK: Non vectorizable stores to invariant address were not found in loop.
4343
; CHECK-NEXT: SCEV assumptions:
44-
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
4544
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
45+
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
4646
; CHECK: Expressions re-written:
4747
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom:
4848
; CHECK-NEXT: ((4 * (zext i32 {1,+,1}<%for.body> to i64))<nuw><nsw> + %a)<nuw>
@@ -84,8 +84,8 @@ exit:
8484
; CHECK-LABEL: test2
8585
; CHECK: Memory dependences are safe
8686
; CHECK: SCEV assumptions:
87-
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
8887
; CHECK-NEXT: {0,+,1}<%for.body> Added Flags: <nusw>
88+
; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: <nusw>
8989
define void @test2(i64 %x, ptr %a) {
9090
entry:
9191
br label %for.body
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; REQUIRES: x86-registered-target
3+
; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S %s | FileCheck %s
4+
5+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-apple-macosx10.10.0"
7+
8+
@B = common global ptr null, align 8
9+
@A = common global ptr null, align 8
10+
@C = common global ptr null, align 8
11+
@D = common global ptr null, align 8
12+
@E = common global ptr null, align 8
13+
14+
define void @f() {
15+
; CHECK-LABEL: define void @f() {
16+
; CHECK-NEXT: [[ENTRY:.*]]:
17+
; CHECK-NEXT: [[A:%.*]] = load ptr, ptr @A, align 8
18+
; CHECK-NEXT: [[B:%.*]] = load ptr, ptr @B, align 8
19+
; CHECK-NEXT: [[C:%.*]] = load ptr, ptr @C, align 8
20+
; CHECK-NEXT: [[D:%.*]] = load ptr, ptr @D, align 8
21+
; CHECK-NEXT: [[E:%.*]] = load ptr, ptr @E, align 8
22+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
23+
; CHECK: [[FOR_BODY]]:
24+
; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LATCH:.*]] ]
25+
; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IND]]
26+
; CHECK-NEXT: [[LOADA:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4
27+
; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IND]]
28+
; CHECK-NEXT: [[LOADB:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4
29+
; CHECK-NEXT: [[UNCOUNTABLE_C:%.*]] = icmp eq i32 [[LOADB]], 10
30+
; CHECK-NEXT: br i1 [[UNCOUNTABLE_C]], label %[[FOR_END:.*]], label %[[LATCH]]
31+
; CHECK: [[LATCH]]:
32+
; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]]
33+
; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1
34+
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD]]
35+
; CHECK-NEXT: store i32 [[MULA]], ptr [[ARRAYIDXA_PLUS_4]], align 4
36+
; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IND]]
37+
; CHECK-NEXT: [[LOADD:%.*]] = load i32, ptr [[ARRAYIDXD]], align 4
38+
; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, ptr [[E]], i64 [[IND]]
39+
; CHECK-NEXT: [[LOADE:%.*]] = load i32, ptr [[ARRAYIDXE]], align 4
40+
; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]]
41+
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IND]]
42+
; CHECK-NEXT: store i32 [[MULC]], ptr [[ARRAYIDXC]], align 4
43+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], 20
44+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]]
45+
; CHECK: [[FOR_END]]:
46+
; CHECK-NEXT: ret void
47+
;
48+
entry:
49+
%a = load ptr, ptr @A, align 8
50+
%b = load ptr, ptr @B, align 8
51+
%c = load ptr, ptr @C, align 8
52+
%d = load ptr, ptr @D, align 8
53+
%e = load ptr, ptr @E, align 8
54+
br label %for.body
55+
56+
for.body:
57+
%ind = phi i64 [ 0, %entry ], [ %add, %latch ]
58+
59+
%arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
60+
%loadA = load i32, ptr %arrayidxA, align 4
61+
62+
%arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
63+
%loadB = load i32, ptr %arrayidxB, align 4
64+
%uncountable.c = icmp eq i32 %loadB, 10
65+
br i1 %uncountable.c, label %for.end, label %latch
66+
67+
latch:
68+
%mulA = mul i32 %loadB, %loadA
69+
70+
%add = add nuw nsw i64 %ind, 1
71+
%arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add
72+
store i32 %mulA, ptr %arrayidxA_plus_4, align 4
73+
74+
%arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind
75+
%loadD = load i32, ptr %arrayidxD, align 4
76+
77+
%arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind
78+
%loadE = load i32, ptr %arrayidxE, align 4
79+
80+
%mulC = mul i32 %loadD, %loadE
81+
82+
%arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
83+
store i32 %mulC, ptr %arrayidxC, align 4
84+
85+
%exitcond = icmp eq i64 %add, 20
86+
br i1 %exitcond, label %for.end, label %for.body
87+
88+
for.end: ; preds = %for.body
89+
ret void
90+
}
91+
92+
attributes #0 = { nounwind readnone convergent }
93+
attributes #1 = { nounwind convergent }
94+
95+
!0 = distinct !{!0, !1}
96+
!1 = !{!"llvm.loop.distribute.enable", i1 true}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s
3+
4+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5+
6+
define void @f(ptr %A, ptr %B, ptr %C, i64 %N) {
7+
; CHECK-LABEL: define void @f(
8+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) {
9+
; CHECK-NEXT: [[FOR_BODY_LVER_CHECK:.*]]:
10+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
11+
; CHECK: [[FOR_BODY]]:
12+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_LVER_CHECK]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LATCH:.*]] ]
13+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
14+
; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
15+
; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
16+
; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
17+
; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
18+
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[BIDX]], align 4
19+
; CHECK-NEXT: [[UNCOUNTABLE_C:%.*]] = icmp eq i32 [[B]], 10
20+
; CHECK-NEXT: br i1 [[UNCOUNTABLE_C]], label %[[LATCH]], label %[[FOR_END:.*]]
21+
; CHECK: [[LATCH]]:
22+
; CHECK-NEXT: [[A_P1:%.*]] = add i32 [[B]], 2
23+
; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4
24+
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[AIDX]], align 1
25+
; CHECK-NEXT: [[C:%.*]] = mul i32 [[A]], 2
26+
; CHECK-NEXT: store i32 [[C]], ptr [[CIDX]], align 4
27+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
28+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]]
29+
; CHECK: [[FOR_END]]:
30+
; CHECK-NEXT: ret void
31+
;
32+
entry:
33+
br label %for.body
34+
35+
for.body: ; preds = %for.body, %entry
36+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %latch ]
37+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
38+
39+
%Aidx_next = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next
40+
%Bidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
41+
%Cidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
42+
%Aidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
43+
44+
%b = load i32, ptr %Bidx, align 4
45+
%uncountable.c = icmp eq i32 %b, 10
46+
br i1 %uncountable.c, label %latch, label %for.end
47+
48+
latch:
49+
%a_p1 = add i32 %b, 2
50+
store i32 %a_p1, ptr %Aidx_next, align 4
51+
52+
%a = load i32, ptr %Aidx, align 1
53+
%c = mul i32 %a, 2
54+
store i32 %c, ptr %Cidx, align 4
55+
56+
%exitcond = icmp eq i64 %indvars.iv.next, %N
57+
br i1 %exitcond, label %for.end, label %for.body
58+
59+
for.end: ; preds = %for.body
60+
ret void
61+
}

0 commit comments

Comments
 (0)