Skip to content

Commit e949b54

Browse files
authored
[LAA] Use PSE::getSymbolicMaxBackedgeTakenCount. (#93499)
Update LAA to use PSE::getSymbolicMaxBackedgeTakenCount which returns the minimum of the countable exits. When analyzing dependences and computing runtime checks, we need the smallest upper bound on the number of iterations. In terms of memory safety, it shouldn't matter if any uncomputable exits leave the loop, as long as we prove that there are no dependences given the minimum of the countable exits. The same should apply also for generating runtime checks. Note that this shifts the responsiblity of checking whether all exit counts are computable or handling early-exits to the users of LAA. Depends on #93498 PR: #93499
1 parent afe6ab7 commit e949b54

File tree

7 files changed

+238
-35
lines changed

7 files changed

+238
-35
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ getStartAndEndForAccess(const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
214214
if (SE->isLoopInvariant(PtrExpr, Lp)) {
215215
ScStart = ScEnd = PtrExpr;
216216
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
217-
const SCEV *Ex = PSE.getBackedgeTakenCount();
217+
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();
218218

219219
ScStart = AR->getStart();
220220
ScEnd = AR->evaluateAtIteration(Ex, *SE);
@@ -1796,28 +1796,28 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
17961796
/// Given a dependence-distance \p Dist between two
17971797
/// memory accesses, that have strides in the same direction whose absolute
17981798
/// value of the maximum stride is given in \p MaxStride, and that have the same
1799-
/// type size \p TypeByteSize, in a loop whose takenCount is \p
1800-
/// BackedgeTakenCount, check if it is possible to prove statically that the
1801-
/// dependence distance is larger than the range that the accesses will travel
1802-
/// through the execution of the loop. If so, return true; false otherwise. This
1803-
/// is useful for example in loops such as the following (PR31098):
1799+
/// type size \p TypeByteSize, in a loop whose maximum backedge taken count is
1800+
/// \p MaxBTC, check if it is possible to prove statically that the dependence
1801+
/// distance is larger than the range that the accesses will travel through the
1802+
/// execution of the loop. If so, return true; false otherwise. This is useful
1803+
/// for example in loops such as the following (PR31098):
18041804
/// for (i = 0; i < D; ++i) {
18051805
/// = out[i];
18061806
/// out[i+D] =
18071807
/// }
18081808
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1809-
const SCEV &BackedgeTakenCount,
1810-
const SCEV &Dist, uint64_t MaxStride,
1809+
const SCEV &MaxBTC, const SCEV &Dist,
1810+
uint64_t MaxStride,
18111811
uint64_t TypeByteSize) {
18121812

18131813
// If we can prove that
1814-
// (**) |Dist| > BackedgeTakenCount * Step
1814+
// (**) |Dist| > MaxBTC * Step
18151815
// where Step is the absolute stride of the memory accesses in bytes,
18161816
// then there is no dependence.
18171817
//
18181818
// Rationale:
18191819
// We basically want to check if the absolute distance (|Dist/Step|)
1820-
// is >= the loop iteration count (or > BackedgeTakenCount).
1820+
// is >= the loop iteration count (or > MaxBTC).
18211821
// This is equivalent to the Strong SIV Test (Practical Dependence Testing,
18221822
// Section 4.2.1); Note, that for vectorization it is sufficient to prove
18231823
// that the dependence distance is >= VF; This is checked elsewhere.
@@ -1828,8 +1828,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18281828
// also guarantees that distance >= VF.
18291829
//
18301830
const uint64_t ByteStride = MaxStride * TypeByteSize;
1831-
const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1832-
const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1831+
const SCEV *Step = SE.getConstant(MaxBTC.getType(), ByteStride);
1832+
const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
18331833

18341834
const SCEV *CastedDist = &Dist;
18351835
const SCEV *CastedProduct = Product;
@@ -1844,13 +1844,13 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18441844
else
18451845
CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
18461846

1847-
// Is Dist - (BackedgeTakenCount * Step) > 0 ?
1847+
// Is Dist - (MaxBTC * Step) > 0 ?
18481848
// (If so, then we have proven (**) because |Dist| >= Dist)
18491849
const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
18501850
if (SE.isKnownPositive(Minus))
18511851
return true;
18521852

1853-
// Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1853+
// Second try: Is -Dist - (MaxBTC * Step) > 0 ?
18541854
// (If so, then we have proven (**) because |Dist| >= -1*Dist)
18551855
const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
18561856
Minus = SE.getMinusSCEV(NegDist, CastedProduct);
@@ -2034,12 +2034,13 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
20342034
uint64_t MaxStride = std::max(StrideA, StrideB);
20352035

20362036
// If the distance between the acecsses is larger than their maximum absolute
2037-
// stride multiplied by the backedge taken count, the accesses are independet,
2038-
// i.e. they are far enough appart that accesses won't access the same
2039-
// location across all loop ierations.
2040-
if (HasSameSize &&
2041-
isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
2042-
MaxStride, TypeByteSize))
2037+
// stride multiplied by the symbolic maximum backedge taken count (which is an
2038+
// upper bound of the number of iterations), the accesses are independet, i.e.
2039+
// they are far enough appart that accesses won't access the same location
2040+
// across all loop ierations.
2041+
if (HasSameSize && isSafeDependenceDistance(
2042+
DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()),
2043+
*Dist, MaxStride, TypeByteSize))
20432044
return Dependence::NoDep;
20442045

20452046
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
@@ -2374,8 +2375,10 @@ bool LoopAccessInfo::canAnalyzeLoop() {
23742375
return false;
23752376
}
23762377

2377-
// ScalarEvolution needs to be able to find the exit count.
2378-
const SCEV *ExitCount = PSE->getBackedgeTakenCount();
2378+
// ScalarEvolution needs to be able to find the symbolic max backedge taken
2379+
// count, which is an upper bound on the number of loop iterations. The loop
2380+
// may execute fewer iterations, if it exits via an uncountable exit.
2381+
const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
23792382
if (isa<SCEVCouldNotCompute>(ExitCount)) {
23802383
recordAnalysis("CantComputeNumberOfIterations")
23812384
<< "could not determine number of loop iterations";
@@ -2984,25 +2987,25 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
29842987
// of various possible stride specializations, considering the alternatives
29852988
// of using gather/scatters (if available).
29862989

2987-
const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2990+
const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
29882991

2989-
// Match the types so we can compare the stride and the BETakenCount.
2992+
// Match the types so we can compare the stride and the MaxBTC.
29902993
// The Stride can be positive/negative, so we sign extend Stride;
2991-
// The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2994+
// The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
29922995
const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
29932996
uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
2994-
uint64_t BETypeSizeBits = DL.getTypeSizeInBits(BETakenCount->getType());
2997+
uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
29952998
const SCEV *CastedStride = StrideExpr;
2996-
const SCEV *CastedBECount = BETakenCount;
2999+
const SCEV *CastedBECount = MaxBTC;
29973000
ScalarEvolution *SE = PSE->getSE();
29983001
if (BETypeSizeBits >= StrideTypeSizeBits)
2999-
CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
3002+
CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
30003003
else
3001-
CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
3004+
CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
30023005
const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
30033006
// Since TripCount == BackEdgeTakenCount + 1, checking:
30043007
// "Stride >= TripCount" is equivalent to checking:
3005-
// Stride - BETakenCount > 0
3008+
// Stride - MaxBTC> 0
30063009
if (SE->isKnownPositive(StrideMinusBETaken)) {
30073010
LLVM_DEBUG(
30083011
dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2079,7 +2079,7 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
20792079
// FIXME: It is highly suspicious that we're ignoring the predicates here.
20802080
SmallVector<const SCEVPredicate *, 4> Pred;
20812081
const SCEV *ExitCount =
2082-
SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred);
2082+
SE.getPredicatedSymbolicMaxBackedgeTakenCount(AR->getLoop(), Pred);
20832083

20842084
assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
20852085

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,16 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
15061506
return false;
15071507
}
15081508

1509+
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1510+
reportVectorizationFailure("could not determine number of loop iterations",
1511+
"could not determine number of loop iterations",
1512+
"CantComputeNumberOfIterations", ORE, TheLoop);
1513+
if (DoExtraAnalysis)
1514+
Result = false;
1515+
else
1516+
return false;
1517+
}
1518+
15091519
LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
15101520
<< (LAI->getRuntimePointerChecking()->Need
15111521
? " (with a runtime bound check)"

llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,21 @@
44
define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations(ptr %A, ptr %B) {
55
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations'
66
; CHECK-NEXT: loop.header:
7-
; CHECK-NEXT: Report: could not determine number of loop iterations
7+
; CHECK-NEXT: Memory dependences are safe with run-time checks
88
; CHECK-NEXT: Dependences:
99
; CHECK-NEXT: Run-time memory checks:
10+
; CHECK-NEXT: Check 0:
11+
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
12+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
13+
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
14+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
1015
; CHECK-NEXT: Grouped accesses:
16+
; CHECK-NEXT: Group [[GRP1]]:
17+
; CHECK-NEXT: (Low: %B High: (2000 + %B))
18+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
19+
; CHECK-NEXT: Group [[GRP2]]:
20+
; CHECK-NEXT: (Low: %A High: (2000 + %A))
21+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
1122
; CHECK-EMPTY:
1223
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
1324
; CHECK-NEXT: SCEV assumptions:
@@ -53,10 +64,21 @@ e.2:
5364
define i32 @all_exits_dominate_latch_countable_exits_at_most_1000_iterations(ptr %A, ptr %B) {
5465
; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_1000_iterations'
5566
; CHECK-NEXT: loop.header:
56-
; CHECK-NEXT: Report: could not determine number of loop iterations
67+
; CHECK-NEXT: Memory dependences are safe with run-time checks
5768
; CHECK-NEXT: Dependences:
5869
; CHECK-NEXT: Run-time memory checks:
70+
; CHECK-NEXT: Check 0:
71+
; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]):
72+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
73+
; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]):
74+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
5975
; CHECK-NEXT: Grouped accesses:
76+
; CHECK-NEXT: Group [[GRP3]]:
77+
; CHECK-NEXT: (Low: %B High: (4004 + %B))
78+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
79+
; CHECK-NEXT: Group [[GRP4]]:
80+
; CHECK-NEXT: (Low: %A High: (4004 + %A))
81+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
6082
; CHECK-EMPTY:
6183
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
6284
; CHECK-NEXT: SCEV assumptions:
@@ -145,10 +167,21 @@ e.2:
145167
define i32 @b3_does_not_dominate_latch(ptr %A, ptr %B) {
146168
; CHECK-LABEL: 'b3_does_not_dominate_latch'
147169
; CHECK-NEXT: loop.header:
148-
; CHECK-NEXT: Report: could not determine number of loop iterations
170+
; CHECK-NEXT: Memory dependences are safe with run-time checks
149171
; CHECK-NEXT: Dependences:
150172
; CHECK-NEXT: Run-time memory checks:
173+
; CHECK-NEXT: Check 0:
174+
; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]):
175+
; CHECK-NEXT: %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
176+
; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]):
177+
; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
151178
; CHECK-NEXT: Grouped accesses:
179+
; CHECK-NEXT: Group [[GRP5]]:
180+
; CHECK-NEXT: (Low: %B High: (4004 + %B))
181+
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header>
182+
; CHECK-NEXT: Group [[GRP6]]:
183+
; CHECK-NEXT: (Low: %A High: (4004 + %A))
184+
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header>
152185
; CHECK-EMPTY:
153186
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
154187
; CHECK-NEXT: SCEV assumptions:
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; REQUIRES: x86-registered-target
3+
; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S %s | FileCheck %s
4+
5+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-apple-macosx10.10.0"
7+
8+
@B = common global ptr null, align 8
9+
@A = common global ptr null, align 8
10+
@C = common global ptr null, align 8
11+
@D = common global ptr null, align 8
12+
@E = common global ptr null, align 8
13+
14+
define void @f() {
15+
; CHECK-LABEL: define void @f() {
16+
; CHECK-NEXT: [[ENTRY:.*]]:
17+
; CHECK-NEXT: [[A:%.*]] = load ptr, ptr @A, align 8
18+
; CHECK-NEXT: [[B:%.*]] = load ptr, ptr @B, align 8
19+
; CHECK-NEXT: [[C:%.*]] = load ptr, ptr @C, align 8
20+
; CHECK-NEXT: [[D:%.*]] = load ptr, ptr @D, align 8
21+
; CHECK-NEXT: [[E:%.*]] = load ptr, ptr @E, align 8
22+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
23+
; CHECK: [[FOR_BODY]]:
24+
; CHECK-NEXT: [[IND:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[LATCH:.*]] ]
25+
; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IND]]
26+
; CHECK-NEXT: [[LOADA:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4
27+
; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IND]]
28+
; CHECK-NEXT: [[LOADB:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4
29+
; CHECK-NEXT: [[UNCOUNTABLE_C:%.*]] = icmp eq i32 [[LOADB]], 10
30+
; CHECK-NEXT: br i1 [[UNCOUNTABLE_C]], label %[[FOR_END:.*]], label %[[LATCH]]
31+
; CHECK: [[LATCH]]:
32+
; CHECK-NEXT: [[MULA:%.*]] = mul i32 [[LOADB]], [[LOADA]]
33+
; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1
34+
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD]]
35+
; CHECK-NEXT: store i32 [[MULA]], ptr [[ARRAYIDXA_PLUS_4]], align 4
36+
; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[IND]]
37+
; CHECK-NEXT: [[LOADD:%.*]] = load i32, ptr [[ARRAYIDXD]], align 4
38+
; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, ptr [[E]], i64 [[IND]]
39+
; CHECK-NEXT: [[LOADE:%.*]] = load i32, ptr [[ARRAYIDXE]], align 4
40+
; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]]
41+
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IND]]
42+
; CHECK-NEXT: store i32 [[MULC]], ptr [[ARRAYIDXC]], align 4
43+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], 20
44+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]]
45+
; CHECK: [[FOR_END]]:
46+
; CHECK-NEXT: ret void
47+
;
48+
entry:
49+
%a = load ptr, ptr @A, align 8
50+
%b = load ptr, ptr @B, align 8
51+
%c = load ptr, ptr @C, align 8
52+
%d = load ptr, ptr @D, align 8
53+
%e = load ptr, ptr @E, align 8
54+
br label %for.body
55+
56+
for.body:
57+
%ind = phi i64 [ 0, %entry ], [ %add, %latch ]
58+
59+
%arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind
60+
%loadA = load i32, ptr %arrayidxA, align 4
61+
62+
%arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind
63+
%loadB = load i32, ptr %arrayidxB, align 4
64+
%uncountable.c = icmp eq i32 %loadB, 10
65+
br i1 %uncountable.c, label %for.end, label %latch
66+
67+
latch:
68+
%mulA = mul i32 %loadB, %loadA
69+
70+
%add = add nuw nsw i64 %ind, 1
71+
%arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add
72+
store i32 %mulA, ptr %arrayidxA_plus_4, align 4
73+
74+
%arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind
75+
%loadD = load i32, ptr %arrayidxD, align 4
76+
77+
%arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind
78+
%loadE = load i32, ptr %arrayidxE, align 4
79+
80+
%mulC = mul i32 %loadD, %loadE
81+
82+
%arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind
83+
store i32 %mulC, ptr %arrayidxC, align 4
84+
85+
%exitcond = icmp eq i64 %add, 20
86+
br i1 %exitcond, label %for.end, label %for.body
87+
88+
for.end: ; preds = %for.body
89+
ret void
90+
}
91+
92+
attributes #0 = { nounwind readnone convergent }
93+
attributes #1 = { nounwind convergent }
94+
95+
!0 = distinct !{!0, !1}
96+
!1 = !{!"llvm.loop.distribute.enable", i1 true}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s
3+
4+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5+
6+
define void @f(ptr %A, ptr %B, ptr %C, i64 %N) {
7+
; CHECK-LABEL: define void @f(
8+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) {
9+
; CHECK-NEXT: [[FOR_BODY_LVER_CHECK:.*]]:
10+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
11+
; CHECK: [[FOR_BODY]]:
12+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_LVER_CHECK]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LATCH:.*]] ]
13+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
14+
; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
15+
; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
16+
; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]]
17+
; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
18+
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[BIDX]], align 4
19+
; CHECK-NEXT: [[UNCOUNTABLE_C:%.*]] = icmp eq i32 [[B]], 10
20+
; CHECK-NEXT: br i1 [[UNCOUNTABLE_C]], label %[[LATCH]], label %[[FOR_END:.*]]
21+
; CHECK: [[LATCH]]:
22+
; CHECK-NEXT: [[A_P1:%.*]] = add i32 [[B]], 2
23+
; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4
24+
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[AIDX]], align 1
25+
; CHECK-NEXT: [[C:%.*]] = mul i32 [[A]], 2
26+
; CHECK-NEXT: store i32 [[C]], ptr [[CIDX]], align 4
27+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
28+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]]
29+
; CHECK: [[FOR_END]]:
30+
; CHECK-NEXT: ret void
31+
;
32+
entry:
33+
br label %for.body
34+
35+
for.body: ; preds = %for.body, %entry
36+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %latch ]
37+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
38+
39+
%Aidx_next = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next
40+
%Bidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
41+
%Cidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
42+
%Aidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
43+
44+
%b = load i32, ptr %Bidx, align 4
45+
%uncountable.c = icmp eq i32 %b, 10
46+
br i1 %uncountable.c, label %latch, label %for.end
47+
48+
latch:
49+
%a_p1 = add i32 %b, 2
50+
store i32 %a_p1, ptr %Aidx_next, align 4
51+
52+
%a = load i32, ptr %Aidx, align 1
53+
%c = mul i32 %a, 2
54+
store i32 %c, ptr %Cidx, align 4
55+
56+
%exitcond = icmp eq i64 %indvars.iv.next, %N
57+
br i1 %exitcond, label %for.end, label %for.body
58+
59+
for.end: ; preds = %for.body
60+
ret void
61+
}

0 commit comments

Comments
 (0)