Skip to content

Commit fe182dd

Browse files
committed
[LoopUnrollAnalyzer] Use constant folding API for loads
Use ConstantFoldLoadFromConst() instead of a partial re-implementation. This makes the code slightly more generic by not depending on the exact structure of the constant.
1 parent dfde1a7 commit fe182dd

File tree

2 files changed

+45
-30
lines changed

2 files changed

+45
-30
lines changed

llvm/lib/Analysis/LoopUnrollAnalyzer.cpp

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
//===----------------------------------------------------------------------===//
1414

1515
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
16+
#include "llvm/Analysis/ConstantFolding.h"
1617
#include "llvm/Analysis/InstructionSimplify.h"
1718
#include "llvm/Analysis/LoopInfo.h"
1819
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -105,45 +106,20 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) {
105106
auto AddressIt = SimplifiedAddresses.find(AddrOp);
106107
if (AddressIt == SimplifiedAddresses.end())
107108
return false;
108-
const APInt &SimplifiedAddrOp = AddressIt->second.Offset;
109109

110110
auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
111111
// We're only interested in loads that can be completely folded to a
112112
// constant.
113113
if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
114114
return false;
115115

116-
ConstantDataSequential *CDS =
117-
dyn_cast<ConstantDataSequential>(GV->getInitializer());
118-
if (!CDS)
116+
Constant *Res =
117+
ConstantFoldLoadFromConst(GV->getInitializer(), I.getType(),
118+
AddressIt->second.Offset, I.getDataLayout());
119+
if (!Res)
119120
return false;
120121

121-
// We might have a vector load from an array. FIXME: for now we just bail
122-
// out in this case, but we should be able to resolve and simplify such
123-
// loads.
124-
if (CDS->getElementType() != I.getType())
125-
return false;
126-
127-
unsigned ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
128-
if (SimplifiedAddrOp.getActiveBits() > 64)
129-
return false;
130-
int64_t SimplifiedAddrOpV = SimplifiedAddrOp.getSExtValue();
131-
if (SimplifiedAddrOpV < 0) {
132-
// FIXME: For now we conservatively ignore out of bound accesses, but
133-
// we're allowed to perform the optimization in this case.
134-
return false;
135-
}
136-
uint64_t Index = static_cast<uint64_t>(SimplifiedAddrOpV) / ElemSize;
137-
if (Index >= CDS->getNumElements()) {
138-
// FIXME: For now we conservatively ignore out of bound accesses, but
139-
// we're allowed to perform the optimization in this case.
140-
return false;
141-
}
142-
143-
Constant *CV = CDS->getElementAsConstant(Index);
144-
assert(CV && "Constant expected.");
145-
SimplifiedValues[&I] = CV;
146-
122+
SimplifiedValues[&I] = Res;
147123
return true;
148124
}
149125

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; RUN: opt < %s -S -passes=loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST1
2+
; RUN: opt < %s -S -passes=loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=200 | FileCheck %s -check-prefix=TEST2
3+
; RUN: opt < %s -S -passes=loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3
4+
5+
; This test is a copy of full-unroll-heuristics.ll but with the constant
6+
; wrapped in an extra struct. This should not hinder the analysis.
7+
8+
; If the absolute threshold is too low, we should not unroll:
9+
; TEST1: %array_const_idx = getelementptr inbounds { [9 x i32] }, ptr @known_constant, i64 0, i32 0, i64 %iv
10+
11+
; Otherwise, we should:
12+
; TEST2-NOT: %array_const_idx = getelementptr inbounds { [9 x i32] }, ptr @known_constant, i64 0, i32 0, i64 %iv
13+
14+
; If we do not boost threshold, the unroll will not happen:
15+
; TEST3: %array_const_idx = getelementptr inbounds { [9 x i32] }, ptr @known_constant, i64 0, i32 0, i64 %iv
16+
17+
@known_constant = internal unnamed_addr constant { [9 x i32] } { [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0] }, align 16
18+
19+
define i32 @foo(ptr noalias nocapture readonly %src) {
20+
entry:
21+
br label %loop
22+
23+
loop: ; preds = %loop, %entry
24+
%iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
25+
%r = phi i32 [ 0, %entry ], [ %add, %loop ]
26+
%arrayidx = getelementptr inbounds i32, ptr %src, i64 %iv
27+
%src_element = load i32, ptr %arrayidx, align 4
28+
%array_const_idx = getelementptr inbounds { [9 x i32] }, ptr @known_constant, i64 0, i32 0, i64 %iv
29+
%const_array_element = load i32, ptr %array_const_idx, align 4
30+
%mul = mul nsw i32 %src_element, %const_array_element
31+
%add = add nsw i32 %mul, %r
32+
%inc = add nuw nsw i64 %iv, 1
33+
%exitcond86.i = icmp eq i64 %inc, 9
34+
br i1 %exitcond86.i, label %loop.end, label %loop
35+
36+
loop.end: ; preds = %loop
37+
%r.lcssa = phi i32 [ %r, %loop ]
38+
ret i32 %r.lcssa
39+
}

0 commit comments

Comments
 (0)