Skip to content

Commit 000adcf

Browse files
committed
[LoopInterchange] Bail out early if minimum loop nest is not met
This patch bails out early if minimum depth is not met. As it stands today, the pass computes CacheCost before it attempts to do the transform. This is not needed if minimum depth is not met. This handles basic cases where depth is typically 1. As the patch avoids unnecessary computation, it is aimed to improve compile-time.
1 parent 1715549 commit 000adcf

File tree

2 files changed

+83
-4
lines changed

2 files changed

+83
-4
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,14 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
234234
LoopList.push_back(CurrentLoop);
235235
}
236236

237+
static bool hasMinimumLoopDepth(SmallVectorImpl<Loop *> &LoopList) {
238+
unsigned LoopNestDepth = LoopList.size();
239+
if (LoopNestDepth < 2) {
240+
LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
241+
return false;
242+
}
243+
return true;
244+
}
237245
namespace {
238246

239247
/// LoopInterchangeLegality checks if it is legal to interchange the loop.
@@ -416,11 +424,11 @@ struct LoopInterchange {
416424

417425
bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
418426
bool Changed = false;
419-
unsigned LoopNestDepth = LoopList.size();
420-
if (LoopNestDepth < 2) {
421-
LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
427+
428+
if (!hasMinimumLoopDepth(LoopList))
422429
return false;
423-
}
430+
431+
unsigned LoopNestDepth = LoopList.size();
424432
if (LoopNestDepth > MaxLoopNestDepth) {
425433
LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
426434
<< MaxLoopNestDepth << "\n");
@@ -1713,6 +1721,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17131721
LPMUpdater &U) {
17141722
Function &F = *LN.getParent();
17151723

1724+
SmallVector<Loop *, 8> LoopList(LN.getLoops());
1725+
1726+
// Ensure minimum depth of the loop nest to do the interchange.
1727+
if (!hasMinimumLoopDepth(LoopList))
1728+
return PreservedAnalyses::all();
1729+
17161730
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
17171731
std::unique_ptr<CacheCost> CC =
17181732
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: opt < %s -passes=loop-interchange -debug -disable-output 2>&1 | FileCheck %s
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
6+
7+
@N = dso_local global i32 0, align 4
8+
@a = dso_local global ptr null, align 8
9+
@b = dso_local global ptr null, align 8
10+
@c = dso_local global ptr null, align 8
11+
12+
; Loop interchange should not run delinearization
13+
; for one loop case and should bail out early.
14+
15+
; CHECK-NOT: Delinearizing
16+
; CHECK-NOT: Strides:
17+
; CHECK-NOT: Terms:
18+
; CHECK: Loop doesn't contain minimum nesting level.
19+
20+
define void @foo() {
21+
entry:
22+
%retval = alloca i32, align 4
23+
%i = alloca i32, align 4
24+
store i32 0, ptr %retval, align 4
25+
store i32 0, ptr %i, align 4
26+
br label %for.cond
27+
28+
for.cond: ; preds = %for.inc, %entry
29+
%0 = load i32, ptr %i, align 4
30+
%1 = load i32, ptr @N, align 4
31+
%cmp = icmp ult i32 %0, %1
32+
br i1 %cmp, label %for.body, label %for.cond.cleanup
33+
34+
for.cond.cleanup: ; preds = %for.cond
35+
br label %for.end
36+
37+
for.body: ; preds = %for.cond
38+
%2 = load ptr, ptr @b, align 8
39+
%3 = load i32, ptr %i, align 4
40+
%idxprom = zext i32 %3 to i64
41+
%arrayidx = getelementptr inbounds nuw i32, ptr %2, i64 %idxprom
42+
%4 = load i32, ptr %arrayidx, align 4
43+
%5 = load ptr, ptr @c, align 8
44+
%6 = load i32, ptr %i, align 4
45+
%idxprom1 = zext i32 %6 to i64
46+
%arrayidx2 = getelementptr inbounds nuw i32, ptr %5, i64 %idxprom1
47+
%7 = load i32, ptr %arrayidx2, align 4
48+
%add = add nsw i32 %4, %7
49+
%8 = load ptr, ptr @a, align 8
50+
%9 = load i32, ptr %i, align 4
51+
%idxprom3 = zext i32 %9 to i64
52+
%arrayidx4 = getelementptr inbounds nuw i32, ptr %8, i64 %idxprom3
53+
store i32 %add, ptr %arrayidx4, align 4
54+
br label %for.inc
55+
56+
for.inc: ; preds = %for.body
57+
%10 = load i32, ptr %i, align 4
58+
%inc = add i32 %10, 1
59+
store i32 %inc, ptr %i, align 4
60+
br label %for.cond
61+
62+
for.end: ; preds = %for.cond.cleanup
63+
ret void
64+
}
65+

0 commit comments

Comments
 (0)