Skip to content

Commit 41c24f3

Browse files
committed
Simplify the logic and the test
1 parent bc145d6 commit 41c24f3

File tree

3 files changed

+200
-3
lines changed

3 files changed

+200
-3
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,10 +269,10 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
269269

270270
// Check if the direction vector is lexicographically positive (or zero)
271271
// for both before/after exchanged.
272-
if (isLexicographicallyPositive(Cur, 0, Cur.size()) == false)
272+
if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false)
273273
return false;
274274
std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
275-
if (isLexicographicallyPositive(Cur, 0, Cur.size()) == false)
275+
if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false)
276276
return false;
277277
}
278278
return true;

llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ for.end8: ; preds = %for.cond1.for.inc6_
7474

7575
; CHECK: --- !Missed
7676
; CHECK-NEXT: Pass: loop-interchange
77-
; CHECK-NEXT: Name: Dependence
77+
; CHECK-NEXT: Name: UnsupportedPHIOuter
7878
; CHECK-NEXT: Function: reduction_03
7979

8080
; IR-LABEL: @reduction_03(
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
3+
; RUN: -disable-output -debug 2>&1 | FileCheck %s
4+
5+
@a = dso_local global [256 x [256 x float]] zeroinitializer, align 4
6+
@b = dso_local global [20 x [20 x [20 x i32]]] zeroinitializer, align 4
7+
8+
;; for (int n = 0; n < 100; ++n)
9+
;; for (int i = 0; i < 256; ++i)
10+
;; for (int j = 1; j < 256; ++j)
11+
;; a[j - 1][i] += a[j][i];
12+
;;
13+
;; The direction vector of `a` is [* = <]. We can interchange the innermost
14+
;; two loops, The direction vector after interchanging will be [* < =].
15+
16+
; CHECK: Dependency matrix before interchange:
17+
; CHECK-NEXT: * = <
18+
; CHECK-NEXT: * = =
19+
; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
20+
; CHECK-NEXT: Checking if loops are tightly nested
21+
; CHECK-NEXT: Checking instructions in Loop header and Loop latch
22+
; CHECK-NEXT: Loops are perfectly nested
23+
; CHECK-NEXT: Loops are legal to interchange
24+
25+
define void @all_eq_lt() {
26+
entry:
27+
br label %for.n.header
28+
29+
for.n.header:
30+
%n = phi i32 [ 0, %entry ], [ %n.inc, %for.n.latch ]
31+
br label %for.i.header
32+
33+
for.i.header:
34+
%i = phi i32 [ 0, %for.n.header ], [ %i.inc, %for.i.latch ]
35+
br label %for.j
36+
37+
for.j:
38+
%j = phi i32 [ 1, %for.i.header ], [ %j.inc, %for.j ]
39+
%j.dec = sub nsw i32 %j, 1
40+
%idx.store = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j.dec, i32 %i
41+
%idx.load = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j, i32 %i
42+
%0 = load float, ptr %idx.load, align 4
43+
%1 = load float, ptr %idx.store, align 4
44+
%add = fadd fast float %0, %1
45+
store float %add, ptr %idx.store, align 4
46+
%j.inc = add nuw nsw i32 %j, 1
47+
%cmp.j = icmp slt i32 %j.inc, 256
48+
br i1 %cmp.j, label %for.j, label %for.i.latch
49+
50+
for.i.latch:
51+
%i.inc = add nuw nsw i32 %i, 1
52+
%cmp.i = icmp slt i32 %i.inc, 256
53+
br i1 %cmp.i, label %for.i.header, label %for.n.latch
54+
55+
for.n.latch:
56+
%n.inc = add nuw nsw i32 %n, 1
57+
%cmp.n = icmp slt i32 %n.inc, 100
58+
br i1 %cmp.n, label %for.n.header, label %exit
59+
60+
exit:
61+
ret void
62+
}
63+
64+
;; for (int i = 0; i < 256; ++i)
65+
;; for (int j = 1; j < 256; ++j)
66+
;; a[j - 1][i] = a[j][255 - i];
67+
;;
68+
;; The direction vector of `a` is [* <]. We cannot interchange the loops
69+
;; because we must handle a `*` dependence conservatively.
70+
71+
; CHECK: Dependency matrix before interchange:
72+
; CHECK-NEXT: * <
73+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
74+
; CHECK-NEXT: Failed interchange InnerLoopId = 1 and OuterLoopId = 0 due to dependence
75+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
76+
77+
define void @all_lt() {
78+
entry:
79+
br label %for.i.header
80+
81+
for.i.header:
82+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
83+
%i.rev = sub nsw i32 255, %i
84+
br label %for.j
85+
86+
for.j:
87+
%j = phi i32 [ 1, %for.i.header ], [ %j.inc, %for.j ]
88+
%j.dec = sub nsw i32 %j, 1
89+
%idx.store = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j.dec, i32 %i
90+
%idx.load = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j, i32 %i.rev
91+
%0 = load float, ptr %idx.load, align 4
92+
store float %0, ptr %idx.store, align 4
93+
%j.inc = add nuw nsw i32 %j, 1
94+
%cmp.j = icmp slt i32 %j.inc, 256
95+
br i1 %cmp.j, label %for.j, label %for.i.latch
96+
97+
for.i.latch:
98+
%i.inc = add nuw nsw i32 %i, 1
99+
%cmp.i = icmp slt i32 %i.inc, 256
100+
br i1 %cmp.i, label %for.i.header, label %exit
101+
102+
exit:
103+
ret void
104+
}
105+
106+
;; for (int i = 0; i < 255; ++i)
107+
;; for (int j = 1; j < 256; ++j)
108+
;; a[j][i] = a[j - 1][i + 1];
109+
;;
110+
;; The direciton vector of `a` is [< >]. We cannot interchange the loops
111+
;; because the read/write order for `a` cannot be changed.
112+
113+
; CHECK: Dependency matrix before interchange:
114+
; CHECK-NEXT: < >
115+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
116+
; CHECK-NEXT: Failed interchange InnerLoopId = 1 and OuterLoopId = 0 due to dependence
117+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
118+
119+
define void @lt_gt() {
120+
entry:
121+
br label %for.i.header
122+
123+
for.i.header:
124+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
125+
%i.inc = add nuw nsw i32 %i, 1
126+
br label %for.j
127+
128+
for.j:
129+
%j = phi i32 [ 1, %for.i.header ], [ %j.inc, %for.j ]
130+
%j.dec = sub nsw i32 %j, 1
131+
%idx.store = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j, i32 %i
132+
%idx.load = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j.dec, i32 %i.inc
133+
%0 = load float, ptr %idx.load, align 4
134+
store float %0, ptr %idx.store, align 4
135+
%j.inc = add nuw nsw i32 %j, 1
136+
%cmp.j = icmp slt i32 %j.inc, 256
137+
br i1 %cmp.j, label %for.j, label %for.i.latch
138+
139+
for.i.latch:
140+
%cmp.i = icmp slt i32 %i.inc, 255
141+
br i1 %cmp.i, label %for.i.header, label %exit
142+
143+
exit:
144+
ret void
145+
}
146+
147+
;; for (int i = 0; i < 20; i++)
148+
;; for (int j = 0; j < 20; j++)
149+
;; for (int k = 0; k < 19; k++)
150+
;; b[i][j][k] = b[i][5][k + 1];
151+
;;
152+
;; The direction vector of `b` is [= * <]. We cannot interchange all the loops.
153+
154+
; CHECK: Dependency matrix before interchange:
155+
; CHECK-NEXT: = * <
156+
; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
157+
; CHECK-NEXT: Failed interchange InnerLoopId = 2 and OuterLoopId = 1 due to dependence
158+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
159+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
160+
; CHECK-NEXT: Failed interchange InnerLoopId = 1 and OuterLoopId = 0 due to dependence
161+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
162+
163+
define void @eq_all_lt() {
164+
entry:
165+
br label %for.i.header
166+
167+
for.i.header:
168+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
169+
br label %for.j.header
170+
171+
for.j.header:
172+
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
173+
br label %for.k
174+
175+
for.k:
176+
%k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
177+
%k.inc = add nuw nsw i32 %k, 1
178+
%idx.store = getelementptr inbounds [20 x [20 x [20 x i32]]], ptr @b, i32 %i, i32 %j, i32 %k
179+
%idx.load = getelementptr inbounds [20 x [20 x [20 x i32]]], ptr @b, i32 %i, i32 5, i32 %k.inc
180+
%0 = load i32, ptr %idx.load, align 4
181+
store i32 %0, ptr %idx.store, align 4
182+
%cmp.k = icmp slt i32 %k.inc, 19
183+
br i1 %cmp.k, label %for.k, label %for.j.latch
184+
185+
for.j.latch:
186+
%j.inc = add nuw nsw i32 %j, 1
187+
%cmp.j = icmp slt i32 %j.inc, 20
188+
br i1 %cmp.j, label %for.j.header, label %for.i.latch
189+
190+
for.i.latch:
191+
%i.inc = add nuw nsw i32 %i, 1
192+
%cmp.i = icmp slt i32 %i.inc, 20
193+
br i1 %cmp.i, label %for.i.header, label %exit
194+
195+
exit:
196+
ret void
197+
}

0 commit comments

Comments
 (0)