Skip to content

Commit ea50e94

Browse files
committed
Add tests showing runtime checks cost with low trip counts
1 parent 984086d commit ea50e94

File tree

1 file changed

+212
-0
lines changed

1 file changed

+212
-0
lines changed
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -p loop-vectorize -debug-only=loop-vectorize -S -disable-output < %s 2>&1 | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define void @no_outer_loop(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %off, i64 noundef %n) {
7+
; CHECK-LABEL: LV: Checking a loop in 'no_outer_loop'
8+
; CHECK: Calculating cost of runtime checks:
9+
; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop.
10+
; CHECK: Total cost of runtime checks: 4
11+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
12+
entry:
13+
br label %inner.loop
14+
15+
inner.loop:
16+
%inner.iv = phi i64 [ 0, %entry ], [ %inner.iv.next, %inner.loop ]
17+
%add.us = add nuw nsw i64 %inner.iv, %off
18+
%arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
19+
%0 = load i8, ptr %arrayidx.us, align 1
20+
%arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
21+
%1 = load i8, ptr %arrayidx7.us, align 1
22+
%add9.us = add i8 %1, %0
23+
store i8 %add9.us, ptr %arrayidx7.us, align 1
24+
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
25+
%exitcond.not = icmp eq i64 %inner.iv.next, %n
26+
br i1 %exitcond.not, label %inner.exit, label %inner.loop
27+
28+
inner.exit:
29+
ret void
30+
}
31+
32+
define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
33+
; CHECK-LABEL: LV: Checking a loop in 'outer_no_tc'
34+
; CHECK: Calculating cost of runtime checks:
35+
; CHECK: Total cost of runtime checks: 6
36+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
37+
entry:
38+
br label %outer.loop
39+
40+
outer.loop:
41+
%outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
42+
%mul.us = mul nsw i64 %outer.iv, %n
43+
br label %inner.loop
44+
45+
inner.loop:
46+
%inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
47+
%add.us = add nuw nsw i64 %inner.iv, %mul.us
48+
%arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
49+
%0 = load i8, ptr %arrayidx.us, align 1
50+
%arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
51+
%1 = load i8, ptr %arrayidx7.us, align 1
52+
%add9.us = add i8 %1, %0
53+
store i8 %add9.us, ptr %arrayidx7.us, align 1
54+
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
55+
%exitcond.not = icmp eq i64 %inner.iv.next, %n
56+
br i1 %exitcond.not, label %inner.exit, label %inner.loop
57+
58+
inner.exit:
59+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
60+
%exitcond27.not = icmp eq i64 %outer.iv.next, %m
61+
br i1 %exitcond27.not, label %outer.exit, label %outer.loop
62+
63+
outer.exit:
64+
ret void
65+
}
66+
67+
68+
define void @outer_known_tc3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
69+
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3'
70+
; CHECK: Calculating cost of runtime checks:
71+
; CHECK: Total cost of runtime checks: 6
72+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
73+
entry:
74+
br label %outer.loop
75+
76+
outer.loop:
77+
%outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
78+
%mul.us = mul nsw i64 %outer.iv, %n
79+
br label %inner.loop
80+
81+
inner.loop:
82+
%inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
83+
%add.us = add nuw nsw i64 %inner.iv, %mul.us
84+
%arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
85+
%0 = load i8, ptr %arrayidx.us, align 1
86+
%arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
87+
%1 = load i8, ptr %arrayidx7.us, align 1
88+
%add9.us = add i8 %1, %0
89+
store i8 %add9.us, ptr %arrayidx7.us, align 1
90+
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
91+
%exitcond.not = icmp eq i64 %inner.iv.next, %n
92+
br i1 %exitcond.not, label %inner.exit, label %inner.loop
93+
94+
inner.exit:
95+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
96+
%exitcond26.not = icmp eq i64 %outer.iv.next, 3
97+
br i1 %exitcond26.not, label %outer.exit, label %outer.loop
98+
99+
outer.exit:
100+
ret void
101+
}
102+
103+
104+
define void @outer_known_tc64(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %n) {
105+
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc64'
106+
; CHECK: Calculating cost of runtime checks:
107+
; CHECK: Total cost of runtime checks: 6
108+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
109+
entry:
110+
br label %outer.loop
111+
112+
outer.loop:
113+
%outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
114+
%mul.us = mul nsw i64 %outer.iv, %n
115+
br label %inner.loop
116+
117+
inner.loop:
118+
%inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
119+
%add.us = add nuw nsw i64 %inner.iv, %mul.us
120+
%arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
121+
%0 = load i8, ptr %arrayidx.us, align 1
122+
%arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
123+
%1 = load i8, ptr %arrayidx7.us, align 1
124+
%add9.us = add i8 %1, %0
125+
store i8 %add9.us, ptr %arrayidx7.us, align 1
126+
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
127+
%exitcond.not = icmp eq i64 %inner.iv.next, %n
128+
br i1 %exitcond.not, label %inner.exit, label %inner.loop
129+
130+
inner.exit:
131+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
132+
%exitcond26.not = icmp eq i64 %outer.iv.next, 64
133+
br i1 %exitcond26.not, label %outer.exit, label %outer.loop
134+
135+
outer.exit:
136+
ret void
137+
}
138+
139+
140+
define void @outer_pgo_3(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
141+
; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_3'
142+
; CHECK: Calculating cost of runtime checks:
143+
; CHECK: Total cost of runtime checks: 6
144+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
145+
entry:
146+
br label %outer.loop
147+
148+
outer.loop:
149+
%outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
150+
%mul.us = mul nsw i64 %outer.iv, %n
151+
br label %inner.loop
152+
153+
inner.loop:
154+
%inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
155+
%add.us = add nuw nsw i64 %inner.iv, %mul.us
156+
%arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
157+
%0 = load i8, ptr %arrayidx.us, align 1
158+
%arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
159+
%1 = load i8, ptr %arrayidx7.us, align 1
160+
%add9.us = add i8 %1, %0
161+
store i8 %add9.us, ptr %arrayidx7.us, align 1
162+
%inner.iv.next = add nuw nsw i64 %inner.iv, 1
163+
%exitcond.not = icmp eq i64 %inner.iv.next, %n
164+
br i1 %exitcond.not, label %inner.exit, label %inner.loop
165+
166+
inner.exit:
167+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
168+
%exitcond26.not = icmp eq i64 %outer.iv.next, %m
169+
br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !0
170+
171+
outer.exit:
172+
ret void
173+
}
174+
175+
176+
define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
177+
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
178+
; CHECK: Calculating cost of runtime checks:
179+
; CHECK: Total cost of runtime checks: 6
180+
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:8
181+
entry:
182+
br label %outer.loop
183+
184+
outer.loop:
185+
%outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
186+
%0 = mul nsw i64 %outer.iv, %n
187+
br label %inner.loop
188+
189+
inner.loop:
190+
%iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
191+
%1 = add nuw nsw i64 %iv.inner, %0
192+
%arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %1
193+
%2 = load i32, ptr %arrayidx.us, align 4
194+
%arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %1
195+
%3 = load i32, ptr %arrayidx8.us, align 4
196+
%add9.us = add nsw i32 %3, %2
197+
store i32 %add9.us, ptr %arrayidx8.us, align 4
198+
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
199+
%inner.exit.cond = icmp eq i64 %iv.inner.next, %n
200+
br i1 %inner.exit.cond, label %inner.exit, label %inner.loop
201+
202+
inner.exit:
203+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
204+
%outer.exit.cond = icmp eq i64 %outer.iv.next, 3
205+
br i1 %outer.exit.cond, label %outer.exit, label %outer.loop
206+
207+
outer.exit:
208+
ret void
209+
}
210+
211+
212+
!0 = !{!"branch_weights", i32 10, i32 20}

0 commit comments

Comments
 (0)