Skip to content

Commit 2454c68

Browse files
committed
[AMDGPU][SIPreEmitPeephole] pre-commit tests: mustRetainExeczBranch: use a cost model
1 parent f7493ba commit 2454c68

File tree

1 file changed

+256
-0
lines changed

1 file changed

+256
-0
lines changed
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX1010 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX1030 %s
4+
5+
define void @uniform_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
6+
; GFX10-LABEL: uniform_br_no_metadata:
7+
; GFX10: ; %bb.0: ; %entry
8+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
10+
; GFX10-NEXT: s_cbranch_scc1 .LBB0_2
11+
; GFX10-NEXT: ; %bb.1: ; %if.then
12+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
13+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
14+
; GFX10-NEXT: s_mov_b32 s11, s18
15+
; GFX10-NEXT: s_mov_b32 s10, s17
16+
; GFX10-NEXT: s_mov_b32 s9, s16
17+
; GFX10-NEXT: s_mov_b32 s8, s7
18+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
19+
; GFX10-NEXT: .LBB0_2: ; %if.end
20+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21+
; GFX10-NEXT: s_setpc_b64 s[30:31]
22+
entry:
23+
%cmp = icmp sgt i32 %flag, 0
24+
br i1 %cmp, label %if.then, label %if.end
25+
26+
if.then:
27+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
28+
br label %if.end
29+
30+
if.end:
31+
call void @llvm.amdgcn.s.waitcnt(i32 0)
32+
ret void
33+
}
34+
35+
define void @uniform_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
36+
; GFX10-LABEL: uniform_br_unprofitable:
37+
; GFX10: ; %bb.0: ; %entry
38+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
40+
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
41+
; GFX10-NEXT: ; %bb.1: ; %if.then
42+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
43+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
44+
; GFX10-NEXT: s_mov_b32 s11, s18
45+
; GFX10-NEXT: s_mov_b32 s10, s17
46+
; GFX10-NEXT: s_mov_b32 s9, s16
47+
; GFX10-NEXT: s_mov_b32 s8, s7
48+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
49+
; GFX10-NEXT: .LBB1_2: ; %if.end
50+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; GFX10-NEXT: s_setpc_b64 s[30:31]
52+
entry:
53+
%cmp = icmp sgt i32 %flag, 0
54+
br i1 %cmp, label %if.then, label %if.end, !prof !0
55+
56+
if.then:
57+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
58+
br label %if.end
59+
60+
if.end:
61+
call void @llvm.amdgcn.s.waitcnt(i32 0)
62+
ret void
63+
}
64+
65+
define void @uniform_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
66+
; GFX10-LABEL: uniform_br_profitable:
67+
; GFX10: ; %bb.0: ; %entry
68+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
70+
; GFX10-NEXT: s_cbranch_scc1 .LBB2_2
71+
; GFX10-NEXT: ; %bb.1: ; %if.then
72+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
73+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
74+
; GFX10-NEXT: s_mov_b32 s11, s18
75+
; GFX10-NEXT: s_mov_b32 s10, s17
76+
; GFX10-NEXT: s_mov_b32 s9, s16
77+
; GFX10-NEXT: s_mov_b32 s8, s7
78+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
79+
; GFX10-NEXT: .LBB2_2: ; %if.end
80+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81+
; GFX10-NEXT: s_setpc_b64 s[30:31]
82+
entry:
83+
%cmp = icmp sgt i32 %flag, 0
84+
br i1 %cmp, label %if.then, label %if.end, !prof !1
85+
86+
if.then:
87+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
88+
br label %if.end
89+
90+
if.end:
91+
call void @llvm.amdgcn.s.waitcnt(i32 0)
92+
ret void
93+
}
94+
95+
define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) {
96+
; GFX1010-LABEL: divergent_br_no_metadata:
97+
; GFX1010: ; %bb.0: ; %entry
98+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99+
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
100+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
101+
; GFX1010-NEXT: s_cbranch_execz .LBB3_2
102+
; GFX1010-NEXT: ; %bb.1: ; %if.then
103+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
104+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
105+
; GFX1010-NEXT: s_mov_b32 s11, s18
106+
; GFX1010-NEXT: s_mov_b32 s10, s17
107+
; GFX1010-NEXT: s_mov_b32 s9, s16
108+
; GFX1010-NEXT: s_mov_b32 s8, s7
109+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
110+
; GFX1010-NEXT: .LBB3_2: ; %if.end
111+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
112+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
113+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
115+
;
116+
; GFX1030-LABEL: divergent_br_no_metadata:
117+
; GFX1030: ; %bb.0: ; %entry
118+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
120+
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
121+
; GFX1030-NEXT: s_cbranch_execz .LBB3_2
122+
; GFX1030-NEXT: ; %bb.1: ; %if.then
123+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
124+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
125+
; GFX1030-NEXT: s_mov_b32 s11, s18
126+
; GFX1030-NEXT: s_mov_b32 s10, s17
127+
; GFX1030-NEXT: s_mov_b32 s9, s16
128+
; GFX1030-NEXT: s_mov_b32 s8, s7
129+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
130+
; GFX1030-NEXT: .LBB3_2: ; %if.end
131+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
132+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
134+
entry:
135+
%cmp = icmp sgt i32 %flag, 0
136+
br i1 %cmp, label %if.then, label %if.end
137+
138+
if.then:
139+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
140+
br label %if.end
141+
142+
if.end:
143+
call void @llvm.amdgcn.s.waitcnt(i32 0)
144+
ret void
145+
}
146+
147+
define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) {
148+
; GFX1010-LABEL: divergent_br_unprofitable:
149+
; GFX1010: ; %bb.0: ; %entry
150+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151+
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
152+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
153+
; GFX1010-NEXT: s_cbranch_execz .LBB4_2
154+
; GFX1010-NEXT: ; %bb.1: ; %if.then
155+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
156+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
157+
; GFX1010-NEXT: s_mov_b32 s11, s18
158+
; GFX1010-NEXT: s_mov_b32 s10, s17
159+
; GFX1010-NEXT: s_mov_b32 s9, s16
160+
; GFX1010-NEXT: s_mov_b32 s8, s7
161+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
162+
; GFX1010-NEXT: .LBB4_2: ; %if.end
163+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
164+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
165+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
167+
;
168+
; GFX1030-LABEL: divergent_br_unprofitable:
169+
; GFX1030: ; %bb.0: ; %entry
170+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
172+
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
173+
; GFX1030-NEXT: s_cbranch_execz .LBB4_2
174+
; GFX1030-NEXT: ; %bb.1: ; %if.then
175+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
176+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
177+
; GFX1030-NEXT: s_mov_b32 s11, s18
178+
; GFX1030-NEXT: s_mov_b32 s10, s17
179+
; GFX1030-NEXT: s_mov_b32 s9, s16
180+
; GFX1030-NEXT: s_mov_b32 s8, s7
181+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
182+
; GFX1030-NEXT: .LBB4_2: ; %if.end
183+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
184+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
186+
entry:
187+
%cmp = icmp sgt i32 %flag, 0
188+
br i1 %cmp, label %if.then, label %if.end, !prof !0
189+
190+
if.then:
191+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
192+
br label %if.end
193+
194+
if.end:
195+
call void @llvm.amdgcn.s.waitcnt(i32 0)
196+
ret void
197+
}
198+
199+
define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) {
200+
; GFX1010-LABEL: divergent_br_profitable:
201+
; GFX1010: ; %bb.0: ; %entry
202+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203+
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
204+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
205+
; GFX1010-NEXT: s_cbranch_execz .LBB5_2
206+
; GFX1010-NEXT: ; %bb.1: ; %if.then
207+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
208+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
209+
; GFX1010-NEXT: s_mov_b32 s11, s18
210+
; GFX1010-NEXT: s_mov_b32 s10, s17
211+
; GFX1010-NEXT: s_mov_b32 s9, s16
212+
; GFX1010-NEXT: s_mov_b32 s8, s7
213+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
214+
; GFX1010-NEXT: .LBB5_2: ; %if.end
215+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
216+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
217+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
219+
;
220+
; GFX1030-LABEL: divergent_br_profitable:
221+
; GFX1030: ; %bb.0: ; %entry
222+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
224+
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
225+
; GFX1030-NEXT: s_cbranch_execz .LBB5_2
226+
; GFX1030-NEXT: ; %bb.1: ; %if.then
227+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
228+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
229+
; GFX1030-NEXT: s_mov_b32 s11, s18
230+
; GFX1030-NEXT: s_mov_b32 s10, s17
231+
; GFX1030-NEXT: s_mov_b32 s9, s16
232+
; GFX1030-NEXT: s_mov_b32 s8, s7
233+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
234+
; GFX1030-NEXT: .LBB5_2: ; %if.end
235+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
236+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
238+
entry:
239+
%cmp = icmp sgt i32 %flag, 0
240+
br i1 %cmp, label %if.then, label %if.end, !prof !1
241+
242+
if.then:
243+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
244+
br label %if.end
245+
246+
if.end:
247+
call void @llvm.amdgcn.s.waitcnt(i32 0)
248+
ret void
249+
}
250+
251+
declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg)
252+
declare void @llvm.amdgcn.s.waitcnt(i32)
253+
declare i32 @llvm.amdgcn.workitem.id.x()
254+
255+
!0 = !{!"branch_weights", i32 1000, i32 1000}
256+
!1 = !{!"branch_weights", i32 2000, i32 1}

0 commit comments

Comments
 (0)