Skip to content

Commit 88945db

Browse files
authored
[AMDGPU][SIPreEmitPeephole] pre-commit tests: mustRetainExeczBranch: use a cost model (#109816)
1 parent 11c423f commit 88945db

File tree

1 file changed

+365
-0
lines changed

1 file changed

+365
-0
lines changed
Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX1010 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX1030 %s
5+
6+
define void @uniform_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
7+
; GFX9-LABEL: uniform_br_no_metadata:
8+
; GFX9: ; %bb.0: ; %entry
9+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10+
; GFX9-NEXT: s_cmp_lt_i32 s21, 1
11+
; GFX9-NEXT: s_cbranch_scc1 .LBB0_2
12+
; GFX9-NEXT: ; %bb.1: ; %if.then
13+
; GFX9-NEXT: s_mov_b32 s11, s18
14+
; GFX9-NEXT: s_mov_b32 s10, s17
15+
; GFX9-NEXT: s_mov_b32 s9, s16
16+
; GFX9-NEXT: s_mov_b32 s8, s7
17+
; GFX9-NEXT: v_mov_b32_e32 v0, s6
18+
; GFX9-NEXT: v_mov_b32_e32 v1, s19
19+
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
20+
; GFX9-NEXT: .LBB0_2: ; %if.end
21+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22+
; GFX9-NEXT: s_setpc_b64 s[30:31]
23+
;
24+
; GFX10-LABEL: uniform_br_no_metadata:
25+
; GFX10: ; %bb.0: ; %entry
26+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
28+
; GFX10-NEXT: s_cbranch_scc1 .LBB0_2
29+
; GFX10-NEXT: ; %bb.1: ; %if.then
30+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
31+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
32+
; GFX10-NEXT: s_mov_b32 s11, s18
33+
; GFX10-NEXT: s_mov_b32 s10, s17
34+
; GFX10-NEXT: s_mov_b32 s9, s16
35+
; GFX10-NEXT: s_mov_b32 s8, s7
36+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
37+
; GFX10-NEXT: .LBB0_2: ; %if.end
38+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39+
; GFX10-NEXT: s_setpc_b64 s[30:31]
40+
entry:
41+
%cmp = icmp sgt i32 %flag, 0
42+
br i1 %cmp, label %if.then, label %if.end
43+
44+
if.then:
45+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
46+
br label %if.end
47+
48+
if.end:
49+
call void @llvm.amdgcn.s.waitcnt(i32 0)
50+
ret void
51+
}
52+
53+
define void @uniform_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
54+
; GFX9-LABEL: uniform_br_unprofitable:
55+
; GFX9: ; %bb.0: ; %entry
56+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57+
; GFX9-NEXT: s_cmp_lt_i32 s21, 1
58+
; GFX9-NEXT: s_cbranch_scc1 .LBB1_2
59+
; GFX9-NEXT: ; %bb.1: ; %if.then
60+
; GFX9-NEXT: s_mov_b32 s11, s18
61+
; GFX9-NEXT: s_mov_b32 s10, s17
62+
; GFX9-NEXT: s_mov_b32 s9, s16
63+
; GFX9-NEXT: s_mov_b32 s8, s7
64+
; GFX9-NEXT: v_mov_b32_e32 v0, s6
65+
; GFX9-NEXT: v_mov_b32_e32 v1, s19
66+
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
67+
; GFX9-NEXT: .LBB1_2: ; %if.end
68+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GFX9-NEXT: s_setpc_b64 s[30:31]
70+
;
71+
; GFX10-LABEL: uniform_br_unprofitable:
72+
; GFX10: ; %bb.0: ; %entry
73+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
75+
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
76+
; GFX10-NEXT: ; %bb.1: ; %if.then
77+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
78+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
79+
; GFX10-NEXT: s_mov_b32 s11, s18
80+
; GFX10-NEXT: s_mov_b32 s10, s17
81+
; GFX10-NEXT: s_mov_b32 s9, s16
82+
; GFX10-NEXT: s_mov_b32 s8, s7
83+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
84+
; GFX10-NEXT: .LBB1_2: ; %if.end
85+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86+
; GFX10-NEXT: s_setpc_b64 s[30:31]
87+
entry:
88+
%cmp = icmp sgt i32 %flag, 0
89+
br i1 %cmp, label %if.then, label %if.end, !prof !0
90+
91+
if.then:
92+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
93+
br label %if.end
94+
95+
if.end:
96+
call void @llvm.amdgcn.s.waitcnt(i32 0)
97+
ret void
98+
}
99+
100+
define void @uniform_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
101+
; GFX9-LABEL: uniform_br_profitable:
102+
; GFX9: ; %bb.0: ; %entry
103+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104+
; GFX9-NEXT: s_cmp_lt_i32 s21, 1
105+
; GFX9-NEXT: s_cbranch_scc1 .LBB2_2
106+
; GFX9-NEXT: ; %bb.1: ; %if.then
107+
; GFX9-NEXT: s_mov_b32 s11, s18
108+
; GFX9-NEXT: s_mov_b32 s10, s17
109+
; GFX9-NEXT: s_mov_b32 s9, s16
110+
; GFX9-NEXT: s_mov_b32 s8, s7
111+
; GFX9-NEXT: v_mov_b32_e32 v0, s6
112+
; GFX9-NEXT: v_mov_b32_e32 v1, s19
113+
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
114+
; GFX9-NEXT: .LBB2_2: ; %if.end
115+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116+
; GFX9-NEXT: s_setpc_b64 s[30:31]
117+
;
118+
; GFX10-LABEL: uniform_br_profitable:
119+
; GFX10: ; %bb.0: ; %entry
120+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
122+
; GFX10-NEXT: s_cbranch_scc1 .LBB2_2
123+
; GFX10-NEXT: ; %bb.1: ; %if.then
124+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
125+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
126+
; GFX10-NEXT: s_mov_b32 s11, s18
127+
; GFX10-NEXT: s_mov_b32 s10, s17
128+
; GFX10-NEXT: s_mov_b32 s9, s16
129+
; GFX10-NEXT: s_mov_b32 s8, s7
130+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
131+
; GFX10-NEXT: .LBB2_2: ; %if.end
132+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133+
; GFX10-NEXT: s_setpc_b64 s[30:31]
134+
entry:
135+
%cmp = icmp sgt i32 %flag, 0
136+
br i1 %cmp, label %if.then, label %if.end, !prof !1
137+
138+
if.then:
139+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
140+
br label %if.end
141+
142+
if.end:
143+
call void @llvm.amdgcn.s.waitcnt(i32 0)
144+
ret void
145+
}
146+
147+
define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) {
148+
; GFX9-LABEL: divergent_br_no_metadata:
149+
; GFX9: ; %bb.0: ; %entry
150+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151+
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
152+
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
153+
; GFX9-NEXT: s_cbranch_execz .LBB3_2
154+
; GFX9-NEXT: ; %bb.1: ; %if.then
155+
; GFX9-NEXT: s_mov_b32 s11, s18
156+
; GFX9-NEXT: s_mov_b32 s10, s17
157+
; GFX9-NEXT: s_mov_b32 s9, s16
158+
; GFX9-NEXT: s_mov_b32 s8, s7
159+
; GFX9-NEXT: v_mov_b32_e32 v0, s6
160+
; GFX9-NEXT: v_mov_b32_e32 v1, s19
161+
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
162+
; GFX9-NEXT: .LBB3_2: ; %if.end
163+
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
164+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165+
; GFX9-NEXT: s_setpc_b64 s[30:31]
166+
;
167+
; GFX1010-LABEL: divergent_br_no_metadata:
168+
; GFX1010: ; %bb.0: ; %entry
169+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170+
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
171+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
172+
; GFX1010-NEXT: s_cbranch_execz .LBB3_2
173+
; GFX1010-NEXT: ; %bb.1: ; %if.then
174+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
175+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
176+
; GFX1010-NEXT: s_mov_b32 s11, s18
177+
; GFX1010-NEXT: s_mov_b32 s10, s17
178+
; GFX1010-NEXT: s_mov_b32 s9, s16
179+
; GFX1010-NEXT: s_mov_b32 s8, s7
180+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
181+
; GFX1010-NEXT: .LBB3_2: ; %if.end
182+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
183+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
184+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
186+
;
187+
; GFX1030-LABEL: divergent_br_no_metadata:
188+
; GFX1030: ; %bb.0: ; %entry
189+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
191+
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
192+
; GFX1030-NEXT: s_cbranch_execz .LBB3_2
193+
; GFX1030-NEXT: ; %bb.1: ; %if.then
194+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
195+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
196+
; GFX1030-NEXT: s_mov_b32 s11, s18
197+
; GFX1030-NEXT: s_mov_b32 s10, s17
198+
; GFX1030-NEXT: s_mov_b32 s9, s16
199+
; GFX1030-NEXT: s_mov_b32 s8, s7
200+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
201+
; GFX1030-NEXT: .LBB3_2: ; %if.end
202+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
203+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
205+
entry:
206+
%cmp = icmp sgt i32 %flag, 0
207+
br i1 %cmp, label %if.then, label %if.end
208+
209+
if.then:
210+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
211+
br label %if.end
212+
213+
if.end:
214+
call void @llvm.amdgcn.s.waitcnt(i32 0)
215+
ret void
216+
}
217+
218+
define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) {
219+
; GFX9-LABEL: divergent_br_unprofitable:
220+
; GFX9: ; %bb.0: ; %entry
221+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222+
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
223+
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
224+
; GFX9-NEXT: s_cbranch_execz .LBB4_2
225+
; GFX9-NEXT: ; %bb.1: ; %if.then
226+
; GFX9-NEXT: s_mov_b32 s11, s18
227+
; GFX9-NEXT: s_mov_b32 s10, s17
228+
; GFX9-NEXT: s_mov_b32 s9, s16
229+
; GFX9-NEXT: s_mov_b32 s8, s7
230+
; GFX9-NEXT: v_mov_b32_e32 v0, s6
231+
; GFX9-NEXT: v_mov_b32_e32 v1, s19
232+
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
233+
; GFX9-NEXT: .LBB4_2: ; %if.end
234+
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
235+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236+
; GFX9-NEXT: s_setpc_b64 s[30:31]
237+
;
238+
; GFX1010-LABEL: divergent_br_unprofitable:
239+
; GFX1010: ; %bb.0: ; %entry
240+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241+
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
242+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
243+
; GFX1010-NEXT: s_cbranch_execz .LBB4_2
244+
; GFX1010-NEXT: ; %bb.1: ; %if.then
245+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
246+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
247+
; GFX1010-NEXT: s_mov_b32 s11, s18
248+
; GFX1010-NEXT: s_mov_b32 s10, s17
249+
; GFX1010-NEXT: s_mov_b32 s9, s16
250+
; GFX1010-NEXT: s_mov_b32 s8, s7
251+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
252+
; GFX1010-NEXT: .LBB4_2: ; %if.end
253+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
254+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
255+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
257+
;
258+
; GFX1030-LABEL: divergent_br_unprofitable:
259+
; GFX1030: ; %bb.0: ; %entry
260+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
262+
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
263+
; GFX1030-NEXT: s_cbranch_execz .LBB4_2
264+
; GFX1030-NEXT: ; %bb.1: ; %if.then
265+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
266+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
267+
; GFX1030-NEXT: s_mov_b32 s11, s18
268+
; GFX1030-NEXT: s_mov_b32 s10, s17
269+
; GFX1030-NEXT: s_mov_b32 s9, s16
270+
; GFX1030-NEXT: s_mov_b32 s8, s7
271+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
272+
; GFX1030-NEXT: .LBB4_2: ; %if.end
273+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
274+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
276+
entry:
277+
%cmp = icmp sgt i32 %flag, 0
278+
br i1 %cmp, label %if.then, label %if.end, !prof !0
279+
280+
if.then:
281+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
282+
br label %if.end
283+
284+
if.end:
285+
call void @llvm.amdgcn.s.waitcnt(i32 0)
286+
ret void
287+
}
288+
289+
define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef %flag) {
290+
; GFX9-LABEL: divergent_br_profitable:
291+
; GFX9: ; %bb.0: ; %entry
292+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293+
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
294+
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
295+
; GFX9-NEXT: s_cbranch_execz .LBB5_2
296+
; GFX9-NEXT: ; %bb.1: ; %if.then
297+
; GFX9-NEXT: s_mov_b32 s11, s18
298+
; GFX9-NEXT: s_mov_b32 s10, s17
299+
; GFX9-NEXT: s_mov_b32 s9, s16
300+
; GFX9-NEXT: s_mov_b32 s8, s7
301+
; GFX9-NEXT: v_mov_b32_e32 v0, s6
302+
; GFX9-NEXT: v_mov_b32_e32 v1, s19
303+
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
304+
; GFX9-NEXT: .LBB5_2: ; %if.end
305+
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
306+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307+
; GFX9-NEXT: s_setpc_b64 s[30:31]
308+
;
309+
; GFX1010-LABEL: divergent_br_profitable:
310+
; GFX1010: ; %bb.0: ; %entry
311+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312+
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
313+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
314+
; GFX1010-NEXT: s_cbranch_execz .LBB5_2
315+
; GFX1010-NEXT: ; %bb.1: ; %if.then
316+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
317+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
318+
; GFX1010-NEXT: s_mov_b32 s11, s18
319+
; GFX1010-NEXT: s_mov_b32 s10, s17
320+
; GFX1010-NEXT: s_mov_b32 s9, s16
321+
; GFX1010-NEXT: s_mov_b32 s8, s7
322+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
323+
; GFX1010-NEXT: .LBB5_2: ; %if.end
324+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
325+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
326+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
328+
;
329+
; GFX1030-LABEL: divergent_br_profitable:
330+
; GFX1030: ; %bb.0: ; %entry
331+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
333+
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
334+
; GFX1030-NEXT: s_cbranch_execz .LBB5_2
335+
; GFX1030-NEXT: ; %bb.1: ; %if.then
336+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
337+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
338+
; GFX1030-NEXT: s_mov_b32 s11, s18
339+
; GFX1030-NEXT: s_mov_b32 s10, s17
340+
; GFX1030-NEXT: s_mov_b32 s9, s16
341+
; GFX1030-NEXT: s_mov_b32 s8, s7
342+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
343+
; GFX1030-NEXT: .LBB5_2: ; %if.end
344+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
345+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
347+
entry:
348+
%cmp = icmp sgt i32 %flag, 0
349+
br i1 %cmp, label %if.then, label %if.end, !prof !1
350+
351+
if.then:
352+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
353+
br label %if.end
354+
355+
if.end:
356+
call void @llvm.amdgcn.s.waitcnt(i32 0)
357+
ret void
358+
}
359+
360+
declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg)
361+
declare void @llvm.amdgcn.s.waitcnt(i32)
362+
declare i32 @llvm.amdgcn.workitem.id.x()
363+
364+
!0 = !{!"branch_weights", i32 1000, i32 1000}
365+
!1 = !{!"branch_weights", i32 2000, i32 1}

0 commit comments

Comments
 (0)