Skip to content

Commit 3422bab

Browse files
committed
[AMDGPU][SIPreEmitPeephole] pre-commit tests: mustRetainExeczBranch: use a cost model
1 parent e903c5a commit 3422bab

File tree

1 file changed

+265
-0
lines changed

1 file changed

+265
-0
lines changed
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX1010 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX1030 %s
4+
5+
define void @convergent_cmp_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
6+
; GFX10-LABEL: convergent_cmp_no_metadata:
7+
; GFX10: ; %bb.0: ; %entry
8+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
10+
; GFX10-NEXT: s_cbranch_scc1 .LBB0_2
11+
; GFX10-NEXT: ; %bb.1: ; %if.then
12+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
13+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
14+
; GFX10-NEXT: s_mov_b32 s11, s18
15+
; GFX10-NEXT: s_mov_b32 s10, s17
16+
; GFX10-NEXT: s_mov_b32 s9, s16
17+
; GFX10-NEXT: s_mov_b32 s8, s7
18+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
19+
; GFX10-NEXT: .LBB0_2: ; %if.end
20+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21+
; GFX10-NEXT: s_setpc_b64 s[30:31]
22+
entry:
23+
%cmp = icmp sgt i32 %flag, 0
24+
br i1 %cmp, label %if.then, label %if.end
25+
26+
if.then:
27+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
28+
br label %if.end
29+
30+
if.end:
31+
call void @llvm.amdgcn.s.waitcnt(i32 0)
32+
ret void
33+
}
34+
35+
define void @convergent_cmp_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
36+
; GFX10-LABEL: convergent_cmp_unprofitable:
37+
; GFX10: ; %bb.0: ; %entry
38+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
40+
; GFX10-NEXT: s_cbranch_scc1 .LBB1_2
41+
; GFX10-NEXT: ; %bb.1: ; %if.then
42+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
43+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
44+
; GFX10-NEXT: s_mov_b32 s11, s18
45+
; GFX10-NEXT: s_mov_b32 s10, s17
46+
; GFX10-NEXT: s_mov_b32 s9, s16
47+
; GFX10-NEXT: s_mov_b32 s8, s7
48+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
49+
; GFX10-NEXT: .LBB1_2: ; %if.end
50+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; GFX10-NEXT: s_setpc_b64 s[30:31]
52+
entry:
53+
%cmp = icmp sgt i32 %flag, 0
54+
br i1 %cmp, label %if.then, label %if.end, !prof !0
55+
56+
if.then:
57+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
58+
br label %if.end
59+
60+
if.end:
61+
call void @llvm.amdgcn.s.waitcnt(i32 0)
62+
ret void
63+
}
64+
65+
define void @convergent_cmp_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
66+
; GFX10-LABEL: convergent_cmp_profitable:
67+
; GFX10: ; %bb.0: ; %entry
68+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69+
; GFX10-NEXT: s_cmp_lt_i32 s21, 1
70+
; GFX10-NEXT: s_cbranch_scc1 .LBB2_2
71+
; GFX10-NEXT: ; %bb.1: ; %if.then
72+
; GFX10-NEXT: v_mov_b32_e32 v0, s6
73+
; GFX10-NEXT: v_mov_b32_e32 v1, s19
74+
; GFX10-NEXT: s_mov_b32 s11, s18
75+
; GFX10-NEXT: s_mov_b32 s10, s17
76+
; GFX10-NEXT: s_mov_b32 s9, s16
77+
; GFX10-NEXT: s_mov_b32 s8, s7
78+
; GFX10-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
79+
; GFX10-NEXT: .LBB2_2: ; %if.end
80+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81+
; GFX10-NEXT: s_setpc_b64 s[30:31]
82+
entry:
83+
%cmp = icmp sgt i32 %flag, 0
84+
br i1 %cmp, label %if.then, label %if.end, !prof !1
85+
86+
if.then:
87+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
88+
br label %if.end
89+
90+
if.end:
91+
call void @llvm.amdgcn.s.waitcnt(i32 0)
92+
ret void
93+
}
94+
95+
define void @divergent_cmp_no_metadata(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
96+
; GFX1010-LABEL: divergent_cmp_no_metadata:
97+
; GFX1010: ; %bb.0: ; %entry
98+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99+
; GFX1010-NEXT: v_and_b32_e32 v0, 0x3ff, v31
100+
; GFX1010-NEXT: v_cmp_gt_i32_e32 vcc_lo, s21, v0
101+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
102+
; GFX1010-NEXT: s_cbranch_execz .LBB3_2
103+
; GFX1010-NEXT: ; %bb.1: ; %if.then
104+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
105+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
106+
; GFX1010-NEXT: s_mov_b32 s11, s18
107+
; GFX1010-NEXT: s_mov_b32 s10, s17
108+
; GFX1010-NEXT: s_mov_b32 s9, s16
109+
; GFX1010-NEXT: s_mov_b32 s8, s7
110+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
111+
; GFX1010-NEXT: .LBB3_2: ; %if.end
112+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
113+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
114+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
116+
;
117+
; GFX1030-LABEL: divergent_cmp_no_metadata:
118+
; GFX1030: ; %bb.0: ; %entry
119+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120+
; GFX1030-NEXT: v_and_b32_e32 v0, 0x3ff, v31
121+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
122+
; GFX1030-NEXT: v_cmpx_gt_i32_e64 s21, v0
123+
; GFX1030-NEXT: s_cbranch_execz .LBB3_2
124+
; GFX1030-NEXT: ; %bb.1: ; %if.then
125+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
126+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
127+
; GFX1030-NEXT: s_mov_b32 s11, s18
128+
; GFX1030-NEXT: s_mov_b32 s10, s17
129+
; GFX1030-NEXT: s_mov_b32 s9, s16
130+
; GFX1030-NEXT: s_mov_b32 s8, s7
131+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
132+
; GFX1030-NEXT: .LBB3_2: ; %if.end
133+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
134+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
136+
entry:
137+
%id = call i32 @llvm.amdgcn.workitem.id.x()
138+
%cmp = icmp sgt i32 %flag, %id
139+
br i1 %cmp, label %if.then, label %if.end
140+
141+
if.then:
142+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
143+
br label %if.end
144+
145+
if.end:
146+
call void @llvm.amdgcn.s.waitcnt(i32 0)
147+
ret void
148+
}
149+
150+
define void @divergent_cmp_unprofitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
151+
; GFX1010-LABEL: divergent_cmp_unprofitable:
152+
; GFX1010: ; %bb.0: ; %entry
153+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154+
; GFX1010-NEXT: v_and_b32_e32 v0, 0x3ff, v31
155+
; GFX1010-NEXT: v_cmp_gt_i32_e32 vcc_lo, s21, v0
156+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
157+
; GFX1010-NEXT: s_cbranch_execz .LBB4_2
158+
; GFX1010-NEXT: ; %bb.1: ; %if.then
159+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
160+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
161+
; GFX1010-NEXT: s_mov_b32 s11, s18
162+
; GFX1010-NEXT: s_mov_b32 s10, s17
163+
; GFX1010-NEXT: s_mov_b32 s9, s16
164+
; GFX1010-NEXT: s_mov_b32 s8, s7
165+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
166+
; GFX1010-NEXT: .LBB4_2: ; %if.end
167+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
168+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
169+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
171+
;
172+
; GFX1030-LABEL: divergent_cmp_unprofitable:
173+
; GFX1030: ; %bb.0: ; %entry
174+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175+
; GFX1030-NEXT: v_and_b32_e32 v0, 0x3ff, v31
176+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
177+
; GFX1030-NEXT: v_cmpx_gt_i32_e64 s21, v0
178+
; GFX1030-NEXT: s_cbranch_execz .LBB4_2
179+
; GFX1030-NEXT: ; %bb.1: ; %if.then
180+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
181+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
182+
; GFX1030-NEXT: s_mov_b32 s11, s18
183+
; GFX1030-NEXT: s_mov_b32 s10, s17
184+
; GFX1030-NEXT: s_mov_b32 s9, s16
185+
; GFX1030-NEXT: s_mov_b32 s8, s7
186+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
187+
; GFX1030-NEXT: .LBB4_2: ; %if.end
188+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
189+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
191+
entry:
192+
%id = call i32 @llvm.amdgcn.workitem.id.x()
193+
%cmp = icmp sgt i32 %flag, %id
194+
br i1 %cmp, label %if.then, label %if.end, !prof !0
195+
196+
if.then:
197+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
198+
br label %if.end
199+
200+
if.end:
201+
call void @llvm.amdgcn.s.waitcnt(i32 0)
202+
ret void
203+
}
204+
205+
define void @divergent_cmp_profitable(i32 noundef inreg %value, ptr addrspace(8) nocapture writeonly inreg %res, i32 noundef inreg %v_offset, i32 noundef inreg %0, i32 noundef inreg %flag) {
206+
; GFX1010-LABEL: divergent_cmp_profitable:
207+
; GFX1010: ; %bb.0: ; %entry
208+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209+
; GFX1010-NEXT: v_and_b32_e32 v0, 0x3ff, v31
210+
; GFX1010-NEXT: v_cmp_gt_i32_e32 vcc_lo, s21, v0
211+
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
212+
; GFX1010-NEXT: s_cbranch_execz .LBB5_2
213+
; GFX1010-NEXT: ; %bb.1: ; %if.then
214+
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
215+
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
216+
; GFX1010-NEXT: s_mov_b32 s11, s18
217+
; GFX1010-NEXT: s_mov_b32 s10, s17
218+
; GFX1010-NEXT: s_mov_b32 s9, s16
219+
; GFX1010-NEXT: s_mov_b32 s8, s7
220+
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
221+
; GFX1010-NEXT: .LBB5_2: ; %if.end
222+
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
223+
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
224+
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225+
; GFX1010-NEXT: s_setpc_b64 s[30:31]
226+
;
227+
; GFX1030-LABEL: divergent_cmp_profitable:
228+
; GFX1030: ; %bb.0: ; %entry
229+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230+
; GFX1030-NEXT: v_and_b32_e32 v0, 0x3ff, v31
231+
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
232+
; GFX1030-NEXT: v_cmpx_gt_i32_e64 s21, v0
233+
; GFX1030-NEXT: s_cbranch_execz .LBB5_2
234+
; GFX1030-NEXT: ; %bb.1: ; %if.then
235+
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
236+
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
237+
; GFX1030-NEXT: s_mov_b32 s11, s18
238+
; GFX1030-NEXT: s_mov_b32 s10, s17
239+
; GFX1030-NEXT: s_mov_b32 s9, s16
240+
; GFX1030-NEXT: s_mov_b32 s8, s7
241+
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
242+
; GFX1030-NEXT: .LBB5_2: ; %if.end
243+
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
244+
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245+
; GFX1030-NEXT: s_setpc_b64 s[30:31]
246+
entry:
247+
%id = call i32 @llvm.amdgcn.workitem.id.x()
248+
%cmp = icmp sgt i32 %flag, %id
249+
br i1 %cmp, label %if.then, label %if.end, !prof !1
250+
251+
if.then:
252+
tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0)
253+
br label %if.end
254+
255+
if.end:
256+
call void @llvm.amdgcn.s.waitcnt(i32 0)
257+
ret void
258+
}
259+
260+
declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg)
261+
declare void @llvm.amdgcn.s.waitcnt(i32)
262+
declare i32 @llvm.amdgcn.workitem.id.x()
263+
264+
!0 = !{!"branch_weights", i32 1000, i32 1000}
265+
!1 = !{!"branch_weights", i32 2000, i32 1}

0 commit comments

Comments
 (0)