|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
2 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
|
3 | 3 | ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
|
4 |
| -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s |
| 4 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s |
| 5 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s |
5 | 6 |
|
6 | 7 | define amdgpu_kernel void @br_cc_f16(
|
7 | 8 | ; SI-LABEL: br_cc_f16:
|
@@ -60,32 +61,62 @@ define amdgpu_kernel void @br_cc_f16(
|
60 | 61 | ; VI-NEXT: buffer_store_short v1, off, s[0:3], 0
|
61 | 62 | ; VI-NEXT: s_endpgm
|
62 | 63 | ;
|
63 |
| -; GFX11-LABEL: br_cc_f16: |
64 |
| -; GFX11: ; %bb.0: ; %entry |
65 |
| -; GFX11-NEXT: s_clause 0x1 |
66 |
| -; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
67 |
| -; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x34 |
68 |
| -; GFX11-NEXT: s_mov_b32 s6, -1 |
69 |
| -; GFX11-NEXT: s_mov_b32 s7, 0x31016000 |
70 |
| -; GFX11-NEXT: s_mov_b32 s10, s6 |
71 |
| -; GFX11-NEXT: s_mov_b32 s11, s7 |
72 |
| -; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
73 |
| -; GFX11-NEXT: s_mov_b32 s4, s2 |
74 |
| -; GFX11-NEXT: s_mov_b32 s5, s3 |
75 |
| -; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc |
76 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
77 |
| -; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc |
78 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
79 |
| -; GFX11-NEXT: s_mov_b32 s2, s6 |
80 |
| -; GFX11-NEXT: s_mov_b32 s3, s7 |
81 |
| -; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 |
82 |
| -; GFX11-NEXT: s_cbranch_vccnz .LBB0_2 |
83 |
| -; GFX11-NEXT: ; %bb.1: ; %one |
84 |
| -; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
85 |
| -; GFX11-NEXT: s_endpgm |
86 |
| -; GFX11-NEXT: .LBB0_2: ; %two |
87 |
| -; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 |
88 |
| -; GFX11-NEXT: s_endpgm |
| 64 | +; GFX11-TRUE16-LABEL: br_cc_f16: |
| 65 | +; GFX11-TRUE16: ; %bb.0: ; %entry |
| 66 | +; GFX11-TRUE16-NEXT: s_clause 0x1 |
| 67 | +; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 68 | +; GFX11-TRUE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34 |
| 69 | +; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1 |
| 70 | +; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 71 | +; GFX11-TRUE16-NEXT: s_mov_b32 s10, s6 |
| 72 | +; GFX11-TRUE16-NEXT: s_mov_b32 s11, s7 |
| 73 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 74 | +; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2 |
| 75 | +; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3 |
| 76 | +; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc |
| 77 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 78 | +; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc |
| 79 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 80 | +; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6 |
| 81 | +; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7 |
| 82 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l |
| 83 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l |
| 84 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 85 | +; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v2.l, v2.h |
| 86 | +; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB0_2 |
| 87 | +; GFX11-TRUE16-NEXT: ; %bb.1: ; %one |
| 88 | +; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| 89 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 90 | +; GFX11-TRUE16-NEXT: .LBB0_2: ; %two |
| 91 | +; GFX11-TRUE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0 |
| 92 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 93 | +; |
| 94 | +; GFX11-FAKE16-LABEL: br_cc_f16: |
| 95 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 96 | +; GFX11-FAKE16-NEXT: s_clause 0x1 |
| 97 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 98 | +; GFX11-FAKE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34 |
| 99 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 100 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 101 | +; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6 |
| 102 | +; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7 |
| 103 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 104 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2 |
| 105 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3 |
| 106 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc |
| 107 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 108 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc |
| 109 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 110 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6 |
| 111 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7 |
| 112 | +; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 |
| 113 | +; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB0_2 |
| 114 | +; GFX11-FAKE16-NEXT: ; %bb.1: ; %one |
| 115 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| 116 | +; GFX11-FAKE16-NEXT: s_endpgm |
| 117 | +; GFX11-FAKE16-NEXT: .LBB0_2: ; %two |
| 118 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0 |
| 119 | +; GFX11-FAKE16-NEXT: s_endpgm |
89 | 120 | ptr addrspace(1) %r,
|
90 | 121 | ptr addrspace(1) %a,
|
91 | 122 | ptr addrspace(1) %b) {
|
@@ -151,25 +182,47 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
|
151 | 182 | ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
152 | 183 | ; VI-NEXT: s_endpgm
|
153 | 184 | ;
|
154 |
| -; GFX11-LABEL: br_cc_f16_imm_a: |
155 |
| -; GFX11: ; %bb.0: ; %entry |
156 |
| -; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
157 |
| -; GFX11-NEXT: s_mov_b32 s7, 0x31016000 |
158 |
| -; GFX11-NEXT: s_mov_b32 s6, -1 |
159 |
| -; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
160 |
| -; GFX11-NEXT: s_mov_b32 s4, s2 |
161 |
| -; GFX11-NEXT: s_mov_b32 s5, s3 |
162 |
| -; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 |
163 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
164 |
| -; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0 |
165 |
| -; GFX11-NEXT: s_cbranch_vccnz .LBB1_2 |
166 |
| -; GFX11-NEXT: ; %bb.1: ; %one |
167 |
| -; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 |
168 |
| -; GFX11-NEXT: .LBB1_2: ; %two |
169 |
| -; GFX11-NEXT: s_mov_b32 s2, s6 |
170 |
| -; GFX11-NEXT: s_mov_b32 s3, s7 |
171 |
| -; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
172 |
| -; GFX11-NEXT: s_endpgm |
| 185 | +; GFX11-TRUE16-LABEL: br_cc_f16_imm_a: |
| 186 | +; GFX11-TRUE16: ; %bb.0: ; %entry |
| 187 | +; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 188 | +; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 189 | +; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1 |
| 190 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 191 | +; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2 |
| 192 | +; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3 |
| 193 | +; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 |
| 194 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 195 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l |
| 196 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 197 | +; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l |
| 198 | +; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2 |
| 199 | +; GFX11-TRUE16-NEXT: ; %bb.1: ; %one |
| 200 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800 |
| 201 | +; GFX11-TRUE16-NEXT: .LBB1_2: ; %two |
| 202 | +; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6 |
| 203 | +; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7 |
| 204 | +; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| 205 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 206 | +; |
| 207 | +; GFX11-FAKE16-LABEL: br_cc_f16_imm_a: |
| 208 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 209 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 210 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 211 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 212 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 213 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2 |
| 214 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3 |
| 215 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 |
| 216 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 217 | +; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0 |
| 218 | +; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB1_2 |
| 219 | +; GFX11-FAKE16-NEXT: ; %bb.1: ; %one |
| 220 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800 |
| 221 | +; GFX11-FAKE16-NEXT: .LBB1_2: ; %two |
| 222 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6 |
| 223 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7 |
| 224 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| 225 | +; GFX11-FAKE16-NEXT: s_endpgm |
173 | 226 | ptr addrspace(1) %r,
|
174 | 227 | ptr addrspace(1) %b) {
|
175 | 228 | entry:
|
@@ -235,25 +288,47 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
|
235 | 288 | ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
|
236 | 289 | ; VI-NEXT: s_endpgm
|
237 | 290 | ;
|
238 |
| -; GFX11-LABEL: br_cc_f16_imm_b: |
239 |
| -; GFX11: ; %bb.0: ; %entry |
240 |
| -; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
241 |
| -; GFX11-NEXT: s_mov_b32 s7, 0x31016000 |
242 |
| -; GFX11-NEXT: s_mov_b32 s6, -1 |
243 |
| -; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
244 |
| -; GFX11-NEXT: s_mov_b32 s4, s2 |
245 |
| -; GFX11-NEXT: s_mov_b32 s5, s3 |
246 |
| -; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 |
247 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
248 |
| -; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0 |
249 |
| -; GFX11-NEXT: s_cbranch_vccz .LBB2_2 |
250 |
| -; GFX11-NEXT: ; %bb.1: ; %two |
251 |
| -; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 |
252 |
| -; GFX11-NEXT: .LBB2_2: ; %one |
253 |
| -; GFX11-NEXT: s_mov_b32 s2, s6 |
254 |
| -; GFX11-NEXT: s_mov_b32 s3, s7 |
255 |
| -; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
256 |
| -; GFX11-NEXT: s_endpgm |
| 291 | +; GFX11-TRUE16-LABEL: br_cc_f16_imm_b: |
| 292 | +; GFX11-TRUE16: ; %bb.0: ; %entry |
| 293 | +; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 294 | +; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 295 | +; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1 |
| 296 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 297 | +; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2 |
| 298 | +; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3 |
| 299 | +; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 |
| 300 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 301 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l |
| 302 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 303 | +; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l |
| 304 | +; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2 |
| 305 | +; GFX11-TRUE16-NEXT: ; %bb.1: ; %two |
| 306 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800 |
| 307 | +; GFX11-TRUE16-NEXT: .LBB2_2: ; %one |
| 308 | +; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6 |
| 309 | +; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7 |
| 310 | +; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| 311 | +; GFX11-TRUE16-NEXT: s_endpgm |
| 312 | +; |
| 313 | +; GFX11-FAKE16-LABEL: br_cc_f16_imm_b: |
| 314 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 315 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 316 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 317 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 318 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 319 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2 |
| 320 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3 |
| 321 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 |
| 322 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 323 | +; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0 |
| 324 | +; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB2_2 |
| 325 | +; GFX11-FAKE16-NEXT: ; %bb.1: ; %two |
| 326 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800 |
| 327 | +; GFX11-FAKE16-NEXT: .LBB2_2: ; %one |
| 328 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6 |
| 329 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7 |
| 330 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| 331 | +; GFX11-FAKE16-NEXT: s_endpgm |
257 | 332 | ptr addrspace(1) %r,
|
258 | 333 | ptr addrspace(1) %a) {
|
259 | 334 | entry:
|
|
269 | 344 | store half 0xH3800, ptr addrspace(1) %r
|
270 | 345 | ret void
|
271 | 346 | }
|
| 347 | +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| 348 | +; GFX11: {{.*}} |
0 commit comments