|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
|
4 | 4 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX10,GFX10_DEFAULT %s
|
5 | 5 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX10,FLATSCR_GFX10 %s
|
6 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX11 %s |
7 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX11 %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
| 8 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
| 9 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
8 | 10 |
|
9 | 11 | define <2 x half> @chain_hi_to_lo_private() {
|
10 | 12 | ; GFX900-LABEL: chain_hi_to_lo_private:
|
@@ -156,14 +158,23 @@ define <2 x half> @chain_hi_to_lo_arithmatic(ptr addrspace(5) %base, half %in) {
|
156 | 158 | ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1
|
157 | 159 | ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31]
|
158 | 160 | ;
|
159 |
| -; GFX11-LABEL: chain_hi_to_lo_arithmatic: |
160 |
| -; GFX11: ; %bb.0: ; %bb |
161 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
162 |
| -; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 |
163 |
| -; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
164 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
165 |
| -; GFX11-NEXT: v_mov_b32_e32 v0, v1 |
166 |
| -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 161 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_arithmatic: |
| 162 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 163 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 164 | +; GFX11-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l |
| 165 | +; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
| 166 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 167 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v1 |
| 168 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 169 | +; |
| 170 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_arithmatic: |
| 171 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 172 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 173 | +; GFX11-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 174 | +; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
| 175 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 176 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v1 |
| 177 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
167 | 178 | bb:
|
168 | 179 | %arith_lo = fadd half %in, 1.0
|
169 | 180 | %load_hi = load half, ptr addrspace(5) %base
|
@@ -361,18 +372,31 @@ define <2 x half> @chain_hi_to_lo_flat() {
|
361 | 372 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
362 | 373 | ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
363 | 374 | ;
|
364 |
| -; GFX11-LABEL: chain_hi_to_lo_flat: |
365 |
| -; GFX11: ; %bb.0: ; %bb |
366 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
367 |
| -; GFX11-NEXT: v_mov_b32_e32 v0, 2 |
368 |
| -; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
369 |
| -; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
370 |
| -; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
371 |
| -; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
372 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
373 |
| -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
374 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
375 |
| -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 375 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat: |
| 376 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 377 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 378 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 |
| 379 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
| 380 | +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
| 381 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
| 382 | +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 |
| 383 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 384 | +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| 385 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 386 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 387 | +; |
| 388 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat: |
| 389 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 390 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 391 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 |
| 392 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 393 | +; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
| 394 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 395 | +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 |
| 396 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 397 | +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| 398 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 399 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
376 | 400 | bb:
|
377 | 401 | %gep_lo = getelementptr inbounds half, ptr null, i64 1
|
378 | 402 | %load_lo = load half, ptr %gep_lo
|
@@ -403,14 +427,23 @@ define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_h
|
403 | 427 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
404 | 428 | ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
405 | 429 | ;
|
406 |
| -; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: |
407 |
| -; GFX11: ; %bb.0: ; %bb |
408 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
409 |
| -; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
410 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
411 |
| -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
412 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
413 |
| -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 430 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_different_bases: |
| 431 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 432 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 433 | +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
| 434 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 435 | +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
| 436 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 437 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 438 | +; |
| 439 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_different_bases: |
| 440 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 441 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 442 | +; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
| 443 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 444 | +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
| 445 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 446 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
414 | 447 | bb:
|
415 | 448 | %load_lo = load half, ptr %base_lo
|
416 | 449 | %load_hi = load half, ptr %base_hi
|
@@ -864,17 +897,31 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
|
864 | 897 | ; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
|
865 | 898 | ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
866 | 899 | ;
|
867 |
| -; GFX11-LABEL: chain_hi_to_lo_flat_other_dep: |
868 |
| -; GFX11: ; %bb.0: ; %bb |
869 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
870 |
| -; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
871 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) |
872 |
| -; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
873 |
| -; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
874 |
| -; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
875 |
| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
876 |
| -; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
877 |
| -; GFX11-NEXT: s_setpc_b64 s[30:31] |
| 900 | +; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_other_dep: |
| 901 | +; GFX11-TRUE16: ; %bb.0: ; %bb |
| 902 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 903 | +; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2 glc dlc |
| 904 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 905 | +; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
| 906 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
| 907 | +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l |
| 908 | +; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 909 | +; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
| 910 | +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 911 | +; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0 |
| 912 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 913 | +; |
| 914 | +; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_other_dep: |
| 915 | +; GFX11-FAKE16: ; %bb.0: ; %bb |
| 916 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 917 | +; GFX11-FAKE16-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
| 918 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 919 | +; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
| 920 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 921 | +; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
| 922 | +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 923 | +; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
| 924 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
878 | 925 | bb:
|
879 | 926 | %gep_lo = getelementptr inbounds i16, ptr addrspace(0) %ptr, i64 1
|
880 | 927 | %load_lo = load volatile i16, ptr addrspace(0) %gep_lo
|
|
0 commit comments