|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9 |
| 3 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck %s -check-prefix=GFX12 |
3 | 4 |
|
4 | 5 | ; We have an indirect call with a known set of callees, which are
|
5 | 6 | ; known to not need any special inputs. The ABI still needs to use the
|
|
8 | 9 | ; FIXME: Passing real values for workitem ID, and 0s that can be undef
|
9 | 10 |
|
10 | 11 | define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
|
11 |
| -; CHECK-LABEL: indirect_call_known_no_special_inputs: |
12 |
| -; CHECK: ; %bb.0: ; %bb |
13 |
| -; CHECK-NEXT: s_add_u32 flat_scratch_lo, s4, s7 |
14 |
| -; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 |
15 |
| -; CHECK-NEXT: s_add_u32 s0, s0, s7 |
16 |
| -; CHECK-NEXT: s_addc_u32 s1, s1, 0 |
17 |
| -; CHECK-NEXT: s_mov_b64 s[4:5], 0 |
18 |
| -; CHECK-NEXT: s_load_dword s7, s[4:5], 0x0 |
19 |
| -; CHECK-NEXT: s_getpc_b64 s[4:5] |
20 |
| -; CHECK-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 |
21 |
| -; CHECK-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 |
22 |
| -; CHECK-NEXT: s_getpc_b64 s[8:9] |
23 |
| -; CHECK-NEXT: s_add_u32 s8, s8, snork@gotpcrel32@lo+4 |
24 |
| -; CHECK-NEXT: s_addc_u32 s9, s9, snork@gotpcrel32@hi+12 |
25 |
| -; CHECK-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 |
26 |
| -; CHECK-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 |
27 |
| -; CHECK-NEXT: s_mov_b64 s[8:9], 0 |
28 |
| -; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
29 |
| -; CHECK-NEXT: s_and_b32 s4, 1, s7 |
30 |
| -; CHECK-NEXT: s_cmp_eq_u32 s4, 1 |
31 |
| -; CHECK-NEXT: v_mov_b32_e32 v31, v0 |
32 |
| -; CHECK-NEXT: s_cselect_b32 s5, s13, s11 |
33 |
| -; CHECK-NEXT: s_cselect_b32 s4, s12, s10 |
34 |
| -; CHECK-NEXT: s_mov_b32 s12, s6 |
35 |
| -; CHECK-NEXT: v_mov_b32_e32 v1, 0 |
36 |
| -; CHECK-NEXT: v_mov_b32_e32 v4, 0 |
37 |
| -; CHECK-NEXT: s_mov_b32 s32, 0 |
38 |
| -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] |
39 |
| -; CHECK-NEXT: s_endpgm |
| 12 | +; GFX9-LABEL: indirect_call_known_no_special_inputs: |
| 13 | +; GFX9: ; %bb.0: ; %bb |
| 14 | +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 |
| 15 | +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 |
| 16 | +; GFX9-NEXT: s_add_u32 s0, s0, s7 |
| 17 | +; GFX9-NEXT: s_addc_u32 s1, s1, 0 |
| 18 | +; GFX9-NEXT: s_mov_b64 s[4:5], 0 |
| 19 | +; GFX9-NEXT: s_load_dword s7, s[4:5], 0x0 |
| 20 | +; GFX9-NEXT: s_getpc_b64 s[4:5] |
| 21 | +; GFX9-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 |
| 22 | +; GFX9-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 |
| 23 | +; GFX9-NEXT: s_getpc_b64 s[8:9] |
| 24 | +; GFX9-NEXT: s_add_u32 s8, s8, snork@gotpcrel32@lo+4 |
| 25 | +; GFX9-NEXT: s_addc_u32 s9, s9, snork@gotpcrel32@hi+12 |
| 26 | +; GFX9-NEXT: s_load_dwordx2 s[10:11], s[8:9], 0x0 |
| 27 | +; GFX9-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 |
| 28 | +; GFX9-NEXT: s_mov_b64 s[8:9], 0 |
| 29 | +; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| 30 | +; GFX9-NEXT: s_and_b32 s4, 1, s7 |
| 31 | +; GFX9-NEXT: s_cmp_eq_u32 s4, 1 |
| 32 | +; GFX9-NEXT: v_mov_b32_e32 v31, v0 |
| 33 | +; GFX9-NEXT: s_cselect_b32 s5, s13, s11 |
| 34 | +; GFX9-NEXT: s_cselect_b32 s4, s12, s10 |
| 35 | +; GFX9-NEXT: s_mov_b32 s12, s6 |
| 36 | +; GFX9-NEXT: v_mov_b32_e32 v1, 0 |
| 37 | +; GFX9-NEXT: v_mov_b32_e32 v4, 0 |
| 38 | +; GFX9-NEXT: s_mov_b32 s32, 0 |
| 39 | +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] |
| 40 | +; GFX9-NEXT: s_endpgm |
| 41 | +; |
| 42 | +; GFX12-LABEL: indirect_call_known_no_special_inputs: |
| 43 | +; GFX12: ; %bb.0: ; %bb |
| 44 | +; GFX12-NEXT: s_getpc_b64 s[4:5] |
| 45 | +; GFX12-NEXT: s_sext_i32_i16 s5, s5 |
| 46 | +; GFX12-NEXT: s_add_co_u32 s4, s4, snork@gotpcrel32@lo+8 |
| 47 | +; GFX12-NEXT: s_add_co_ci_u32 s5, s5, snork@gotpcrel32@hi+16 |
| 48 | +; GFX12-NEXT: s_mov_b64 s[2:3], 0 |
| 49 | +; GFX12-NEXT: s_getpc_b64 s[6:7] |
| 50 | +; GFX12-NEXT: s_sext_i32_i16 s7, s7 |
| 51 | +; GFX12-NEXT: s_add_co_u32 s6, s6, wobble@gotpcrel32@lo+8 |
| 52 | +; GFX12-NEXT: s_add_co_ci_u32 s7, s7, wobble@gotpcrel32@hi+16 |
| 53 | +; GFX12-NEXT: s_load_u8 s1, s[2:3], 0x0 |
| 54 | +; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 |
| 55 | +; GFX12-NEXT: s_load_b64 s[4:5], s[6:7], 0x0 |
| 56 | +; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0 |
| 57 | +; GFX12-NEXT: v_mov_b32_e32 v31, v0 |
| 58 | +; GFX12-NEXT: s_mov_b64 s[8:9], 0 |
| 59 | +; GFX12-NEXT: s_mov_b32 s12, s0 |
| 60 | +; GFX12-NEXT: s_mov_b32 s32, 0 |
| 61 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 62 | +; GFX12-NEXT: s_and_b32 s1, 1, s1 |
| 63 | +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) |
| 64 | +; GFX12-NEXT: s_cmp_eq_u32 s1, 1 |
| 65 | +; GFX12-NEXT: s_cselect_b32 s3, s5, s3 |
| 66 | +; GFX12-NEXT: s_cselect_b32 s2, s4, s2 |
| 67 | +; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3] |
| 68 | +; GFX12-NEXT: s_endpgm |
40 | 69 |
|
41 | 70 | bb:
|
42 | 71 | %cond = load i1, ptr addrspace(4) null
|
|
46 | 75 | }
|
47 | 76 |
|
48 | 77 | define void @wobble() {
|
49 |
| -; CHECK-LABEL: wobble: |
50 |
| -; CHECK: ; %bb.0: ; %bb |
51 |
| -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
52 |
| -; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 78 | +; GFX9-LABEL: wobble: |
| 79 | +; GFX9: ; %bb.0: ; %bb |
| 80 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 81 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 82 | +; |
| 83 | +; GFX12-LABEL: wobble: |
| 84 | +; GFX12: ; %bb.0: ; %bb |
| 85 | +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 86 | +; GFX12-NEXT: s_wait_expcnt 0x0 |
| 87 | +; GFX12-NEXT: s_wait_samplecnt 0x0 |
| 88 | +; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| 89 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 90 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
53 | 91 | bb:
|
54 | 92 | ret void
|
55 | 93 | }
|
56 | 94 |
|
57 | 95 | define void @snork() {
|
58 |
| -; CHECK-LABEL: snork: |
59 |
| -; CHECK: ; %bb.0: ; %bb |
60 |
| -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
61 |
| -; CHECK-NEXT: s_setpc_b64 s[30:31] |
| 96 | +; GFX9-LABEL: snork: |
| 97 | +; GFX9: ; %bb.0: ; %bb |
| 98 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 99 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 100 | +; |
| 101 | +; GFX12-LABEL: snork: |
| 102 | +; GFX12: ; %bb.0: ; %bb |
| 103 | +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 104 | +; GFX12-NEXT: s_wait_expcnt 0x0 |
| 105 | +; GFX12-NEXT: s_wait_samplecnt 0x0 |
| 106 | +; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| 107 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 108 | +; GFX12-NEXT: s_setpc_b64 s[30:31] |
62 | 109 | bb:
|
63 | 110 | ret void
|
64 | 111 | }
|
0 commit comments