|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
1 | 2 | ; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
2 | 3 |
|
3 | 4 | declare i32 @llvm.amdgcn.workitem.id.x()
|
4 | 5 | declare i32 @llvm.amdgcn.readfirstlane(i32)
|
5 | 6 |
|
6 |
| -; GCN-LABEL: readfirstlane_uniform |
7 |
| -; GCN: s_load_dwordx4 s[[[IN_ADDR:[0-9]+]]:3], s[4:5], 0x0 |
8 |
| -; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0 |
9 |
| -; GCN: s_add_u32 s[[LOAD_ADDR:[0-9]+]], s[[IN_ADDR]], s[[SCALAR]] |
10 |
| -; GCN: s_load_dword s{{[0-9]+}}, s[[[LOAD_ADDR]] |
11 |
| - |
12 | 7 | define amdgpu_kernel void @readfirstlane_uniform(ptr addrspace(1) noalias nocapture readonly, ptr addrspace(1) noalias nocapture readonly) {
|
| 8 | +; GCN-LABEL: readfirstlane_uniform: |
| 9 | +; GCN: ; %bb.0: |
| 10 | +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 |
| 11 | +; GCN-NEXT: v_readfirstlane_b32 s4, v0 |
| 12 | +; GCN-NEXT: s_mov_b32 s5, 0 |
| 13 | +; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 |
| 14 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 15 | +; GCN-NEXT: s_add_u32 s0, s0, s4 |
| 16 | +; GCN-NEXT: s_addc_u32 s1, s1, s5 |
| 17 | +; GCN-NEXT: s_load_dword s4, s[0:1], 0x0 |
| 18 | +; GCN-NEXT: s_add_u32 s0, s2, 40 |
| 19 | +; GCN-NEXT: s_addc_u32 s1, s3, 0 |
| 20 | +; GCN-NEXT: v_mov_b32_e32 v0, s0 |
| 21 | +; GCN-NEXT: v_mov_b32_e32 v1, s1 |
| 22 | +; GCN-NEXT: s_waitcnt lgkmcnt(0) |
| 23 | +; GCN-NEXT: v_mov_b32_e32 v2, s4 |
| 24 | +; GCN-NEXT: flat_store_dword v[0:1], v2 |
| 25 | +; GCN-NEXT: s_endpgm |
13 | 26 | %tid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
14 | 27 | %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid)
|
15 | 28 | %idx = zext i32 %scalar to i64
|
|
0 commit comments