-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[NFC][AMDGPU] Update tests to use autogened CHECKs #140311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is in preparation for updates relevant to enabling CopyConstrain DAG mutation in the default maxOccupancy scheduler. Change-Id: I4c6928197c7cfc8d1d51db90d111c3ffc8e09e32
@llvm/pr-subscribers-backend-amdgpu Author: Chinmay Deshpande (chinmaydd) ChangesThis is in preparation for updates relevant to enabling CopyConstrain DAG mutation in the default maxOccupancy scheduler. Change-Id: I4c6928197c7cfc8d1d51db90d111c3ffc8e09e32 Patch is 626.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140311.diff 9 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
index 2c2855c860ebb..81a646f8b7055 100644
--- a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FUNC,GFX7 %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FUNC,GFX8 %s
@@ -10,21 +11,70 @@
; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
; instructions with B64, U64, and I64 take 64-bit operands.
-; FUNC-LABEL: {{^}}local_address_load:
-; SI: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
-; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define amdgpu_kernel void @local_address_load(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_load:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_load:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = load i32, ptr addrspace(3) %in
store i32 %0, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep:
-; SI: s_add_i32 [[SPTR:s[0-9]]]
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_read_b32 [[VPTR]]
define amdgpu_kernel void @local_address_gep(ptr addrspace(1) %out, ptr addrspace(3) %in, i32 %offset) {
+; GFX7-LABEL: local_address_gep:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_lshl_b32 s3, s3, 2
+; GFX7-NEXT: s_add_i32 s2, s2, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_lshl_b32 s3, s3, 2
+; GFX8-NEXT: s_add_i32 s2, s2, s3
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = getelementptr i32, ptr addrspace(3) %in, i32 %offset
%1 = load i32, ptr addrspace(3) %0
@@ -32,10 +82,34 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep_const_offset:
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
define amdgpu_kernel void @local_address_gep_const_offset(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_gep_const_offset:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0 offset:4
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_const_offset:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0 offset:4
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = getelementptr i32, ptr addrspace(3) %in, i32 1
%1 = load i32, ptr addrspace(3) %0
@@ -44,11 +118,36 @@ entry:
}
; Offset too large, can't fold into 16-bit immediate offset.
-; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
-; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_read_b32 [[VPTR]]
define amdgpu_kernel void @local_address_gep_large_const_offset(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_gep_large_const_offset:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s2, s2, 0x10004
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_large_const_offset:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s2, s2, 0x10004
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = getelementptr i32, ptr addrspace(3) %in, i32 16385
%1 = load i32, ptr addrspace(3) %0
@@ -56,24 +155,71 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
-; GFX7 v_cmp_ne_u32
-; GFX7: s_cselect_b32
-; GFX8: s_cmp_lg_u32
-; GFX8-NOT: v_cmp_ne_u32
-; GFX8: s_cselect_b32
define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+; GFX7-LABEL: null_32bit_lds_ptr:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dword s6, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_movk_i32 s4, 0x7b
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_cmp_lg_u32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s4, s4, 0x1c8
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: null_32bit_lds_ptr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_movk_i32 s4, 0x7b
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_cmp_lg_u32 s6, 0
+; GFX8-NOT: v_cmp_ne_u32
+; GFX8-NEXT: s_cselect_b32 s4, s4, 0x1c8
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
%cmp = icmp ne ptr addrspace(3) %lds, null
%x = select i1 %cmp, i32 123, i32 456
store i32 %x, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}mul_32bit_ptr:
-; SI: s_mul_i32
-; SI-NEXT: s_add_i32
-; SI: ds_read_b32
define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds, i32 %tid) {
+; GFX7-LABEL: mul_32bit_ptr:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_mul_i32 s3, s3, 12
+; GFX7-NEXT: s_add_i32 s2, s2, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: mul_32bit_ptr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_mul_i32 s3, s3, 12
+; GFX8-NEXT: s_add_i32 s2, s2, s3
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
%ptr = getelementptr [3 x float], ptr addrspace(3) %lds, i32 %tid, i32 0
%val = load float, ptr addrspace(3) %ptr
store float %val, ptr addrspace(1) %out
@@ -82,60 +228,156 @@ define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3)
@g_lds = addrspace(3) global float poison, align 4
-; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
-; SI: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
-; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define amdgpu_kernel void @infer_ptr_alignment_global_offset(ptr addrspace(1) %out, i32 %tid) {
+; GFX7-LABEL: infer_ptr_alignment_global_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: infer_ptr_alignment_global_offset:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_mov_b32_e32 v0, 0
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
%val = load float, ptr addrspace(3) @g_lds
store float %val, ptr addrspace(1) %out
ret void
}
-
@ptr = addrspace(3) global ptr addrspace(3) poison
@dst = addrspace(3) global [16383 x i32] poison
-; FUNC-LABEL: {{^}}global_ptr:
-; SI: ds_write_b32
define amdgpu_kernel void @global_ptr() nounwind {
+; SI-LABEL: global_ptr:
+; SI: ; %bb.0:
+; SI-NEXT: v_mov_b32_e32 v0, 64
+; SI-NEXT: v_mov_b32_e32 v1, 0
+; SI-NEXT: s_mov_b32 m0, -1
+; SI-NEXT: ds_write_b32 v1, v0 offset:65532
+; SI-NEXT: s_endpgm
store ptr addrspace(3) getelementptr ([16383 x i32], ptr addrspace(3) @dst, i32 0, i32 16), ptr addrspace(3) @ptr
ret void
}
-; FUNC-LABEL: {{^}}local_address_store:
-; SI: ds_write_b32
define amdgpu_kernel void @local_address_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: v_mov_b32_e32 v1, s1
+; GFX7-NEXT: ds_write_b32 v0, v1
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: ds_write_b32 v0, v1
+; GFX8-NEXT: s_endpgm
store i32 %val, ptr addrspace(3) %out
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep_store:
-; SI: s_add_i32 [[SADDR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
-; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
define amdgpu_kernel void @local_address_gep_store(ptr addrspace(3) %out, i32, i32 %val, i32 %offset) {
+; GFX7-LABEL: local_address_gep_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_lshl_b32 s1, s1, 2
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: s_add_i32 s0, s2, s1
+; GFX7-NEXT: v_mov_b32_e32 v1, s0
+; GFX7-NEXT: ds_write_b32 v1, v0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_lshl_b32 s1, s1, 2
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: s_add_i32 s0, s2, s1
+; GFX8-NEXT: v_mov_b32_e32 v1, s0
+; GFX8-NEXT: ds_write_b32 v1, v0
+; GFX8-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %out, i32 %offset
store i32 %val, ptr addrspace(3) %gep, align 4
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
define amdgpu_kernel void @local_address_gep_const_offset_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_gep_const_offset_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: v_mov_b32_e32 v1, s1
+; GFX7-NEXT: ds_write_b32 v0, v1 offset:4
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_const_offset_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: ds_write_b32 v0, v1 offset:4
+; GFX8-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %out, i32 1
store i32 %val, ptr addrspace(3) %gep, align 4
ret void
}
; Offset too large, can't fold into 16-bit immediate offset.
-; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
-; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
define amdgpu_kernel void @local_address_gep_large_const_offset_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_gep_large_const_offset_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s0, s0, 0x10004
+; GFX7-NEXT: v_mov_b32_e32 v0, s1
+; GFX7-NEXT: v_mov_b32_e32 v1, s0
+; GFX7-NEXT: ds_write_b32 v1, v0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_large_const_offset_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s0, s0, 0x10004
+; GFX8-NEXT: v_mov_b32_e32 v0, s1
+; GFX8-NEXT: v_mov_b32_e32 v1, s0
+; GFX8-NEXT: ds_write_b32 v1, v0
+; GFX8-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %out, i32 16385
store i32 %val, ptr addrspace(3) %gep, align 4
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FUNC: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
index f4d8ec180cf91..f27ec1e38f942 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,11 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; GCN-LABEL: {{^}}select_and1:
-; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_gt_i32 s2, 10
+; GCN-NEXT: s_cselect_b32 s2, s3, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NOT: v_and_b32
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %y, %s
@@ -13,12 +20,18 @@ define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
ret void
}
-; GCN-LABEL: {{^}}select_and2:
-; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_gt_i32 s2, 10
+; GCN-NEXT: s_cselect_b32 s2, s3, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NOT: v_and_b32
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %s, %y
@@ -26,12 +39,18 @@ define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
ret void
}
-; GCN-LABEL: {{^}}select_and3:
-; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and3:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_lt_i32 s2, 11
+; GCN-NEXT: s_cselect_b32 s2, s3, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NOT: v_and_b32
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 -1, i32 0
%a = and i32 %y, %s
@@ -39,18 +58,26 @@ define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
ret void
}
-; GCN-LABEL: {{^}}select_and_v4:
-; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
-; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
-; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
-; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
-; GCN-NOT: v_and_b32
-; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
+; GCN-LABEL: select_and_v4:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v4, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_gt_i32 s8, 10
+; GCN-NEXT: s_cselect_b32 s3, s3, 0
+; GCN-NEXT: s_cselect_b32 s2, s2, 0
+; GCN-NEXT: s_cselect_b32 s1, s1, 0
+; GCN-NEXT: s_csel...
[truncated]
|
Change-Id: I11e96fac101599c84000b6e66972bc402da9fee5
Change-Id: I459e7906d74d015dad32d2fd6e029cc5b94271ba
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/17901 Here is the relevant piece of the build log for the reference
|
This is in preparation for updates relevant to enabling CopyConstrain DAG mutation in the default maxOccupancy scheduler.