Skip to content

[NFC][AMDGPU] Update tests to use autogened CHECKs #140311

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 17, 2025

Conversation

chinmaydd
Copy link
Contributor

@chinmaydd chinmaydd commented May 16, 2025

This is in preparation for updates relevant to enabling CopyConstrain DAG mutation in the default maxOccupancy scheduler.

This is in preparation for updates relevant to enabling CopyConstrain
DAG mutation in the default maxOccupancy scheduler.

Change-Id: I4c6928197c7cfc8d1d51db90d111c3ffc8e09e32
@llvmbot
Copy link
Member

llvmbot commented May 16, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Chinmay Deshpande (chinmaydd)

Changes

This is in preparation for updates relevant to enabling CopyConstrain DAG mutation in the default maxOccupancy scheduler.

Change-Id: I4c6928197c7cfc8d1d51db90d111c3ffc8e09e32


Patch is 626.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140311.diff

9 Files Affected:

  • (modified) llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll (+286-44)
  • (modified) llvm/test/CodeGen/AMDGPU/dagcombine-select.ll (+331-110)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll (+460-200)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll (+4880-424)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll (+2750-286)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll (+805-250)
  • (modified) llvm/test/CodeGen/AMDGPU/load-global-f32.ll (+702-92)
  • (modified) llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll (+402-128)
  • (modified) llvm/test/CodeGen/AMDGPU/trunc.ll (+422-50)
diff --git a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
index 2c2855c860ebb..81a646f8b7055 100644
--- a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FUNC,GFX7 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FUNC,GFX8 %s
 
@@ -10,21 +11,70 @@
 ; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
 ; instructions with B64, U64, and I64 take 64-bit operands.
 
-; FUNC-LABEL: {{^}}local_address_load:
-; SI: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
-; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
 define amdgpu_kernel void @local_address_load(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_load:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    ds_read_b32 v0, v0
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_load:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    ds_read_b32 v0, v0
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = load i32, ptr addrspace(3) %in
   store i32 %0, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}local_address_gep:
-; SI: s_add_i32 [[SPTR:s[0-9]]]
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_read_b32 [[VPTR]]
 define amdgpu_kernel void @local_address_gep(ptr addrspace(1) %out, ptr addrspace(3) %in, i32 %offset) {
+; GFX7-LABEL: local_address_gep:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_lshl_b32 s3, s3, 2
+; GFX7-NEXT:    s_add_i32 s2, s2, s3
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    ds_read_b32 v0, v0
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_gep:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_lshl_b32 s3, s3, 2
+; GFX8-NEXT:    s_add_i32 s2, s2, s3
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    ds_read_b32 v0, v0
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = getelementptr i32, ptr addrspace(3) %in, i32 %offset
   %1 = load i32, ptr addrspace(3) %0
@@ -32,10 +82,34 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}local_address_gep_const_offset:
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
 define amdgpu_kernel void @local_address_gep_const_offset(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_gep_const_offset:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    ds_read_b32 v0, v0 offset:4
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_gep_const_offset:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    ds_read_b32 v0, v0 offset:4
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = getelementptr i32, ptr addrspace(3) %in, i32 1
   %1 = load i32, ptr addrspace(3) %0
@@ -44,11 +118,36 @@ entry:
 }
 
 ; Offset too large, can't fold into 16-bit immediate offset.
-; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
-; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_read_b32 [[VPTR]]
 define amdgpu_kernel void @local_address_gep_large_const_offset(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_gep_large_const_offset:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_add_i32 s2, s2, 0x10004
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    ds_read_b32 v0, v0
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_gep_large_const_offset:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_i32 s2, s2, 0x10004
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    ds_read_b32 v0, v0
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = getelementptr i32, ptr addrspace(3) %in, i32 16385
   %1 = load i32, ptr addrspace(3) %0
@@ -56,24 +155,71 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
-; GFX7 v_cmp_ne_u32
-; GFX7: s_cselect_b32
-; GFX8: s_cmp_lg_u32
-; GFX8-NOT: v_cmp_ne_u32
-; GFX8: s_cselect_b32
 define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+; GFX7-LABEL: null_32bit_lds_ptr:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dword s6, s[4:5], 0xb
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_movk_i32 s4, 0x7b
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_cmp_lg_u32 s6, 0
+; GFX7-NEXT:    s_cselect_b32 s4, s4, 0x1c8
+; GFX7-NEXT:    v_mov_b32_e32 v0, s4
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: null_32bit_lds_ptr:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_movk_i32 s4, 0x7b
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_cmp_lg_u32 s6, 0
+; GFX8-NOT:     v_cmp_ne_u32
+; GFX8-NEXT:    s_cselect_b32 s4, s4, 0x1c8
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
   %cmp = icmp ne ptr addrspace(3) %lds, null
   %x = select i1 %cmp, i32 123, i32 456
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}mul_32bit_ptr:
-; SI: s_mul_i32
-; SI-NEXT: s_add_i32
-; SI: ds_read_b32
 define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds, i32 %tid) {
+; GFX7-LABEL: mul_32bit_ptr:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_mul_i32 s3, s3, 12
+; GFX7-NEXT:    s_add_i32 s2, s2, s3
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    ds_read_b32 v0, v0
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: mul_32bit_ptr:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_mul_i32 s3, s3, 12
+; GFX8-NEXT:    s_add_i32 s2, s2, s3
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    ds_read_b32 v0, v0
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
   %ptr = getelementptr [3 x float], ptr addrspace(3) %lds, i32 %tid, i32 0
   %val = load float, ptr addrspace(3) %ptr
   store float %val, ptr addrspace(1) %out
@@ -82,60 +228,156 @@ define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3)
 
 @g_lds = addrspace(3) global float poison, align 4
 
-; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
-; SI: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
-; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
 define amdgpu_kernel void @infer_ptr_alignment_global_offset(ptr addrspace(1) %out, i32 %tid) {
+; GFX7-LABEL: infer_ptr_alignment_global_offset:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v0, 0
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    ds_read_b32 v0, v0
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: infer_ptr_alignment_global_offset:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v0, 0
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    ds_read_b32 v0, v0
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
   %val = load float, ptr addrspace(3) @g_lds
   store float %val, ptr addrspace(1) %out
   ret void
 }
 
-
 @ptr = addrspace(3) global ptr addrspace(3) poison
 @dst = addrspace(3) global [16383 x i32] poison
 
-; FUNC-LABEL: {{^}}global_ptr:
-; SI: ds_write_b32
 define amdgpu_kernel void @global_ptr() nounwind {
+; SI-LABEL: global_ptr:
+; SI:       ; %bb.0:
+; SI-NEXT:    v_mov_b32_e32 v0, 64
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 m0, -1
+; SI-NEXT:    ds_write_b32 v1, v0 offset:65532
+; SI-NEXT:    s_endpgm
   store ptr addrspace(3) getelementptr ([16383 x i32], ptr addrspace(3) @dst, i32 0, i32 16), ptr addrspace(3) @ptr
   ret void
 }
 
-; FUNC-LABEL: {{^}}local_address_store:
-; SI: ds_write_b32
 define amdgpu_kernel void @local_address_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_store:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    ds_write_b32 v0, v1
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_store:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    ds_write_b32 v0, v1
+; GFX8-NEXT:    s_endpgm
   store i32 %val, ptr addrspace(3) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}local_address_gep_store:
-; SI: s_add_i32 [[SADDR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
-; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
 define amdgpu_kernel void @local_address_gep_store(ptr addrspace(3) %out, i32, i32 %val, i32 %offset) {
+; GFX7-LABEL: local_address_gep_store:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GFX7-NEXT:    s_load_dword s2, s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 2
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    s_add_i32 s0, s2, s1
+; GFX7-NEXT:    v_mov_b32_e32 v1, s0
+; GFX7-NEXT:    ds_write_b32 v1, v0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_gep_store:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_lshl_b32 s1, s1, 2
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    s_add_i32 s0, s2, s1
+; GFX8-NEXT:    v_mov_b32_e32 v1, s0
+; GFX8-NEXT:    ds_write_b32 v1, v0
+; GFX8-NEXT:    s_endpgm
   %gep = getelementptr i32, ptr addrspace(3) %out, i32 %offset
   store i32 %val, ptr addrspace(3) %gep, align 4
   ret void
 }
 
-; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
 define amdgpu_kernel void @local_address_gep_const_offset_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_gep_const_offset_store:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    ds_write_b32 v0, v1 offset:4
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_gep_const_offset_store:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    ds_write_b32 v0, v1 offset:4
+; GFX8-NEXT:    s_endpgm
   %gep = getelementptr i32, ptr addrspace(3) %out, i32 1
   store i32 %val, ptr addrspace(3) %gep, align 4
   ret void
 }
 
 ; Offset too large, can't fold into 16-bit immediate offset.
-; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
-; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
 define amdgpu_kernel void @local_address_gep_large_const_offset_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_gep_large_const_offset_store:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT:    s_mov_b32 m0, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_add_i32 s0, s0, 0x10004
+; GFX7-NEXT:    v_mov_b32_e32 v0, s1
+; GFX7-NEXT:    v_mov_b32_e32 v1, s0
+; GFX7-NEXT:    ds_write_b32 v1, v0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: local_address_gep_large_const_offset_store:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 m0, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_i32 s0, s0, 0x10004
+; GFX8-NEXT:    v_mov_b32_e32 v0, s1
+; GFX8-NEXT:    v_mov_b32_e32 v1, s0
+; GFX8-NEXT:    ds_write_b32 v1, v0
+; GFX8-NEXT:    s_endpgm
   %gep = getelementptr i32, ptr addrspace(3) %out, i32 16385
   store i32 %val, ptr addrspace(3) %gep, align 4
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FUNC: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
index f4d8ec180cf91..f27ec1e38f942 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,11 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
-; GCN-LABEL: {{^}}select_and1:
-; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and1:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_gt_i32 s2, 10
+; GCN-NEXT:    s_cselect_b32 s2, s3, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, s2
+; GCN-NOT:     v_and_b32
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-NEXT:    s_endpgm
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
   %a = and i32 %y, %s
@@ -13,12 +20,18 @@ define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
   ret void
 }
 
-; GCN-LABEL: {{^}}select_and2:
-; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and2:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_gt_i32 s2, 10
+; GCN-NEXT:    s_cselect_b32 s2, s3, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, s2
+; GCN-NOT:     v_and_b32
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-NEXT:    s_endpgm
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
   %a = and i32 %s, %y
@@ -26,12 +39,18 @@ define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
   ret void
 }
 
-; GCN-LABEL: {{^}}select_and3:
-; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and3:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lt_i32 s2, 11
+; GCN-NEXT:    s_cselect_b32 s2, s3, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, s2
+; GCN-NOT:     v_and_b32
+; GCN-NEXT:    global_store_dword v0, v1, s[0:1]
+; GCN-NEXT:    s_endpgm
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 -1, i32 0
   %a = and i32 %y, %s
@@ -39,18 +58,26 @@ define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
   ret void
 }
 
-; GCN-LABEL: {{^}}select_and_v4:
-; GCN:     s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, 0
-; GCN:     s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, 0
-; GCN:     s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, 0
-; GCN:     s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, 0
-; GCN:     v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
-; GCN:     v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
-; GCN:     v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
-; GCN:     v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
-; GCN-NOT: v_and_b32
-; GCN:     global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
 define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
+; GCN-LABEL: select_and_v4:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dword s8, s[4:5], 0x2c
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GCN-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v4, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_gt_i32 s8, 10
+; GCN-NEXT:    s_cselect_b32 s3, s3, 0
+; GCN-NEXT:    s_cselect_b32 s2, s2, 0
+; GCN-NEXT:    s_cselect_b32 s1, s1, 0
+; GCN-NEXT:    s_csel...
[truncated]

chinmaydd added 2 commits May 16, 2025 18:11
Change-Id: I11e96fac101599c84000b6e66972bc402da9fee5
Change-Id: I459e7906d74d015dad32d2fd6e029cc5b94271ba
@chinmaydd chinmaydd merged commit 437195e into llvm:main May 17, 2025
11 checks passed
@chinmaydd chinmaydd deleted the chinmaydd/test-update branch May 17, 2025 00:22
@llvm-ci
Copy link
Collaborator

llvm-ci commented May 17, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-gcc-ubuntu running on sie-linux-worker3 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/17901

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
...
PASS: lit :: allow-retries.py (90162 of 90171)
PASS: lit :: discovery.py (90163 of 90171)
PASS: lit :: shtest-external-shell-kill.py (90164 of 90171)
PASS: lit :: googletest-timeout.py (90165 of 90171)
PASS: lit :: selecting.py (90166 of 90171)
PASS: lit :: shtest-timeout.py (90167 of 90171)
PASS: lit :: max-time.py (90168 of 90171)
PASS: lit :: shtest-shell.py (90169 of 90171)
PASS: lit :: shtest-define.py (90170 of 90171)
TIMEOUT: AddressSanitizer-x86_64-linux-dynamic :: TestCases/asan_lsan_deadlock.cpp (90171 of 90171)
******************** TEST 'AddressSanitizer-x86_64-linux-dynamic :: TestCases/asan_lsan_deadlock.cpp' FAILED ********************
Exit Code: -9
Timeout: Reached timeout of 900 seconds

Command Output (stderr):
--
/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang  --driver-mode=g++ -fsanitize=address -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only  -m64  -shared-libasan -O0 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/asan/TestCases/asan_lsan_deadlock.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Output/asan_lsan_deadlock.cpp.tmp # RUN: at line 4
+ /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang --driver-mode=g++ -fsanitize=address -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only -m64 -shared-libasan -O0 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/asan/TestCases/asan_lsan_deadlock.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Output/asan_lsan_deadlock.cpp.tmp
env ASAN_OPTIONS=detect_leaks=1 not  /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Output/asan_lsan_deadlock.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/asan/TestCases/asan_lsan_deadlock.cpp # RUN: at line 5
+ env ASAN_OPTIONS=detect_leaks=1 not /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Output/asan_lsan_deadlock.cpp.tmp
+ FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/asan/TestCases/asan_lsan_deadlock.cpp

--

********************
********************
Timed Out Tests (1):
  AddressSanitizer-x86_64-linux-dynamic :: TestCases/asan_lsan_deadlock.cpp


Testing Time: 1162.21s

Total Discovered Tests: 124711
  Skipped          :     38 (0.03%)
  Unsupported      :   2683 (2.15%)
  Passed           : 121699 (97.58%)
  Expectedly Failed:    290 (0.23%)
  Timed Out        :      1 (0.00%)
FAILED: CMakeFiles/check-all /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/CMakeFiles/check-all 
cd /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build && /usr/bin/python3.8 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/llvm-lit --verbose --timeout=900 --param USE_Z3_SOLVER=0 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/utils/mlgo-utils /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/projects/cross-project-tests /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/tools/lld/test /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/tools/clang/tools/extra/include-cleaner/test /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/tools/clang/tools/extra/test /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/tools/clang/test @/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/lit.tests /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/utils/lit /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/test
ninja: build stopped: subcommand failed.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants