Skip to content

Commit 9a23d26

Browse files
jayfoadLukacma
authored andcommitted
[AMDGPU] Restore non-buffer atomic tests lost in llvm#93801 (llvm#94978)
1 parent 2ac3c9a commit 9a23d26

File tree

3 files changed

+178
-0
lines changed

3 files changed

+178
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12
3+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX12PLUS
4+
5+
declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32)
6+
7+
; GCN-LABEL: {{^}}global_atomic_csub_rtn:
8+
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc
9+
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
10+
define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) {
11+
main_body:
12+
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
13+
ret void
14+
}
15+
16+
; GCN-LABEL: {{^}}global_atomic_csub_no_rtn:
17+
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
18+
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1]
19+
define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
20+
main_body:
21+
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
22+
ret void
23+
}
24+
25+
; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn:
26+
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc
27+
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] offset:4 th:TH_ATOMIC_RETURN
28+
define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) {
29+
main_body:
30+
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
31+
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
32+
ret void
33+
}
34+
35+
; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn:
36+
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
37+
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] offset:4
38+
define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
39+
main_body:
40+
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
41+
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
42+
ret void
43+
}
44+
45+
attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" }
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
2+
3+
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
4+
declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
5+
6+
; GFX90A-LABEL: {{^}}global_atomic_add_f32:
7+
; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc
8+
define amdgpu_ps float @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) {
9+
main_body:
10+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
11+
ret float %ret
12+
}
13+
14+
; GFX90A-LABEL: {{^}}global_atomic_add_f32_off4:
15+
; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:4 glc
16+
define amdgpu_ps float @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) {
17+
main_body:
18+
%p = getelementptr float, ptr addrspace(1) %ptr, i64 1
19+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
20+
ret float %ret
21+
}
22+
23+
; GFX90A-LABEL: {{^}}global_atomic_add_f32_offneg4:
24+
; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:-4 glc
25+
define amdgpu_ps float @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) {
26+
main_body:
27+
%p = getelementptr float, ptr addrspace(1) %ptr, i64 -1
28+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
29+
ret float %ret
30+
}
31+
32+
; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16:
33+
; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc
34+
define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) {
35+
main_body:
36+
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
37+
ret <2 x half> %ret
38+
}
39+
40+
; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_off4:
41+
; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:4 glc
42+
define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) {
43+
main_body:
44+
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1
45+
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
46+
ret <2 x half> %ret
47+
}
48+
49+
; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
50+
; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-4 glc
51+
define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) {
52+
main_body:
53+
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1
54+
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
55+
ret <2 x half> %ret
56+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN
2+
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN
3+
4+
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
5+
declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
6+
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr, float)
7+
8+
; GCN-LABEL: {{^}}global_atomic_add_f32:
9+
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
10+
define amdgpu_kernel void @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) {
11+
main_body:
12+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
13+
ret void
14+
}
15+
16+
; GCN-LABEL: {{^}}global_atomic_add_f32_off4:
17+
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
18+
define amdgpu_kernel void @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) {
19+
main_body:
20+
%p = getelementptr float, ptr addrspace(1) %ptr, i64 1
21+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
22+
ret void
23+
}
24+
25+
; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:
26+
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4
27+
define amdgpu_kernel void @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) {
28+
main_body:
29+
%p = getelementptr float, ptr addrspace(1) %ptr, i64 -1
30+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
31+
ret void
32+
}
33+
34+
; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:
35+
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
36+
define amdgpu_kernel void @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) {
37+
main_body:
38+
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
39+
ret void
40+
}
41+
42+
; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4:
43+
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
44+
define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) {
45+
main_body:
46+
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1
47+
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
48+
ret void
49+
}
50+
51+
; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
52+
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}}
53+
define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) {
54+
main_body:
55+
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1
56+
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
57+
ret void
58+
}
59+
60+
; Make sure this artificially selects with an incorrect subtarget, but
61+
; the feature set.
62+
; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
63+
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
64+
define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) #0 {
65+
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
66+
ret void
67+
}
68+
69+
; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget:
70+
; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
71+
define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) #1 {
72+
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
73+
ret void
74+
}
75+
76+
attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"}
77+
attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"}

0 commit comments

Comments
 (0)