Skip to content

Commit 4e67b50

Browse files
committed
AMDGPU: Add more tests for atomicrmw handling
Add agent scope copies of atomicrmw atomics tests. Expand testing for the undo identity atomicrmw case. Test 16-bit atomic expansions.
1 parent 9f9856d commit 4e67b50

9 files changed

+8869
-21
lines changed

llvm/test/CodeGen/AMDGPU/idemponent-atomics.ll

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,26 @@ entry:
7676
ret i32 %val
7777
}
7878

79+
define i32 @global_agent_acquire_release_idempotent_or__no_fine_grained(ptr addrspace(1) %in) {
80+
; GFX940-LABEL: global_agent_acquire_release_idempotent_or__no_fine_grained:
81+
; GFX940: ; %bb.0: ; %entry
82+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83+
; GFX940-NEXT: v_mov_b32_e32 v2, 0
84+
; GFX940-NEXT: buffer_wbl2 sc1
85+
; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
86+
; GFX940-NEXT: s_waitcnt vmcnt(0)
87+
; GFX940-NEXT: buffer_inv sc1
88+
; GFX940-NEXT: s_setpc_b64 s[30:31]
89+
; OPT-LABEL: @global_agent_acquire_release_idempotent_or__no_fine_grained(
90+
; OPT-NEXT: entry:
91+
; OPT-NEXT: [[VAL:%.*]] = atomicrmw or ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
92+
; OPT-NEXT: ret i32 [[VAL]]
93+
;
94+
entry:
95+
%val = atomicrmw or ptr addrspace(1) %in, i32 0 syncscope("agent-one-as") acq_rel, align 4, !amdgpu.no.fine.grained.memory !0
96+
ret i32 %val
97+
}
98+
7999
define i32 @global_agent_seq_cst_idempotent_or(ptr addrspace(1) %in) {
80100
; GFX940-LABEL: global_agent_seq_cst_idempotent_or:
81101
; GFX940: ; %bb.0: ; %entry
@@ -113,6 +133,23 @@ entry:
113133
ret i32 %val
114134
}
115135

136+
define i32 @global_agent_monotonic_idempotent_add__no_fine_grained(ptr addrspace(1) %in) {
137+
; GFX940-LABEL: global_agent_monotonic_idempotent_add__no_fine_grained:
138+
; GFX940: ; %bb.0: ; %entry
139+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140+
; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0
141+
; GFX940-NEXT: s_waitcnt vmcnt(0)
142+
; GFX940-NEXT: s_setpc_b64 s[30:31]
143+
; OPT-LABEL: @global_agent_monotonic_idempotent_add__no_fine_grained(
144+
; OPT-NEXT: entry:
145+
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
146+
; OPT-NEXT: ret i32 [[VAL]]
147+
;
148+
entry:
149+
%val = atomicrmw add ptr addrspace(1) %in, i32 0 syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory !0
150+
ret i32 %val
151+
}
152+
116153
define i32 @global_agent_monotonic_idempotent_sub(ptr addrspace(1) %in) {
117154
; GFX940-LABEL: global_agent_monotonic_idempotent_sub:
118155
; GFX940: ; %bb.0: ; %entry
@@ -130,6 +167,23 @@ entry:
130167
ret i32 %val
131168
}
132169

170+
define i32 @global_agent_monotonic_idempotent_sub__no_fine_grained(ptr addrspace(1) %in) {
171+
; GFX940-LABEL: global_agent_monotonic_idempotent_sub__no_fine_grained:
172+
; GFX940: ; %bb.0: ; %entry
173+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174+
; GFX940-NEXT: global_load_dword v0, v[0:1], off
175+
; GFX940-NEXT: s_waitcnt vmcnt(0)
176+
; GFX940-NEXT: s_setpc_b64 s[30:31]
177+
; OPT-LABEL: @global_agent_monotonic_idempotent_sub__no_fine_grained(
178+
; OPT-NEXT: entry:
179+
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
180+
; OPT-NEXT: ret i32 [[VAL]]
181+
;
182+
entry:
183+
%val = atomicrmw sub ptr addrspace(1) %in, i32 0 syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory !0
184+
ret i32 %val
185+
}
186+
133187
define i32 @global_system_monotonic_idempotent_xor(ptr addrspace(1) %in) {
134188
; GFX940-LABEL: global_system_monotonic_idempotent_xor:
135189
; GFX940: ; %bb.0: ; %entry
@@ -147,6 +201,23 @@ entry:
147201
ret i32 %val
148202
}
149203

204+
define i32 @global_system_monotonic_idempotent_xor__no_fine_grained(ptr addrspace(1) %in) {
205+
; GFX940-LABEL: global_system_monotonic_idempotent_xor__no_fine_grained:
206+
; GFX940: ; %bb.0: ; %entry
207+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208+
; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0 sc1
209+
; GFX940-NEXT: s_waitcnt vmcnt(0)
210+
; GFX940-NEXT: s_setpc_b64 s[30:31]
211+
; OPT-LABEL: @global_system_monotonic_idempotent_xor__no_fine_grained(
212+
; OPT-NEXT: entry:
213+
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
214+
; OPT-NEXT: ret i32 [[VAL]]
215+
;
216+
entry:
217+
%val = atomicrmw xor ptr addrspace(1) %in, i32 0 monotonic, align 4, !amdgpu.no.fine.grained.memory !0
218+
ret i32 %val
219+
}
220+
150221
define i32 @global_agent_monotonic_idempotent_and(ptr addrspace(1) %in) {
151222
; GFX940-LABEL: global_agent_monotonic_idempotent_and:
152223
; GFX940: ; %bb.0: ; %entry
@@ -163,3 +234,22 @@ entry:
163234
%val = atomicrmw and ptr addrspace(1) %in, i32 -1 syncscope("singlethread") monotonic, align 4
164235
ret i32 %val
165236
}
237+
238+
define i32 @global_agent_monotonic_idempotent_and_no_fined_grain(ptr addrspace(1) %in) {
239+
; GFX940-LABEL: global_agent_monotonic_idempotent_and_no_fined_grain:
240+
; GFX940: ; %bb.0: ; %entry
241+
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242+
; GFX940-NEXT: global_load_dword v0, v[0:1], off
243+
; GFX940-NEXT: s_waitcnt vmcnt(0)
244+
; GFX940-NEXT: s_setpc_b64 s[30:31]
245+
; OPT-LABEL: @global_agent_monotonic_idempotent_and_no_fined_grain(
246+
; OPT-NEXT: entry:
247+
; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
248+
; OPT-NEXT: ret i32 [[VAL]]
249+
;
250+
entry:
251+
%val = atomicrmw and ptr addrspace(1) %in, i32 -1 syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory !0
252+
ret i32 %val
253+
}
254+
255+
!0 = !{}

0 commit comments

Comments
 (0)