@@ -76,6 +76,26 @@ entry:
76
76
ret i32 %val
77
77
}
78
78
79
+ define i32 @global_agent_acquire_release_idempotent_or__no_fine_grained (ptr addrspace (1 ) %in ) {
80
+ ; GFX940-LABEL: global_agent_acquire_release_idempotent_or__no_fine_grained:
81
+ ; GFX940: ; %bb.0: ; %entry
82
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83
+ ; GFX940-NEXT: v_mov_b32_e32 v2, 0
84
+ ; GFX940-NEXT: buffer_wbl2 sc1
85
+ ; GFX940-NEXT: global_atomic_or v0, v[0:1], v2, off sc0
86
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
87
+ ; GFX940-NEXT: buffer_inv sc1
88
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
89
+ ; OPT-LABEL: @global_agent_acquire_release_idempotent_or__no_fine_grained(
90
+ ; OPT-NEXT: entry:
91
+ ; OPT-NEXT: [[VAL:%.*]] = atomicrmw or ptr addrspace(1) [[IN:%.*]], i32 0 syncscope("agent-one-as") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
92
+ ; OPT-NEXT: ret i32 [[VAL]]
93
+ ;
94
+ entry:
95
+ %val = atomicrmw or ptr addrspace (1 ) %in , i32 0 syncscope("agent-one-as" ) acq_rel , align 4 , !amdgpu.no.fine.grained.memory !0
96
+ ret i32 %val
97
+ }
98
+
79
99
define i32 @global_agent_seq_cst_idempotent_or (ptr addrspace (1 ) %in ) {
80
100
; GFX940-LABEL: global_agent_seq_cst_idempotent_or:
81
101
; GFX940: ; %bb.0: ; %entry
@@ -113,6 +133,23 @@ entry:
113
133
ret i32 %val
114
134
}
115
135
136
+ define i32 @global_agent_monotonic_idempotent_add__no_fine_grained (ptr addrspace (1 ) %in ) {
137
+ ; GFX940-LABEL: global_agent_monotonic_idempotent_add__no_fine_grained:
138
+ ; GFX940: ; %bb.0: ; %entry
139
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140
+ ; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0
141
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
142
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
143
+ ; OPT-LABEL: @global_agent_monotonic_idempotent_add__no_fine_grained(
144
+ ; OPT-NEXT: entry:
145
+ ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
146
+ ; OPT-NEXT: ret i32 [[VAL]]
147
+ ;
148
+ entry:
149
+ %val = atomicrmw add ptr addrspace (1 ) %in , i32 0 syncscope("workgroup" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0
150
+ ret i32 %val
151
+ }
152
+
116
153
define i32 @global_agent_monotonic_idempotent_sub (ptr addrspace (1 ) %in ) {
117
154
; GFX940-LABEL: global_agent_monotonic_idempotent_sub:
118
155
; GFX940: ; %bb.0: ; %entry
@@ -130,6 +167,23 @@ entry:
130
167
ret i32 %val
131
168
}
132
169
170
+ define i32 @global_agent_monotonic_idempotent_sub__no_fine_grained (ptr addrspace (1 ) %in ) {
171
+ ; GFX940-LABEL: global_agent_monotonic_idempotent_sub__no_fine_grained:
172
+ ; GFX940: ; %bb.0: ; %entry
173
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174
+ ; GFX940-NEXT: global_load_dword v0, v[0:1], off
175
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
176
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
177
+ ; OPT-LABEL: @global_agent_monotonic_idempotent_sub__no_fine_grained(
178
+ ; OPT-NEXT: entry:
179
+ ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
180
+ ; OPT-NEXT: ret i32 [[VAL]]
181
+ ;
182
+ entry:
183
+ %val = atomicrmw sub ptr addrspace (1 ) %in , i32 0 syncscope("wavefront" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0
184
+ ret i32 %val
185
+ }
186
+
133
187
define i32 @global_system_monotonic_idempotent_xor (ptr addrspace (1 ) %in ) {
134
188
; GFX940-LABEL: global_system_monotonic_idempotent_xor:
135
189
; GFX940: ; %bb.0: ; %entry
@@ -147,6 +201,23 @@ entry:
147
201
ret i32 %val
148
202
}
149
203
204
+ define i32 @global_system_monotonic_idempotent_xor__no_fine_grained (ptr addrspace (1 ) %in ) {
205
+ ; GFX940-LABEL: global_system_monotonic_idempotent_xor__no_fine_grained:
206
+ ; GFX940: ; %bb.0: ; %entry
207
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208
+ ; GFX940-NEXT: global_load_dword v0, v[0:1], off sc0 sc1
209
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
210
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
211
+ ; OPT-LABEL: @global_system_monotonic_idempotent_xor__no_fine_grained(
212
+ ; OPT-NEXT: entry:
213
+ ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
214
+ ; OPT-NEXT: ret i32 [[VAL]]
215
+ ;
216
+ entry:
217
+ %val = atomicrmw xor ptr addrspace (1 ) %in , i32 0 monotonic , align 4 , !amdgpu.no.fine.grained.memory !0
218
+ ret i32 %val
219
+ }
220
+
150
221
define i32 @global_agent_monotonic_idempotent_and (ptr addrspace (1 ) %in ) {
151
222
; GFX940-LABEL: global_agent_monotonic_idempotent_and:
152
223
; GFX940: ; %bb.0: ; %entry
@@ -163,3 +234,22 @@ entry:
163
234
%val = atomicrmw and ptr addrspace (1 ) %in , i32 -1 syncscope("singlethread" ) monotonic , align 4
164
235
ret i32 %val
165
236
}
237
+
238
+ define i32 @global_agent_monotonic_idempotent_and_no_fined_grain (ptr addrspace (1 ) %in ) {
239
+ ; GFX940-LABEL: global_agent_monotonic_idempotent_and_no_fined_grain:
240
+ ; GFX940: ; %bb.0: ; %entry
241
+ ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242
+ ; GFX940-NEXT: global_load_dword v0, v[0:1], off
243
+ ; GFX940-NEXT: s_waitcnt vmcnt(0)
244
+ ; GFX940-NEXT: s_setpc_b64 s[30:31]
245
+ ; OPT-LABEL: @global_agent_monotonic_idempotent_and_no_fined_grain(
246
+ ; OPT-NEXT: entry:
247
+ ; OPT-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[IN:%.*]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0]]
248
+ ; OPT-NEXT: ret i32 [[VAL]]
249
+ ;
250
+ entry:
251
+ %val = atomicrmw and ptr addrspace (1 ) %in , i32 -1 syncscope("singlethread" ) monotonic , align 4 , !amdgpu.no.fine.grained.memory !0
252
+ ret i32 %val
253
+ }
254
+
255
+ !0 = !{}
0 commit comments