@@ -237,24 +237,10 @@ define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %p
237
237
; GFX940-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
238
238
; GFX940: ; %bb.0:
239
239
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240
- ; GFX940-NEXT: global_load_dword v3, v[0:1], off offset:1024
241
- ; GFX940-NEXT: s_mov_b64 s[0:1], 0
242
- ; GFX940-NEXT: .LBB17_1: ; %atomicrmw.start
243
- ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
244
- ; GFX940-NEXT: s_waitcnt vmcnt(0)
245
- ; GFX940-NEXT: v_mov_b32_e32 v5, v3
246
- ; GFX940-NEXT: v_pk_add_f16 v4, v5, v2
247
240
; GFX940-NEXT: buffer_wbl2 sc1
248
- ; GFX940-NEXT: global_atomic_cmpswap v3 , v[0:1], v[4:5] , off offset:1024 sc0
241
+ ; GFX940-NEXT: global_atomic_pk_add_f16 v0 , v[0:1], v2 , off offset:1024 sc0
249
242
; GFX940-NEXT: s_waitcnt vmcnt(0)
250
243
; GFX940-NEXT: buffer_inv sc1
251
- ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
252
- ; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
253
- ; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
254
- ; GFX940-NEXT: s_cbranch_execnz .LBB17_1
255
- ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
256
- ; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
257
- ; GFX940-NEXT: v_mov_b32_e32 v0, v3
258
244
; GFX940-NEXT: s_setpc_b64 s[30:31]
259
245
%gep = getelementptr <2 x half >, ptr addrspace (1 ) %ptr , i32 256
260
246
%result = atomicrmw fadd ptr addrspace (1 ) %gep , <2 x half > %val syncscope("agent" ) seq_cst
@@ -265,23 +251,10 @@ define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr,
265
251
; GFX940-LABEL: global_atomic_fadd_noret_v2f16_agent_offset:
266
252
; GFX940: ; %bb.0:
267
253
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268
- ; GFX940-NEXT: global_load_dword v5, v[0:1], off offset:1024
269
- ; GFX940-NEXT: s_mov_b64 s[0:1], 0
270
- ; GFX940-NEXT: .LBB18_1: ; %atomicrmw.start
271
- ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
272
- ; GFX940-NEXT: s_waitcnt vmcnt(0)
273
- ; GFX940-NEXT: v_pk_add_f16 v4, v5, v2
274
254
; GFX940-NEXT: buffer_wbl2 sc1
275
- ; GFX940-NEXT: global_atomic_cmpswap v3, v[0:1], v[4:5] , off offset:1024 sc0
255
+ ; GFX940-NEXT: global_atomic_pk_add_f16 v[0:1], v2 , off offset:1024
276
256
; GFX940-NEXT: s_waitcnt vmcnt(0)
277
257
; GFX940-NEXT: buffer_inv sc1
278
- ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
279
- ; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
280
- ; GFX940-NEXT: v_mov_b32_e32 v5, v3
281
- ; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
282
- ; GFX940-NEXT: s_cbranch_execnz .LBB18_1
283
- ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
284
- ; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
285
258
; GFX940-NEXT: s_setpc_b64 s[30:31]
286
259
%gep = getelementptr <2 x half >, ptr addrspace (1 ) %ptr , i32 256
287
260
%unused = atomicrmw fadd ptr addrspace (1 ) %gep , <2 x half > %val syncscope("agent" ) seq_cst
@@ -292,24 +265,10 @@ define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half>
292
265
; GFX940-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
293
266
; GFX940: ; %bb.0:
294
267
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295
- ; GFX940-NEXT: flat_load_dword v3, v[0:1] offset:1024
296
- ; GFX940-NEXT: s_mov_b64 s[0:1], 0
297
- ; GFX940-NEXT: .LBB19_1: ; %atomicrmw.start
298
- ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
299
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
300
- ; GFX940-NEXT: v_mov_b32_e32 v5, v3
301
- ; GFX940-NEXT: v_pk_add_f16 v4, v5, v2
302
268
; GFX940-NEXT: buffer_wbl2 sc1
303
- ; GFX940-NEXT: flat_atomic_cmpswap v3 , v[0:1], v[4:5] offset:1024 sc0
269
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v0 , v[0:1], v2 offset:1024 sc0
304
270
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
305
271
; GFX940-NEXT: buffer_inv sc1
306
- ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
307
- ; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
308
- ; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
309
- ; GFX940-NEXT: s_cbranch_execnz .LBB19_1
310
- ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
311
- ; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
312
- ; GFX940-NEXT: v_mov_b32_e32 v0, v3
313
272
; GFX940-NEXT: s_setpc_b64 s[30:31]
314
273
%gep = getelementptr <2 x half >, ptr %ptr , i32 256
315
274
%result = atomicrmw fadd ptr %gep , <2 x half > %val syncscope("agent" ) seq_cst
@@ -320,23 +279,10 @@ define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val
320
279
; GFX940-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
321
280
; GFX940: ; %bb.0:
322
281
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323
- ; GFX940-NEXT: flat_load_dword v5, v[0:1] offset:1024
324
- ; GFX940-NEXT: s_mov_b64 s[0:1], 0
325
- ; GFX940-NEXT: .LBB20_1: ; %atomicrmw.start
326
- ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1
327
- ; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
328
- ; GFX940-NEXT: v_pk_add_f16 v4, v5, v2
329
282
; GFX940-NEXT: buffer_wbl2 sc1
330
- ; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] offset:1024 sc0
283
+ ; GFX940-NEXT: flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
331
284
; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
332
285
; GFX940-NEXT: buffer_inv sc1
333
- ; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
334
- ; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
335
- ; GFX940-NEXT: v_mov_b32_e32 v5, v3
336
- ; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1]
337
- ; GFX940-NEXT: s_cbranch_execnz .LBB20_1
338
- ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end
339
- ; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
340
286
; GFX940-NEXT: s_setpc_b64 s[30:31]
341
287
%gep = getelementptr <2 x half >, ptr %ptr , i32 256
342
288
%unused = atomicrmw fadd ptr %gep , <2 x half > %val syncscope("agent" ) seq_cst
0 commit comments