@@ -269,22 +269,23 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
269
269
; GFX11-NEXT: [[V_MOV_B32_dpp3:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY10]], [[V_ADD_F32_e64_2]], 280, 15, 15, 0, implicit $exec
270
270
; GFX11-NEXT: [[V_ADD_F32_e64_3:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_2]], 0, [[V_MOV_B32_dpp3]], 0, 0, implicit $mode, implicit $exec
271
271
; GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
272
- ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[V_ADD_F32_e64_3]], 0, implicit $exec
273
- ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
274
- ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY11]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
275
- ; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
272
+ ; GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
273
+ ; GFX11-NEXT: [[V_PERMLANEX16_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERMLANEX16_B32_e64 0, [[V_ADD_F32_e64_3]], 0, [[S_MOV_B32_2]], 0, [[S_MOV_B32_2]], [[COPY11]], 0, implicit $exec
276
274
; GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
277
- ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
275
+ ; GFX11-NEXT: [[V_MOV_B32_dpp4:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY12]], [[V_PERMLANEX16_B32_e64_]], 228, 10, 15, 0, implicit $exec
276
+ ; GFX11-NEXT: [[V_ADD_F32_e64_4:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[V_ADD_F32_e64_3]], 0, [[V_MOV_B32_dpp4]], 0, 0, implicit $mode, implicit $exec
277
+ ; GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
278
+ ; GFX11-NEXT: [[V_MOV_B32_dpp5:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[COPY13]], [[V_ADD_F32_e64_4]], 273, 15, 15, 0, implicit $exec
278
279
; GFX11-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 15
279
280
; GFX11-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_3]]
280
281
; GFX11-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 16
281
282
; GFX11-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 [[V_READLANE_B32_]], [[S_MOV_B32_4]], [[V_MOV_B32_dpp5]]
282
283
; GFX11-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 31
283
284
; GFX11-NEXT: [[V_READLANE_B32_1:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[V_ADD_F32_e64_4]], [[S_MOV_B32_5]]
284
- ; GFX11-NEXT: [[COPY13 :%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_1]]
285
- ; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY13 ]], implicit $exec
286
- ; GFX11-NEXT: [[COPY14 :%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
287
- ; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_LO_U32_B32_e64_]], [[COPY14 ]], implicit $exec
285
+ ; GFX11-NEXT: [[COPY14 :%[0-9]+]]:vgpr_32 = COPY [[V_READLANE_B32_1]]
286
+ ; GFX11-NEXT: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY14 ]], implicit $exec
287
+ ; GFX11-NEXT: [[COPY15 :%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
288
+ ; GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_MBCNT_LO_U32_B32_e64_]], [[COPY15 ]], implicit $exec
288
289
; GFX11-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec
289
290
; GFX11-NEXT: S_BRANCH %bb.3
290
291
; GFX11-NEXT: {{ $}}
@@ -298,7 +299,7 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
298
299
; GFX11-NEXT: bb.4.Flow:
299
300
; GFX11-NEXT: successors: %bb.6(0x80000000)
300
301
; GFX11-NEXT: {{ $}}
301
- ; GFX11-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %41 , %bb.5, [[DEF]], %bb.1
302
+ ; GFX11-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI %42 , %bb.5, [[DEF]], %bb.1
302
303
; GFX11-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec
303
304
; GFX11-NEXT: S_BRANCH %bb.6
304
305
; GFX11-NEXT: {{ $}}
@@ -309,10 +310,10 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
309
310
; GFX11-NEXT: SI_END_CF [[SI_IF1]], implicit-def $exec, implicit-def $scc, implicit $exec
310
311
; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[PHI1]], implicit $exec
311
312
; GFX11-NEXT: [[STRICT_WWM1:%[0-9]+]]:vgpr_32 = STRICT_WWM [[V_WRITELANE_B32_]], implicit $exec
312
- ; GFX11-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
313
- ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY15]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
314
313
; GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
315
- ; GFX11-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_5]], 0, [[COPY16]], [[V_CMP_EQ_U32_e64_]], implicit $exec
314
+ ; GFX11-NEXT: [[V_ADD_F32_e64_5:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY16]], 0, [[STRICT_WWM1]], 0, 0, implicit $mode, implicit $exec
315
+ ; GFX11-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[V_READFIRSTLANE_B32_]]
316
+ ; GFX11-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_ADD_F32_e64_5]], 0, [[COPY17]], [[V_CMP_EQ_U32_e64_]], implicit $exec
316
317
; GFX11-NEXT: S_BRANCH %bb.4
317
318
; GFX11-NEXT: {{ $}}
318
319
; GFX11-NEXT: bb.6 (%ir-block.38):
0 commit comments