@@ -18,7 +18,6 @@ define float @local_atomic_fmax_ret_f32(ptr addrspace(3) %ptr, float %val) {
18
18
; GFX12-NEXT: s_wait_samplecnt 0x0
19
19
; GFX12-NEXT: s_wait_bvhcnt 0x0
20
20
; GFX12-NEXT: s_wait_kmcnt 0x0
21
- ; GFX12-NEXT: global_wb scope:SCOPE_SE
22
21
; GFX12-NEXT: s_wait_storecnt 0x0
23
22
; GFX12-NEXT: ds_max_num_rtn_f32 v0, v0, v1
24
23
; GFX12-NEXT: s_wait_dscnt 0x0
@@ -91,7 +90,6 @@ define void @local_atomic_fmax_noret_f32(ptr addrspace(3) %ptr, float %val) {
91
90
; GFX12-NEXT: s_wait_samplecnt 0x0
92
91
; GFX12-NEXT: s_wait_bvhcnt 0x0
93
92
; GFX12-NEXT: s_wait_kmcnt 0x0
94
- ; GFX12-NEXT: global_wb scope:SCOPE_SE
95
93
; GFX12-NEXT: s_wait_storecnt 0x0
96
94
; GFX12-NEXT: ds_max_num_f32 v0, v1
97
95
; GFX12-NEXT: s_wait_dscnt 0x0
@@ -164,7 +162,6 @@ define double @local_atomic_fmax_ret_f64(ptr addrspace(3) %ptr, double %val) {
164
162
; GFX12-NEXT: s_wait_samplecnt 0x0
165
163
; GFX12-NEXT: s_wait_bvhcnt 0x0
166
164
; GFX12-NEXT: s_wait_kmcnt 0x0
167
- ; GFX12-NEXT: global_wb scope:SCOPE_SE
168
165
; GFX12-NEXT: s_wait_storecnt 0x0
169
166
; GFX12-NEXT: ds_max_num_rtn_f64 v[0:1], v0, v[1:2]
170
167
; GFX12-NEXT: s_wait_dscnt 0x0
@@ -241,7 +238,6 @@ define void @local_atomic_fmax_noret_f64(ptr addrspace(3) %ptr, double %val) {
241
238
; GFX12-NEXT: s_wait_samplecnt 0x0
242
239
; GFX12-NEXT: s_wait_bvhcnt 0x0
243
240
; GFX12-NEXT: s_wait_kmcnt 0x0
244
- ; GFX12-NEXT: global_wb scope:SCOPE_SE
245
241
; GFX12-NEXT: s_wait_storecnt 0x0
246
242
; GFX12-NEXT: ds_max_num_f64 v0, v[1:2]
247
243
; GFX12-NEXT: s_wait_dscnt 0x0
@@ -318,7 +314,6 @@ define float @global_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(pt
318
314
; GFX12-NEXT: s_wait_samplecnt 0x0
319
315
; GFX12-NEXT: s_wait_bvhcnt 0x0
320
316
; GFX12-NEXT: s_wait_kmcnt 0x0
321
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
322
317
; GFX12-NEXT: s_wait_storecnt 0x0
323
318
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
324
319
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -465,7 +460,6 @@ define void @global_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(p
465
460
; GFX12-NEXT: s_wait_samplecnt 0x0
466
461
; GFX12-NEXT: s_wait_bvhcnt 0x0
467
462
; GFX12-NEXT: s_wait_kmcnt 0x0
468
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
469
463
; GFX12-NEXT: s_wait_storecnt 0x0
470
464
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off scope:SCOPE_DEV
471
465
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -617,7 +611,6 @@ define double @global_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(p
617
611
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
618
612
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
619
613
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[2:3]
620
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
621
614
; GFX12-NEXT: s_wait_storecnt 0x0
622
615
; GFX12-NEXT: global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
623
616
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -774,7 +767,6 @@ define void @global_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(p
774
767
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
775
768
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
776
769
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
777
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
778
770
; GFX12-NEXT: s_wait_storecnt 0x0
779
771
; GFX12-NEXT: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
780
772
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -915,7 +907,6 @@ define float @flat_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory(ptr
915
907
; GFX12-NEXT: s_wait_samplecnt 0x0
916
908
; GFX12-NEXT: s_wait_bvhcnt 0x0
917
909
; GFX12-NEXT: s_wait_kmcnt 0x0
918
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
919
910
; GFX12-NEXT: s_wait_storecnt 0x0
920
911
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
921
912
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1058,7 +1049,6 @@ define void @flat_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory(ptr
1058
1049
; GFX12-NEXT: s_wait_samplecnt 0x0
1059
1050
; GFX12-NEXT: s_wait_bvhcnt 0x0
1060
1051
; GFX12-NEXT: s_wait_kmcnt 0x0
1061
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
1062
1052
; GFX12-NEXT: s_wait_storecnt 0x0
1063
1053
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2 scope:SCOPE_DEV
1064
1054
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
@@ -1209,7 +1199,6 @@ define double @flat_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_memory(ptr
1209
1199
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1210
1200
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
1211
1201
; GFX12-NEXT: v_max_num_f64_e32 v[4:5], v[4:5], v[2:3]
1212
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
1213
1202
; GFX12-NEXT: s_wait_storecnt 0x0
1214
1203
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1215
1204
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1364,7 +1353,6 @@ define void @flat_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_memory(ptr
1364
1353
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
1365
1354
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1366
1355
; GFX12-NEXT: v_max_num_f64_e32 v[2:3], v[2:3], v[6:7]
1367
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
1368
1356
; GFX12-NEXT: s_wait_storecnt 0x0
1369
1357
; GFX12-NEXT: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1370
1358
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
@@ -1507,7 +1495,6 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m
1507
1495
; GFX12-NEXT: s_wait_bvhcnt 0x0
1508
1496
; GFX12-NEXT: s_wait_kmcnt 0x0
1509
1497
; GFX12-NEXT: v_mov_b32_e32 v1, s6
1510
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
1511
1498
; GFX12-NEXT: s_wait_storecnt 0x0
1512
1499
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
1513
1500
; GFX12-NEXT: s_wait_loadcnt 0x0
@@ -1684,7 +1671,6 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_
1684
1671
; GFX12-NEXT: s_wait_bvhcnt 0x0
1685
1672
; GFX12-NEXT: s_wait_kmcnt 0x0
1686
1673
; GFX12-NEXT: v_mov_b32_e32 v1, s6
1687
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
1688
1674
; GFX12-NEXT: s_wait_storecnt 0x0
1689
1675
; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
1690
1676
; GFX12-NEXT: s_wait_storecnt 0x0
@@ -1865,11 +1851,11 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_
1865
1851
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
1866
1852
; GFX12-NEXT: s_wait_loadcnt 0x0
1867
1853
; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
1868
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
1869
1854
; GFX12-NEXT: s_wait_storecnt 0x0
1870
- ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
1871
1855
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1856
+ ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
1872
1857
; GFX12-NEXT: v_max_num_f64_e32 v[7:8], v[0:1], v[4:5]
1858
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
1873
1859
; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1874
1860
; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
1875
1861
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
@@ -2058,11 +2044,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f64__amdgpu_no_fine_grained_
2058
2044
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
2059
2045
; GFX12-NEXT: s_wait_loadcnt 0x0
2060
2046
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
2061
- ; GFX12-NEXT: global_wb scope:SCOPE_DEV
2062
2047
; GFX12-NEXT: s_wait_storecnt 0x0
2048
+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2063
2049
; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[4:5]
2064
2050
; GFX12-NEXT: v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
2065
- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
2066
2051
; GFX12-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
2067
2052
; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
2068
2053
; GFX12-NEXT: s_wait_loadcnt 0x0
0 commit comments