@@ -1021,20 +1021,20 @@ main_body:
1021
1021
define amdgpu_kernel void @global_atomic_fadd_f64_noret (ptr addrspace (1 ) %ptr , double %data ) {
1022
1022
; GFX90A-LABEL: global_atomic_fadd_f64_noret:
1023
1023
; GFX90A: ; %bb.0: ; %main_body
1024
- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1024
+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1025
1025
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1026
1026
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1027
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3 ], s[2:3 ] op_sel:[0,1]
1028
- ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1 ]
1027
+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7 ], s[6:7 ] op_sel:[0,1]
1028
+ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1029
1029
; GFX90A-NEXT: s_endpgm
1030
1030
;
1031
1031
; GFX940-LABEL: global_atomic_fadd_f64_noret:
1032
1032
; GFX940: ; %bb.0: ; %main_body
1033
- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1033
+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1034
1034
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1035
1035
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1036
- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3 ]
1037
- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1 ]
1036
+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7 ]
1037
+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1038
1038
; GFX940-NEXT: s_endpgm
1039
1039
main_body:
1040
1040
%ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64 (ptr addrspace (1 ) %ptr , double %data )
@@ -1044,20 +1044,20 @@ main_body:
1044
1044
define amdgpu_kernel void @global_atomic_fmin_f64_noret (ptr addrspace (1 ) %ptr , double %data ) {
1045
1045
; GFX90A-LABEL: global_atomic_fmin_f64_noret:
1046
1046
; GFX90A: ; %bb.0: ; %main_body
1047
- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1047
+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1048
1048
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1049
1049
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1050
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3 ], s[2:3 ] op_sel:[0,1]
1051
- ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1 ]
1050
+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7 ], s[6:7 ] op_sel:[0,1]
1051
+ ; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5 ]
1052
1052
; GFX90A-NEXT: s_endpgm
1053
1053
;
1054
1054
; GFX940-LABEL: global_atomic_fmin_f64_noret:
1055
1055
; GFX940: ; %bb.0: ; %main_body
1056
- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1056
+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1057
1057
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1058
1058
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1059
- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3 ]
1060
- ; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1 ]
1059
+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7 ]
1060
+ ; GFX940-NEXT: global_atomic_min_f64 v2, v[0:1], s[4:5 ]
1061
1061
; GFX940-NEXT: s_endpgm
1062
1062
main_body:
1063
1063
%ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64 (ptr addrspace (1 ) %ptr , double %data )
@@ -1067,20 +1067,20 @@ main_body:
1067
1067
define amdgpu_kernel void @global_atomic_fmax_f64_noret (ptr addrspace (1 ) %ptr , double %data ) {
1068
1068
; GFX90A-LABEL: global_atomic_fmax_f64_noret:
1069
1069
; GFX90A: ; %bb.0: ; %main_body
1070
- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1070
+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1071
1071
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1072
1072
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1073
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3 ], s[2:3 ] op_sel:[0,1]
1074
- ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1 ]
1073
+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7 ], s[6:7 ] op_sel:[0,1]
1074
+ ; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5 ]
1075
1075
; GFX90A-NEXT: s_endpgm
1076
1076
;
1077
1077
; GFX940-LABEL: global_atomic_fmax_f64_noret:
1078
1078
; GFX940: ; %bb.0: ; %main_body
1079
- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1079
+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1080
1080
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1081
1081
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1082
- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3 ]
1083
- ; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1 ]
1082
+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7 ]
1083
+ ; GFX940-NEXT: global_atomic_max_f64 v2, v[0:1], s[4:5 ]
1084
1084
; GFX940-NEXT: s_endpgm
1085
1085
main_body:
1086
1086
%ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64 (ptr addrspace (1 ) %ptr , double %data )
@@ -1134,14 +1134,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %pt
1134
1134
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1135
1135
; GFX940-NEXT: s_cbranch_execz .LBB39_2
1136
1136
; GFX940-NEXT: ; %bb.1:
1137
- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1137
+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1138
1138
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1139
1139
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1140
1140
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1141
1141
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1142
1142
; GFX940-NEXT: buffer_wbl2 sc0 sc1
1143
1143
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1144
- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ] sc1
1144
+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ] sc1
1145
1145
; GFX940-NEXT: s_waitcnt vmcnt(0)
1146
1146
; GFX940-NEXT: buffer_inv sc0 sc1
1147
1147
; GFX940-NEXT: .LBB39_2:
@@ -1162,13 +1162,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
1162
1162
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1163
1163
; GFX90A-NEXT: s_cbranch_execz .LBB40_2
1164
1164
; GFX90A-NEXT: ; %bb.1:
1165
- ; GFX90A-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1165
+ ; GFX90A-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1166
1166
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1167
1167
; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1168
1168
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1169
1169
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1170
1170
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1171
- ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1171
+ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1172
1172
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1173
1173
; GFX90A-NEXT: buffer_wbinvl1_vol
1174
1174
; GFX90A-NEXT: .LBB40_2:
@@ -1184,14 +1184,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(
1184
1184
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1185
1185
; GFX940-NEXT: s_cbranch_execz .LBB40_2
1186
1186
; GFX940-NEXT: ; %bb.1:
1187
- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1187
+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1188
1188
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1189
1189
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1190
1190
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1191
1191
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1192
1192
; GFX940-NEXT: buffer_wbl2 sc1
1193
1193
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1194
- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1194
+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1195
1195
; GFX940-NEXT: s_waitcnt vmcnt(0)
1196
1196
; GFX940-NEXT: buffer_inv sc1
1197
1197
; GFX940-NEXT: .LBB40_2:
@@ -1248,14 +1248,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace
1248
1248
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1249
1249
; GFX940-NEXT: s_cbranch_execz .LBB41_2
1250
1250
; GFX940-NEXT: ; %bb.1:
1251
- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1251
+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1252
1252
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1253
1253
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1254
1254
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1255
1255
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1256
1256
; GFX940-NEXT: buffer_wbl2 sc0 sc1
1257
1257
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1258
- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ] sc1
1258
+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ] sc1
1259
1259
; GFX940-NEXT: s_waitcnt vmcnt(0)
1260
1260
; GFX940-NEXT: buffer_inv sc0 sc1
1261
1261
; GFX940-NEXT: .LBB41_2:
@@ -1276,13 +1276,13 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
1276
1276
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
1277
1277
; GFX90A-NEXT: s_cbranch_execz .LBB42_2
1278
1278
; GFX90A-NEXT: ; %bb.1:
1279
- ; GFX90A-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1279
+ ; GFX90A-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1280
1280
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1281
1281
; GFX90A-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1282
1282
; GFX90A-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1283
1283
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1284
1284
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1285
- ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1285
+ ; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1286
1286
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1287
1287
; GFX90A-NEXT: buffer_wbinvl1_vol
1288
1288
; GFX90A-NEXT: .LBB42_2:
@@ -1298,14 +1298,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(
1298
1298
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1299
1299
; GFX940-NEXT: s_cbranch_execz .LBB42_2
1300
1300
; GFX940-NEXT: ; %bb.1:
1301
- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1301
+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1302
1302
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1303
1303
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1304
1304
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1305
1305
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1306
1306
; GFX940-NEXT: buffer_wbl2 sc1
1307
1307
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1308
- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1308
+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1309
1309
; GFX940-NEXT: s_waitcnt vmcnt(0)
1310
1310
; GFX940-NEXT: buffer_inv sc1
1311
1311
; GFX940-NEXT: .LBB42_2:
@@ -1522,14 +1522,14 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrs
1522
1522
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
1523
1523
; GFX940-NEXT: s_cbranch_execz .LBB49_2
1524
1524
; GFX940-NEXT: ; %bb.1:
1525
- ; GFX940-NEXT: s_load_dwordx2 s[2:3 ], s[2:3], 0x24
1525
+ ; GFX940-NEXT: s_load_dwordx2 s[4:5 ], s[2:3], 0x24
1526
1526
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
1527
1527
; GFX940-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
1528
1528
; GFX940-NEXT: v_mul_f64 v[0:1], v[0:1], 4.0
1529
1529
; GFX940-NEXT: v_mov_b32_e32 v2, 0
1530
1530
; GFX940-NEXT: buffer_wbl2 sc1
1531
1531
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1532
- ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3 ]
1532
+ ; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5 ]
1533
1533
; GFX940-NEXT: s_waitcnt vmcnt(0)
1534
1534
; GFX940-NEXT: buffer_inv sc1
1535
1535
; GFX940-NEXT: .LBB49_2:
@@ -1761,19 +1761,19 @@ main_body:
1761
1761
define amdgpu_kernel void @flat_atomic_fadd_f64_noret (ptr %ptr , double %data ) {
1762
1762
; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
1763
1763
; GFX90A: ; %bb.0: ; %main_body
1764
- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1764
+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1765
1765
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1766
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1 ], s[0:1 ] op_sel:[0,1]
1767
- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3 ], s[2:3 ] op_sel:[0,1]
1766
+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5 ], s[4:5 ] op_sel:[0,1]
1767
+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7 ], s[6:7 ] op_sel:[0,1]
1768
1768
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1769
1769
; GFX90A-NEXT: s_endpgm
1770
1770
;
1771
1771
; GFX940-LABEL: flat_atomic_fadd_f64_noret:
1772
1772
; GFX940: ; %bb.0: ; %main_body
1773
- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1773
+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1774
1774
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1775
- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1 ]
1776
- ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3 ]
1775
+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5 ]
1776
+ ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7 ]
1777
1777
; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1778
1778
; GFX940-NEXT: s_endpgm
1779
1779
main_body:
@@ -1842,19 +1842,19 @@ main_body:
1842
1842
define amdgpu_kernel void @flat_atomic_fmin_f64_noret (ptr %ptr , double %data ) {
1843
1843
; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
1844
1844
; GFX90A: ; %bb.0: ; %main_body
1845
- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1845
+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1846
1846
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1847
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1 ], s[0:1 ] op_sel:[0,1]
1848
- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3 ], s[2:3 ] op_sel:[0,1]
1847
+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5 ], s[4:5 ] op_sel:[0,1]
1848
+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7 ], s[6:7 ] op_sel:[0,1]
1849
1849
; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
1850
1850
; GFX90A-NEXT: s_endpgm
1851
1851
;
1852
1852
; GFX940-LABEL: flat_atomic_fmin_f64_noret:
1853
1853
; GFX940: ; %bb.0: ; %main_body
1854
- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1854
+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1855
1855
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1856
- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1 ]
1857
- ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3 ]
1856
+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5 ]
1857
+ ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7 ]
1858
1858
; GFX940-NEXT: flat_atomic_min_f64 v[0:1], v[2:3]
1859
1859
; GFX940-NEXT: s_endpgm
1860
1860
main_body:
@@ -1884,19 +1884,19 @@ main_body:
1884
1884
define amdgpu_kernel void @flat_atomic_fmax_f64_noret (ptr %ptr , double %data ) {
1885
1885
; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
1886
1886
; GFX90A: ; %bb.0: ; %main_body
1887
- ; GFX90A-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1887
+ ; GFX90A-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1888
1888
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1889
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1 ], s[0:1 ] op_sel:[0,1]
1890
- ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3 ], s[2:3 ] op_sel:[0,1]
1889
+ ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[4:5 ], s[4:5 ] op_sel:[0,1]
1890
+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[6:7 ], s[6:7 ] op_sel:[0,1]
1891
1891
; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
1892
1892
; GFX90A-NEXT: s_endpgm
1893
1893
;
1894
1894
; GFX940-LABEL: flat_atomic_fmax_f64_noret:
1895
1895
; GFX940: ; %bb.0: ; %main_body
1896
- ; GFX940-NEXT: s_load_dwordx4 s[0:3 ], s[2:3], 0x24
1896
+ ; GFX940-NEXT: s_load_dwordx4 s[4:7 ], s[2:3], 0x24
1897
1897
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
1898
- ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1 ]
1899
- ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3 ]
1898
+ ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[4:5 ]
1899
+ ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[6:7 ]
1900
1900
; GFX940-NEXT: flat_atomic_max_f64 v[0:1], v[2:3]
1901
1901
; GFX940-NEXT: s_endpgm
1902
1902
main_body:
0 commit comments