@@ -452,7 +452,7 @@ define double @v_uitofp_i8_to_f64(i8 %arg0) nounwind {
452
452
define amdgpu_kernel void @load_i8_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
453
453
; SI-LABEL: load_i8_to_f32:
454
454
; SI: ; %bb.0:
455
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
455
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
456
456
; SI-NEXT: s_mov_b32 s6, 0
457
457
; SI-NEXT: s_mov_b32 s7, 0xf000
458
458
; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -468,7 +468,7 @@ define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr add
468
468
;
469
469
; VI-LABEL: load_i8_to_f32:
470
470
; VI: ; %bb.0:
471
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
471
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
472
472
; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0
473
473
; VI-NEXT: s_waitcnt lgkmcnt(0)
474
474
; VI-NEXT: v_mov_b32_e32 v1, s2
@@ -493,7 +493,7 @@ define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr add
493
493
define amdgpu_kernel void @load_v2i8_to_v2f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
494
494
; SI-LABEL: load_v2i8_to_v2f32:
495
495
; SI: ; %bb.0:
496
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
496
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
497
497
; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
498
498
; SI-NEXT: v_mov_b32_e32 v1, 0
499
499
; SI-NEXT: s_mov_b32 s6, 0
@@ -513,7 +513,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr
513
513
;
514
514
; VI-LABEL: load_v2i8_to_v2f32:
515
515
; VI: ; %bb.0:
516
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
516
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
517
517
; VI-NEXT: v_lshlrev_b32_e32 v2, 1, v0
518
518
; VI-NEXT: s_waitcnt lgkmcnt(0)
519
519
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -539,7 +539,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr
539
539
define amdgpu_kernel void @load_v3i8_to_v3f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
540
540
; SI-LABEL: load_v3i8_to_v3f32:
541
541
; SI: ; %bb.0:
542
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
542
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
543
543
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
544
544
; SI-NEXT: v_mov_b32_e32 v1, 0
545
545
; SI-NEXT: s_mov_b32 s6, 0
@@ -562,7 +562,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr
562
562
;
563
563
; VI-LABEL: load_v3i8_to_v3f32:
564
564
; VI: ; %bb.0:
565
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
565
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
566
566
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
567
567
; VI-NEXT: s_waitcnt lgkmcnt(0)
568
568
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -589,7 +589,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr
589
589
define amdgpu_kernel void @load_v4i8_to_v4f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
590
590
; SI-LABEL: load_v4i8_to_v4f32:
591
591
; SI: ; %bb.0:
592
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
592
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
593
593
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
594
594
; SI-NEXT: v_mov_b32_e32 v1, 0
595
595
; SI-NEXT: s_mov_b32 s6, 0
@@ -612,7 +612,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr
612
612
;
613
613
; VI-LABEL: load_v4i8_to_v4f32:
614
614
; VI: ; %bb.0:
615
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
615
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
616
616
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
617
617
; VI-NEXT: s_waitcnt lgkmcnt(0)
618
618
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -644,7 +644,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr
644
644
define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
645
645
; SI-LABEL: load_v4i8_to_v4f32_unaligned:
646
646
; SI: ; %bb.0:
647
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
647
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
648
648
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
649
649
; SI-NEXT: v_mov_b32_e32 v1, 0
650
650
; SI-NEXT: s_mov_b32 s6, 0
@@ -679,7 +679,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias
679
679
;
680
680
; VI-LABEL: load_v4i8_to_v4f32_unaligned:
681
681
; VI: ; %bb.0:
682
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
682
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
683
683
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
684
684
; VI-NEXT: s_waitcnt lgkmcnt(0)
685
685
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -725,14 +725,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias
725
725
define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %out2 , ptr addrspace (1 ) noalias %in ) nounwind {
726
726
; SI-LABEL: load_v4i8_to_v4f32_2_uses:
727
727
; SI: ; %bb.0:
728
- ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
728
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
729
729
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
730
730
; SI-NEXT: v_mov_b32_e32 v1, 0
731
731
; SI-NEXT: s_mov_b32 s6, 0
732
732
; SI-NEXT: s_mov_b32 s7, 0xf000
733
733
; SI-NEXT: s_waitcnt lgkmcnt(0)
734
734
; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
735
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
735
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
736
736
; SI-NEXT: s_mov_b32 s6, -1
737
737
; SI-NEXT: s_waitcnt lgkmcnt(0)
738
738
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
@@ -769,17 +769,17 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
769
769
;
770
770
; VI-LABEL: load_v4i8_to_v4f32_2_uses:
771
771
; VI: ; %bb.0:
772
- ; VI-NEXT: s_load_dwordx2 s[2:3 ], s[0:1 ], 0x34
772
+ ; VI-NEXT: s_load_dwordx2 s[0:1 ], s[2:3 ], 0x34
773
773
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
774
774
; VI-NEXT: v_mov_b32_e32 v6, 9
775
775
; VI-NEXT: v_mov_b32_e32 v7, 8
776
776
; VI-NEXT: s_waitcnt lgkmcnt(0)
777
- ; VI-NEXT: v_mov_b32_e32 v0, s2
778
- ; VI-NEXT: v_mov_b32_e32 v1, s3
777
+ ; VI-NEXT: v_mov_b32_e32 v0, s0
778
+ ; VI-NEXT: v_mov_b32_e32 v1, s1
779
779
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
780
780
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
781
781
; VI-NEXT: flat_load_dword v1, v[0:1]
782
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
782
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
783
783
; VI-NEXT: v_mov_b32_e32 v2, 0xff
784
784
; VI-NEXT: s_waitcnt lgkmcnt(0)
785
785
; VI-NEXT: v_mov_b32_e32 v5, s1
@@ -821,7 +821,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
821
821
define amdgpu_kernel void @load_v7i8_to_v7f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
822
822
; SI-LABEL: load_v7i8_to_v7f32:
823
823
; SI: ; %bb.0:
824
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
824
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
825
825
; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
826
826
; SI-NEXT: v_mov_b32_e32 v1, 0
827
827
; SI-NEXT: s_mov_b32 s6, 0
@@ -858,7 +858,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
858
858
;
859
859
; VI-LABEL: load_v7i8_to_v7f32:
860
860
; VI: ; %bb.0:
861
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
861
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
862
862
; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
863
863
; VI-NEXT: s_waitcnt lgkmcnt(0)
864
864
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -918,7 +918,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
918
918
define amdgpu_kernel void @load_v8i8_to_v8f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
919
919
; SI-LABEL: load_v8i8_to_v8f32:
920
920
; SI: ; %bb.0:
921
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
921
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
922
922
; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
923
923
; SI-NEXT: v_mov_b32_e32 v1, 0
924
924
; SI-NEXT: s_mov_b32 s6, 0
@@ -949,7 +949,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr
949
949
;
950
950
; VI-LABEL: load_v8i8_to_v8f32:
951
951
; VI: ; %bb.0:
952
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
952
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
953
953
; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
954
954
; VI-NEXT: s_waitcnt lgkmcnt(0)
955
955
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -986,7 +986,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr
986
986
define amdgpu_kernel void @i8_zext_inreg_i32_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
987
987
; SI-LABEL: i8_zext_inreg_i32_to_f32:
988
988
; SI: ; %bb.0:
989
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
989
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
990
990
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
991
991
; SI-NEXT: v_mov_b32_e32 v1, 0
992
992
; SI-NEXT: s_mov_b32 s6, 0
@@ -1005,7 +1005,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %ou
1005
1005
;
1006
1006
; VI-LABEL: i8_zext_inreg_i32_to_f32:
1007
1007
; VI: ; %bb.0:
1008
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1008
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1009
1009
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1010
1010
; VI-NEXT: s_waitcnt lgkmcnt(0)
1011
1011
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1033,7 +1033,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %ou
1033
1033
define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1034
1034
; SI-LABEL: i8_zext_inreg_hi1_to_f32:
1035
1035
; SI: ; %bb.0:
1036
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1036
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1037
1037
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1038
1038
; SI-NEXT: v_mov_b32_e32 v1, 0
1039
1039
; SI-NEXT: s_mov_b32 s6, 0
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %ou
1051
1051
;
1052
1052
; VI-LABEL: i8_zext_inreg_hi1_to_f32:
1053
1053
; VI: ; %bb.0:
1054
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1054
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1055
1055
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1056
1056
; VI-NEXT: s_waitcnt lgkmcnt(0)
1057
1057
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1080,7 +1080,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %ou
1080
1080
define amdgpu_kernel void @i8_zext_i32_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1081
1081
; SI-LABEL: i8_zext_i32_to_f32:
1082
1082
; SI: ; %bb.0:
1083
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1083
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1084
1084
; SI-NEXT: s_mov_b32 s6, 0
1085
1085
; SI-NEXT: s_mov_b32 s7, 0xf000
1086
1086
; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1096,7 +1096,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr
1096
1096
;
1097
1097
; VI-LABEL: i8_zext_i32_to_f32:
1098
1098
; VI: ; %bb.0:
1099
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1099
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1100
1100
; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0
1101
1101
; VI-NEXT: s_waitcnt lgkmcnt(0)
1102
1102
; VI-NEXT: v_mov_b32_e32 v1, s2
@@ -1122,7 +1122,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr
1122
1122
define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1123
1123
; SI-LABEL: v4i8_zext_v4i32_to_v4f32:
1124
1124
; SI: ; %bb.0:
1125
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1125
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1126
1126
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1127
1127
; SI-NEXT: v_mov_b32_e32 v1, 0
1128
1128
; SI-NEXT: s_mov_b32 s6, 0
@@ -1157,7 +1157,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %ou
1157
1157
;
1158
1158
; VI-LABEL: v4i8_zext_v4i32_to_v4f32:
1159
1159
; VI: ; %bb.0:
1160
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1160
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1161
1161
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1162
1162
; VI-NEXT: s_waitcnt lgkmcnt(0)
1163
1163
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1204,7 +1204,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %ou
1204
1204
define amdgpu_kernel void @extract_byte0_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1205
1205
; SI-LABEL: extract_byte0_to_f32:
1206
1206
; SI: ; %bb.0:
1207
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1207
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1208
1208
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1209
1209
; SI-NEXT: v_mov_b32_e32 v1, 0
1210
1210
; SI-NEXT: s_mov_b32 s6, 0
@@ -1221,7 +1221,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, p
1221
1221
;
1222
1222
; VI-LABEL: extract_byte0_to_f32:
1223
1223
; VI: ; %bb.0:
1224
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1224
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1225
1225
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1226
1226
; VI-NEXT: s_waitcnt lgkmcnt(0)
1227
1227
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1247,7 +1247,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, p
1247
1247
define amdgpu_kernel void @extract_byte1_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1248
1248
; SI-LABEL: extract_byte1_to_f32:
1249
1249
; SI: ; %bb.0:
1250
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1250
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1251
1251
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1252
1252
; SI-NEXT: v_mov_b32_e32 v1, 0
1253
1253
; SI-NEXT: s_mov_b32 s6, 0
@@ -1265,7 +1265,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, p
1265
1265
;
1266
1266
; VI-LABEL: extract_byte1_to_f32:
1267
1267
; VI: ; %bb.0:
1268
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1268
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1269
1269
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1270
1270
; VI-NEXT: s_waitcnt lgkmcnt(0)
1271
1271
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, p
1292
1292
define amdgpu_kernel void @extract_byte2_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1293
1293
; SI-LABEL: extract_byte2_to_f32:
1294
1294
; SI: ; %bb.0:
1295
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1295
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1296
1296
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1297
1297
; SI-NEXT: v_mov_b32_e32 v1, 0
1298
1298
; SI-NEXT: s_mov_b32 s6, 0
@@ -1310,7 +1310,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, p
1310
1310
;
1311
1311
; VI-LABEL: extract_byte2_to_f32:
1312
1312
; VI: ; %bb.0:
1313
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1313
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1314
1314
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1315
1315
; VI-NEXT: s_waitcnt lgkmcnt(0)
1316
1316
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1337,7 +1337,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, p
1337
1337
define amdgpu_kernel void @extract_byte3_to_f32 (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in ) nounwind {
1338
1338
; SI-LABEL: extract_byte3_to_f32:
1339
1339
; SI: ; %bb.0:
1340
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1340
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1341
1341
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1342
1342
; SI-NEXT: v_mov_b32_e32 v1, 0
1343
1343
; SI-NEXT: s_mov_b32 s6, 0
@@ -1354,7 +1354,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, p
1354
1354
;
1355
1355
; VI-LABEL: extract_byte3_to_f32:
1356
1356
; VI: ; %bb.0:
1357
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1357
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1358
1358
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1359
1359
; VI-NEXT: s_waitcnt lgkmcnt(0)
1360
1360
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1381,7 +1381,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, p
1381
1381
define amdgpu_kernel void @cvt_ubyte0_or_multiuse (ptr addrspace (1 ) %in , ptr addrspace (1 ) %out ) {
1382
1382
; SI-LABEL: cvt_ubyte0_or_multiuse:
1383
1383
; SI: ; %bb.0: ; %bb
1384
- ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
1384
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
1385
1385
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1386
1386
; SI-NEXT: v_mov_b32_e32 v1, 0
1387
1387
; SI-NEXT: s_mov_b32 s6, 0
@@ -1401,7 +1401,7 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addr
1401
1401
;
1402
1402
; VI-LABEL: cvt_ubyte0_or_multiuse:
1403
1403
; VI: ; %bb.0: ; %bb
1404
- ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
1404
+ ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
1405
1405
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1406
1406
; VI-NEXT: s_waitcnt lgkmcnt(0)
1407
1407
; VI-NEXT: v_mov_b32_e32 v0, s0
0 commit comments