Skip to content

Commit a8394e2

Browse files
committed
Extended merge-s-load.mir to test *_ec sload optimizations.
1 parent c8d096b commit a8394e2

File tree

112 files changed

+19657
-18910
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+19657
-18910
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ define double @v_uitofp_i8_to_f64(i8 %arg0) nounwind {
452452
define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
453453
; SI-LABEL: load_i8_to_f32:
454454
; SI: ; %bb.0:
455-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
455+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
456456
; SI-NEXT: s_mov_b32 s6, 0
457457
; SI-NEXT: s_mov_b32 s7, 0xf000
458458
; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -468,7 +468,7 @@ define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr add
468468
;
469469
; VI-LABEL: load_i8_to_f32:
470470
; VI: ; %bb.0:
471-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
471+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
472472
; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0
473473
; VI-NEXT: s_waitcnt lgkmcnt(0)
474474
; VI-NEXT: v_mov_b32_e32 v1, s2
@@ -493,7 +493,7 @@ define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr add
493493
define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
494494
; SI-LABEL: load_v2i8_to_v2f32:
495495
; SI: ; %bb.0:
496-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
496+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
497497
; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0
498498
; SI-NEXT: v_mov_b32_e32 v1, 0
499499
; SI-NEXT: s_mov_b32 s6, 0
@@ -513,7 +513,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr
513513
;
514514
; VI-LABEL: load_v2i8_to_v2f32:
515515
; VI: ; %bb.0:
516-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
516+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
517517
; VI-NEXT: v_lshlrev_b32_e32 v2, 1, v0
518518
; VI-NEXT: s_waitcnt lgkmcnt(0)
519519
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -539,7 +539,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr
539539
define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
540540
; SI-LABEL: load_v3i8_to_v3f32:
541541
; SI: ; %bb.0:
542-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
542+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
543543
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
544544
; SI-NEXT: v_mov_b32_e32 v1, 0
545545
; SI-NEXT: s_mov_b32 s6, 0
@@ -562,7 +562,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr
562562
;
563563
; VI-LABEL: load_v3i8_to_v3f32:
564564
; VI: ; %bb.0:
565-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
565+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
566566
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
567567
; VI-NEXT: s_waitcnt lgkmcnt(0)
568568
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -589,7 +589,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr
589589
define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
590590
; SI-LABEL: load_v4i8_to_v4f32:
591591
; SI: ; %bb.0:
592-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
592+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
593593
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
594594
; SI-NEXT: v_mov_b32_e32 v1, 0
595595
; SI-NEXT: s_mov_b32 s6, 0
@@ -612,7 +612,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr
612612
;
613613
; VI-LABEL: load_v4i8_to_v4f32:
614614
; VI: ; %bb.0:
615-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
615+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
616616
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
617617
; VI-NEXT: s_waitcnt lgkmcnt(0)
618618
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -644,7 +644,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr
644644
define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
645645
; SI-LABEL: load_v4i8_to_v4f32_unaligned:
646646
; SI: ; %bb.0:
647-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
647+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
648648
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
649649
; SI-NEXT: v_mov_b32_e32 v1, 0
650650
; SI-NEXT: s_mov_b32 s6, 0
@@ -679,7 +679,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias
679679
;
680680
; VI-LABEL: load_v4i8_to_v4f32_unaligned:
681681
; VI: ; %bb.0:
682-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
682+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
683683
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
684684
; VI-NEXT: s_waitcnt lgkmcnt(0)
685685
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -725,14 +725,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias
725725
define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %out2, ptr addrspace(1) noalias %in) nounwind {
726726
; SI-LABEL: load_v4i8_to_v4f32_2_uses:
727727
; SI: ; %bb.0:
728-
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
728+
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xd
729729
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
730730
; SI-NEXT: v_mov_b32_e32 v1, 0
731731
; SI-NEXT: s_mov_b32 s6, 0
732732
; SI-NEXT: s_mov_b32 s7, 0xf000
733733
; SI-NEXT: s_waitcnt lgkmcnt(0)
734734
; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
735-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
735+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
736736
; SI-NEXT: s_mov_b32 s6, -1
737737
; SI-NEXT: s_waitcnt lgkmcnt(0)
738738
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
@@ -769,17 +769,17 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
769769
;
770770
; VI-LABEL: load_v4i8_to_v4f32_2_uses:
771771
; VI: ; %bb.0:
772-
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
772+
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
773773
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
774774
; VI-NEXT: v_mov_b32_e32 v6, 9
775775
; VI-NEXT: v_mov_b32_e32 v7, 8
776776
; VI-NEXT: s_waitcnt lgkmcnt(0)
777-
; VI-NEXT: v_mov_b32_e32 v0, s2
778-
; VI-NEXT: v_mov_b32_e32 v1, s3
777+
; VI-NEXT: v_mov_b32_e32 v0, s0
778+
; VI-NEXT: v_mov_b32_e32 v1, s1
779779
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
780780
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
781781
; VI-NEXT: flat_load_dword v1, v[0:1]
782-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
782+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
783783
; VI-NEXT: v_mov_b32_e32 v2, 0xff
784784
; VI-NEXT: s_waitcnt lgkmcnt(0)
785785
; VI-NEXT: v_mov_b32_e32 v5, s1
@@ -821,7 +821,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
821821
define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
822822
; SI-LABEL: load_v7i8_to_v7f32:
823823
; SI: ; %bb.0:
824-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
824+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
825825
; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
826826
; SI-NEXT: v_mov_b32_e32 v1, 0
827827
; SI-NEXT: s_mov_b32 s6, 0
@@ -858,7 +858,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
858858
;
859859
; VI-LABEL: load_v7i8_to_v7f32:
860860
; VI: ; %bb.0:
861-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
861+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
862862
; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
863863
; VI-NEXT: s_waitcnt lgkmcnt(0)
864864
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -918,7 +918,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr
918918
define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
919919
; SI-LABEL: load_v8i8_to_v8f32:
920920
; SI: ; %bb.0:
921-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
921+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
922922
; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
923923
; SI-NEXT: v_mov_b32_e32 v1, 0
924924
; SI-NEXT: s_mov_b32 s6, 0
@@ -949,7 +949,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr
949949
;
950950
; VI-LABEL: load_v8i8_to_v8f32:
951951
; VI: ; %bb.0:
952-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
952+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
953953
; VI-NEXT: v_lshlrev_b32_e32 v2, 3, v0
954954
; VI-NEXT: s_waitcnt lgkmcnt(0)
955955
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -986,7 +986,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr
986986
define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
987987
; SI-LABEL: i8_zext_inreg_i32_to_f32:
988988
; SI: ; %bb.0:
989-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
989+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
990990
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
991991
; SI-NEXT: v_mov_b32_e32 v1, 0
992992
; SI-NEXT: s_mov_b32 s6, 0
@@ -1005,7 +1005,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %ou
10051005
;
10061006
; VI-LABEL: i8_zext_inreg_i32_to_f32:
10071007
; VI: ; %bb.0:
1008-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1008+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
10091009
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10101010
; VI-NEXT: s_waitcnt lgkmcnt(0)
10111011
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1033,7 +1033,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %ou
10331033
define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
10341034
; SI-LABEL: i8_zext_inreg_hi1_to_f32:
10351035
; SI: ; %bb.0:
1036-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1036+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
10371037
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
10381038
; SI-NEXT: v_mov_b32_e32 v1, 0
10391039
; SI-NEXT: s_mov_b32 s6, 0
@@ -1051,7 +1051,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %ou
10511051
;
10521052
; VI-LABEL: i8_zext_inreg_hi1_to_f32:
10531053
; VI: ; %bb.0:
1054-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1054+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
10551055
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10561056
; VI-NEXT: s_waitcnt lgkmcnt(0)
10571057
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1080,7 +1080,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %ou
10801080
define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
10811081
; SI-LABEL: i8_zext_i32_to_f32:
10821082
; SI: ; %bb.0:
1083-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1083+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
10841084
; SI-NEXT: s_mov_b32 s6, 0
10851085
; SI-NEXT: s_mov_b32 s7, 0xf000
10861086
; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -1096,7 +1096,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr
10961096
;
10971097
; VI-LABEL: i8_zext_i32_to_f32:
10981098
; VI: ; %bb.0:
1099-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1099+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
11001100
; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0
11011101
; VI-NEXT: s_waitcnt lgkmcnt(0)
11021102
; VI-NEXT: v_mov_b32_e32 v1, s2
@@ -1122,7 +1122,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr
11221122
define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
11231123
; SI-LABEL: v4i8_zext_v4i32_to_v4f32:
11241124
; SI: ; %bb.0:
1125-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1125+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
11261126
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
11271127
; SI-NEXT: v_mov_b32_e32 v1, 0
11281128
; SI-NEXT: s_mov_b32 s6, 0
@@ -1157,7 +1157,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %ou
11571157
;
11581158
; VI-LABEL: v4i8_zext_v4i32_to_v4f32:
11591159
; VI: ; %bb.0:
1160-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1160+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
11611161
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
11621162
; VI-NEXT: s_waitcnt lgkmcnt(0)
11631163
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1204,7 +1204,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %ou
12041204
define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
12051205
; SI-LABEL: extract_byte0_to_f32:
12061206
; SI: ; %bb.0:
1207-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1207+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
12081208
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12091209
; SI-NEXT: v_mov_b32_e32 v1, 0
12101210
; SI-NEXT: s_mov_b32 s6, 0
@@ -1221,7 +1221,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, p
12211221
;
12221222
; VI-LABEL: extract_byte0_to_f32:
12231223
; VI: ; %bb.0:
1224-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1224+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
12251225
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
12261226
; VI-NEXT: s_waitcnt lgkmcnt(0)
12271227
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1247,7 +1247,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, p
12471247
define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
12481248
; SI-LABEL: extract_byte1_to_f32:
12491249
; SI: ; %bb.0:
1250-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1250+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
12511251
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12521252
; SI-NEXT: v_mov_b32_e32 v1, 0
12531253
; SI-NEXT: s_mov_b32 s6, 0
@@ -1265,7 +1265,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, p
12651265
;
12661266
; VI-LABEL: extract_byte1_to_f32:
12671267
; VI: ; %bb.0:
1268-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1268+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
12691269
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
12701270
; VI-NEXT: s_waitcnt lgkmcnt(0)
12711271
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, p
12921292
define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
12931293
; SI-LABEL: extract_byte2_to_f32:
12941294
; SI: ; %bb.0:
1295-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1295+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
12961296
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
12971297
; SI-NEXT: v_mov_b32_e32 v1, 0
12981298
; SI-NEXT: s_mov_b32 s6, 0
@@ -1310,7 +1310,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, p
13101310
;
13111311
; VI-LABEL: extract_byte2_to_f32:
13121312
; VI: ; %bb.0:
1313-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1313+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
13141314
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
13151315
; VI-NEXT: s_waitcnt lgkmcnt(0)
13161316
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1337,7 +1337,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, p
13371337
define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
13381338
; SI-LABEL: extract_byte3_to_f32:
13391339
; SI: ; %bb.0:
1340-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1340+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
13411341
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
13421342
; SI-NEXT: v_mov_b32_e32 v1, 0
13431343
; SI-NEXT: s_mov_b32 s6, 0
@@ -1354,7 +1354,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, p
13541354
;
13551355
; VI-LABEL: extract_byte3_to_f32:
13561356
; VI: ; %bb.0:
1357-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1357+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
13581358
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
13591359
; VI-NEXT: s_waitcnt lgkmcnt(0)
13601360
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1381,7 +1381,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, p
13811381
define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addrspace(1) %out) {
13821382
; SI-LABEL: cvt_ubyte0_or_multiuse:
13831383
; SI: ; %bb.0: ; %bb
1384-
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
1384+
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
13851385
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
13861386
; SI-NEXT: v_mov_b32_e32 v1, 0
13871387
; SI-NEXT: s_mov_b32 s6, 0
@@ -1401,7 +1401,7 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addr
14011401
;
14021402
; VI-LABEL: cvt_ubyte0_or_multiuse:
14031403
; VI: ; %bb.0: ; %bb
1404-
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
1404+
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
14051405
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
14061406
; VI-NEXT: s_waitcnt lgkmcnt(0)
14071407
; VI-NEXT: v_mov_b32_e32 v0, s0

0 commit comments

Comments
 (0)