@@ -681,61 +681,72 @@ body: |
681
681
; GFX6-LABEL: name: load_global_s128
682
682
; GFX6: liveins: $vgpr0_vgpr1
683
683
; GFX6-NEXT: {{ $}}
684
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
685
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
686
- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
684
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
685
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
686
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
687
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
688
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
689
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
690
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
691
+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
687
692
;
688
693
; GFX7-LABEL: name: load_global_s128
689
694
; GFX7: liveins: $vgpr0_vgpr1
690
695
; GFX7-NEXT: {{ $}}
691
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
692
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
693
- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
696
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
697
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
698
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
699
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
700
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
701
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
702
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
703
+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
694
704
;
695
705
; GFX7-FLAT-LABEL: name: load_global_s128
696
706
; GFX7-FLAT: liveins: $vgpr0_vgpr1
697
707
; GFX7-FLAT-NEXT: {{ $}}
698
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
699
- ; GFX7-FLAT-NEXT: [[LOAD :%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
700
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
708
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
709
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32> ), align 4, addrspace 1)
710
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
701
711
;
702
712
; GFX8-LABEL: name: load_global_s128
703
713
; GFX8: liveins: $vgpr0_vgpr1
704
714
; GFX8-NEXT: {{ $}}
705
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
706
- ; GFX8-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
707
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
715
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
716
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32> ), align 4, addrspace 1)
717
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
708
718
;
709
719
; GFX9-LABEL: name: load_global_s128
710
720
; GFX9: liveins: $vgpr0_vgpr1
711
721
; GFX9-NEXT: {{ $}}
712
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
713
- ; GFX9-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
714
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
722
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
723
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
724
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
715
725
;
716
726
; GFX10-LABEL: name: load_global_s128
717
727
; GFX10: liveins: $vgpr0_vgpr1
718
728
; GFX10-NEXT: {{ $}}
719
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
720
- ; GFX10-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
721
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
729
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
730
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
731
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
722
732
;
723
733
; GFX11-LABEL: name: load_global_s128
724
734
; GFX11: liveins: $vgpr0_vgpr1
725
735
; GFX11-NEXT: {{ $}}
726
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
727
- ; GFX11-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
728
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
736
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
737
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
738
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
729
739
;
730
740
; GFX12-LABEL: name: load_global_s128
731
741
; GFX12: liveins: $vgpr0_vgpr1
732
742
; GFX12-NEXT: {{ $}}
733
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
734
- ; GFX12-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
735
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
743
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
744
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
745
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
736
746
%0:vgpr(p1) = COPY $vgpr0_vgpr1
737
- %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1)
738
- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
747
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
748
+ %2:vgpr(s128) = G_BITCAST %1(<4 x s32>)
749
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
739
750
740
751
...
741
752
@@ -989,61 +1000,72 @@ body: |
989
1000
; GFX6-LABEL: name: load_global_v2p3
990
1001
; GFX6: liveins: $vgpr0_vgpr1
991
1002
; GFX6-NEXT: {{ $}}
992
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
993
- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
994
- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1003
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1004
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1005
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1006
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
1007
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1008
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1009
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
1010
+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
995
1011
;
996
1012
; GFX7-LABEL: name: load_global_v2p3
997
1013
; GFX7: liveins: $vgpr0_vgpr1
998
1014
; GFX7-NEXT: {{ $}}
999
- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1000
- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
1001
- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1015
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1016
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1017
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1018
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
1019
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1020
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1021
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
1022
+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
1002
1023
;
1003
1024
; GFX7-FLAT-LABEL: name: load_global_v2p3
1004
1025
; GFX7-FLAT: liveins: $vgpr0_vgpr1
1005
1026
; GFX7-FLAT-NEXT: {{ $}}
1006
- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1007
- ; GFX7-FLAT-NEXT: [[LOAD :%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1008
- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1027
+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1028
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32 >), addrspace 1)
1029
+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
1009
1030
;
1010
1031
; GFX8-LABEL: name: load_global_v2p3
1011
1032
; GFX8: liveins: $vgpr0_vgpr1
1012
1033
; GFX8-NEXT: {{ $}}
1013
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1014
- ; GFX8-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1015
- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1034
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1035
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32 >), addrspace 1)
1036
+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
1016
1037
;
1017
1038
; GFX9-LABEL: name: load_global_v2p3
1018
1039
; GFX9: liveins: $vgpr0_vgpr1
1019
1040
; GFX9-NEXT: {{ $}}
1020
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1021
- ; GFX9-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1022
- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1041
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1042
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1043
+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
1023
1044
;
1024
1045
; GFX10-LABEL: name: load_global_v2p3
1025
1046
; GFX10: liveins: $vgpr0_vgpr1
1026
1047
; GFX10-NEXT: {{ $}}
1027
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1028
- ; GFX10-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1029
- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1048
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1049
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1050
+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
1030
1051
;
1031
1052
; GFX11-LABEL: name: load_global_v2p3
1032
1053
; GFX11: liveins: $vgpr0_vgpr1
1033
1054
; GFX11-NEXT: {{ $}}
1034
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1035
- ; GFX11-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1036
- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1055
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1056
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1057
+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
1037
1058
;
1038
1059
; GFX12-LABEL: name: load_global_v2p3
1039
1060
; GFX12: liveins: $vgpr0_vgpr1
1040
1061
; GFX12-NEXT: {{ $}}
1041
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1042
- ; GFX12-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1043
- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1062
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1063
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1064
+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
1044
1065
%0:vgpr(p1) = COPY $vgpr0_vgpr1
1045
- %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
1046
- $vgpr0_vgpr1 = COPY %1
1066
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
1067
+ %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x s32>)
1068
+ $vgpr0_vgpr1 = COPY %2
1047
1069
1048
1070
...
1049
1071
@@ -1231,7 +1253,7 @@ body: |
1231
1253
; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
1232
1254
; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1233
1255
; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1234
- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x s16 >), align 4, addrspace 1)
1256
+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1235
1257
; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
1236
1258
;
1237
1259
; GFX7-LABEL: name: load_global_v8s16
@@ -1243,52 +1265,53 @@ body: |
1243
1265
; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
1244
1266
; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1245
1267
; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1246
- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x s16 >), align 4, addrspace 1)
1268
+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1247
1269
; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
1248
1270
;
1249
1271
; GFX7-FLAT-LABEL: name: load_global_v8s16
1250
1272
; GFX7-FLAT: liveins: $vgpr0_vgpr1
1251
1273
; GFX7-FLAT-NEXT: {{ $}}
1252
1274
; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1253
- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16 >), align 4, addrspace 1)
1275
+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32 >), align 4, addrspace 1)
1254
1276
; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
1255
1277
;
1256
1278
; GFX8-LABEL: name: load_global_v8s16
1257
1279
; GFX8: liveins: $vgpr0_vgpr1
1258
1280
; GFX8-NEXT: {{ $}}
1259
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1260
- ; GFX8-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1261
- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1281
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1282
+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32 >), align 4, addrspace 1)
1283
+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
1262
1284
;
1263
1285
; GFX9-LABEL: name: load_global_v8s16
1264
1286
; GFX9: liveins: $vgpr0_vgpr1
1265
1287
; GFX9-NEXT: {{ $}}
1266
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1267
- ; GFX9-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1268
- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1288
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1289
+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1290
+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
1269
1291
;
1270
1292
; GFX10-LABEL: name: load_global_v8s16
1271
1293
; GFX10: liveins: $vgpr0_vgpr1
1272
1294
; GFX10-NEXT: {{ $}}
1273
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1274
- ; GFX10-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1275
- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1295
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1296
+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1297
+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
1276
1298
;
1277
1299
; GFX11-LABEL: name: load_global_v8s16
1278
1300
; GFX11: liveins: $vgpr0_vgpr1
1279
1301
; GFX11-NEXT: {{ $}}
1280
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1281
- ; GFX11-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1282
- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1302
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1303
+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1304
+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
1283
1305
;
1284
1306
; GFX12-LABEL: name: load_global_v8s16
1285
1307
; GFX12: liveins: $vgpr0_vgpr1
1286
1308
; GFX12-NEXT: {{ $}}
1287
- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1288
- ; GFX12-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1289
- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1309
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1310
+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1311
+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
1290
1312
%0:vgpr(p1) = COPY $vgpr0_vgpr1
1291
- %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1)
1313
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
1314
+ %2:vgpr(<8 x s16>) = G_BITCAST %1(<4 x s32>)
1292
1315
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
1293
1316
1294
1317
...
0 commit comments