@@ -864,31 +864,91 @@ define <2 x bfloat> @test_cvt_scale_bf16_fp4_byte3(i32 %src, float %scale) {
864
864
}
865
865
866
866
define <32 x float > @test_cvt_scale_pk32_f32_fp6 (<6 x i32 > %src , float %scale ) {
867
- ; GCN-LABEL: test_cvt_scale_pk32_f32_fp6:
868
- ; GCN: ; %bb.0:
869
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870
- ; GCN-NEXT: v_cvt_scalef32_pk32_f32_fp6 v[0:31], v[0:5], v6
871
- ; GCN-NEXT: s_setpc_b64 s[30:31]
867
+ ; GFX950-SDAG-LABEL: test_cvt_scale_pk32_f32_fp6:
868
+ ; GFX950-SDAG: ; %bb.0:
869
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v38, v6
871
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v37, v5
872
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v36, v4
873
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v35, v3
874
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v34, v2
875
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v33, v1
876
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v32, v0
877
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f32_fp6 v[0:31], v[32:37], v38
878
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
879
+ ;
880
+ ; GFX950-GISEL-LABEL: test_cvt_scale_pk32_f32_fp6:
881
+ ; GFX950-GISEL: ; %bb.0:
882
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
883
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v32, v0
884
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v33, v1
885
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v34, v2
886
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v35, v3
887
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v36, v4
888
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v37, v5
889
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v38, v6
890
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f32_fp6 v[0:31], v[32:37], v38
891
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
872
892
%ret = tail call <32 x float > @llvm.amdgcn.cvt.scalef32.pk32.f32.fp6 (<6 x i32 > %src , float %scale )
873
893
ret <32 x float > %ret
874
894
}
875
895
876
896
define <32 x float > @test_cvt_scale_pk32_f32_bf6 (<6 x i32 > %src , float %scale ) {
877
- ; GCN-LABEL: test_cvt_scale_pk32_f32_bf6:
878
- ; GCN: ; %bb.0:
879
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880
- ; GCN-NEXT: v_cvt_scalef32_pk32_f32_bf6 v[0:31], v[0:5], v6
881
- ; GCN-NEXT: s_setpc_b64 s[30:31]
897
+ ; GFX950-SDAG-LABEL: test_cvt_scale_pk32_f32_bf6:
898
+ ; GFX950-SDAG: ; %bb.0:
899
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v38, v6
901
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v37, v5
902
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v36, v4
903
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v35, v3
904
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v34, v2
905
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v33, v1
906
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v32, v0
907
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f32_bf6 v[0:31], v[32:37], v38
908
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
909
+ ;
910
+ ; GFX950-GISEL-LABEL: test_cvt_scale_pk32_f32_bf6:
911
+ ; GFX950-GISEL: ; %bb.0:
912
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v32, v0
914
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v33, v1
915
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v34, v2
916
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v35, v3
917
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v36, v4
918
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v37, v5
919
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v38, v6
920
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f32_bf6 v[0:31], v[32:37], v38
921
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
882
922
%ret = tail call <32 x float > @llvm.amdgcn.cvt.scalef32.pk32.f32.bf6 (<6 x i32 > %src , float %scale )
883
923
ret <32 x float > %ret
884
924
}
885
925
886
926
define <32 x half > @test_cvt_scalef32_pk32_f16_fp6_vv (<6 x i32 > %src , float %scale ) {
887
- ; GCN-LABEL: test_cvt_scalef32_pk32_f16_fp6_vv:
888
- ; GCN: ; %bb.0:
889
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890
- ; GCN-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[0:5], v6
891
- ; GCN-NEXT: s_setpc_b64 s[30:31]
927
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_fp6_vv:
928
+ ; GFX950-SDAG: ; %bb.0:
929
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
930
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
931
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
932
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
933
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
934
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
935
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
936
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
937
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21], v22
938
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
939
+ ;
940
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_fp6_vv:
941
+ ; GFX950-GISEL: ; %bb.0:
942
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v16, v0
944
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v17, v1
945
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, v2
946
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v19, v3
947
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v4
948
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v5
949
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22, v6
950
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21], v22
951
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
892
952
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.fp6 (<6 x i32 > %src , float %scale )
893
953
ret <32 x half > %ret
894
954
}
@@ -897,24 +957,24 @@ define <32 x half> @test_cvt_scalef32_pk32_f16_fp6_sl(<6 x i32> inreg %src) {
897
957
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_fp6_sl:
898
958
; GFX950-SDAG: ; %bb.0:
899
959
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
901
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
902
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
903
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
904
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s4
905
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s5
960
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
961
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
962
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
963
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
964
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s4
965
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s5
906
966
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
907
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[0:5 ], s0
967
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21 ], s0
908
968
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
909
969
;
910
970
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_fp6_sl:
911
971
; GFX950-GISEL: ; %bb.0:
912
972
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1 ], s[0:1 ]
914
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3 ], s[2:3]
915
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5 ], s[4:5 ]
916
- ; GFX950-GISEL-NEXT: v_mov_b32_e32 v6 , 0x42c80000
917
- ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[0:5 ], v6
973
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[20:21 ], s[4:5 ]
974
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[18:19 ], s[2:3]
975
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17 ], s[0:1 ]
976
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22 , 0x42c80000
977
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21 ], v22
918
978
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
919
979
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.fp6 (<6 x i32 > %src , float 100 .0 )
920
980
ret <32 x half > %ret
@@ -924,7 +984,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_fp6_vv(<6 x i32> %src, float %
924
984
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_fp6_vv:
925
985
; GFX950-SDAG: ; %bb.0:
926
986
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[0:5], v6
987
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
988
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
989
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
990
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
991
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
992
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
993
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
994
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[16:21], v22
928
995
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
929
996
;
930
997
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_fp6_vv:
@@ -956,14 +1023,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_fp6_sl(<6 x i32> inreg %src) {
956
1023
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_fp6_sl:
957
1024
; GFX950-SDAG: ; %bb.0:
958
1025
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
959
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
960
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
961
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
962
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
963
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s4
964
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s5
1026
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
1027
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
1028
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
1029
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
1030
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s4
1031
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s5
965
1032
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
966
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[0:5 ], s0
1033
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[16:21 ], s0
967
1034
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
968
1035
;
969
1036
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_fp6_sl:
@@ -996,11 +1063,31 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_fp6_sl(<6 x i32> inreg %src) {
996
1063
}
997
1064
998
1065
define <32 x half > @test_cvt_scalef32_pk32_f16_bf6_vv (<6 x i32 > %src , float %scale ) {
999
- ; GCN-LABEL: test_cvt_scalef32_pk32_f16_bf6_vv:
1000
- ; GCN: ; %bb.0:
1001
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002
- ; GCN-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[0:5], v6
1003
- ; GCN-NEXT: s_setpc_b64 s[30:31]
1066
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_bf6_vv:
1067
+ ; GFX950-SDAG: ; %bb.0:
1068
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
1070
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
1071
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
1072
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
1073
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
1074
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
1075
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
1076
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21], v22
1077
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1078
+ ;
1079
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_bf6_vv:
1080
+ ; GFX950-GISEL: ; %bb.0:
1081
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v16, v0
1083
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v17, v1
1084
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, v2
1085
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v19, v3
1086
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v4
1087
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v5
1088
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22, v6
1089
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21], v22
1090
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
1004
1091
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.bf6 (<6 x i32 > %src , float %scale )
1005
1092
ret <32 x half > %ret
1006
1093
}
@@ -1009,24 +1096,24 @@ define <32 x half> @test_cvt_scalef32_pk32_f16_bf6_sl(<6 x i32> inreg %src) {
1009
1096
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_bf6_sl:
1010
1097
; GFX950-SDAG: ; %bb.0:
1011
1098
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1012
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
1013
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
1014
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
1015
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
1016
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s4
1017
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s5
1099
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
1100
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
1101
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
1102
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
1103
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s4
1104
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s5
1018
1105
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
1019
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[0:5 ], s0
1106
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21 ], s0
1020
1107
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1021
1108
;
1022
1109
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_bf6_sl:
1023
1110
; GFX950-GISEL: ; %bb.0:
1024
1111
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1025
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1 ], s[0:1 ]
1026
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3 ], s[2:3]
1027
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5 ], s[4:5 ]
1028
- ; GFX950-GISEL-NEXT: v_mov_b32_e32 v6 , 0x42c80000
1029
- ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[0:5 ], v6
1112
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[20:21 ], s[4:5 ]
1113
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[18:19 ], s[2:3]
1114
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17 ], s[0:1 ]
1115
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22 , 0x42c80000
1116
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21 ], v22
1030
1117
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
1031
1118
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.bf6 (<6 x i32 > %src , float 100 .0 )
1032
1119
ret <32 x half > %ret
@@ -1036,7 +1123,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_bf6_vv(<6 x i32> %src, float %
1036
1123
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_bf6_vv:
1037
1124
; GFX950-SDAG: ; %bb.0:
1038
1125
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[0:5], v6
1126
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
1127
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
1128
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
1129
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
1130
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
1131
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
1132
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
1133
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[16:21], v22
1040
1134
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1041
1135
;
1042
1136
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_bf6_vv:
@@ -1068,14 +1162,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_bf6_sl(<6 x i32> inreg %src) {
1068
1162
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_bf6_sl:
1069
1163
; GFX950-SDAG: ; %bb.0:
1070
1164
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
1072
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
1073
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
1074
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
1075
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s4
1076
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s5
1165
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
1166
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
1167
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
1168
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
1169
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s4
1170
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s5
1077
1171
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
1078
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[0:5 ], s0
1172
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[16:21 ], s0
1079
1173
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1080
1174
;
1081
1175
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_bf6_sl:
0 commit comments