@@ -864,31 +864,91 @@ define <2 x bfloat> @test_cvt_scale_bf16_fp4_byte3(i32 %src, float %scale) {
864
864
}
865
865
866
866
define <32 x float > @test_cvt_scale_pk32_f32_fp6 (<6 x i32 > %src , float %scale ) {
867
- ; GCN-LABEL: test_cvt_scale_pk32_f32_fp6:
868
- ; GCN: ; %bb.0:
869
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870
- ; GCN-NEXT: v_cvt_scalef32_pk32_f32_fp6 v[0:31], v[0:5], v6
871
- ; GCN-NEXT: s_setpc_b64 s[30:31]
867
+ ; GFX950-SDAG-LABEL: test_cvt_scale_pk32_f32_fp6:
868
+ ; GFX950-SDAG: ; %bb.0:
869
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v38, v6
871
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v37, v5
872
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v36, v4
873
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v35, v3
874
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v34, v2
875
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v33, v1
876
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v32, v0
877
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f32_fp6 v[0:31], v[32:37], v38
878
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
879
+ ;
880
+ ; GFX950-GISEL-LABEL: test_cvt_scale_pk32_f32_fp6:
881
+ ; GFX950-GISEL: ; %bb.0:
882
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
883
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v32, v0
884
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v33, v1
885
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v34, v2
886
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v35, v3
887
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v36, v4
888
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v37, v5
889
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v38, v6
890
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f32_fp6 v[0:31], v[32:37], v38
891
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
872
892
%ret = tail call <32 x float > @llvm.amdgcn.cvt.scalef32.pk32.f32.fp6 (<6 x i32 > %src , float %scale )
873
893
ret <32 x float > %ret
874
894
}
875
895
876
896
define <32 x float > @test_cvt_scale_pk32_f32_bf6 (<6 x i32 > %src , float %scale ) {
877
- ; GCN-LABEL: test_cvt_scale_pk32_f32_bf6:
878
- ; GCN: ; %bb.0:
879
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880
- ; GCN-NEXT: v_cvt_scalef32_pk32_f32_bf6 v[0:31], v[0:5], v6
881
- ; GCN-NEXT: s_setpc_b64 s[30:31]
897
+ ; GFX950-SDAG-LABEL: test_cvt_scale_pk32_f32_bf6:
898
+ ; GFX950-SDAG: ; %bb.0:
899
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v38, v6
901
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v37, v5
902
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v36, v4
903
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v35, v3
904
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v34, v2
905
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v33, v1
906
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v32, v0
907
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f32_bf6 v[0:31], v[32:37], v38
908
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
909
+ ;
910
+ ; GFX950-GISEL-LABEL: test_cvt_scale_pk32_f32_bf6:
911
+ ; GFX950-GISEL: ; %bb.0:
912
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v32, v0
914
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v33, v1
915
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v34, v2
916
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v35, v3
917
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v36, v4
918
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v37, v5
919
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v38, v6
920
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f32_bf6 v[0:31], v[32:37], v38
921
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
882
922
%ret = tail call <32 x float > @llvm.amdgcn.cvt.scalef32.pk32.f32.bf6 (<6 x i32 > %src , float %scale )
883
923
ret <32 x float > %ret
884
924
}
885
925
886
926
define <32 x half > @test_cvt_scalef32_pk32_f16_fp6_vv (<6 x i32 > %src , float %scale ) {
887
- ; GCN-LABEL: test_cvt_scalef32_pk32_f16_fp6_vv:
888
- ; GCN: ; %bb.0:
889
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890
- ; GCN-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[0:5], v6
891
- ; GCN-NEXT: s_setpc_b64 s[30:31]
927
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_fp6_vv:
928
+ ; GFX950-SDAG: ; %bb.0:
929
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
930
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
931
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
932
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
933
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
934
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
935
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
936
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
937
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21], v22
938
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
939
+ ;
940
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_fp6_vv:
941
+ ; GFX950-GISEL: ; %bb.0:
942
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v16, v0
944
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v17, v1
945
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, v2
946
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v19, v3
947
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v4
948
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v5
949
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22, v6
950
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21], v22
951
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
892
952
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.fp6 (<6 x i32 > %src , float %scale )
893
953
ret <32 x half > %ret
894
954
}
@@ -897,26 +957,26 @@ define <32 x half> @test_cvt_scalef32_pk32_f16_fp6_sl(<6 x i32> inreg %src) {
897
957
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_fp6_sl:
898
958
; GFX950-SDAG: ; %bb.0:
899
959
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
901
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
902
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
903
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
904
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s16
905
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s17
960
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
961
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
962
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
963
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
964
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s16
965
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s17
906
966
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
907
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[0:5 ], s0
967
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21 ], s0
908
968
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
909
969
;
910
970
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_fp6_sl:
911
971
; GFX950-GISEL: ; %bb.0:
912
972
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913
973
; GFX950-GISEL-NEXT: s_mov_b32 s4, s16
914
974
; GFX950-GISEL-NEXT: s_mov_b32 s5, s17
915
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1 ], s[0:1 ]
916
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3 ], s[2:3]
917
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5 ], s[4:5 ]
918
- ; GFX950-GISEL-NEXT: v_mov_b32_e32 v6 , 0x42c80000
919
- ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[0:5 ], v6
975
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[20:21 ], s[4:5 ]
976
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[18:19 ], s[2:3]
977
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17 ], s[0:1 ]
978
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22 , 0x42c80000
979
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_fp6 v[0:15], v[16:21 ], v22
920
980
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
921
981
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.fp6 (<6 x i32 > %src , float 100 .0 )
922
982
ret <32 x half > %ret
@@ -926,7 +986,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_fp6_vv(<6 x i32> %src, float %
926
986
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_fp6_vv:
927
987
; GFX950-SDAG: ; %bb.0:
928
988
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
929
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[0:5], v6
989
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
990
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
991
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
992
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
993
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
994
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
995
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
996
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[16:21], v22
930
997
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
931
998
;
932
999
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_fp6_vv:
@@ -958,14 +1025,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_fp6_sl(<6 x i32> inreg %src) {
958
1025
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_fp6_sl:
959
1026
; GFX950-SDAG: ; %bb.0:
960
1027
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
962
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
963
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
964
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
965
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s16
966
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s17
1028
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
1029
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
1030
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
1031
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
1032
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s16
1033
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s17
967
1034
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
968
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[0:5 ], s0
1035
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_fp6 v[0:15], v[16:21 ], s0
969
1036
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
970
1037
;
971
1038
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_fp6_sl:
@@ -1000,11 +1067,31 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_fp6_sl(<6 x i32> inreg %src) {
1000
1067
}
1001
1068
1002
1069
define <32 x half > @test_cvt_scalef32_pk32_f16_bf6_vv (<6 x i32 > %src , float %scale ) {
1003
- ; GCN-LABEL: test_cvt_scalef32_pk32_f16_bf6_vv:
1004
- ; GCN: ; %bb.0:
1005
- ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1006
- ; GCN-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[0:5], v6
1007
- ; GCN-NEXT: s_setpc_b64 s[30:31]
1070
+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_bf6_vv:
1071
+ ; GFX950-SDAG: ; %bb.0:
1072
+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
1074
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
1075
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
1076
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
1077
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
1078
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
1079
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
1080
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21], v22
1081
+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1082
+ ;
1083
+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_bf6_vv:
1084
+ ; GFX950-GISEL: ; %bb.0:
1085
+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v16, v0
1087
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v17, v1
1088
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v18, v2
1089
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v19, v3
1090
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v20, v4
1091
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v21, v5
1092
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22, v6
1093
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21], v22
1094
+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
1008
1095
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.bf6 (<6 x i32 > %src , float %scale )
1009
1096
ret <32 x half > %ret
1010
1097
}
@@ -1013,26 +1100,26 @@ define <32 x half> @test_cvt_scalef32_pk32_f16_bf6_sl(<6 x i32> inreg %src) {
1013
1100
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_f16_bf6_sl:
1014
1101
; GFX950-SDAG: ; %bb.0:
1015
1102
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1016
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
1017
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
1018
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
1019
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
1020
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s16
1021
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s17
1103
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
1104
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
1105
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
1106
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
1107
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s16
1108
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s17
1022
1109
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
1023
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[0:5 ], s0
1110
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21 ], s0
1024
1111
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1025
1112
;
1026
1113
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_f16_bf6_sl:
1027
1114
; GFX950-GISEL: ; %bb.0:
1028
1115
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1029
1116
; GFX950-GISEL-NEXT: s_mov_b32 s4, s16
1030
1117
; GFX950-GISEL-NEXT: s_mov_b32 s5, s17
1031
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1 ], s[0:1 ]
1032
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3 ], s[2:3]
1033
- ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5 ], s[4:5 ]
1034
- ; GFX950-GISEL-NEXT: v_mov_b32_e32 v6 , 0x42c80000
1035
- ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[0:5 ], v6
1118
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[20:21 ], s[4:5 ]
1119
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[18:19 ], s[2:3]
1120
+ ; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17 ], s[0:1 ]
1121
+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v22 , 0x42c80000
1122
+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_f16_bf6 v[0:15], v[16:21 ], v22
1036
1123
; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
1037
1124
%ret = tail call <32 x half > @llvm.amdgcn.cvt.scalef32.pk32.f16.bf6 (<6 x i32 > %src , float 100 .0 )
1038
1125
ret <32 x half > %ret
@@ -1042,7 +1129,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_bf6_vv(<6 x i32> %src, float %
1042
1129
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_bf6_vv:
1043
1130
; GFX950-SDAG: ; %bb.0:
1044
1131
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[0:5], v6
1132
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v22, v6
1133
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21, v5
1134
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20, v4
1135
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19, v3
1136
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18, v2
1137
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, v1
1138
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, v0
1139
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[16:21], v22
1046
1140
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1047
1141
;
1048
1142
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_bf6_vv:
@@ -1074,14 +1168,14 @@ define <32 x bfloat> @test_cvt_scalef32_pk32_bf16_bf6_sl(<6 x i32> inreg %src) {
1074
1168
; GFX950-SDAG-LABEL: test_cvt_scalef32_pk32_bf16_bf6_sl:
1075
1169
; GFX950-SDAG: ; %bb.0:
1076
1170
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0 , s0
1078
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v1 , s1
1079
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v2 , s2
1080
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v3 , s3
1081
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v4 , s16
1082
- ; GFX950-SDAG-NEXT: v_mov_b32_e32 v5 , s17
1171
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v16 , s0
1172
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v17 , s1
1173
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v18 , s2
1174
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v19 , s3
1175
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v20 , s16
1176
+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v21 , s17
1083
1177
; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000
1084
- ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[0:5 ], s0
1178
+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf16_bf6 v[0:15], v[16:21 ], s0
1085
1179
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1086
1180
;
1087
1181
; GFX950-GISEL-LABEL: test_cvt_scalef32_pk32_bf16_bf6_sl:
0 commit comments