@@ -937,6 +937,30 @@ let SubtargetPredicate = isGFX11Plus in {
937
937
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
938
938
} // End SubtargetPredicate = isGFX11Plus
939
939
940
+ // FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
941
+ // instead of less complex f16. Disable GlobalISel for these for now.
942
+ def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> {
943
+ let GISelPredicateCode = [{return false;}];
944
+ }
945
+
946
+ let SubtargetPredicate = HasBF16ConversionInsts in {
947
+ let ReadsModeReg = 0 in {
948
+ defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
949
+ }
950
+ def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
951
+ (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
952
+ def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
953
+ (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
954
+ 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
955
+ def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
956
+ (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
957
+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
958
+ def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
959
+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
960
+ def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
961
+ (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
962
+ }
963
+
940
964
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941
965
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942
966
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1701,5 +1725,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
1701
1725
1702
1726
defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
1703
1727
defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
1728
+ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
1704
1729
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
1705
1730
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
0 commit comments