@@ -944,6 +944,30 @@ let SubtargetPredicate = isGFX11Plus in {
944
944
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
945
945
} // End SubtargetPredicate = isGFX11Plus
946
946
947
+ // FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
948
+ // instead of less complex f16. Disable GlobalISel for these for now.
949
+ def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> {
950
+ let GISelPredicateCode = [{return false;}];
951
+ }
952
+
953
+ let SubtargetPredicate = HasBF16ConversionInsts in {
954
+ let ReadsModeReg = 0 in {
955
+ defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
956
+ }
957
+ def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
958
+ (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
959
+ def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
960
+ (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
961
+ 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
962
+ def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
963
+ (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
964
+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
965
+ def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
966
+ (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
967
+ def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
968
+ (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
969
+ }
970
+
947
971
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
948
972
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
949
973
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1721,5 +1745,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
1721
1745
1722
1746
defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
1723
1747
defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
1748
+ defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
1724
1749
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
1725
1750
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
0 commit comments