Skip to content

Commit 2558f35

Browse files
pravinjagtaparsenm
authored andcommitted
AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950
1 parent 0c42168 commit 2558f35

File tree

6 files changed

+255
-217
lines changed

6 files changed

+255
-217
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
889889

890890
setOperationAction(ISD::MUL, MVT::i1, Promote);
891891

892+
if (Subtarget->hasBF16ConversionInsts()) {
893+
setOperationAction(ISD::FP_ROUND, MVT::v2bf16, Legal);
894+
setOperationAction(ISD::FP_ROUND, MVT::bf16, Legal);
895+
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
896+
}
897+
892898
setTargetDAGCombine({ISD::ADD,
893899
ISD::UADDO_CARRY,
894900
ISD::SUB,

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,6 +2787,7 @@ def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
27872787
def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
27882788
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
27892789
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2790+
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
27902791

27912792
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
27922793
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,30 @@ let SubtargetPredicate = isGFX11Plus in {
944944
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
945945
} // End SubtargetPredicate = isGFX11Plus
946946

947+
// FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
948+
// instead of less complex f16. Disable GlobalISel for these for now.
949+
def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> {
950+
let GISelPredicateCode = [{return false;}];
951+
}
952+
953+
let SubtargetPredicate = HasBF16ConversionInsts in {
954+
let ReadsModeReg = 0 in {
955+
defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
956+
}
957+
def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
958+
(V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
959+
def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
960+
(V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
961+
0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
962+
def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
963+
(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
964+
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
965+
def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
966+
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
967+
def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
968+
(V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
969+
}
970+
947971
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
948972
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
949973
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1721,5 +1745,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
17211745

17221746
defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
17231747
defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
1748+
defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
17241749
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
17251750
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;

0 commit comments

Comments
 (0)