Skip to content

Commit 1e633d2

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Add V_CVT_PK_BF16_F32 for gfx950 (llvm#116678)
1 parent 508ba41 commit 1e633d2

File tree

6 files changed

+255
-217
lines changed

6 files changed

+255
-217
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
889889

890890
setOperationAction(ISD::MUL, MVT::i1, Promote);
891891

892+
if (Subtarget->hasBF16ConversionInsts()) {
893+
setOperationAction(ISD::FP_ROUND, MVT::v2bf16, Legal);
894+
setOperationAction(ISD::FP_ROUND, MVT::bf16, Legal);
895+
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
896+
}
897+
892898
setTargetDAGCombine({ISD::ADD,
893899
ISD::UADDO_CARRY,
894900
ISD::SUB,

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,6 +2588,7 @@ def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
25882588
def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
25892589
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
25902590
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2591+
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
25912592

25922593
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
25932594
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,30 @@ let SubtargetPredicate = isGFX11Plus in {
937937
defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
938938
} // End SubtargetPredicate = isGFX11Plus
939939

940+
// FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
941+
// instead of less complex f16. Disable GlobalISel for these for now.
942+
def bf16_fpround : PatFrag <(ops node:$src0), (fpround $src0), [{ return true; }]> {
943+
let GISelPredicateCode = [{return false;}];
944+
}
945+
946+
let SubtargetPredicate = HasBF16ConversionInsts in {
947+
let ReadsModeReg = 0 in {
948+
defm V_CVT_PK_BF16_F32 : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
949+
}
950+
def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
951+
(V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
952+
def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
953+
(V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
954+
0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
955+
def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
956+
(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
957+
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
958+
def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
959+
(V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
960+
def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
961+
(V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
962+
}
963+
940964
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
941965
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
942966
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -1701,5 +1725,6 @@ defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
17011725

17021726
defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
17031727
defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
1728+
defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
17041729
defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
17051730
defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;

0 commit comments

Comments
 (0)