Skip to content

Commit b889783

Browse files
arsenmshiltian
authored andcommitted
AMDGPU: Add support for V_CVT_PK_F16_F32 instruction for gfx950 (llvm#118300)
Co-authored-by: Shilei Tian <[email protected]>
1 parent 7c8ff4b commit b889783

File tree

8 files changed

+398
-5
lines changed

8 files changed

+398
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,12 @@ def FeatureAshrPkInsts : SubtargetFeature<"ashr-pk-insts",
432432
"Has Arithmetic Shift Pack instructions"
433433
>;
434434

435+
def FeatureCvtPkF16F32Inst : SubtargetFeature<"cvt-pk-f16-f32-inst",
436+
"HasCvtPkF16F32Inst",
437+
"true",
438+
"Has cvt_pk_f16_f32 instruction"
439+
>;
440+
435441
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
436442
"GFX950Insts",
437443
"true",
@@ -445,8 +451,9 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
445451
FeatureFP6BF6ConversionScaleInsts,
446452
FeatureF16BF16ToFP6BF6ConversionScaleInsts,
447453
FeatureF32ToF16BF16ConversionSRInsts,
454+
FeatureCvtPkF16F32Inst,
448455
FeatureMinimum3Maximum3F32,
449-
FeatureMinimum3Maximum3PKF16
456+
FeatureMinimum3Maximum3PKF16,
450457
]
451458
>;
452459

@@ -2515,6 +2522,9 @@ def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionSca
25152522
def HasF16BF16ToFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasF16BF16ToFP6BF6ConversionScaleInsts()">,
25162523
AssemblerPredicate<(all_of FeatureF16BF16ToFP6BF6ConversionScaleInsts)>;
25172524

2525+
def HasCvtPkF16F32Inst : Predicate<"Subtarget->hasCvtPkF16F32Inst()">,
2526+
AssemblerPredicate<(all_of FeatureCvtPkF16F32Inst)>;
2527+
25182528
def HasF32ToF16BF16ConversionSRInsts : Predicate<"Subtarget->hasF32ToF16BF16ConversionSRInsts()">,
25192529
AssemblerPredicate<(all_of FeatureF32ToF16BF16ConversionSRInsts)>;
25202530

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,10 +1038,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10381038
.lower();
10391039
}
10401040

1041-
getActionDefinitionsBuilder(G_FPTRUNC)
1042-
.legalFor({{S32, S64}, {S16, S32}})
1043-
.scalarize(0)
1044-
.lower();
1041+
auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC);
1042+
if (ST.hasCvtPkF16F32Inst())
1043+
FPTruncActions.legalFor(
1044+
{{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}});
1045+
else
1046+
FPTruncActions.legalFor({{S32, S64}, {S16, S32}});
1047+
FPTruncActions.scalarize(0).lower();
10451048

10461049
getActionDefinitionsBuilder(G_FPEXT)
10471050
.legalFor({{S64, S32}, {S32, S16}})

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ class AMDGPUSubtarget {
5555
bool HasFP4ConversionScaleInsts = false;
5656
bool HasFP6BF6ConversionScaleInsts = false;
5757
bool HasF16BF16ToFP6BF6ConversionScaleInsts = false;
58+
bool HasCvtPkF16F32Inst = false;
5859
bool HasF32ToF16BF16ConversionSRInsts = false;
5960
bool EnableRealTrue16Insts = false;
6061
bool HasBF16ConversionInsts = false;
@@ -191,6 +192,8 @@ class AMDGPUSubtarget {
191192

192193
bool hasF16BF16ToFP6BF6ConversionScaleInsts() const { return HasF16BF16ToFP6BF6ConversionScaleInsts; }
193194

195+
bool hasCvtPkF16F32Inst() const { return HasCvtPkF16F32Inst; }
196+
194197
bool hasF32ToF16BF16ConversionSRInsts() const {
195198
return HasF32ToF16BF16ConversionSRInsts;
196199
}

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
902902
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
903903
}
904904

905+
if (Subtarget->hasCvtPkF16F32Inst()) {
906+
setOperationAction(ISD::FP_ROUND, MVT::v2f16, Legal);
907+
}
908+
905909
setTargetDAGCombine({ISD::ADD,
906910
ISD::UADDO_CARRY,
907911
ISD::SUB,

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,6 +1146,21 @@ let SubtargetPredicate = HasGFX950Insts, mayRaiseFPException = 0 in {
11461146
defm V_CVT_SCALEF32_2XPK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_2xpk16_bf6_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V16F32_V16F32_F32>, int_amdgcn_cvt_scalef32_2xpk16_bf6_f32>;
11471147
}
11481148

1149+
let SubtargetPredicate = HasCvtPkF16F32Inst in {
1150+
let ReadsModeReg = 0 in {
1151+
defm V_CVT_PK_F16_F32 : VOP3Inst<"v_cvt_pk_f16_f32", VOP3_Profile<VOP_V2F16_F32_F32>>;
1152+
}
1153+
1154+
def : GCNPat<(v2f16 (fpround v2f32:$src)),
1155+
(V_CVT_PK_F16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
1156+
def : GCNPat<(v2f16 (fpround v2f64:$src)),
1157+
(V_CVT_PK_F16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
1158+
0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
1159+
def : GCNPat<(v2f16 (build_vector (f16 (fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1160+
(f16 (fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
1161+
(V_CVT_PK_F16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
1162+
}
1163+
11491164
class Cvt_Scale_FP4FP8BF8ToF16F32_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
11501165
(DstTy (node i32:$src0, f32:$src1, timm:$index)),
11511166
(inst (SrcAndDstSelToOpSelXForm_0_0 $index), $src0, (SrcAndDstSelToOpSelXForm_1_0 $index), $src1)
@@ -2250,6 +2265,9 @@ defm V_CVT_SR_BF16_F32: VOP3OpSel_Real_gfx9 <0x2a7>;
22502265

22512266
defm V_ASHR_PK_I8_I32 : VOP3OpSel_Real_gfx9 <0x265>;
22522267
defm V_ASHR_PK_U8_I32 : VOP3OpSel_Real_gfx9 <0x266>;
2268+
let OtherPredicates = [HasCvtPkF16F32Inst] in {
2269+
defm V_CVT_PK_F16_F32 : VOP3_Real_gfx9<0x267, "v_cvt_pk_f16_f32">;
2270+
}
22532271

22542272
defm V_CVT_SCALEF32_2XPK16_FP6_F32 : VOP3_Real_gfx9<0x252, "v_cvt_scalef32_2xpk16_fp6_f32">;
22552273
defm V_CVT_SCALEF32_2XPK16_BF6_F32 : VOP3_Real_gfx9<0x253, "v_cvt_scalef32_2xpk16_bf6_f32">;

0 commit comments

Comments
 (0)