Skip to content

Commit 7b4c8b3

Browse files
authored
[AMDGPU][True16][MC] VOP3 profile in True16 format (llvm#109031)
Modify VOP3 profile and pesudo, and add encoding info for VOP3 True16 including DPP and DPP8 in true16 and fake16 format. This patch applies true16/fake16 changes and asm/dasm changes to V_ADD_NC_U16 V_ADD_NC_I16 V_SUB_NC_U16 V_SUB_NC_I16
1 parent dcc5ba4 commit 7b4c8b3

19 files changed

+2896
-926
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,6 +2149,8 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
21492149
string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
21502150
}
21512151

2152+
// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
2153+
// VOP3 16 bit instructions are replaced to true16 format
21522154
class getAsmVOP3OpSel <int NumSrcArgs,
21532155
bit HasClamp,
21542156
bit HasOMod,
@@ -2237,8 +2239,9 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
22372239
string clamp = !if(HasClamp, "$clamp", "");
22382240
string omod = !if(HasOMod, "$omod", "");
22392241

2240-
string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
2241-
2242+
string ret = dst#!if(!eq(NumSrcArgs,0),
2243+
"",
2244+
!if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
22422245
}
22432246

22442247
class getAsmVOP3DPP<string base> {
@@ -2733,6 +2736,7 @@ def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
27332736
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
27342737
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
27352738
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2739+
def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
27362740
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
27372741
def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
27382742
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,8 +1664,8 @@ multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
16641664
VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
16651665

16661666
multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
1667-
VOP3Only_Realtriple_t16<GFX11Gen, op, asmName, OpName>,
1668-
VOP3Only_Realtriple_t16<GFX12Gen, op, asmName, OpName>;
1667+
VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", /*IsSingle*/1>,
1668+
VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>;
16691669

16701670
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
16711671
defm OpName#"_t16": VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 53 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
569569
getAsmVOP3OpSel<3, HasClamp, HasOMod,
570570
HasSrc0FloatMods, HasSrc1FloatMods,
571571
HasSrc2FloatMods>.ret);
572-
let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
573-
getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
574-
HasOMod, 0, 1, HasSrc0FloatMods,
575-
HasSrc1FloatMods,
576-
HasSrc2FloatMods>.ret>.ret);
577-
let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
578-
getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
579-
HasOMod, 0, 1, HasSrc0FloatMods,
580-
HasSrc1FloatMods,
581-
HasSrc2FloatMods>.ret>.ret);
572+
let AsmVOP3Base = !subst(", $src2_modifiers", "",
573+
getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
574+
HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
575+
HasModifiers, DstVT>.ret);
582576
}
583577

584578
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
636630
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
637631
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
638632

639-
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
640-
defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
633+
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
634+
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
641635

642636
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
643637
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
@@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
752746
(i32 (EXTRACT_SUBREG $src1, sub1)),
753747
(i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
754748

749+
} // End SubtargetPredicate = isGFX9Plus
750+
755751
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
756752
class OpSelBinOpClampPat<SDPatternOperator node,
757753
Instruction inst> : GCNPat<
@@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
760756
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
761757
>;
762758

763-
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
764-
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
765-
} // End SubtargetPredicate = isGFX9Plus
759+
let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
760+
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
761+
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
762+
} // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts
763+
let True16Predicate = UseFakeTrue16Insts in {
764+
def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
765+
def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
766+
} // End True16Predicate = UseFakeTrue16Insts
766767

767768
multiclass IMAD32_Pats <VOP3_Pseudo inst> {
768769
def : GCNPat <
@@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
871872
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
872873
}
873874

874-
defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
875-
defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
876-
877-
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
878-
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
879-
880-
// Undo sub x, c -> add x, -c canonicalization since c is more likely
881-
// an inline immediate than -c.
882-
def : GCNPat<
883-
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
884-
(V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
885-
>;
875+
defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
876+
defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
886877

887878
} // End SubtargetPredicate = isGFX10Plus
888879

880+
let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in {
881+
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
882+
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
883+
// Undo sub x, c -> add x, -c canonicalization since c is more likely
884+
// an inline immediate than -c.
885+
def : GCNPat<
886+
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
887+
(V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
888+
>;
889+
} // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus
890+
891+
let True16Predicate = UseFakeTrue16Insts in {
892+
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
893+
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
894+
def : GCNPat<
895+
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
896+
(V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
897+
>;
898+
} // End True16Predicate = UseFakeTrue16Insts
899+
889900
let SubtargetPredicate = isGFX12Plus in {
890901
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
891902
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
@@ -1104,6 +1115,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
11041115
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
11051116
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
11061117

1118+
multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1119+
string pseudo_mnemonic = "", bit isSingle = 0> :
1120+
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1121+
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1122+
1123+
multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1124+
string pseudo_mnemonic = "", bit isSingle = 0> {
1125+
defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1126+
defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1127+
}
1128+
11071129
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
11081130
VOP3be_Real<GFX11Gen, op, opName, asmName>,
11091131
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1189,17 +1211,17 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
11891211
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
11901212
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
11911213
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
1192-
defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>;
1193-
defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>;
1214+
defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
1215+
defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">;
11941216
defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
11951217
defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
11961218
defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
11971219
defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x309, "v_max_u16">;
11981220
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
11991221
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
12001222
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
1201-
defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">;
1202-
defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
1223+
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
1224+
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
12031225
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
12041226
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
12051227
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;

0 commit comments

Comments
 (0)