@@ -569,16 +569,10 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
569
569
getAsmVOP3OpSel<3, HasClamp, HasOMod,
570
570
HasSrc0FloatMods, HasSrc1FloatMods,
571
571
HasSrc2FloatMods>.ret);
572
- let AsmVOP3DPP16 = !subst(", $src2_modifiers", "",
573
- getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1,
574
- HasOMod, 0, 1, HasSrc0FloatMods,
575
- HasSrc1FloatMods,
576
- HasSrc2FloatMods>.ret>.ret);
577
- let AsmVOP3DPP8 = !subst(", $src2_modifiers", "",
578
- getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1,
579
- HasOMod, 0, 1, HasSrc0FloatMods,
580
- HasSrc1FloatMods,
581
- HasSrc2FloatMods>.ret>.ret);
572
+ let AsmVOP3Base = !subst(", $src2_modifiers", "",
573
+ getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
574
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
575
+ HasModifiers, DstVT>.ret);
582
576
}
583
577
584
578
class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
@@ -636,8 +630,8 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
636
630
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
637
631
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
638
632
639
- defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile< VOP_I16_I16_I16, VOP3_OPSEL> >;
640
- defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile< VOP_I16_I16_I16, VOP3_OPSEL> >;
633
+ defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
634
+ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
641
635
642
636
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
643
637
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
@@ -752,6 +746,8 @@ def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
752
746
(i32 (EXTRACT_SUBREG $src1, sub1)),
753
747
(i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
754
748
749
+ } // End SubtargetPredicate = isGFX9Plus
750
+
755
751
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
756
752
class OpSelBinOpClampPat<SDPatternOperator node,
757
753
Instruction inst> : GCNPat<
@@ -760,9 +756,14 @@ class OpSelBinOpClampPat<SDPatternOperator node,
760
756
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
761
757
>;
762
758
763
- def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
764
- def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
765
- } // End SubtargetPredicate = isGFX9Plus
759
+ let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
760
+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
761
+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
762
+ } // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts
763
+ let True16Predicate = UseFakeTrue16Insts in {
764
+ def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
765
+ def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
766
+ } // End True16Predicate = UseFakeTrue16Insts
766
767
767
768
multiclass IMAD32_Pats <VOP3_Pseudo inst> {
768
769
def : GCNPat <
@@ -871,21 +872,31 @@ let SubtargetPredicate = isGFX10Plus in {
871
872
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
872
873
}
873
874
874
- defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
875
- defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
876
-
877
- def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
878
- def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
879
-
880
- // Undo sub x, c -> add x, -c canonicalization since c is more likely
881
- // an inline immediate than -c.
882
- def : GCNPat<
883
- (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
884
- (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
885
- >;
875
+ defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
876
+ defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
886
877
887
878
} // End SubtargetPredicate = isGFX10Plus
888
879
880
+ let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in {
881
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
882
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
883
+ // Undo sub x, c -> add x, -c canonicalization since c is more likely
884
+ // an inline immediate than -c.
885
+ def : GCNPat<
886
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
887
+ (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
888
+ >;
889
+ } // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus
890
+
891
+ let True16Predicate = UseFakeTrue16Insts in {
892
+ def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
893
+ def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
894
+ def : GCNPat<
895
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
896
+ (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
897
+ >;
898
+ } // End True16Predicate = UseFakeTrue16Insts
899
+
889
900
let SubtargetPredicate = isGFX12Plus in {
890
901
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
891
902
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
@@ -1104,6 +1115,17 @@ multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
1104
1115
multiclass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> :
1105
1116
VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>;
1106
1117
1118
+ multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1119
+ string pseudo_mnemonic = "", bit isSingle = 0> :
1120
+ VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1121
+ VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1122
+
1123
+ multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1124
+ string pseudo_mnemonic = "", bit isSingle = 0> {
1125
+ defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1126
+ defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1127
+ }
1128
+
1107
1129
multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
1108
1130
VOP3be_Real<GFX11Gen, op, opName, asmName>,
1109
1131
VOP3be_Real<GFX12Gen, op, opName, asmName>;
@@ -1189,17 +1211,17 @@ defm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "
1189
1211
defm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
1190
1212
defm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
1191
1213
defm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
1192
- defm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12 <0x303>;
1193
- defm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12 <0x304>;
1214
+ defm V_ADD_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x303, "v_add_nc_u16" >;
1215
+ defm V_SUB_NC_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x304, "v_sub_nc_u16" >;
1194
1216
defm V_MUL_LO_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
1195
1217
defm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>;
1196
1218
defm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>;
1197
1219
defm V_MAX_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x309, "v_max_u16">;
1198
1220
defm V_MAX_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
1199
1221
defm V_MIN_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
1200
1222
defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
1201
- defm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12 <0x30d, "V_ADD_I16 ", "v_add_nc_i16 ">;
1202
- defm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12 <0x30e, "V_SUB_I16 ", "v_sub_nc_i16 ">;
1223
+ defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x30d, "v_add_nc_i16 ", "V_ADD_I16 ">;
1224
+ defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x30e, "v_sub_nc_i16 ", "V_SUB_I16 ">;
1203
1225
defm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>;
1204
1226
defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
1205
1227
defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
0 commit comments