Skip to content

Commit 78941e1

Browse files
authored
[llvm][AArch64][Assembly]: Add FP8 instructions assembly and disassembly. (#69632)
This patch adds the feature flag FP8 and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2: * NEON Instructions: + Advanced SIMD two-register miscellaneous: - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 - BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 + Advanced SIMD three-register extension: - FCVTN, FCVTN2 (FP32 to FP8) - FCVTN (FP16 to FP8) + Advanced SIMD three same: - FSCALE * SVE2 Instructions: + Downconvert instructions: - FCVTN_Z2Z_HtoB - FCVTNB_Z2Z_StoB - BFCVTN_Z2Z_HtoB - FCVTNT_Z2Z_StoB + Upconvert instructions: - F1CVT_ZZ, F2CVT_ZZ - BF1CVT_ZZ, BF2CVT_ZZ - F1CVTLT_ZZ, F2CVTLT_ZZ - BF1CVTLT_ZZ, BF2CVTLT_ZZ * SME2 Instructions: - F1CVT_2ZZ, F2CVT_2ZZ - BF1CVT_2ZZ, BF2CVT_2ZZ - F1CVTL_2ZZ, F2CVTL_2ZZ - BF1CVTL_2ZZ, BF2CVTL_2ZZ - FCVT_Z2Z_HtoB, BFCVT_Z2Z_HtoB - FCVT_Z4Z - FCVTN_Z4Z - FSCALE_2ZZ, FSCALE_4ZZ - FSCALE_2Z2Z, FSCALE_4Z4Z That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09
1 parent 2d0ac85 commit 78941e1

23 files changed

+1686
-22
lines changed

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ enum ArchExtKind : unsigned {
160160
AEK_ITE = 56, // FEAT_ITE
161161
AEK_GCS = 57, // FEAT_GCS
162162
AEK_FPMR = 58, // FEAT_FPMR
163+
AEK_FP8 = 59, // FEAT_FP8
163164
AEK_NUM_EXTENSIONS
164165
};
165166
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
@@ -269,6 +270,7 @@ inline constexpr ExtensionInfo Extensions[] = {
269270
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
270271
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
271272
{"fpmr", AArch64::AEK_FPMR, "+fpmr", "-fpmr", FEAT_INIT, "", 0},
273+
{"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "+fpmr", 0},
272274
// Special cases
273275
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
274276
};

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
130130
def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true",
131131
"Enable FPMR Register (FEAT_FPMR)">;
132132

133+
def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
134+
"Enable FP8 instructions (FEAT_FP8)">;
135+
133136
// This flag is currently still labeled as Experimental, but when fully
134137
// implemented this should tell the compiler to use the zeroing pseudos to
135138
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6056,6 +6056,49 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
60566056
}
60576057

60586058

6059+
// FP8 assembly/disassembly classes
6060+
6061+
//----------------------------------------------------------------------------
6062+
// FP8 Advanced SIMD three-register extension
6063+
//----------------------------------------------------------------------------
6064+
class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
6065+
RegisterOperand regtype1,
6066+
RegisterOperand regtype2, string asm,
6067+
string kind1, string kind2>
6068+
: I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
6069+
"\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
6070+
bits<5> Rd;
6071+
bits<5> Rn;
6072+
bits<5> Rm;
6073+
let Inst{31} = 0;
6074+
let Inst{30} = Q;
6075+
let Inst{29} = U;
6076+
let Inst{28-24} = 0b01110;
6077+
let Inst{23-22} = size;
6078+
let Inst{21} = 0b0;
6079+
let Inst{20-16} = Rm;
6080+
let Inst{15} = 0b1;
6081+
let Inst{14-11} = op;
6082+
let Inst{10} = 0b1;
6083+
let Inst{9-5} = Rn;
6084+
let Inst{4-0} = Rd;
6085+
}
6086+
6087+
6088+
// FCVTN (FP16 to FP8)
6089+
multiclass SIMDThreeSameSizeVectorCvt<string asm> {
6090+
def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
6091+
def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">;
6092+
}
6093+
6094+
// TODO : Create v16f8 value type
6095+
// FCVTN, FCVTN2 (FP32 to FP8)
6096+
multiclass SIMDThreeVectorCvt<string asm> {
6097+
def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
6098+
def 2v16f8 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1110, asm#2, ".16b", ".4s",
6099+
V128, v16i8, v4f32, null_frag>;
6100+
}
6101+
60596102
//----------------------------------------------------------------------------
60606103
// AdvSIMD two register vector instructions.
60616104
//----------------------------------------------------------------------------
@@ -6479,6 +6522,16 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
64796522
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
64806523
}
64816524

6525+
//----------------------------------------------------------------------------
6526+
// FP8 Advanced SIMD two-register miscellaneous
6527+
//----------------------------------------------------------------------------
6528+
multiclass SIMDMixedTwoVectorFP8<bits<2>sz, string asm> {
6529+
def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
6530+
asm, ".8h", ".8b", []>;
6531+
def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
6532+
asm#2, ".8h", ".16b", []>;
6533+
}
6534+
64826535
class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
64836536
bits<5> opcode, RegisterOperand regtype, string asm,
64846537
string kind, string zero, ValueType dty,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
162162
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
163163
def HasFPMR : Predicate<"Subtarget->hasFPMR()">,
164164
AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
165+
def HasFP8 : Predicate<"Subtarget->hasFP8()">,
166+
AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
165167

166168
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
167169
// they should be enabled if either has been specified.
@@ -173,6 +175,10 @@ def HasSVE2orSME
173175
: Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
174176
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
175177
"sve2 or sme">;
178+
def HasSVE2orSME2
179+
: Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
180+
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
181+
"sve2 or sme2">;
176182
def HasSVE2p1_or_HasSME
177183
: Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
178184
AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -9249,6 +9255,15 @@ let Predicates = [HasD128] in {
92499255
}
92509256
}
92519257

9258+
let Predicates = [HasFP8] in {
9259+
defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
9260+
defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
9261+
defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
9262+
defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
9263+
defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
9264+
defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
9265+
defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
9266+
} // End let Predicates = [HasFP8]
92529267

92539268
include "AArch64InstrAtomics.td"
92549269
include "AArch64SVEInstrInfo.td"

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -330,14 +330,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
330330
defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>;
331331
defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>;
332332

333-
defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
334-
defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
335-
defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
336-
defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
337-
338-
defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
339-
defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
340-
defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
333+
defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
334+
defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
335+
defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
336+
defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
337+
338+
defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
339+
defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
340+
defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
341341
defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>;
342342
defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>;
343343
defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>;
@@ -855,3 +855,26 @@ defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">;
855855
defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>;
856856
defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>;
857857
}
858+
859+
let Predicates = [HasSME2, HasFP8] in {
860+
defm F1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>;
861+
defm F1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>;
862+
defm BF1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>;
863+
defm BF1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>;
864+
defm F2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>;
865+
defm F2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>;
866+
defm BF2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>;
867+
defm BF2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>;
868+
869+
defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>;
870+
defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>;
871+
defm FCVT_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>;
872+
defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>;
873+
874+
defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>;
875+
defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
876+
defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
877+
defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
878+
879+
} // [HasSME2, HasFP8]
880+

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4002,3 +4002,24 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
40024002
defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
40034003
defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
40044004
} // End HasSVE2p1_or_HasSME2p1
4005+
4006+
//===----------------------------------------------------------------------===//
4007+
// SVE2 FP8 instructions
4008+
//===----------------------------------------------------------------------===//
4009+
let Predicates = [HasSVE2orSME2, HasFP8] in {
4010+
// FP8 upconvert
4011+
defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
4012+
defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
4013+
defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
4014+
defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
4015+
defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
4016+
defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
4017+
defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
4018+
defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
4019+
4020+
// FP8 downconvert
4021+
defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
4022+
defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
4023+
defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
4024+
defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
4025+
} // End HasSVE2orSME2, HasFP8

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3639,6 +3639,7 @@ static const struct Extension {
36393639
{"ssbs", {AArch64::FeatureSSBS}},
36403640
{"tme", {AArch64::FeatureTME}},
36413641
{"fpmr", {AArch64::FeatureFPMR}},
3642+
{"fp8", {AArch64::FeatureFP8}},
36423643
};
36433644

36443645
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,15 +2161,16 @@ multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
21612161
mnemonic>;
21622162
}
21632163

2164-
class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
2165-
: I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
2164+
class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
2165+
RegisterOperand first_ty, RegisterOperand second_ty>
2166+
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
21662167
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
21672168
bits<4> Zn;
21682169
bits<5> Zd;
21692170
let Inst{31-23} = 0b110000010;
2170-
let Inst{22} = op{3};
2171-
let Inst{21-18} = 0b1000;
2172-
let Inst{17-16} = op{2-1};
2171+
let Inst{22} = op{4};
2172+
let Inst{21-19} = 0b100;
2173+
let Inst{18-16} = op{3-1};
21732174
let Inst{15-10} = 0b111000;
21742175
let Inst{9-6} = Zn;
21752176
let Inst{5} = op{0};
@@ -2178,12 +2179,17 @@ class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
21782179

21792180
// SME2 multi-vec FP down convert two registers
21802181
// SME2 multi-vec int down convert two registers
2181-
multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
2182+
multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
21822183
ValueType in_vt, SDPatternOperator intrinsic> {
2183-
def NAME : sme2_cvt_vg2_single<mnemonic, op>;
2184+
def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
21842185
def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
21852186
}
21862187

2188+
// SME2 multi-vec FP8 down convert two registers
2189+
multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
2190+
def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
2191+
}
2192+
21872193
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
21882194
RegisterOperand second_ty, string mnemonic>
21892195
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
@@ -2212,7 +2218,13 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
22122218
def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
22132219
}
22142220

2215-
class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
2221+
// SME2 multi-vec FP8 up convert two registers
2222+
multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
2223+
def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
2224+
}
2225+
2226+
2227+
class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty,
22162228
RegisterOperand second_ty, string mnemonic>
22172229
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
22182230
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
@@ -2221,21 +2233,28 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
22212233
let Inst{31-24} = 0b11000001;
22222234
let Inst{23} = sz;
22232235
let Inst{22} = op{2};
2224-
let Inst{21-10} = 0b110011111000;
2236+
let Inst{21-20} = 0b11;
2237+
let Inst{19-16} = op2;
2238+
let Inst{15-10} = 0b111000;
22252239
let Inst{9-7} = Zn;
22262240
let Inst{6-5} = op{1-0};
22272241
let Inst{4-0} = Zd;
22282242
}
22292243

22302244
// SME2 multi-vec int down convert four registers
22312245
multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
2232-
def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2233-
def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
2246+
def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2247+
def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
22342248

22352249
def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
22362250
def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
22372251
}
22382252

2253+
//SME2 multi-vec FP8 down convert four registers
2254+
multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
2255+
def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2256+
}
2257+
22392258
class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
22402259
RegisterOperand second_ty, string mnemonic>
22412260
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10078,3 +10078,46 @@ multiclass sve2p1_tblq<string mnemonic> {
1007810078
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
1007910079
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
1008010080
}
10081+
10082+
//===----------------------------------------------------------------------===//
10083+
// SVE2 FP8 Instructions
10084+
//===----------------------------------------------------------------------===//
10085+
10086+
// FP8 upconvert
10087+
class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
10088+
ZPRRegOp dst_ty, ZPRRegOp src_ty>
10089+
: I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
10090+
mnemonic, "\t$Zd, $Zn",
10091+
"", []>, Sched<[]>{
10092+
bits<5> Zd;
10093+
bits<5> Zn;
10094+
let Inst{31-17} = 0b011001010000100;
10095+
let Inst{16} = L;
10096+
let Inst{15-12} = 0b0011;
10097+
let Inst{11-10} = opc;
10098+
let Inst{9-5} = Zn;
10099+
let Inst{4-0} = Zd;
10100+
}
10101+
10102+
multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
10103+
def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
10104+
}
10105+
10106+
// FP8 downconvert
10107+
class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
10108+
ZPRRegOp dst_ty, RegisterOperand src_ty>
10109+
: I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
10110+
mnemonic, "\t$Zd, $Zn",
10111+
"", []>, Sched<[]>{
10112+
bits<5> Zd;
10113+
bits<4> Zn;
10114+
let Inst{31-12} = 0b01100101000010100011;
10115+
let Inst{11-10} = opc;
10116+
let Inst{9-6} = Zn;
10117+
let Inst{5} = 0b0;
10118+
let Inst{4-0} = Zd;
10119+
}
10120+
10121+
multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
10122+
def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
10123+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9-a+fp8
4+
.arch armv9-a+nofp8
5+
bf1cvtl v0.8h, v0.8b
6+
// CHECK: error: instruction requires: fp8
7+
// CHECK: bf1cvtl v0.8h, v0.8b
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9-a+fp8
4+
bf1cvtl v0.8h, v0.8b
5+
// CHECK: bf1cvtl v0.8h, v0.8b
6+
7+
.arch armv9-a+nofp8

0 commit comments

Comments
 (0)