-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[llvm][AArch64][Assembly]: Add FP8 instructions assembly and disassembly. #69632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-mc Author: None (hassnaaHamdi) ChangesThis patch adds the feature flag FP8 and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2:
That is according to this documentation: Change-Id: I56008a1b74c21ad30f36d18c4895c4dd1ba48920 Patch is 80.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69632.diff 23 Files Affected:
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 616f2d79028615d..e22ab824e51e2eb 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -159,7 +159,8 @@ enum ArchExtKind : unsigned {
AEK_RASv2 = 55, // FEAT_RASv2
AEK_ITE = 56, // FEAT_ITE
AEK_GCS = 57, // FEAT_GCS
- AEK_NUM_EXTENSIONS = AEK_GCS + 1
+ AEK_FP8 = 58, // FEAT_FP8
+ AEK_NUM_EXTENSIONS
};
using ExtensionBitset = Bitset<AEK_NUM_EXTENSIONS>;
// clang-format on
@@ -267,6 +268,7 @@ inline constexpr ExtensionInfo Extensions[] = {
{"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_INIT, "", 0},
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
{"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_INIT, "", 0},
+ {"fp8", AArch64::AEK_FP8, "+fp8", "-fp8", FEAT_INIT, "", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_INIT, "", ExtensionInfo::MaxFMVPriority},
};
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 70973c92305aa62..9254676458fb831 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -127,6 +127,9 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;
+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
+ "Enable FP8 Instructions (FEAT_FP8)">;
+
// This flag is currently still labeled as Experimental, but when fully
// implemented this should tell the compiler to use the zeroing pseudos to
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e5dbfa404b3c6bf..129866f39edc7ce 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6056,6 +6056,53 @@ multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm,
}
+// FP8 assembly/disassembly classes
+
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD three-register extension
+//----------------------------------------------------------------------------
+class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
+ RegisterOperand regtype1,
+ RegisterOperand regtype2, string asm,
+ string kind1, string kind2>
+ : I<(outs regtype1:$Rd), (ins regtype2:$Rn, regtype2:$Rm), asm,
+ "\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2, "", []>, Sched<[]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0b1;
+ let Inst{14-11} = op;
+ let Inst{10} = 0b1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+// FCVTN (FP16 to FP8)
+multiclass SIMDThreeSameSizeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
+ def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">;
+}
+
+class BaseSIMDThreeSameSizeVectorFP8Tied<bit Q, bits<2> sz, string asm, string kind1,
+ string kind2, RegisterOperand RegType> :
+ BaseSIMDThreeSameVectorTied<Q, 0b0, {sz,0b0}, 0b11110, RegType, asm, kind1, []> {
+ let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}");
+}
+
+// FCVTN, FCVTN2 (FP32 to FP8)
+multiclass SIMDThreeVectorCvt<string asm> {
+ def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
+ def 2v16f8 : BaseSIMDThreeSameSizeVectorFP8Tied<0b1, 0b00, asm#2, ".16b", ".4s", V128>;
+}
+
//----------------------------------------------------------------------------
// AdvSIMD two register vector instructions.
//----------------------------------------------------------------------------
@@ -6479,6 +6526,16 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
}
+//----------------------------------------------------------------------------
+// FP8 Advanced SIMD two-register miscellaneous
+//----------------------------------------------------------------------------
+multiclass SIMDMixedTwoVectorFP8<bits<2>sz, string asm> {
+ def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
+ asm, ".8h", ".8b", []>;
+ def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
+ asm#2, ".8h", ".16b", []>;
+}
+
class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
bits<5> opcode, RegisterOperand regtype, string asm,
string kind, string zero, ValueType dty,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index df59dc4ad27fadb..aec57737ab49636 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -160,6 +160,8 @@ def HasSME2 : Predicate<"Subtarget->hasSME2()">,
AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">,
AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
+def HasFP8 : Predicate<"Subtarget->hasFP8()">,
+ AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
@@ -171,6 +173,10 @@ def HasSVE2orSME
: Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
"sve2 or sme">;
+def HasSVE2orSME2
+ : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
+ "sve2 or sme2">;
def HasSVE2p1_or_HasSME
: Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -9247,6 +9253,15 @@ let Predicates = [HasD128] in {
}
}
+let Predicates = [HasFP8] in {
+ defm F1CVTL : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
+ defm F2CVTL : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
+ defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
+ defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
+ defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
+ defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
+ defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
+} // End let Predicates = [HasFP8]
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 2685f2e3c8108e5..cbdc38965fc5c7e 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -330,14 +330,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>;
defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>;
-defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
-defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
-defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
-defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
-
-defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
-defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
-defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
+defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
+defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
+defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
+defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
+
+defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
+defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
+defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>;
defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>;
defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>;
@@ -855,3 +855,26 @@ defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">;
defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, 0b11, ZPR16>;
defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, 0b11, ZPR16>;
}
+
+let Predicates = [HasSME2, HasFP8] in {
+defm F1CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>;
+defm F1CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>;
+defm BF1CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>;
+defm BF1CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>;
+defm F2CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>;
+defm F2CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>;
+defm BF2CVT_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>;
+defm BF2CVTL_2ZZ : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>;
+
+defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>;
+defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>;
+defm FCVT_Z4Z : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>;
+defm FCVTN_Z4Z : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>;
+
+defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>;
+defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
+defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>;
+defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>;
+
+} // [HasSME2, HasFP8]
+
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d599ac4689e5cb3..002d5d28fcf8d53 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4002,3 +4002,24 @@ defm UZPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b010, "uzpq1">;
defm UZPQ2_ZZZ : sve2p1_permute_vec_elems_q<0b011, "uzpq2">;
defm TBLQ_ZZZ : sve2p1_tblq<"tblq">;
} // End HasSVE2p1_or_HasSME2p1
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasSVE2orSME2, HasFP8] in {
+// FP8 upconvert
+defm F1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b00, "f1cvt">;
+defm F2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b01, "f2cvt">;
+defm BF1CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b10, "bf1cvt">;
+defm BF2CVT_ZZ : sve2_fp8_cvt_single<0b0, 0b11, "bf2cvt">;
+defm F1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b00, "f1cvtlt">;
+defm F2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b01, "f2cvtlt">;
+defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt">;
+defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt">;
+
+// FP8 downconvert
+defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
+defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
+defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
+defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
+} // End HasSVE2orSME2, HasFP8
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index cbcb49c3e2d74dc..5b00615b05c654a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3638,6 +3638,7 @@ static const struct Extension {
{"sb", {AArch64::FeatureSB}},
{"ssbs", {AArch64::FeatureSSBS}},
{"tme", {AArch64::FeatureTME}},
+ {"fp8", {AArch64::FeatureFP8}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 823115c7d025005..b85b5ed367ca376 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -2161,15 +2161,16 @@ multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
mnemonic>;
}
-class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn),
+class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
+ RegisterOperand first_ty, RegisterOperand second_ty>
+ : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<4> Zn;
bits<5> Zd;
let Inst{31-23} = 0b110000010;
- let Inst{22} = op{3};
- let Inst{21-18} = 0b1000;
- let Inst{17-16} = op{2-1};
+ let Inst{22} = op{4};
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = op{3-1};
let Inst{15-10} = 0b111000;
let Inst{9-6} = Zn;
let Inst{5} = op{0};
@@ -2178,12 +2179,17 @@ class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
// SME2 multi-vec FP down convert two registers
// SME2 multi-vec int down convert two registers
-multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
+multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
ValueType in_vt, SDPatternOperator intrinsic> {
- def NAME : sme2_cvt_vg2_single<mnemonic, op>;
+ def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
}
+// SME2 multi-vec FP8 down convert two registers
+multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
+ def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
+}
+
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
@@ -2212,7 +2218,13 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
}
-class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
+// SME2 multi-vec FP8 up convert two registers
+multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
+ def _BtoH : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
+}
+
+
+class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
@@ -2221,7 +2233,9 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
let Inst{31-24} = 0b11000001;
let Inst{23} = sz;
let Inst{22} = op{2};
- let Inst{21-10} = 0b110011111000;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op2;
+ let Inst{15-10} = 0b111000;
let Inst{9-7} = Zn;
let Inst{6-5} = op{1-0};
let Inst{4-0} = Zd;
@@ -2229,13 +2243,18 @@ class sme2_cvt_vg4_single<bit sz, bits<3> op, RegisterOperand first_ty,
// SME2 multi-vec int down convert four registers
multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>;
- def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>;
+ def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+ def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
}
+//SME2 multi-vec FP8 down convert four registers
+multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
+ def _StoB : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
+}
+
class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
RegisterOperand second_ty, string mnemonic>
: I<(outs first_ty:$Zd), (ins second_ty:$Zn),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7bb457d9188210c..28e25f346cbdbc4 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10078,3 +10078,46 @@ multiclass sve2p1_tblq<string mnemonic> {
def _S : sve2p1_permute_vec_elems_q<0b10, 0b110, mnemonic, ZPR32, Z_s>;
def _D : sve2p1_permute_vec_elems_q<0b11, 0b110, mnemonic, ZPR64, Z_d>;
}
+
+//===----------------------------------------------------------------------===//
+// SVE2 FP8 Instructions
+//===----------------------------------------------------------------------===//
+
+// FP8 upconvert
+class sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic,
+ ZPRRegOp dst_ty, ZPRRegOp src_ty>
+ : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+ mnemonic, "\t$Zd, $Zn",
+ "", []>, Sched<[]>{
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-17} = 0b011001010000100;
+ let Inst{16} = L;
+ let Inst{15-12} = 0b0011;
+ let Inst{11-10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_cvt_single<bit L, bits<2> opc, string mnemonic> {
+ def _BtoH : sve2_fp8_cvt_single<L, opc, mnemonic, ZPR16, ZPR8>;
+}
+
+// FP8 downconvert
+class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
+ ZPRRegOp dst_ty, RegisterOperand src_ty>
+ : I<(outs dst_ty:$Zd), (ins src_ty:$Zn),
+ mnemonic, "\t$Zd, $Zn",
+ "", []>, Sched<[]>{
+ bits<5> Zd;
+ bits<4> Zn;
+ let Inst{31-12} = 0b01100101000010100011;
+ let Inst{11-10} = opc;
+ let Inst{9-6} = Zn;
+ let Inst{5} = 0b0;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
+ def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;
+}
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch-negative.s b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
new file mode 100644
index 000000000000000..cf48416d29d8a28
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch-negative.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8
+.arch armv9-a+nofp8
+bf1cvtl v0.8h, v0.8b
+// CHECK: error: instruction requires: fp8
+// CHECK: bf1cvtl v0.8h, v0.8b
diff --git a/llvm/test/MC/AArch64/FP8/directive-arch.s b/llvm/test/MC/AArch64/FP8/directive-arch.s
new file mode 100644
index 000000000000000..8857d4f0bfbe422
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/directive-arch.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
+
+.arch armv9-a+fp8
+bf1cvtl v0.8h, v0.8b
+// CHECK: bf1cvtl v0.8h, v0.8b
+
+.arch armv9-a+nofp8
diff --git a/llvm/test/MC/AArch64/FP8/miscellaneous-fp8-diagnosti...
[truncated]
|
This looks like a downstream Gerrit ID, I assume you meant to remove this. |
Also if we're doing the |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for doing the changed. The patch LGTM
…bly. This patch adds the feature flag FP8 and the assembly/disassembly for the following instructions of NEON, SVE2 and SME2: * NEON Instructions: + Advanced SIMD two-register miscellaneous: - BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 — BF1CVTL - BF1CVTL, BF1CVTL2, BF2CVTL, BF2CVTL2 — BF2CVTL - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 — F1CVTL - F1CVTL, F1CVTL2, F2CVTL, F2CVTL2 — F2CVTL + Advanced SIMD three-register extension: - FCVTN, FCVTN2 (FP32 to FP8) - FCVTN (FP16 to FP8) + Advanced SIMD three same - FSCALE * SVE2 Instructions: + Downconvert instructions: - FCVTN_Z2Z_HtoB - FCVTNB_Z2Z_StoB - BFCVTN_Z2Z_HtoB - FCVTNT_Z2Z_StoB + Upconvert instructions: - F1CVT_ZZ, F2CVT_ZZ - BF1CVT_ZZ, BF2CVT_ZZ - F1CVTLT_ZZ, F2CVTLT_ZZ That is according to this documentation: https://developer.arm.com/documentation/ddi0602/2023-09 Change-Id: I9d816b3760b0693ad8a33724489629b660070bad
This patch adds the feature flag FP8 and the assembly/disassembly
for the following instructions of NEON, SVE2 and SME2:
NEON Instructions:
SVE2 Instructions:
SME2 Instructions:
That is according to this documentation:
https://developer.arm.com/documentation/ddi0602/2023-09