Skip to content

[AArch64] Add intrinsics for SME FP8 FDOT LANE instructions #118492

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 13, 2024
Merged
15 changes: 15 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,21 @@ let SMETargetGuard = "sme2" in {
def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
}

//
// SME2 FP8 instructions
//

// FDOT
let SMETargetGuard = "sme-f8f32" in {
def SVDOT_LANE_FP8_ZA32_VG1x2 : Inst<"svdot_lane_za32[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_FP8_ZA32_VG1x4 : Inst<"svdot_lane_za32[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
}

let SMETargetGuard = "sme-f8f16" in {
def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
}

////////////////////////////////////////////////////////////////////////////////
// SME2p1 - FMOPA, FMOPS (non-widening)
let SMETargetGuard = "sme-b16b16" in {
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Basic/arm_sve_sme_incl.td
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ include "arm_immcheck_incl.td"
// h: half-float
// d: double
// b: bfloat
// m: mfloat8

// Typespec modifiers
// ------------------
Expand Down
96 changes: 96 additions & 0 deletions clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// REQUIRES: aarch64-registered-target
#include <arm_sme.h>

// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
#include <arm_sme.h>

#ifdef SVE_OVERLOADED_FORMS
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
#else
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
#endif

// CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za32_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm(
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
// CPP-CHECK-NEXT: ret void
//
void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr);
}

// CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3)
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za32_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm(
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3)
// CPP-CHECK-NEXT: ret void
//
void test_svdot_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr);
}

// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za16_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm(
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
// CPP-CHECK-NEXT: ret void
//
void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr);
}

// CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3)
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: define dso_local void @_Z29test_svdot_lane_za16_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm(
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
// CPP-CHECK-NEXT: [[ENTRY:.*:]]
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.lane.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 3)
// CPP-CHECK-NEXT: ret void
//
void test_svdot_lane_za16_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn,
svmfloat8_t zm, fpm_t fpmr)
__arm_streaming __arm_inout("za") {
SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr);
}
38 changes: 38 additions & 0 deletions clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -verify -emit-llvm -o - %s

// REQUIRES: aarch64-registered-target

#include <arm_sme.h>

void test_features(uint32_t slice, svmfloat8_t f8, svmfloat8x2_t f8x2,
svmfloat8x4_t f8x4, uint64_t fpmr) __arm_streaming __arm_inout("za") {
// expected-error@+1 {{'svdot_lane_za32_mf8_vg1x2_fpm' needs target feature sme,sme-f8f32}}
svdot_lane_za32_mf8_vg1x2_fpm(slice, f8x2, f8, 3, fpmr);
// expected-error@+1 {{'svdot_lane_za32_mf8_vg1x4_fpm' needs target feature sme,sme-f8f32}}
svdot_lane_za32_mf8_vg1x4_fpm(slice, f8x4, f8, 3, fpmr);
// expected-error@+1 {{'svdot_lane_za16_mf8_vg1x2_fpm' needs target feature sme,sme-f8f16}}
svdot_lane_za16_mf8_vg1x2_fpm(slice, f8x2, f8, 3, fpmr);
// expected-error@+1 {{'svdot_lane_za16_mf8_vg1x4_fpm' needs target feature sme,sme-f8f16}}
svdot_lane_za16_mf8_vg1x4_fpm(slice, f8x4, f8, 3, fpmr);
}

void test_imm(uint32_t slice, svmfloat8_t f8, svmfloat8x2_t f8x2,
svmfloat8x4_t f8x4, uint64_t fpmr) __arm_streaming __arm_inout("za") {
// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svdot_lane_za32_mf8_vg1x2_fpm(slice, f8x2, f8, -1, fpmr);
// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 3]}}
svdot_lane_za32_mf8_vg1x4_fpm(slice, f8x4, f8, -1, fpmr);
// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svdot_lane_za16_mf8_vg1x2_fpm(slice, f8x2, f8, -1, fpmr);
// expected-error@+1{{argument value 18446744073709551615 is outside the valid range [0, 7]}}
svdot_lane_za16_mf8_vg1x4_fpm(slice, f8x4, f8, -1, fpmr);

// expected-error@+1{{argument value 4 is outside the valid range [0, 3]}}
svdot_lane_za32_mf8_vg1x2_fpm(slice, f8x2, f8, 4, fpmr);
// expected-error@+1{{argument value 4 is outside the valid range [0, 3]}}
svdot_lane_za32_mf8_vg1x4_fpm(slice, f8x4, f8, 4, fpmr);
// expected-error@+1{{argument value 8 is outside the valid range [0, 7]}}
svdot_lane_za16_mf8_vg1x2_fpm(slice, f8x2, f8, 8, fpmr);
// expected-error@+1{{argument value 8 is outside the valid range [0, 7]}}
svdot_lane_za16_mf8_vg1x4_fpm(slice, f8x4, f8, 8, fpmr);
}
27 changes: 26 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -3855,6 +3855,31 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;


// SME FP8 FDOT intrinsics
let TargetPrefix = "aarch64" in {

class SME2_FP8_FDOT_LANE_VG1x2 :
DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty,
llvm_i32_ty],
[IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<4>>]>;

class SME2_FP8_FDOT_LANE_VG1x4 :
DefaultAttrsIntrinsic<[], [llvm_i32_ty,
llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
llvm_nxv16i8_ty,
llvm_i32_ty],
[IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<6>>]>;

def int_aarch64_sme_fp8_fdot_lane_za16_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2;
def int_aarch64_sme_fp8_fdot_lane_za16_vg1x4 : SME2_FP8_FDOT_LANE_VG1x4;

def int_aarch64_sme_fp8_fdot_lane_za32_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2;
def int_aarch64_sme_fp8_fdot_lane_za32_vg1x4 : SME2_FP8_FDOT_LANE_VG1x4;
}

//
// FP8 Intrinsics
//
Expand Down Expand Up @@ -3886,4 +3911,4 @@ let TargetPrefix = "aarch64" in {
// FP8 outer product
def int_aarch64_sme_fp8_fmopa_za16 : SME_FP8_OuterProduct_Intrinsic;
def int_aarch64_sme_fp8_fmopa_za32 : SME_FP8_OuterProduct_Intrinsic;
}
}
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,8 @@ def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;

let Predicates = [HasSMEF8F16] in {
defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fdot", 0b11, 0b010, ZZ_b_mul_r, ZPR4b8>;
defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_f8f16<"fdot", 0b100, ZZZZ_b_mul_r, ZPR4b8>;
defm FDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fdot", int_aarch64_sme_fp8_fdot_lane_za16_vg1x2>;
defm FDOT_VG4_M4ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x4<"fdot", int_aarch64_sme_fp8_fdot_lane_za16_vg1x4>;
defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;

Expand All @@ -1008,9 +1008,9 @@ defm FMOPA_MPPZZ_BtoH : sme2_fp8_fmopa_za16<"fmopa", int_aarch64_sme_fp8_fmopa_z
} //[HasSMEF8F16]

let Predicates = [HasSMEF8F32] in {
defm FDOT_VG2_M2ZZI_BtoS : sme2_fp8_fdot_index_za32_vg1x2<"fdot", int_aarch64_sme_fp8_fdot_lane_za32_vg1x2>;
defm FDOT_VG4_M4ZZI_BtoS : sme2_fp8_fdot_index_za32_vg1x4<"fdot", int_aarch64_sme_fp8_fdot_lane_za32_vg1x4>;

defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
defm FDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>;
defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>;

Expand Down
Loading
Loading