Skip to content

Commit 863a803

Browse files
committed
fixup! [AArch64] Add intrinsics for SME FP8 FDOT LANE instructions
1 parent 605ccdf commit 863a803

File tree

3 files changed

+7
-66
lines changed

3 files changed

+7
-66
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,10 +741,11 @@ let SMETargetGuard = "sme2" in {
741741
}
742742

743743
// FDOT
744-
let SMETargetGuard = "sme2,sme-f8f16" in {
744+
let SMETargetGuard = "sme-f8f16" in {
745745
def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
746746
def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
747747
}
748+
748749
////////////////////////////////////////////////////////////////////////////////
749750
// SME2p1 - FMOPA, FMOPS (non-widening)
750751
let SMETargetGuard = "sme-b16b16" in {

clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
// REQUIRES: aarch64-registered-target
33
#include <arm_sme.h>
44

5-
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
6-
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7-
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
8-
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
9-
// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
5+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
8+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
9+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
1010
#include <arm_sme.h>
1111

1212
#ifdef SVE_OVERLOADED_FORMS

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -5771,66 +5771,6 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> {
57715771

57725772
// FP8 SME FDOT instructions
57735773

5774-
// Selection DAG patterns - map to first level of pseudo-instructions (xxx_PSEUDO)
5775-
5776-
class SME2_FP8_FDOT_Index_VG1x2_Pat<string name, SDPatternOperator intrinsic,
5777-
ComplexPattern tileslice, Operand offset_ty, Operand imm_ty,
5778-
ValueType vt = nxv16i8>
5779-
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)),
5780-
vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i), i64:$fpmr),
5781-
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
5782-
(REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
5783-
ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>;
5784-
5785-
class SME2_FP8_FDOT_Index_VG1x4_Pat<string name, SDPatternOperator intrinsic,
5786-
ComplexPattern tileslice, Operand offset_ty, Operand imm_ty,
5787-
ValueType vt = nxv16i8>
5788-
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset)),
5789-
vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4,
5790-
vt:$Zm, (i32 imm_ty:$i), i64:$fpmr),
5791-
(!cast<Instruction>(name # _PSEUDO) $base, $offset,
5792-
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
5793-
ZPR4b8:$Zm, imm_ty:$i, GPR64:$fpmr)>;
5794-
5795-
// First level pseudo-instructions (xxx_PSEUDO) - transformed to second level pseudo-instructions (xxx_FPMR_PSEUDO)
5796-
// during instruction selection.
5797-
class sme2_fp8_fdot_index_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty, Operand imm_ty>
5798-
: SMEPseudo2Instr<name, 0>,
5799-
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []> {
5800-
let SMEMatrixType = SMEMatrixArray;
5801-
let usesCustomInserter = 1;
5802-
}
5803-
5804-
class sme2_fp8_fdot_pseudo<string name, Operand offset_ty, RegisterOperand src1_ty, RegisterOperand src2_ty>
5805-
: SMEPseudo2Instr<name, 0>,
5806-
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs, src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []> {
5807-
let SMEMatrixType = SMEMatrixArray;
5808-
let usesCustomInserter = 1;
5809-
}
5810-
5811-
// Second level pseudo-instruction - expanded to real instruction by the AArch64 pseudo instruction expansion pass
5812-
class sme2_fp8_fdot_index_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty,
5813-
RegisterOperand src1_ty, RegisterOperand src2_ty,
5814-
Operand imm_ty>
5815-
: Pseudo<(outs matrix_ty:$ZAda),
5816-
(ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs,
5817-
src1_ty:$Zn, src2_ty:$Zm, imm_ty:$i, GPR64:$fpmr), []>,
5818-
SMEPseudo2Instr<name, 1> {
5819-
let hasNoSchedulingInfo = 1;
5820-
let Constraints = "$ZAda = $_ZAda";
5821-
}
5822-
5823-
class sme2_fp8_fdot_fpmr_pseudo<string name, MatrixOperand matrix_ty, Operand offset_ty,
5824-
RegisterOperand src1_ty, RegisterOperand src2_ty>
5825-
: Pseudo<(outs matrix_ty:$ZAda),
5826-
(ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, offset_ty:$offs,
5827-
src1_ty:$Zn, src2_ty:$Zm, GPR64:$fpmr), []>,
5828-
SMEPseudo2Instr<name, 1> {
5829-
let hasNoSchedulingInfo = 1;
5830-
let Constraints = "$ZAda = $_ZAda";
5831-
}
5832-
5833-
// FDOT instructions
58345774
multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic, bits<2> sz, bits<3> op,
58355775
RegisterOperand multi_vector_ty, SDPatternOperator intrinsic> {
58365776
def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,

0 commit comments

Comments
 (0)