Skip to content

Commit 1e44a96

Browse files
authored
[AArch64][SME] Add intrinsics for vector groups ZERO (#93201)
According to the specification in ARM-software/acle#309 this adds the intrinsics: void svzero_za64_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg2x1(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg2x2(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg2x4(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg4x1(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg4x2(uint32_t slice) __arm_streaming __arm_inout("za"); void svzero_za64_vg4x4(uint32_t slice) __arm_streaming __arm_inout("za");
1 parent e93799f commit 1e44a96

File tree

5 files changed

+391
-9
lines changed

5 files changed

+391
-9
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,25 @@ let TargetGuard = "sme" in {
146146
[IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
147147
}
148148

149+
let TargetGuard = "sme2p1" in {
150+
def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x2",
151+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
152+
def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg1x4",
153+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
154+
def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x1",
155+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
156+
def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x2",
157+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
158+
def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg2x4",
159+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
160+
def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x1",
161+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
162+
def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x2",
163+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
164+
def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vm", "", MergeNone, "aarch64_sme_zero_za64_vg4x4",
165+
[IsOverloadNone, IsStreaming, IsInOutZA]>;
166+
}
167+
149168
////////////////////////////////////////////////////////////////////////////////
150169
// SME - Counting elements in a streaming vector
151170

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2+
// REQUIRES: aarch64-registered-target
3+
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -disable-O0-optnone -Werror -Wall -o /dev/null %s
7+
8+
#include <arm_sme.h>
9+
10+
#define SVE_ACLE_FUNC(A1,A2) A1##A2
11+
12+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
13+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
14+
// CHECK-NEXT: entry:
15+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]])
16+
// CHECK-NEXT: ret void
17+
//
18+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x2j(
19+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
20+
// CPP-CHECK-NEXT: entry:
21+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 [[SLICE]])
22+
// CPP-CHECK-NEXT: ret void
23+
//
24+
void test_svzero_za64_vg1x2(uint32_t slice) __arm_streaming __arm_inout("za")
25+
{
26+
SVE_ACLE_FUNC(svzero_za64,_vg1x2)(slice);
27+
}
28+
29+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
30+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
31+
// CHECK-NEXT: entry:
32+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]])
33+
// CHECK-NEXT: ret void
34+
//
35+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg1x4j(
36+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
37+
// CPP-CHECK-NEXT: entry:
38+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg1x4(i32 [[SLICE]])
39+
// CPP-CHECK-NEXT: ret void
40+
//
41+
void test_svzero_za64_vg1x4(uint32_t slice) __arm_streaming __arm_inout("za"){
42+
SVE_ACLE_FUNC(svzero_za64,_vg1x4)(slice);
43+
}
44+
45+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x1(
46+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
47+
// CHECK-NEXT: entry:
48+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]])
49+
// CHECK-NEXT: ret void
50+
//
51+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x1j(
52+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
53+
// CPP-CHECK-NEXT: entry:
54+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x1(i32 [[SLICE]])
55+
// CPP-CHECK-NEXT: ret void
56+
//
57+
void test_svzero_za64_vg2x1(uint32_t slice) __arm_streaming __arm_inout("za"){
58+
SVE_ACLE_FUNC(svzero_za64,_vg2x1)(slice);
59+
}
60+
61+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x2(
62+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
63+
// CHECK-NEXT: entry:
64+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]])
65+
// CHECK-NEXT: ret void
66+
//
67+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x2j(
68+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
69+
// CPP-CHECK-NEXT: entry:
70+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x2(i32 [[SLICE]])
71+
// CPP-CHECK-NEXT: ret void
72+
//
73+
void test_svzero_za64_vg2x2(uint32_t slice) __arm_streaming __arm_inout("za"){
74+
SVE_ACLE_FUNC(svzero_za64,_vg2x2)(slice);
75+
}
76+
77+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg2x4(
78+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
79+
// CHECK-NEXT: entry:
80+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]])
81+
// CHECK-NEXT: ret void
82+
//
83+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg2x4j(
84+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
85+
// CPP-CHECK-NEXT: entry:
86+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg2x4(i32 [[SLICE]])
87+
// CPP-CHECK-NEXT: ret void
88+
//
89+
void test_svzero_za64_vg2x4(uint32_t slice) __arm_streaming __arm_inout("za"){
90+
SVE_ACLE_FUNC(svzero_za64,_vg2x4)(slice);
91+
}
92+
93+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x1(
94+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
95+
// CHECK-NEXT: entry:
96+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]])
97+
// CHECK-NEXT: ret void
98+
//
99+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x1j(
100+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
101+
// CPP-CHECK-NEXT: entry:
102+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x1(i32 [[SLICE]])
103+
// CPP-CHECK-NEXT: ret void
104+
//
105+
void test_svzero_za64_vg4x1(uint32_t slice) __arm_streaming __arm_inout("za"){
106+
SVE_ACLE_FUNC(svzero_za64,_vg4x1)(slice);
107+
}
108+
109+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x2(
110+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
111+
// CHECK-NEXT: entry:
112+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]])
113+
// CHECK-NEXT: ret void
114+
//
115+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x2j(
116+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
117+
// CPP-CHECK-NEXT: entry:
118+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x2(i32 [[SLICE]])
119+
// CPP-CHECK-NEXT: ret void
120+
//
121+
void test_svzero_za64_vg4x2(uint32_t slice) __arm_streaming __arm_inout("za"){
122+
SVE_ACLE_FUNC(svzero_za64,_vg4x2)(slice);
123+
}
124+
125+
// CHECK-LABEL: define dso_local void @test_svzero_za64_vg4x4(
126+
// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
127+
// CHECK-NEXT: entry:
128+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]])
129+
// CHECK-NEXT: ret void
130+
//
131+
// CPP-CHECK-LABEL: define dso_local void @_Z22test_svzero_za64_vg4x4j(
132+
// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0]] {
133+
// CPP-CHECK-NEXT: entry:
134+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.za64.vg4x4(i32 [[SLICE]])
135+
// CPP-CHECK-NEXT: ret void
136+
//
137+
void test_svzero_za64_vg4x4(uint32_t slice) __arm_streaming __arm_inout("za"){
138+
SVE_ACLE_FUNC(svzero_za64,_vg4x4)(slice);
139+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3361,6 +3361,12 @@ let TargetPrefix = "aarch64" in {
33613361
def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
33623362
def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;
33633363

3364+
// Multi-vector zeroing
3365+
3366+
foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in {
3367+
def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects]>;
3368+
}
3369+
33643370
// Multi-vector signed saturating doubling multiply high
33653371

33663372
def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, Re
104104
let usesCustomInserter = 1;
105105
}
106106

107+
class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum za_flag>
108+
: SMEPseudo2Instr<name, 0>,
109+
Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> {
110+
let SMEMatrixType = za_flag;
111+
let usesCustomInserter = 1;
112+
}
113+
107114
//===----------------------------------------------------------------------===//
108115
// SME pattern match helpers.
109116
//===----------------------------------------------------------------------===//
@@ -189,6 +196,9 @@ class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand
189196
: Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
190197
(!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
191198

199+
class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
200+
: Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
201+
(!cast<Instruction>(name) $base, $offset)>;
192202
//===----------------------------------------------------------------------===//
193203
// SME pattern match helpers.
194204
//===----------------------------------------------------------------------===//
@@ -4815,39 +4825,57 @@ class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic,
48154825
}
48164826

48174827
multiclass sme2p1_zero_matrix<string mnemonic> {
4818-
def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> {
4828+
def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_Z , 1> {
48194829
bits<3> imm;
48204830
let Inst{2-0} = imm;
48214831
}
4822-
def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> {
4832+
def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _2Z, 1> {
48234833
bits<3> imm;
48244834
let Inst{2-0} = imm;
48254835
}
4826-
def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> {
4836+
def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_2Z, 1> {
48274837
bits<2> imm;
48284838
let Inst{1-0} = imm;
48294839
}
4830-
def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> {
4840+
def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_2Z, 1> {
48314841
bits<2> imm;
48324842
let Inst{1-0} = imm;
48334843
}
4834-
def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> {
4844+
def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_Z, 1> {
48354845
bits<3> imm;
48364846
let Inst{2-0} = imm;
48374847
}
4838-
def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> {
4848+
def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _4Z, 1> {
48394849
bits<2> imm;
48404850
let Inst{1-0} = imm;
48414851
}
4842-
def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> {
4852+
def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_4Z, 1> {
48434853
bits<1> imm;
48444854
let Inst{0} = imm;
48454855
}
4846-
def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> {
4856+
def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_4Z, 1> {
48474857
bits<1> imm;
48484858
let Inst{0} = imm;
48494859
}
4850-
}
4860+
4861+
def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_Z, sme_elm_idx0_7, SMEMatrixArray>;
4862+
def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_Z, sme_elm_idx0_7, SMEMatrixArray>;
4863+
def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _2Z, uimm2s2range, SMEMatrixArray>;
4864+
def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_2Z, uimm1s2range, SMEMatrixArray>;
4865+
def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_2Z, uimm1s2range, SMEMatrixArray>;
4866+
def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _4Z, uimm1s4range, SMEMatrixArray>;
4867+
def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_4Z, uimm0s4range, SMEMatrixArray>;
4868+
def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_4Z, uimm0s4range, SMEMatrixArray>;
4869+
4870+
def : SME2_Zero_Matrix_Pat<NAME # _VG2_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x2, sme_elm_idx0_7, tileslice16>;
4871+
def : SME2_Zero_Matrix_Pat<NAME # _VG4_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x4, sme_elm_idx0_7, tileslice16>;
4872+
def : SME2_Zero_Matrix_Pat<NAME # _2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x1, uimm2s2range, tileslicerange2s2>;
4873+
def : SME2_Zero_Matrix_Pat<NAME # _VG2_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x2, uimm1s2range, tileslicerange1s2>;
4874+
def : SME2_Zero_Matrix_Pat<NAME # _VG4_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x4, uimm1s2range, tileslicerange1s2>;
4875+
def : SME2_Zero_Matrix_Pat<NAME # _4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x1, uimm1s4range, tileslicerange1s4>;
4876+
def : SME2_Zero_Matrix_Pat<NAME # _VG2_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x2, uimm0s4range, tileslicerange0s4>;
4877+
def : SME2_Zero_Matrix_Pat<NAME # _VG4_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x4, uimm0s4range, tileslicerange0s4>;
4878+
}
48514879

48524880
//===----------------------------------------------------------------------===//
48534881
// SME2.1 lookup table expand two non-contiguous registers

0 commit comments

Comments
 (0)