@@ -30,6 +30,15 @@ def ArmNeon_Dialect : Dialect {
30
30
// to the LLVMDialect (ops or types).
31
31
}
32
32
33
+ //===----------------------------------------------------------------------===//
34
+ // ArmNeon type definition
35
+ //===----------------------------------------------------------------------===//
36
+
37
+ class NeonVectorOfLength<int length, Type elementType> : ShapedContainerType<
38
+ [elementType], And<[IsVectorOfShape<[length]>, IsFixedVectorTypePred]>,
39
+ "a vector with length " # length,
40
+ "::mlir::VectorType">;
41
+
33
42
//===----------------------------------------------------------------------===//
34
43
// ArmNeon op definitions
35
44
//===----------------------------------------------------------------------===//
@@ -120,6 +129,99 @@ def SdotOp : ArmNeon_OverloadedOperandsWithOneResultIntrOp<"sdot",[1], [
120
129
"$a `,` $b `,` $c attr-dict `:` type($b) `,` type($c) `to` type($res)";
121
130
}
122
131
132
+ def SmmlaOp : ArmNeon_OverloadedOperandsWithOneResultIntrOp<"smmla",[1], [
133
+ Pure,
134
+ AllTypesMatch<["src1", "src2"]>,
135
+ AllTypesMatch<["acc", "res"]>,
136
+ ]> {
137
+ let summary = "Matrix-matrix multiply and accumulate op";
138
+ let description = [{
139
+ SMMLA: Signed integer matrix multiply-accumulate.
140
+
141
+ Signed 8-bit integer matrix multiply-accumulate. This instruction multiplies
142
+ the 2x8 matrix of signed 8-bit integer values in the first source vector by
143
+ the 8x2 matrix of signed 8-bit integer values in the second source vector.
144
+ The resulting 2x2 32-bit integer matrix product is destructively added to
145
+ the 32-bit integer matrix accumulator in the destination vector. This is
146
+ equivalent to performing an 8-way dot product per destination element.
147
+
148
+ Source:
149
+ https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]&q=smmla
150
+ }];
151
+ // Supports (vector<16xi8>, vector<16xi8>) -> (vector<4xi32>)
152
+ let arguments = (ins
153
+ NeonVectorOfLength<4, I32>:$acc,
154
+ NeonVectorOfLength<16, I8>:$src1,
155
+ NeonVectorOfLength<16, I8>:$src2
156
+ );
157
+ let results = (outs NeonVectorOfLength<4, I32>:$res);
158
+ let assemblyFormat =
159
+ "$acc `,` $src1 `,` $src2 attr-dict `:` type($src1) `to` type($res)";
160
+ }
161
+
162
+ def UmmlaOp : ArmNeon_OverloadedOperandsWithOneResultIntrOp<"ummla",[1], [
163
+ Pure,
164
+ AllTypesMatch<["src1", "src2"]>,
165
+ AllTypesMatch<["acc", "res"]>,
166
+ ]> {
167
+ let summary = "Unsinged matrix-matrix multiply and accumulate op";
168
+ let description = [{
169
+ UMMLA: Signed integer matrix multiply-accumulate.
170
+
171
+ Unsigned 8-bit integer matrix multiply-accumulate. This instruction
172
+ multiplies the 2x8 matrix of unsigned 8-bit integer values in the first
173
+ source vector by the 8x2 matrix of unsigned 8-bit integer values in the
174
+ second source vector. The resulting 2x2 32-bit integer matrix product is
175
+ destructively added to the 32-bit integer matrix accumulator in the
176
+ destination vector. This is equivalent to performing an 8-way dot product
177
+ per destination element.
178
+
179
+ Source:
180
+ https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]&q=ummla
181
+ }];
182
+ // Supports (vector<16xi8>, vector<16xi8>) -> (vector<4xi32>)
183
+ let arguments = (ins
184
+ NeonVectorOfLength<4, I32>:$acc,
185
+ NeonVectorOfLength<16, I8>:$src1,
186
+ NeonVectorOfLength<16, I8>:$src2
187
+ );
188
+ let results = (outs NeonVectorOfLength<4, I32>:$res);
189
+ let assemblyFormat =
190
+ "$acc `,` $src1 `,` $src2 attr-dict `:` type($src1) `to` type($res)";
191
+ }
192
+
193
+ def UsmmlaOp : ArmNeon_OverloadedOperandsWithOneResultIntrOp<"usmmla",[1], [
194
+ Pure,
195
+ AllTypesMatch<["src1", "src2"]>,
196
+ AllTypesMatch<["acc", "res"]>,
197
+ ]> {
198
+ let summary = "Unsignged and signed matrix-matrix multiply and accumulate op";
199
+ let description = [{
200
+ USMMLA: Signed integer matrix multiply-accumulate.
201
+
202
+ Unsigned and signed 8-bit integer matrix multiply-accumulate. This
203
+ instruction multiplies the 2x8 matrix of unsigned 8-bit integer values in
204
+ the first source vector by the 8x2 matrix of signed 8-bit integer values in
205
+ the second source vector. The resulting 2x2 32-bit integer matrix product is
206
+ destructively added to the 32-bit integer matrix accumulator in the
207
+ destination vector. This is equivalent to performing an 8-way dot product
208
+ per destination element.
209
+
210
+
211
+ Source:
212
+ https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:@navigationhierarchiessimdisa=[Neon]&q=usmmla
213
+ }];
214
+ // Supports (vector<16xi8>, vector<16xi8>) -> (vector<4xi32>)
215
+ let arguments = (ins
216
+ NeonVectorOfLength<4, I32>:$acc,
217
+ NeonVectorOfLength<16, I8>:$src1,
218
+ NeonVectorOfLength<16, I8>:$src2
219
+ );
220
+ let results = (outs NeonVectorOfLength<4, I32>:$res);
221
+ let assemblyFormat =
222
+ "$acc `,` $src1 `,` $src2 attr-dict `:` type($src1) `to` type($res)";
223
+ }
224
+
123
225
class ArmNeon_2dOp<string mnemonic, list<Trait> traits = []>
124
226
: Op</*dialect=*/ArmNeon_Dialect,
125
227
/*opName=*/"2d." # mnemonic,
0 commit comments