Skip to content

Commit 7c36d36

Browse files
[fixup] Misc NFC fixes and makde arm_sve.PselOp illegal for LLVM export
1 parent c1c97e8 commit 7c36d36

File tree

4 files changed

+85
-85
lines changed

4 files changed

+85
-85
lines changed

mlir/include/mlir/Dialect/ArmSVE/IR/ArmSVE.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,9 @@ def DupQLaneOp : ArmSVE_Op<"dupq_lane", [Pure, AllTypesMatch<["src", "dst"]>]> {
544544
%V = arm_sve.dupq_lane %U[1] : vector<[8]xf16>
545545
// %V = [A B C D E F H A B C D E F H]
546546
```
547+
548+
Note: The semantics of the operation match those of the `svdupq_lane` instrinsics.
549+
[Source](https://developer.arm.com/architectures/instruction-sets/intrinsics/#q=svdupq_lane)
547550
}];
548551

549552
let arguments = (ins SVEVector:$src,

mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -190,25 +190,25 @@ void mlir::populateArmSVELegalizeForLLVMExportPatterns(
190190
// Populate conversion patterns
191191

192192
// clang-format off
193-
patterns.add<SdotOpLowering,
194-
SmmlaOpLowering,
195-
UdotOpLowering,
196-
UmmlaOpLowering,
193+
patterns.add<ConvertFromSvboolOpLowering,
194+
ConvertToSvboolOpLowering,
197195
DupQLaneLowering,
198-
ScalableMaskedAddIOpLowering,
196+
PselOpLowering,
199197
ScalableMaskedAddFOpLowering,
200-
ScalableMaskedSubIOpLowering,
201-
ScalableMaskedSubFOpLowering,
202-
ScalableMaskedMulIOpLowering,
198+
ScalableMaskedAddIOpLowering,
199+
ScalableMaskedDivFOpLowering,
203200
ScalableMaskedMulFOpLowering,
201+
ScalableMaskedMulIOpLowering,
204202
ScalableMaskedSDivIOpLowering,
203+
ScalableMaskedSubFOpLowering,
204+
ScalableMaskedSubIOpLowering,
205205
ScalableMaskedUDivIOpLowering,
206-
ScalableMaskedDivFOpLowering,
207-
ConvertToSvboolOpLowering,
208-
ConvertFromSvboolOpLowering,
206+
SmmlaOpLowering,
207+
UdotOpLowering,
208+
UmmlaOpLowering,
209209
ZipX2OpLowering,
210210
ZipX4OpLowering,
211-
PselOpLowering>(converter);
211+
SdotOpLowering>(converter);
212212
// Add vector.create_mask conversion with a high benefit as it produces much
213213
// nicer code than the generic lowering.
214214
patterns.add<CreateMaskOpLowering>(converter, /*benefit=*/4096);
@@ -218,43 +218,44 @@ void mlir::populateArmSVELegalizeForLLVMExportPatterns(
218218
void mlir::configureArmSVELegalizeForExportTarget(
219219
LLVMConversionTarget &target) {
220220
// clang-format off
221-
target.addLegalOp<SdotIntrOp,
222-
SmmlaIntrOp,
223-
UdotIntrOp,
224-
UmmlaIntrOp,
221+
target.addLegalOp<ConvertFromSvboolIntrOp,
222+
ConvertToSvboolIntrOp,
225223
DupQLaneIntrOp,
226-
ScalableMaskedAddIIntrOp,
224+
PselIntrOp,
227225
ScalableMaskedAddFIntrOp,
228-
ScalableMaskedSubIIntrOp,
229-
ScalableMaskedSubFIntrOp,
230-
ScalableMaskedMulIIntrOp,
226+
ScalableMaskedAddIIntrOp,
227+
ScalableMaskedDivFIntrOp,
231228
ScalableMaskedMulFIntrOp,
229+
ScalableMaskedMulIIntrOp,
232230
ScalableMaskedSDivIIntrOp,
231+
ScalableMaskedSubFIntrOp,
232+
ScalableMaskedSubIIntrOp,
233233
ScalableMaskedUDivIIntrOp,
234-
ScalableMaskedDivFIntrOp,
235-
ConvertToSvboolIntrOp,
236-
ConvertFromSvboolIntrOp,
234+
SmmlaIntrOp,
235+
UdotIntrOp,
236+
UmmlaIntrOp,
237+
WhileLTIntrOp,
237238
ZipX2IntrOp,
238239
ZipX4IntrOp,
239-
PselIntrOp,
240-
WhileLTIntrOp>();
241-
target.addIllegalOp<SdotOp,
242-
SmmlaOp,
243-
UdotOp,
244-
UmmlaOp,
240+
SdotIntrOp>();
241+
target.addIllegalOp<ConvertFromSvboolOp,
242+
ConvertToSvboolOp,
245243
DupQLaneOp,
246-
ScalableMaskedAddIOp,
244+
PselOp,
247245
ScalableMaskedAddFOp,
248-
ScalableMaskedSubIOp,
249-
ScalableMaskedSubFOp,
250-
ScalableMaskedMulIOp,
246+
ScalableMaskedAddIOp,
247+
ScalableMaskedDivFOp,
251248
ScalableMaskedMulFOp,
249+
ScalableMaskedMulIOp,
252250
ScalableMaskedSDivIOp,
251+
ScalableMaskedSubFOp,
252+
ScalableMaskedSubIOp,
253253
ScalableMaskedUDivIOp,
254-
ScalableMaskedDivFOp,
255-
ConvertToSvboolOp,
256-
ConvertFromSvboolOp,
254+
SmmlaOp,
255+
UdotOp,
256+
UmmlaOp,
257257
ZipX2Op,
258-
ZipX4Op>();
258+
ZipX4Op,
259+
SdotOp>();
259260
// clang-format on
260261
}

mlir/test/Dialect/ArmSVE/legalize-for-llvm.mlir

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -275,39 +275,37 @@ func.func @arm_sve_psel_mixed_predicate_types(%p0: vector<[8]xi1>, %p1: vector<[
275275
// -----
276276

277277
// CHECK-LABEL: @arm_sve_dupq_lane(
278-
// CHECK-SAME: %[[A0:[a-z0-9]+]]: vector<[16]xi8>
279-
// CHECK-SAME: %[[A1:[a-z0-9]+]]: vector<[8]xi16>
280-
// CHECK-SAME: %[[A2:[a-z0-9]+]]: vector<[8]xf16>
281-
// CHECK-SAME: %[[A3:[a-z0-9]+]]: vector<[8]xbf16>
282-
// CHECK-SAME: %[[A4:[a-z0-9]+]]: vector<[4]xi32>
283-
// CHECK-SAME: %[[A5:[a-z0-9]+]]: vector<[4]xf32>
284-
// CHECK-SAME: %[[A6:[a-z0-9]+]]: vector<[2]xi64>
285-
// CHECK-SAME: %[[A7:[a-z0-9]+]]: vector<[2]xf64>
278+
// CHECK-SAME: %[[A0:[a-z0-9]+]]: vector<[16]xi8>
279+
// CHECK-SAME: %[[A1:[a-z0-9]+]]: vector<[8]xi16>
280+
// CHECK-SAME: %[[A2:[a-z0-9]+]]: vector<[8]xf16>
281+
// CHECK-SAME: %[[A3:[a-z0-9]+]]: vector<[8]xbf16>
282+
// CHECK-SAME: %[[A4:[a-z0-9]+]]: vector<[4]xi32>
283+
// CHECK-SAME: %[[A5:[a-z0-9]+]]: vector<[4]xf32>
284+
// CHECK-SAME: %[[A6:[a-z0-9]+]]: vector<[2]xi64>
285+
// CHECK-SAME: %[[A7:[a-z0-9]+]]: vector<[2]xf64>
286286
// CHECK-SAME: -> !llvm.struct<(vector<[16]xi8>, vector<[8]xi16>, vector<[8]xf16>, vector<[8]xbf16>, vector<[4]xi32>, vector<[4]xf32>, vector<[2]xi64>, vector<[2]xf64>)> {
287-
288-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A0]]) <{lane = 0 : i64}> : (vector<[16]xi8>) -> vector<[16]xi8>
289-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A1]]) <{lane = 1 : i64}> : (vector<[8]xi16>) -> vector<[8]xi16>
290-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A2]]) <{lane = 2 : i64}> : (vector<[8]xf16>) -> vector<[8]xf16>
291-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A3]]) <{lane = 3 : i64}> : (vector<[8]xbf16>) -> vector<[8]xbf16>
292-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A4]]) <{lane = 4 : i64}> : (vector<[4]xi32>) -> vector<[4]xi32>
293-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A5]]) <{lane = 5 : i64}> : (vector<[4]xf32>) -> vector<[4]xf32>
294-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A6]]) <{lane = 6 : i64}> : (vector<[2]xi64>) -> vector<[2]xi64>
295-
// CHECK: "arm_sve.intr.dupq_lane"(%[[A7]]) <{lane = 7 : i64}> : (vector<[2]xf64>) -> vector<[2]xf64>
296287
func.func @arm_sve_dupq_lane(
297288
%v16i8: vector<[16]xi8>, %v8i16: vector<[8]xi16>,
298289
%v8f16: vector<[8]xf16>, %v8bf16: vector<[8]xbf16>,
299290
%v4i32: vector<[4]xi32>, %v4f32: vector<[4]xf32>,
300291
%v2i64: vector<[2]xi64>, %v2f64: vector<[2]xf64>)
301292
-> (vector<[16]xi8>, vector<[8]xi16>, vector<[8]xf16>, vector<[8]xbf16>,
302293
vector<[4]xi32>, vector<[4]xf32>, vector<[2]xi64>, vector<[2]xf64>) {
303-
294+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A0]]) <{lane = 0 : i64}> : (vector<[16]xi8>) -> vector<[16]xi8>
304295
%0 = arm_sve.dupq_lane %v16i8[0] : vector<[16]xi8>
296+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A1]]) <{lane = 1 : i64}> : (vector<[8]xi16>) -> vector<[8]xi16>
305297
%1 = arm_sve.dupq_lane %v8i16[1] : vector<[8]xi16>
298+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A2]]) <{lane = 2 : i64}> : (vector<[8]xf16>) -> vector<[8]xf16>
306299
%2 = arm_sve.dupq_lane %v8f16[2] : vector<[8]xf16>
300+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A3]]) <{lane = 3 : i64}> : (vector<[8]xbf16>) -> vector<[8]xbf16>
307301
%3 = arm_sve.dupq_lane %v8bf16[3] : vector<[8]xbf16>
302+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A4]]) <{lane = 4 : i64}> : (vector<[4]xi32>) -> vector<[4]xi32>
308303
%4 = arm_sve.dupq_lane %v4i32[4] : vector<[4]xi32>
304+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A5]]) <{lane = 5 : i64}> : (vector<[4]xf32>) -> vector<[4]xf32>
309305
%5 = arm_sve.dupq_lane %v4f32[5] : vector<[4]xf32>
306+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A6]]) <{lane = 6 : i64}> : (vector<[2]xi64>) -> vector<[2]xi64>
310307
%6 = arm_sve.dupq_lane %v2i64[6] : vector<[2]xi64>
308+
// CHECK: "arm_sve.intr.dupq_lane"(%[[A7]]) <{lane = 7 : i64}> : (vector<[2]xf64>) -> vector<[2]xf64>
311309
%7 = arm_sve.dupq_lane %v2f64[7] : vector<[2]xf64>
312310

313311
return %0, %1, %2, %3, %4, %5, %6, %7

mlir/test/Target/LLVMIR/arm-sve.mlir

Lines changed: 28 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -392,35 +392,33 @@ llvm.func @arm_sve_psel(%pn: vector<[16]xi1>, %p1: vector<[2]xi1>, %p2: vector<[
392392
}
393393

394394
// CHECK-LABEL: @arm_sve_dupq_lane
395-
// CHECK-SAME: <vscale x 16 x i8> %0
396-
// CHECK-SAME: <vscale x 8 x i16> %1
397-
// CHECK-SAME: <vscale x 8 x half> %2
398-
// CHECK-SAME: <vscale x 8 x bfloat> %3
399-
// CHECK-SAME: <vscale x 4 x i32> %4
400-
// CHECK-SAME: <vscale x 4 x float> %5
401-
// CHECK-SAME: <vscale x 2 x i64> %6
402-
// CHECK-SAME: <vscale x 2 x double> %7
403-
404-
405-
llvm.func @arm_sve_dupq_lane(%arg0: vector<[16]xi8>, %arg1: vector<[8]xi16>,
406-
%arg2: vector<[8]xf16>, %arg3: vector<[8]xbf16>,
407-
%arg4: vector<[4]xi32>,%arg5: vector<[4]xf32>,
408-
%arg6: vector<[2]xi64>, %arg7: vector<[2]xf64>) {
409-
// CHECK: call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %0, i64 0)
410-
%0 = "arm_sve.intr.dupq_lane"(%arg0) <{lane = 0 : i64}> : (vector<[16]xi8>) -> vector<[16]xi8>
411-
// CHECK: call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %1, i64 1)
412-
%1 = "arm_sve.intr.dupq_lane"(%arg1) <{lane = 1 : i64}> : (vector<[8]xi16>) -> vector<[8]xi16>
413-
// CHECK: call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %2, i64 2)
414-
%2 = "arm_sve.intr.dupq_lane"(%arg2) <{lane = 2 : i64}> : (vector<[8]xf16>) -> vector<[8]xf16>
415-
// CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %3, i64 3)
416-
%3 = "arm_sve.intr.dupq_lane"(%arg3) <{lane = 3 : i64}> : (vector<[8]xbf16>) -> vector<[8]xbf16>
417-
// CHECK: call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %4, i64 4)
418-
%4 = "arm_sve.intr.dupq_lane"(%arg4) <{lane = 4 : i64}> : (vector<[4]xi32>) -> vector<[4]xi32>
419-
// CHECK: call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %5, i64 5)
420-
%5 = "arm_sve.intr.dupq_lane"(%arg5) <{lane = 5 : i64}> : (vector<[4]xf32>) -> vector<[4]xf32>
421-
// CHECK: call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %6, i64 6)
422-
%6 = "arm_sve.intr.dupq_lane"(%arg6) <{lane = 6 : i64}> : (vector<[2]xi64>) -> vector<[2]xi64>
423-
// CHECK: call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %7, i64 7)
424-
%7 = "arm_sve.intr.dupq_lane"(%arg7) <{lane = 7 : i64}> : (vector<[2]xf64>) -> vector<[2]xf64>
395+
// CHECK-SAME: <vscale x 16 x i8> %[[V0:[0-9]+]]
396+
// CHECK-SAME: <vscale x 8 x i16> %[[V1:[0-9]+]]
397+
// CHECK-SAME: <vscale x 8 x half> %[[V2:[0-9]+]]
398+
// CHECK-SAME: <vscale x 8 x bfloat> %[[V3:[0-9]+]]
399+
// CHECK-SAME: <vscale x 4 x i32> %[[V4:[0-9]+]]
400+
// CHECK-SAME: <vscale x 4 x float> %[[V5:[0-9]+]]
401+
// CHECK-SAME: <vscale x 2 x i64> %[[V6:[0-9]+]]
402+
// CHECK-SAME: <vscale x 2 x double> %[[V7:[0-9]+]]
403+
llvm.func @arm_sve_dupq_lane(%nxv16i8: vector<[16]xi8>, %nxv8i16: vector<[8]xi16>,
404+
%nxv8f16: vector<[8]xf16>, %nxv8bf16: vector<[8]xbf16>,
405+
%nxv4i32: vector<[4]xi32>, %nxv4f32: vector<[4]xf32>,
406+
%nxv2i64: vector<[2]xi64>, %nxv2f64: vector<[2]xf64>) {
407+
// CHECK: call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %[[V0]], i64 0)
408+
%0 = "arm_sve.intr.dupq_lane"(%nxv16i8) <{lane = 0 : i64}> : (vector<[16]xi8>) -> vector<[16]xi8>
409+
// CHECK: call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %[[V1]], i64 1)
410+
%1 = "arm_sve.intr.dupq_lane"(%nxv8i16) <{lane = 1 : i64}> : (vector<[8]xi16>) -> vector<[8]xi16>
411+
// CHECK: call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %[[V2]], i64 2)
412+
%2 = "arm_sve.intr.dupq_lane"(%nxv8f16) <{lane = 2 : i64}> : (vector<[8]xf16>) -> vector<[8]xf16>
413+
// CHECK: call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %[[V3]], i64 3)
414+
%3 = "arm_sve.intr.dupq_lane"(%nxv8bf16) <{lane = 3 : i64}> : (vector<[8]xbf16>) -> vector<[8]xbf16>
415+
// CHECK: call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %[[V4]], i64 4)
416+
%4 = "arm_sve.intr.dupq_lane"(%nxv4i32) <{lane = 4 : i64}> : (vector<[4]xi32>) -> vector<[4]xi32>
417+
// CHECK: call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %[[V5]], i64 5)
418+
%5 = "arm_sve.intr.dupq_lane"(%nxv4f32) <{lane = 5 : i64}> : (vector<[4]xf32>) -> vector<[4]xf32>
419+
// CHECK: call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %[[V6]], i64 6)
420+
%6 = "arm_sve.intr.dupq_lane"(%nxv2i64) <{lane = 6 : i64}> : (vector<[2]xi64>) -> vector<[2]xi64>
421+
// CHECK: call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %[[V7]], i64 7)
422+
%7 = "arm_sve.intr.dupq_lane"(%nxv2f64) <{lane = 7 : i64}> : (vector<[2]xf64>) -> vector<[2]xf64>
425423
llvm.return
426424
}

0 commit comments

Comments
 (0)