Skip to content

Commit 8f75cce

Browse files
stellaraccidentGroverkss
authored andcommitted
Revert "[mlir][tosa][linalg] Apply direct tosa -> linalg Conv2D lowering (llvm#68304)"
This reverts commit e29a253. Breaking TFLite mobilenet test. Needs triage.
1 parent 994c1a7 commit 8f75cce

File tree

4 files changed

+34
-196
lines changed

4 files changed

+34
-196
lines changed

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

-137
Original file line numberDiff line numberDiff line change
@@ -2575,143 +2575,6 @@ structured_op: !LinalgStructuredOpConfig
25752575
- !ScalarExpression
25762576
scalar_arg: KZp
25772577
--- !LinalgOpConfig
2578-
metadata: !LinalgOpMetadata
2579-
name: conv_2d_nhwc_fhwc_q
2580-
cpp_class_name: Conv2DNhwcFhwcQOp
2581-
doc: |-
2582-
Performs 2-D convolution with zero point offsets.
2583-
2584-
Layout:
2585-
* Input: NHWC.
2586-
* Kernel: FHWC.
2587-
2588-
Numeric casting is performed on the operands to the inner multiply, promoting
2589-
them to the same data type as the accumulator/output. This includes the zero
2590-
point offsets common to quantized operations.
2591-
implements:
2592-
- LinalgConvolutionOpInterface
2593-
structured_op: !LinalgStructuredOpConfig
2594-
args:
2595-
- !LinalgOperandDefConfig
2596-
name: I
2597-
kind: input_tensor
2598-
type_var: T1
2599-
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
2600-
s1 * s2 + s3 * s4, s5 * s6 + s7 * s8, s9)>
2601-
- !LinalgOperandDefConfig
2602-
name: K
2603-
kind: input_tensor
2604-
type_var: T2
2605-
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
2606-
s3, s7, s9)>
2607-
- !LinalgOperandDefConfig
2608-
name: IZp
2609-
kind: scalar
2610-
type_var: I32
2611-
- !LinalgOperandDefConfig
2612-
name: KZp
2613-
kind: scalar
2614-
type_var: I32
2615-
- !LinalgOperandDefConfig
2616-
name: O
2617-
kind: output_tensor
2618-
type_var: U
2619-
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
2620-
s1, s5, s10)>
2621-
- !LinalgOperandDefConfig
2622-
name: strides
2623-
kind: index_attr
2624-
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
2625-
(s2, s6)>
2626-
default_indices:
2627-
- 1
2628-
- 1
2629-
- !LinalgOperandDefConfig
2630-
name: dilations
2631-
kind: index_attr
2632-
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
2633-
(s4, s8)>
2634-
default_indices:
2635-
- 1
2636-
- 1
2637-
indexing_maps: !LinalgIndexingMapsConfig
2638-
static_indexing_maps:
2639-
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
2640-
s9, s10] -> (d0, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8, d6)>
2641-
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
2642-
s9, s10] -> (d3, d4, d5, d6)>
2643-
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
2644-
s9, s10] -> ()>
2645-
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
2646-
s9, s10] -> ()>
2647-
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
2648-
s9, s10] -> (d0, d1, d2, d3)>
2649-
iterator_types:
2650-
- parallel
2651-
- parallel
2652-
- parallel
2653-
- parallel
2654-
- reduction
2655-
- reduction
2656-
- reduction
2657-
assignments:
2658-
- !ScalarAssign
2659-
arg: O
2660-
value: !ScalarExpression
2661-
scalar_fn:
2662-
kind: binary
2663-
fn_name: add
2664-
operands:
2665-
- !ScalarExpression
2666-
scalar_arg: O
2667-
- !ScalarExpression
2668-
scalar_fn:
2669-
kind: binary
2670-
fn_name: mul
2671-
operands:
2672-
- !ScalarExpression
2673-
scalar_fn:
2674-
kind: binary
2675-
fn_name: sub
2676-
operands:
2677-
- !ScalarExpression
2678-
scalar_fn:
2679-
kind: type
2680-
fn_name: cast_signed
2681-
type_var: U
2682-
operands:
2683-
- !ScalarExpression
2684-
scalar_arg: I
2685-
- !ScalarExpression
2686-
scalar_fn:
2687-
kind: type
2688-
fn_name: cast_signed
2689-
type_var: U
2690-
operands:
2691-
- !ScalarExpression
2692-
scalar_arg: IZp
2693-
- !ScalarExpression
2694-
scalar_fn:
2695-
kind: binary
2696-
fn_name: sub
2697-
operands:
2698-
- !ScalarExpression
2699-
scalar_fn:
2700-
kind: type
2701-
fn_name: cast_signed
2702-
type_var: U
2703-
operands:
2704-
- !ScalarExpression
2705-
scalar_arg: K
2706-
- !ScalarExpression
2707-
scalar_fn:
2708-
kind: type
2709-
fn_name: cast_signed
2710-
type_var: U
2711-
operands:
2712-
- !ScalarExpression
2713-
scalar_arg: KZp
2714-
--- !LinalgOpConfig
27152578
metadata: !LinalgOpMetadata
27162579
name: conv_2d_nchw_fchw
27172580
cpp_class_name: Conv2DNchwFchwOp

mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp

+20-23
Original file line numberDiff line numberDiff line change
@@ -248,28 +248,25 @@ class ConvConverter : public OpConversionPattern<TosaConvOp> {
248248
pad.resize(pad.size() + 2, 0);
249249
input = applyPad(loc, input, pad, zeroAttr, rewriter);
250250

251-
// For Conv3D transpose the kernel to match dimension ordering of the linalg
252-
// convolution operation. Conv2D has a 1-1 mapping in linalg so better to
253-
// map directly and then transpose later if desired.
254-
if (5 == inputTy.getRank()) {
255-
// TODO(suderman): See if this can be efficiently folded - check whether
256-
// the input is used anywhere else, if not fold the constant.
257-
SmallVector<int64_t> weightPerm;
258-
for (int i = 1; i < resultTy.getRank(); i++)
259-
weightPerm.push_back(i);
260-
weightPerm.push_back(0);
261-
262-
SmallVector<int64_t> newWeightShape;
263-
for (auto dim : weightPerm)
264-
newWeightShape.push_back(weightShape[dim]);
265-
auto weightPermAttr = rewriter.getI64TensorAttr(weightPerm);
266-
Value weightPermValue =
267-
rewriter.create<arith::ConstantOp>(loc, weightPermAttr);
268-
Type newWeightTy =
269-
RankedTensorType::get(newWeightShape, weightTy.getElementType());
270-
weight = rewriter.create<tosa::TransposeOp>(loc, newWeightTy, weight,
271-
weightPermValue);
272-
}
251+
// Transpose the kernel to match dimension ordering of the linalg
252+
// convolution operation.
253+
// TODO(suderman): See if this can be efficiently folded - check whether
254+
// the input is used anywhere else, if not fold the constant.
255+
SmallVector<int64_t> weightPerm;
256+
for (int i = 1; i < resultTy.getRank(); i++)
257+
weightPerm.push_back(i);
258+
weightPerm.push_back(0);
259+
260+
SmallVector<int64_t> newWeightShape;
261+
for (auto dim : weightPerm)
262+
newWeightShape.push_back(weightShape[dim]);
263+
auto weightPermAttr = rewriter.getI64TensorAttr(weightPerm);
264+
Value weightPermValue =
265+
rewriter.create<arith::ConstantOp>(loc, weightPermAttr);
266+
Type newWeightTy =
267+
RankedTensorType::get(newWeightShape, weightTy.getElementType());
268+
weight = rewriter.create<tosa::TransposeOp>(loc, newWeightTy, weight,
269+
weightPermValue);
273270

274271
auto resultZeroAttr = rewriter.getZeroAttr(resultETy);
275272
Value emptyTensor = rewriter.create<tensor::EmptyOp>(
@@ -980,7 +977,7 @@ void mlir::tosa::populateTosaToLinalgNamedConversionPatterns(
980977
RewritePatternSet *patterns) {
981978
patterns->add<
982979
// clang-format off
983-
ConvConverter<tosa::Conv2DOp, linalg::Conv2DNhwcFhwcOp, linalg::Conv2DNhwcFhwcQOp>,
980+
ConvConverter<tosa::Conv2DOp, linalg::Conv2DNhwcHwcfOp, linalg::Conv2DNhwcHwcfQOp>,
984981
ConvConverter<tosa::Conv3DOp, linalg::Conv3DNdhwcDhwcfOp, linalg::Conv3DNdhwcDhwcfQOp>,
985982
DepthwiseConvConverter,
986983
MatMulConverter,

mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py

-30
Original file line numberDiff line numberDiff line change
@@ -693,36 +693,6 @@ def conv_2d_nhwc_hwcf_q(
693693
) * (TypeFn.cast_signed(U, K[D.kh, D.kw, D.c, D.f]) - TypeFn.cast_signed(U, KZp))
694694

695695

696-
@linalg_structured_op
697-
def conv_2d_nhwc_fhwc_q(
698-
I=TensorDef(T1, S.N, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW, S.C),
699-
K=TensorDef(T2, S.F, S.KH, S.KW, S.C),
700-
IZp=ScalarDef(I32),
701-
KZp=ScalarDef(I32),
702-
O=TensorDef(U, S.N, S.OH, S.OW, S.F, output=True),
703-
strides=IndexAttrDef(S.SH, S.SW, default=[1, 1]),
704-
dilations=IndexAttrDef(S.DH, S.DW, default=[1, 1]),
705-
):
706-
"""Performs 2-D convolution with zero point offsets.
707-
708-
Layout:
709-
* Input: NHWC.
710-
* Kernel: FHWC.
711-
712-
Numeric casting is performed on the operands to the inner multiply, promoting
713-
them to the same data type as the accumulator/output. This includes the zero
714-
point offsets common to quantized operations.
715-
"""
716-
implements(ConvolutionOpInterface)
717-
domain(D.n, D.oh, D.ow, D.f, D.kh, D.kw, D.c)
718-
O[D.n, D.oh, D.ow, D.f] += (
719-
TypeFn.cast_signed(
720-
U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]
721-
)
722-
- TypeFn.cast_signed(U, IZp)
723-
) * (TypeFn.cast_signed(U, K[D.f, D.kh, D.kw, D.c]) - TypeFn.cast_signed(U, KZp))
724-
725-
726696
@linalg_structured_op
727697
def conv_2d_nchw_fchw(
728698
I=TensorDef(T1, S.N, S.C, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW),

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir

+14-6
Original file line numberDiff line numberDiff line change
@@ -363,11 +363,13 @@ func.func @avg_pool_dyn(%arg0: tensor<?x6x34x62xf32>) -> (tensor<?x5x33x62xf32>)
363363

364364
// CHECK-LABEL: @conv2d_i8
365365
func.func @conv2d_i8(%input: tensor<1x49x42x27xi8>, %weights: tensor<28x1x1x27xi8>, %bias: tensor<28xi8>) -> () {
366+
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
367+
// CHECK: %[[W:.+]] = tosa.transpose %arg1, %[[PERM]]
366368
// CHECK: %[[M_IN:.+]] = tensor.empty()
367369
// CHECK: %[[CST:.+]] = arith.constant 0
368370
// CHECK: %[[FILL:.+]] = linalg.fill
369371
// CHECK: %[[B_IN:.+]] = tensor.empty()
370-
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_fhwc_q {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, %c0_i32_0, %c0_i32_1 : tensor<1x49x42x27xi8>, tensor<28x1x1x27xi8>, i32, i32) outs(%[[FILL]] : tensor<1x45x40x28xi32>) -> tensor<1x45x40x28xi32>
372+
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf_q {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]], %c0_i32_0, %c0_i32_1 : tensor<1x49x42x27xi8>, tensor<1x1x27x28xi8>, i32, i32) outs(%[[FILL]] : tensor<1x45x40x28xi32>) -> tensor<1x45x40x28xi32>
371373
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xi8>, tensor<1x45x40x28xi32>) outs(%[[B_IN]] : tensor<1x45x40x28xi32>)
372374
// CHECK: arith.extsi
373375
// CHECK: arith.addi
@@ -383,11 +385,13 @@ func.func @conv2d_i8(%input: tensor<1x49x42x27xi8>, %weights: tensor<28x1x1x27xi
383385

384386
// CHECK-LABEL: @conv2d_f32
385387
func.func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
388+
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
389+
// CHECK: %[[W:.+]] = tosa.transpose %arg1, %[[PERM]]
386390
// CHECK: %[[M_IN:.+]] = tensor.empty()
387391
// CHECK: %[[CST:.+]] = arith.constant 0
388392
// CHECK: %[[FILL:.+]] = linalg.fill
389393
// CHECK: %[[B_IN:.+]] = tensor.empty()
390-
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_fhwc {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
394+
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
391395
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
392396
// CHECK: arith.addf
393397
// CHECK: linalg.yield
@@ -404,11 +408,13 @@ func.func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27
404408
func.func @conv2d_dyn(%input: tensor<?x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () {
405409
// CHECK: %[[C0:.+]] = arith.constant 0
406410
// CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
411+
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
412+
// CHECK: %[[W:.+]] = tosa.transpose %arg1, %[[PERM]]
407413
// CHECK: %[[M_IN:.+]] = tensor.empty(%[[BATCH]])
408414
// CHECK: %[[CST:.+]] = arith.constant 0
409415
// CHECK: %[[FILL:.+]] = linalg.fill
410416
// CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]])
411-
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_fhwc {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x49x42x27xf32>, tensor<28x3x3x27xf32>) outs(%[[FILL]] : tensor<?x45x40x28xf32>)
417+
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<?x45x40x28xf32>)
412418
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) outs(%[[B_IN]] : tensor<?x45x40x28xf32>)
413419
// CHECK: %[[ADD:.+]] = arith.addf
414420
// CHECK: linalg.yield %[[ADD]] : f32
@@ -462,11 +468,13 @@ func.func @conv2d_dyn_w_h(%input: tensor<1x?x?x27xf32>, %weights: tensor<28x3x3x
462468
// CHECK: %[[W_OUT:.+]] = arith.addi %[[DIVIDED_0]], %[[ONE_0]] : index
463469

464470
// Running convolution
471+
// CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]>
472+
// CHECK: %[[WEIGHT:.+]] = tosa.transpose %arg1, %[[PERM]]
465473
// CHECK: %[[M_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
466474
// CHECK: %[[CST:.+]] = arith.constant 0
467475
// CHECK: %[[FILL:.+]] = linalg.fill
468476
// CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
469-
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_fhwc {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x?x?x27xf32>, tensor<28x3x3x27xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>)
477+
// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>)
470478
// CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>)
471479
// CHECK: %[[ADD:.+]] = arith.addf
472480
// CHECK: linalg.yield %[[ADD]] : f32
@@ -481,7 +489,7 @@ func.func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28
481489
// CHECK: %[[C0:.+]] = arith.constant 0
482490
// CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
483491
// CHECK: tensor.yield %[[C0]]
484-
// CHECK: linalg.conv_2d_nhwc_fhwc
492+
// CHECK: linalg.conv_2d_nhwc_hwcf
485493
%0 = tosa.conv2d %input, %weights, %bias {pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>, dilation = array<i64: 2, 1>} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> tensor<1x45x40x28xf32>
486494
return
487495
}
@@ -493,7 +501,7 @@ func.func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1x
493501
// CHECK: %[[C22:.+]] = arith.constant -22
494502
// CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
495503
// CHECK: tensor.yield %[[C22]]
496-
// CHECK: linalg.conv_2d_nhwc_fhwc_q
504+
// CHECK: linalg.conv_2d_nhwc_hwcf_q
497505
%0 = tosa.conv2d %arg0, %arg1, %arg2 {dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, quantization_info = #tosa.conv_quant<input_zp = -22, weight_zp = 42>, stride = array<i64: 1, 1>} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32>
498506
return
499507
}

0 commit comments

Comments
 (0)