Skip to content

Commit b1aa3ab

Browse files
committed
[mlir][linalg][nfc] Update "pack-dynamic-inner-tile.mlir"
[mlir][linalg][nfc] Update pack-dynamic-inner-tile.mlir Builds on: * llvm#117329: Extract GeneralizePadOpPattern into a standalone transformation. * llvm#116373: Update pack-dynamic-inner-tile.mlir. This update adds vectorization to the "pack-dynamic-inner-tile.mlir" pipeline. The pipeline first decomposes `tensor.pack` into `tensor.pad` and then into `linalg.fill` (llvm#117329). Next, `linalg.fill` is vectorized, with vector sizes matching the inner tile sizes of the original `tensor.pack`. ••NOTE:** Depends on llvm#117329 - please only review the top commit!
1 parent 1b2c8f1 commit b1aa3ab

File tree

1 file changed

+30
-11
lines changed

1 file changed

+30
-11
lines changed

mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010

1111
/// End-to-end test for tensor.pack where one of the inner tile sizes is
1212
/// dynamic.
13-
///
14-
/// Note, ATM this is a relatively simple example, with no vectorization and
15-
/// the dynamic tile size being a compile-time constant. The intention is to
16-
/// incrementally expand the config to something much more complex.
1713

1814
func.func @main() {
1915
// Allocate and initialise the inputs
@@ -89,26 +85,49 @@ module @transforms attributes { transform.with_named_sequence } {
8985
%tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
9086
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
9187

92-
// 2. Decompose the tiled Op into (trimmed for brevity):
88+
// 2. Decompose the tiled pack Op into (trimmed for brevity):
9389
//
9490
// %padded = tensor.pad %slice_of_A (..) :
9591
// tensor<?x?xi32> to tensor<8x1xi32>
9692
// %inserted_slice = tensor.insert_slice %padded into %slice_of_A_pack (...) :
9793
// tensor<8x1xi32> into tensor<1x1x?x1xi32>
9894
//
99-
// NOTE: no tile is transposed, hence no linalg.transpose
100-
%func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op
101-
transform.apply_patterns to %func_1 {
95+
// (NOTE: no tile is transposed, hence no linalg.transpose)
96+
//
97+
// This is followed by this decomposition of the pad Op:
98+
//
99+
// %c123_i32 = arith.constant 123 : i32
100+
// %slice_of_A = tensor.extract_slice %A[%3, %arg3] [%4, %5] [1, 1] :
101+
// tensor<7x16xi32> to tensor<?x?xi32>
102+
// %empty = tensor.empty() : tensor<8x1xi32>
103+
// %fill = linalg.fill ins(%c123_i32 : i32) outs(%empty :
104+
// tensor<8x1xi32>) -> tensor<8x1xi32>
105+
// %inserted_slice = tensor.insert_slice %slice_of_A into %fill[0, 0] [%4, %5] [1, 1] :
106+
// tensor<?x?xi32> into tensor<8x1xi32>
107+
//
108+
%func_op = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.op<"func.func">
109+
transform.apply_patterns to %func_op {
102110
transform.apply_patterns.linalg.decompose_pack_unpack
103-
} : !transform.any_op
111+
transform.apply_patterns.linalg.decompose_pad
112+
} : !transform.op<"func.func">
113+
114+
// 3. Vectorize linalg.fill.
115+
// Vector sizes match the inner tiles in the payload IR.
116+
%fill = transform.structured.match ops{["linalg.fill"]} in %func_op : (!transform.op<"func.func">) -> !transform.any_op
117+
transform.structured.vectorize %fill vector_sizes [8, 1] : !transform.any_op
118+
119+
transform.apply_patterns to %func_op {
120+
transform.apply_patterns.tensor.fold_tensor_subset_ops
121+
transform.apply_patterns.canonicalization
122+
} : !transform.op<"func.func">
104123

105124
// 3. Bufferize before lowering to LLVM
106125
%bufferize = transform.bufferization.one_shot_bufferize %module
107126
{bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
108127

109128
// 4. Canonicalize
110-
%func_2 = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func">
111-
transform.apply_patterns to %func_2 {
129+
%func_op_bufferized = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func">
130+
transform.apply_patterns to %func_op_bufferized {
112131
transform.apply_patterns.canonicalization
113132
} : !transform.op<"func.func">
114133

0 commit comments

Comments
 (0)