|
10 | 10 |
|
11 | 11 | /// End-to-end test for tensor.pack where one of the inner tile sizes is
|
12 | 12 | /// dynamic.
|
13 |
| -/// |
14 |
| -/// Note, ATM this is a relatively simple example, with no vectorization and |
15 |
| -/// the dynamic tile size being a compile-time constant. The intention is to |
16 |
| -/// incrementally expand the config to something much more complex. |
17 | 13 |
|
18 | 14 | func.func @main() {
|
19 | 15 | // Allocate and initialise the inputs
|
@@ -89,26 +85,49 @@ module @transforms attributes { transform.with_named_sequence } {
|
89 | 85 | %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1]
|
90 | 86 | : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
91 | 87 |
|
92 |
| - // 2. Decompose the tiled Op into (trimmed for brevity): |
| 88 | + // 2. Decompose the tiled pack Op into (trimmed for brevity): |
93 | 89 | //
|
94 | 90 | // %padded = tensor.pad %slice_of_A (..) :
|
95 | 91 | // tensor<?x?xi32> to tensor<8x1xi32>
|
96 | 92 | // %inserted_slice = tensor.insert_slice %padded into %slice_of_A_pack (...) :
|
97 | 93 | // tensor<8x1xi32> into tensor<1x1x?x1xi32>
|
98 | 94 | //
|
99 |
| - // NOTE: no tile is transposed, hence no linalg.transpose |
100 |
| - %func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op |
101 |
| - transform.apply_patterns to %func_1 { |
| 95 | + // (NOTE: no tile is transposed, hence no linalg.transpose) |
| 96 | + // |
| 97 | + // This is followed by this decomposition of the pad Op: |
| 98 | + // |
| 99 | + // %c123_i32 = arith.constant 123 : i32 |
| 100 | + // %slice_of_A = tensor.extract_slice %A[%3, %arg3] [%4, %5] [1, 1] : |
| 101 | + // tensor<7x16xi32> to tensor<?x?xi32> |
| 102 | + // %empty = tensor.empty() : tensor<8x1xi32> |
| 103 | + // %fill = linalg.fill ins(%c123_i32 : i32) outs(%empty : |
| 104 | + // tensor<8x1xi32>) -> tensor<8x1xi32> |
| 105 | + // %inserted_slice = tensor.insert_slice %slice_of_A into %fill[0, 0] [%4, %5] [1, 1] : |
| 106 | + // tensor<?x?xi32> into tensor<8x1xi32> |
| 107 | + // |
| 108 | + %func_op = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.op<"func.func"> |
| 109 | + transform.apply_patterns to %func_op { |
102 | 110 | transform.apply_patterns.linalg.decompose_pack_unpack
|
103 |
| - } : !transform.any_op |
| 111 | + transform.apply_patterns.linalg.decompose_pad |
| 112 | + } : !transform.op<"func.func"> |
| 113 | + |
| 114 | + // 3. Vectorize linalg.fill. |
| 115 | + // Vector sizes match the inner tiles in the payload IR. |
| 116 | + %fill = transform.structured.match ops{["linalg.fill"]} in %func_op : (!transform.op<"func.func">) -> !transform.any_op |
| 117 | + transform.structured.vectorize %fill vector_sizes [8, 1] : !transform.any_op |
| 118 | + |
| 119 | + transform.apply_patterns to %func_op { |
| 120 | + transform.apply_patterns.tensor.fold_tensor_subset_ops |
| 121 | + transform.apply_patterns.canonicalization |
| 122 | + } : !transform.op<"func.func"> |
104 | 123 |
|
105 | 124 | // 3. Bufferize before lowering to LLVM
|
106 | 125 | %bufferize = transform.bufferization.one_shot_bufferize %module
|
107 | 126 | {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
|
108 | 127 |
|
109 | 128 | // 4. Canonicalize
|
110 |
| - %func_2 = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> |
111 |
| - transform.apply_patterns to %func_2 { |
| 129 | + %func_op_bufferized = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> |
| 130 | + transform.apply_patterns to %func_op_bufferized { |
112 | 131 | transform.apply_patterns.canonicalization
|
113 | 132 | } : !transform.op<"func.func">
|
114 | 133 |
|
|
0 commit comments