@@ -136,7 +136,9 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic(
136
136
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[C0:.*]]], %[[CST_0]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32>
137
137
// CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
138
138
139
- // Same as example above, but reading into a column tensor.
139
+ // Same as example above, but reading into a column tensor. Note that after the
140
+ // vectorizatoin, the `TransferOpReduceRank` will replace
141
+ // `vector.transfer_read` with `tensor.extract -> scalar`.
140
142
141
143
// TODO: Currently this fails to vectorise when the indices are non-constant.
142
144
@@ -160,10 +162,9 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
160
162
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
161
163
// CHECK-SAME: %[[INPUT:.*]]: tensor<3x3x3xf32>,
162
164
// CHECK-SAME: %[[OUTPUT:.*]]: tensor<3x1x1xf32>)
163
- // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
164
- // CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
165
- // CHECK: %[[READ:.*]] = vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector<f32>
166
- // CHECK: %[[BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<3x1x1xf32>
165
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
166
+ // CHECK: %[[EXTRACT:.*]] = tensor.extract %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] : tensor<3x3x3xf32>
167
+ // CHECK: %[[BCAST:.*]] = vector.broadcast %[[EXTRACT]] : f32 to vector<3x1x1xf32>
167
168
// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32>
168
169
// CHECK: return %[[RES]] : tensor<3x1x1xf32>
169
170
@@ -540,9 +541,8 @@ func.func @vectorize_nd_tensor_extract_with_tensor_extract(%input_1: tensor<1x20
540
541
// CHECK-SAME: %[[INPUT_2:.*]]: tensor<257x24xf32>,
541
542
// CHECK: %[[EXTRACTED_0_IDX_0:.*]] = arith.constant 0 : index
542
543
// CHECK: %[[EXTRACTED_0_IDX_1:.*]] = vector.extractelement %{{.*}}[%{{.*}} : i32] : vector<4xindex>
543
- // First `vector.transfer_read` from the generic Op - loop invariant scalar load.
544
- // CHECK: vector.transfer_read %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[EXTRACTED_0_IDX_1]]]
545
- // CHECK-SAME: tensor<1x20xi32>, vector<i32>
544
+ // First `tensor.extract` from the generic Op - loop invariant scalar load.
545
+ // CHECK: tensor.extract %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[EXTRACTED_0_IDX_1]]] : tensor<1x20xi32>
546
546
// The following `tensor.extract` from the generic Op s a contiguous load (all Ops used
547
547
// for address calculation also satisfy the required conditions).
548
548
// CHECK: vector.transfer_read %[[INPUT_2]][%{{.*}}, %{{.*}}, %{{.*}} {in_bounds = [true, true]} : tensor<257x24xf32>, vector<1x4xf32>
@@ -745,8 +745,8 @@ func.func @vectorize_0d_tensor_extract(%arg0: tensor<f32>, %arg2: tensor<1x1x3xf
745
745
746
746
// CHECK-LABEL: func.func @vectorize_0d_tensor_extract(
747
747
// CHECK-SAME: %[[ARG_0:.*]]: tensor<f32>
748
- // CHECK: %[[EXTRACT:.*]] = vector.transfer_read %[[ARG_0]][], %{{.+}} : tensor<f32>
749
- // CHECK: vector.broadcast %[[EXTRACT]] : vector< f32> to vector<1x1x3xf32>
748
+ // CHECK: %[[EXTRACT:.*]] = tensor.extract %[[ARG_0]][] : tensor<f32>
749
+ // CHECK: vector.broadcast %[[EXTRACT]] : f32 to vector<1x1x3xf32>
750
750
751
751
module attributes {transform.with_named_sequence } {
752
752
transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
0 commit comments