@@ -136,9 +136,7 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic(
136
136
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[C0:.*]]], %[[CST_0]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32>
137
137
// CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
138
138
139
- // Same as example above, but reading into a column tensor. Note that after the
140
- // vectorizatoin, the `TransferOpReduceRank` will replace
141
- // `vector.transfer_read` with `tensor.extract -> scalar`.
139
+ // Same as example above, but reading into a column tensor.
142
140
143
141
// TODO: Currently this fails to vectorise when the indices are non-constant.
144
142
@@ -162,9 +160,10 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
162
160
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
163
161
// CHECK-SAME: %[[INPUT:.*]]: tensor<3x3x3xf32>,
164
162
// CHECK-SAME: %[[OUTPUT:.*]]: tensor<3x1x1xf32>)
165
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
166
- // CHECK: %[[EXTRACT:.*]] = tensor.extract %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] : tensor<3x3x3xf32>
167
- // CHECK: %[[BCAST:.*]] = vector.broadcast %[[EXTRACT]] : f32 to vector<3x1x1xf32>
163
+ // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
164
+ // CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
165
+ // CHECK: %[[READ:.*]] = vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector<f32>
166
+ // CHECK: %[[BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<3x1x1xf32>
168
167
// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32>
169
168
// CHECK: return %[[RES]] : tensor<3x1x1xf32>
170
169
@@ -541,8 +540,9 @@ func.func @vectorize_nd_tensor_extract_with_tensor_extract(%input_1: tensor<1x20
541
540
// CHECK-SAME: %[[INPUT_2:.*]]: tensor<257x24xf32>,
542
541
// CHECK: %[[EXTRACTED_0_IDX_0:.*]] = arith.constant 0 : index
543
542
// CHECK: %[[EXTRACTED_0_IDX_1:.*]] = vector.extractelement %{{.*}}[%{{.*}} : i32] : vector<4xindex>
544
- // First `tensor.extract` from the generic Op - loop invariant scalar load.
545
- // CHECK: tensor.extract %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[EXTRACTED_0_IDX_1]]] : tensor<1x20xi32>
543
+ // First `vector.transfer_read` from the generic Op - loop invariant scalar load.
544
+ // CHECK: vector.transfer_read %[[INPUT_1]][%[[EXTRACTED_0_IDX_0]], %[[EXTRACTED_0_IDX_1]]]
545
+ // CHECK-SAME: tensor<1x20xi32>, vector<i32>
546
546
// The following `tensor.extract` from the generic Op s a contiguous load (all Ops used
547
547
// for address calculation also satisfy the required conditions).
548
548
// CHECK: vector.transfer_read %[[INPUT_2]][%{{.*}}, %{{.*}}, %{{.*}} {in_bounds = [true, true]} : tensor<257x24xf32>, vector<1x4xf32>
@@ -745,8 +745,8 @@ func.func @vectorize_0d_tensor_extract(%arg0: tensor<f32>, %arg2: tensor<1x1x3xf
745
745
746
746
// CHECK-LABEL: func.func @vectorize_0d_tensor_extract(
747
747
// CHECK-SAME: %[[ARG_0:.*]]: tensor<f32>
748
- // CHECK: %[[EXTRACT:.*]] = tensor.extract %[[ARG_0]][] : tensor<f32>
749
- // CHECK: vector.broadcast %[[EXTRACT]] : f32 to vector<1x1x3xf32>
748
+ // CHECK: %[[EXTRACT:.*]] = vector.transfer_read %[[ARG_0]][], %{{.+}} : tensor<f32>
749
+ // CHECK: vector.broadcast %[[EXTRACT]] : vector< f32> to vector<1x1x3xf32>
750
750
751
751
module attributes {transform.with_named_sequence } {
752
752
transform.named_sequence @__transform_main (%arg1: !transform.any_op {transform.readonly }) {
0 commit comments