Skip to content

Commit 22f96ab

Browse files
authored
[mlir][vector] Refine vector.transfer_read hoisting/forwarding (#65770)
Make sure that when analysing a `vector.transfer_read` that's a candidate for either hoisting or store-to-load forwarding, `memref.collapse_shape` Ops are correctly included in the alias analysis. This is done by either * making sure that relevant users are taken into account, or * source Ops are correctly identified.
1 parent eaf737a commit 22f96ab

File tree

4 files changed

+123
-1
lines changed

4 files changed

+123
-1
lines changed

mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,11 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
5555
static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
5656
LoopLikeOpInterface loop) {
5757
Value source = transferRead.getSource();
58+
// Skip subview and collapse_shape Ops
5859
while (auto subView = source.getDefiningOp<memref::SubViewOp>())
5960
source = subView.getSource();
61+
while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
62+
source = collapsed->getOperand(0);
6063
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
6164
source.getUsers().end());
6265
llvm::SmallDenseSet<Operation *, 32> processed;
@@ -69,6 +72,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
6972
users.append(subView->getUsers().begin(), subView->getUsers().end());
7073
continue;
7174
}
75+
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
76+
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
77+
continue;
78+
}
7279
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
7380
continue;
7481
if (!loop->isAncestor(user))

mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
206206
users.append(subView->getUsers().begin(), subView->getUsers().end());
207207
continue;
208208
}
209+
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
210+
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
211+
continue;
212+
}
209213
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
210214
continue;
211215
if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {

mlir/test/Dialect/Linalg/hoisting.mlir

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,3 +756,74 @@ transform.sequence failures(propagate) {
756756
transform.structured.hoist_redundant_vector_transfers %0
757757
: (!transform.any_op) -> !transform.any_op
758758
}
759+
760+
// -----
761+
762+
// Regression test - `vector.transfer_read` below should not be hoisted.
763+
// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
764+
// (read by `vector.transfer_read`) alias.
765+
766+
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
767+
// CHECK: scf.for {{.*}} {
768+
// CHECK: vector.transfer_write
769+
// CHECK: vector.transfer_read
770+
// CHECK: vector.transfer_write
771+
// CHECK: }
772+
773+
func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
774+
%c0_i32 = arith.constant 0 : i32
775+
%c0 = arith.constant 0 : index
776+
%c4 = arith.constant 4 : index
777+
%c20 = arith.constant 20 : index
778+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
779+
scf.for %arg0 = %c0 to %c20 step %c4 {
780+
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
781+
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
782+
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
783+
%read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
784+
vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
785+
}
786+
return
787+
}
788+
789+
transform.sequence failures(propagate) {
790+
^bb1(%arg1: !transform.any_op):
791+
%0 = transform.structured.match ops{["func.func"]} in %arg1
792+
: (!transform.any_op) -> !transform.any_op
793+
transform.structured.hoist_redundant_vector_transfers %0
794+
: (!transform.any_op) -> !transform.any_op
795+
}
796+
797+
// -----
798+
799+
// Regression test - `vector.transfer_read` below should not be hoisted.
800+
// Indeed, %collapse_shape (read by `vector.transfer_read`) and %alloca
801+
// (written to by `vector.transfer_write`) alias.
802+
803+
// CHECK-LABEL: func.func @no_hoisting_collapse_shape_2
804+
// CHECK: scf.for {{.*}} {
805+
// CHECK: vector.transfer_write
806+
// CHECK: vector.transfer_read
807+
808+
func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
809+
%c0_i32 = arith.constant 0 : i32
810+
%c0 = arith.constant 0 : index
811+
%c4 = arith.constant 4 : index
812+
%c20 = arith.constant 20 : index
813+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32>
814+
scf.for %arg0 = %c0 to %c20 step %c4 {
815+
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
816+
vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
817+
%read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
818+
"prevent.dce"(%read) : (vector<12xi32>) ->()
819+
}
820+
return
821+
}
822+
823+
transform.sequence failures(propagate) {
824+
^bb1(%arg1: !transform.any_op):
825+
%0 = transform.structured.match ops{["func.func"]} in %arg1
826+
: (!transform.any_op) -> !transform.any_op
827+
transform.structured.hoist_redundant_vector_transfers %0
828+
: (!transform.any_op) -> !transform.any_op
829+
}

mlir/test/Dialect/Vector/vector-transferop-opt.mlir

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,44 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
215215
vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
216216
vector<1x4xf32>, memref<4x4xf32>
217217
return %0 : vector<1x4xf32>
218-
}
218+
}
219+
220+
221+
// Regression test - the following _potential forwarding_ of %1 to the final
222+
// `vector.transfer_write` would not be safe:
223+
// %1 = vector.transfer_read %subview
224+
// vector.transfer_write %1, %alloca
225+
// vector.transfer_write %vec, %collapse_shape
226+
// %2 = vector.transfer_read %alloca
227+
// vector.transfer_write %1, %subview
228+
// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the
229+
// final `vector.transfer_write` should be preserved as:
230+
// vector.transfer_write %2, %subview
231+
232+
// CHECK-LABEL: func.func @collapse_shape
233+
// CHECK: scf.for {{.*}} {
234+
// CHECK: vector.transfer_read
235+
// CHECK: vector.transfer_write
236+
// CHECK: vector.transfer_write
237+
// CHECK: vector.transfer_read
238+
// CHECK: vector.transfer_write
239+
240+
func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
241+
%c0_i32 = arith.constant 0 : i32
242+
%c0 = arith.constant 0 : index
243+
%c4 = arith.constant 4 : index
244+
%c20 = arith.constant 20 : index
245+
246+
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
247+
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
248+
scf.for %arg0 = %c0 to %c20 step %c4 {
249+
%subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
250+
%1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
251+
// $alloca and $collapse_shape alias
252+
vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
253+
vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
254+
%2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
255+
vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
256+
}
257+
return
258+
}

0 commit comments

Comments
 (0)