Skip to content

Commit 90d2f8c

Browse files
authored
[mlir][vector] Teach TransferOptimization to look through trivial aliases (llvm#87805)
This allows `TransferOptimization` to eliminate and forward stores that are to trivial aliases (rather than just to identical memref values). A trivial aliases is (currently) defined as: 1. A `memref.cast` 2. A `memref.subview` with a zero offset and unit strides 3. A chain of 1 and 2
1 parent cfcbe3a commit 90d2f8c

File tree

4 files changed

+92
-18
lines changed

4 files changed

+92
-18
lines changed

mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ namespace mlir {
2222

2323
class MemRefType;
2424

25+
/// A value with a memref type.
26+
using MemrefValue = TypedValue<BaseMemRefType>;
27+
2528
namespace memref {
2629

2730
/// Returns true, if the memref type has static shapes and represents a
@@ -93,6 +96,20 @@ computeStridesIRBlock(Location loc, OpBuilder &builder,
9396
return computeSuffixProductIRBlock(loc, builder, sizes);
9497
}
9598

99+
/// Walk up the source chain until an operation that changes/defines the view of
100+
/// memory is found (i.e. skip operations that alias the entire view).
101+
MemrefValue skipFullyAliasingOperations(MemrefValue source);
102+
103+
/// Checks if two (memref) values are the same or are statically known to alias
104+
/// the same region of memory.
105+
inline bool isSameViewOrTrivialAlias(MemrefValue a, MemrefValue b) {
106+
return skipFullyAliasingOperations(a) == skipFullyAliasingOperations(b);
107+
}
108+
109+
/// Walk up the source chain until something an op other than a `memref.subview`
110+
/// or `memref.cast` is found.
111+
MemrefValue skipSubViewsAndCasts(MemrefValue source);
112+
96113
} // namespace memref
97114
} // namespace mlir
98115

mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,5 +178,35 @@ computeSuffixProductIRBlock(Location loc, OpBuilder &builder,
178178
return computeSuffixProductIRBlockImpl(loc, builder, sizes, unit);
179179
}
180180

181+
MemrefValue skipFullyAliasingOperations(MemrefValue source) {
182+
while (auto op = source.getDefiningOp()) {
183+
if (auto subViewOp = dyn_cast<memref::SubViewOp>(op);
184+
subViewOp && subViewOp.hasZeroOffset() && subViewOp.hasUnitStride()) {
185+
// A `memref.subview` with an all zero offset, and all unit strides, still
186+
// points to the same memory.
187+
source = cast<MemrefValue>(subViewOp.getSource());
188+
} else if (auto castOp = dyn_cast<memref::CastOp>(op)) {
189+
// A `memref.cast` still points to the same memory.
190+
source = castOp.getSource();
191+
} else {
192+
return source;
193+
}
194+
}
195+
return source;
196+
}
197+
198+
MemrefValue skipSubViewsAndCasts(MemrefValue source) {
199+
while (auto op = source.getDefiningOp()) {
200+
if (auto subView = dyn_cast<memref::SubViewOp>(op)) {
201+
source = cast<MemrefValue>(subView.getSource());
202+
} else if (auto cast = dyn_cast<memref::CastOp>(op)) {
203+
source = cast.getSource();
204+
} else {
205+
return source;
206+
}
207+
}
208+
return source;
209+
}
210+
181211
} // namespace memref
182212
} // namespace mlir

mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "mlir/Dialect/Affine/IR/AffineOps.h"
1515
#include "mlir/Dialect/Arith/IR/Arith.h"
1616
#include "mlir/Dialect/MemRef/IR/MemRef.h"
17+
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
1718
#include "mlir/Dialect/Tensor/IR/Tensor.h"
1819
#include "mlir/Dialect/Utils/IndexingUtils.h"
1920
#include "mlir/Dialect/Vector/IR/VectorOps.h"
@@ -104,10 +105,8 @@ void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) {
104105
<< "\n");
105106
llvm::SmallVector<Operation *, 8> blockingAccesses;
106107
Operation *firstOverwriteCandidate = nullptr;
107-
Value source = write.getSource();
108-
// Skip subview ops.
109-
while (auto subView = source.getDefiningOp<memref::SubViewOp>())
110-
source = subView.getSource();
108+
Value source =
109+
memref::skipSubViewsAndCasts(cast<MemrefValue>(write.getSource()));
111110
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
112111
source.getUsers().end());
113112
llvm::SmallDenseSet<Operation *, 32> processed;
@@ -116,8 +115,8 @@ void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) {
116115
// If the user has already been processed skip.
117116
if (!processed.insert(user).second)
118117
continue;
119-
if (auto subView = dyn_cast<memref::SubViewOp>(user)) {
120-
users.append(subView->getUsers().begin(), subView->getUsers().end());
118+
if (isa<memref::SubViewOp, memref::CastOp>(user)) {
119+
users.append(user->getUsers().begin(), user->getUsers().end());
121120
continue;
122121
}
123122
if (isMemoryEffectFree(user))
@@ -126,7 +125,9 @@ void TransferOptimization::deadStoreOp(vector::TransferWriteOp write) {
126125
continue;
127126
if (auto nextWrite = dyn_cast<vector::TransferWriteOp>(user)) {
128127
// Check candidate that can override the store.
129-
if (write.getSource() == nextWrite.getSource() &&
128+
if (memref::isSameViewOrTrivialAlias(
129+
cast<MemrefValue>(nextWrite.getSource()),
130+
cast<MemrefValue>(write.getSource())) &&
130131
checkSameValueWAW(nextWrite, write) &&
131132
postDominators.postDominates(nextWrite, write)) {
132133
if (firstOverwriteCandidate == nullptr ||
@@ -191,10 +192,8 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
191192
<< "\n");
192193
SmallVector<Operation *, 8> blockingWrites;
193194
vector::TransferWriteOp lastwrite = nullptr;
194-
Value source = read.getSource();
195-
// Skip subview ops.
196-
while (auto subView = source.getDefiningOp<memref::SubViewOp>())
197-
source = subView.getSource();
195+
Value source =
196+
memref::skipSubViewsAndCasts(cast<MemrefValue>(read.getSource()));
198197
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
199198
source.getUsers().end());
200199
llvm::SmallDenseSet<Operation *, 32> processed;
@@ -203,12 +202,8 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
203202
// If the user has already been processed skip.
204203
if (!processed.insert(user).second)
205204
continue;
206-
if (auto subView = dyn_cast<memref::SubViewOp>(user)) {
207-
users.append(subView->getUsers().begin(), subView->getUsers().end());
208-
continue;
209-
}
210-
if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
211-
users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
205+
if (isa<memref::SubViewOp, memref::CollapseShapeOp, memref::CastOp>(user)) {
206+
users.append(user->getUsers().begin(), user->getUsers().end());
212207
continue;
213208
}
214209
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
@@ -221,7 +216,9 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
221216
cast<VectorTransferOpInterface>(read.getOperation()),
222217
/*testDynamicValueUsingBounds=*/true))
223218
continue;
224-
if (write.getSource() == read.getSource() &&
219+
if (memref::isSameViewOrTrivialAlias(
220+
cast<MemrefValue>(read.getSource()),
221+
cast<MemrefValue>(write.getSource())) &&
225222
dominators.dominates(write, read) && checkSameValueRAW(write, read)) {
226223
if (lastwrite == nullptr || dominators.dominates(lastwrite, write))
227224
lastwrite = write;

mlir/test/Dialect/Vector/vector-transferop-opt.mlir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,3 +485,33 @@ func.func @forward_dead_constant_splat_store_with_masking_negative_3(%buffer : m
485485
vector.transfer_write %x, %buffer[%c0, %c0], %mask {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
486486
return
487487
}
488+
489+
// Here each read/write is to a different subview, but they all point to exact
490+
// same bit of memory (just through casts and subviews with unit strides and
491+
// zero offsets).
492+
// CHECK-LABEL: func @forward_and_eliminate_stores_through_trivial_aliases
493+
// CHECK-NOT: vector.transfer_write
494+
// CHECK-NOT: vector.transfer_read
495+
// CHECK: scf.for
496+
// CHECK: }
497+
// CHECK: vector.transfer_write
498+
// CHECK: return
499+
func.func @forward_and_eliminate_stores_through_trivial_aliases(
500+
%buffer : memref<?x?xf32>, %vec: vector<[8]x[8]xf32>, %size: index, %a_size: index, %another_size: index
501+
) {
502+
%c0 = arith.constant 0 : index
503+
%c1 = arith.constant 1 : index
504+
%c32 = arith.constant 32 : index
505+
%cst = arith.constant 0.0 : f32
506+
vector.transfer_write %vec, %buffer[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32>
507+
%direct_subview = memref.subview %buffer[0, 0] [%a_size, %a_size] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
508+
%cast = memref.cast %direct_subview : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32>
509+
%subview_of_cast = memref.subview %cast[0, 0] [%another_size, %another_size] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
510+
%21 = vector.transfer_read %direct_subview[%c0, %c0], %cst {in_bounds = [true, true]} : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<[8]x[8]xf32>
511+
%23 = scf.for %arg2 = %c0 to %c32 step %c1 iter_args(%arg3 = %21) -> (vector<[8]x[8]xf32>) {
512+
%24 = arith.addf %arg3, %arg3 : vector<[8]x[8]xf32>
513+
scf.yield %24 : vector<[8]x[8]xf32>
514+
}
515+
vector.transfer_write %23, %subview_of_cast[%c0, %c0] {in_bounds = [true, true]} : vector<[8]x[8]xf32>, memref<?x?xf32, strided<[?, 1], offset: ?>>
516+
return
517+
}

0 commit comments

Comments
 (0)