[MLIR][Vector] Implement transferXXPermutationLowering as MaskableOpRewritePattern #91987

nujaa · 2024-05-13T16:08:07Z

Implements TransferWritePermutationLowering, TransferReadPermutationLowering and TransferWriteNonPermutationLowering as a MaskableOpRewritePattern. Allowing to exit gracefully when such use of a xferOp is inside a vector::MaskOp
Updates MaskableOpRewritePattern to handle MemRefs and buffer semantics providing empty Value() as a return value for matchAndRewriteMaskableOp now represents successful rewriting without value to replace the original op.

Split of #90835

nujaa · 2024-05-13T16:08:18Z

@banach-space

llvmbot · 2024-05-13T16:08:42Z

@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-vector

Author: Hugo Trachino (nujaa)

Changes

Implements TransferWritePermutationLowering, TransferReadPermutationLowering and TransferWriteNonPermutationLowering as a MaskableOpRewritePattern. Allowing to exit gracefully when such use of a xferOp is inside a vector::MaskOp

Split of #90835

Full diff: https://github.com/llvm/llvm-project/pull/91987.diff

2 Files Affected:

(modified) mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp (+40-24)
(modified) mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir (+77)

diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
index b30b43d70bf0f..7f5703b635068 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
@@ -90,14 +90,19 @@ namespace {
 /// Note that an alternative is to transform it to linalg.transpose +
 /// vector.transfer_read to do the transpose in memory instead.
 struct TransferReadPermutationLowering
-    : public OpRewritePattern<vector::TransferReadOp> {
-  using OpRewritePattern::OpRewritePattern;
+    : public MaskableOpRewritePattern<vector::TransferReadOp> {
+  using MaskableOpRewritePattern::MaskableOpRewritePattern;
 
-  LogicalResult matchAndRewrite(vector::TransferReadOp op,
-                                PatternRewriter &rewriter) const override {
+  FailureOr<mlir::Value>
+  matchAndRewriteMaskableOp(vector::TransferReadOp op,
+                            MaskingOpInterface maskOp,
+                            PatternRewriter &rewriter) const override {
     // TODO: support 0-d corner case.
     if (op.getTransferRank() == 0)
       return rewriter.notifyMatchFailure(op, "0-d corner case not supported");
+    // TODO: Support transfer_read inside MaskOp case.
+    if (maskOp)
+      return rewriter.notifyMatchFailure(op, "Masked case not supported");
 
     SmallVector<unsigned> permutation;
     AffineMap map = op.getPermutationMap();
@@ -142,9 +147,9 @@ struct TransferReadPermutationLowering
 
     // Transpose result of transfer_read.
     SmallVector<int64_t> transposePerm(permutation.begin(), permutation.end());
-    rewriter.replaceOpWithNewOp<vector::TransposeOp>(op, newRead,
-                                                     transposePerm);
-    return success();
+    return rewriter
+        .create<vector::TransposeOp>(op.getLoc(), newRead, transposePerm)
+        .getResult();
   }
 };
 
@@ -165,14 +170,19 @@ struct TransferReadPermutationLowering
 ///     %v = vector.transfer_write %tmp ...
 ///         permutation_map: (d0, d1, d2, d3) -> (d2, d3)
 struct TransferWritePermutationLowering
-    : public OpRewritePattern<vector::TransferWriteOp> {
-  using OpRewritePattern::OpRewritePattern;
+    : public MaskableOpRewritePattern<vector::TransferWriteOp> {
+  using MaskableOpRewritePattern::MaskableOpRewritePattern;
 
-  LogicalResult matchAndRewrite(vector::TransferWriteOp op,
-                                PatternRewriter &rewriter) const override {
+  FailureOr<mlir::Value>
+  matchAndRewriteMaskableOp(vector::TransferWriteOp op,
+                            MaskingOpInterface maskOp,
+                            PatternRewriter &rewriter) const override {
     // TODO: support 0-d corner case.
     if (op.getTransferRank() == 0)
       return rewriter.notifyMatchFailure(op, "0-d corner case not supported");
+    // TODO: Support transfer_write inside MaskOp case.
+    if (maskOp)
+      return rewriter.notifyMatchFailure(op, "Masked case not supported");
 
     SmallVector<unsigned> permutation;
     AffineMap map = op.getPermutationMap();
@@ -207,11 +217,11 @@ struct TransferWritePermutationLowering
         op.getLoc(), op.getVector(), indices);
     auto newMap = AffineMap::getMinorIdentityMap(
         map.getNumDims(), map.getNumResults(), rewriter.getContext());
-    rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
-        op, newVec, op.getSource(), op.getIndices(), AffineMapAttr::get(newMap),
-        op.getMask(), newInBoundsAttr);
-
-    return success();
+    return rewriter
+        .create<vector::TransferWriteOp>(
+            op.getLoc(), newVec, op.getSource(), op.getIndices(),
+            AffineMapAttr::get(newMap), op.getMask(), newInBoundsAttr)
+        .getResult();
   }
 };
 
@@ -231,14 +241,19 @@ struct TransferWritePermutationLowering
 ///     vector<1x8x16xf32>
 /// ```
 struct TransferWriteNonPermutationLowering
-    : public OpRewritePattern<vector::TransferWriteOp> {
-  using OpRewritePattern::OpRewritePattern;
+    : public MaskableOpRewritePattern<vector::TransferWriteOp> {
+  using MaskableOpRewritePattern::MaskableOpRewritePattern;
 
-  LogicalResult matchAndRewrite(vector::TransferWriteOp op,
-                                PatternRewriter &rewriter) const override {
+  FailureOr<mlir::Value>
+  matchAndRewriteMaskableOp(vector::TransferWriteOp op,
+                            MaskingOpInterface maskOp,
+                            PatternRewriter &rewriter) const override {
     // TODO: support 0-d corner case.
     if (op.getTransferRank() == 0)
       return rewriter.notifyMatchFailure(op, "0-d corner case not supported");
+    // TODO: Support transfer_write inside MaskOp case.
+    if (maskOp)
+      return rewriter.notifyMatchFailure(op, "Masked case not supported");
 
     SmallVector<unsigned> permutation;
     AffineMap map = op.getPermutationMap();
@@ -285,10 +300,11 @@ struct TransferWriteNonPermutationLowering
       newInBoundsValues.push_back(op.isDimInBounds(i));
     }
     ArrayAttr newInBoundsAttr = rewriter.getBoolArrayAttr(newInBoundsValues);
-    rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
-        op, newVec, op.getSource(), op.getIndices(), AffineMapAttr::get(newMap),
-        newMask, newInBoundsAttr);
-    return success();
+    return rewriter
+        .create<vector::TransferWriteOp>(
+            op.getLoc(), newVec, op.getSource(), op.getIndices(),
+            AffineMapAttr::get(newMap), newMask, newInBoundsAttr)
+        .getResult();
   }
 };
 
diff --git a/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir b/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir
index e48af3cd7aace..a53e2a9e50ba2 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir
@@ -46,6 +46,52 @@ func.func @permutation_with_mask_xfer_write_scalable(%arg0: vector<4x[8]xi16>, %
     return
 }
 
+// transfer_write in MaskOp case not supported.
+// CHECK-LABEL: func @masked_permutation_xfer_write_fixed_width
+//  CHECK-SAME:        %[[ARG_0:.*]]: tensor<?x?xf32>,
+//  CHECK-SAME:        %[[ARG_1:.*]]: vector<16xf32>,
+//  CHECK-SAME:        %[[IDX:.*]]: index,
+//  CHECK-SAME:        %[[MASK:.*]]: vector<16xi1>
+//       CHECK:   %[[RES:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[ARG_1]], %[[ARG_0]][%[[IDX]], %[[IDX]]] {{.*}} vector<16xf32>, tensor<?x?xf32> } : vector<16xi1> -> tensor<?x?xf32>
+//       CHECK:   return %[[RES]]
+func.func @masked_permutation_xfer_write_fixed_width(%t: tensor<?x?xf32>, %val: vector<16xf32>, %idx: index, %mask: vector<16xi1>) -> tensor<?x?xf32> {
+  %r = vector.mask %mask { vector.transfer_write %val, %t[%idx, %idx] {permutation_map = affine_map<(d0, d1) -> (d0)>} : vector<16xf32>, tensor<?x?xf32> } : vector<16xi1> -> tensor<?x?xf32>
+  return %r : tensor<?x?xf32>
+}
+
+// CHECK-LABEL:           func.func @masked_permutation_xfer_write_scalable(
+//  CHECK-SAME:        %[[ARG_0:.*]]: vector<4x[8]xi16>,
+//  CHECK-SAME:        %[[ARG_1:.*]]: tensor<?x?x?x?xf32>,
+//  CHECK-SAME:        %[[MASK:.*]]: vector<4x[8]xi1>)
+//  CHECK-SAME:        -> tensor<?x?x?x?xf32> {
+//       CHECK:             %[[C0:.*]] = arith.constant 0 : index
+//       CHECK:             %[[R:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[ARG_0]], %[[ARG_1]][%c0, %c0, %c0, %c0] {in_bounds = [true, true], permutation_map = #[[MAP:.*]]} : vector<4x[8]xi16>, tensor<?x?x?x?xf32> } : vector<4x[8]xi1> -> tensor<?x?x?x?xf32>
+//       CHECK:             return %[[R]] : tensor<?x?x?x?xf32>
+func.func @masked_permutation_xfer_write_scalable(%arg0: vector<4x[8]xi16>, %t: tensor<?x?x?x?xf32>, %mask:  vector<4x[8]xi1>) -> tensor<?x?x?x?xf32> {
+     %c0 = arith.constant 0 : index
+     %r = vector.mask %mask { vector.transfer_write %arg0, %t[%c0, %c0, %c0, %c0] {in_bounds = [true, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d1, d2)>
+} : vector<4x[8]xi16>, tensor<?x?x?x?xf32> } : vector<4x[8]xi1> -> tensor<?x?x?x?xf32>
+
+    return %r : tensor<?x?x?x?xf32>
+}
+
+// transfer_write in MaskOp case not supported.
+// CHECK-LABEL: func @masked_non_permutation_xfer_write_fixed_width
+//  CHECK-SAME:      %[[ARG0:.*]]: tensor<?x?x?x?xf32>
+//  CHECK-SAME:      %[[ARG1:.*]]: vector<14x8x16xf32>
+//  CHECK-SAME:      %[[IDX:.*]]: index) -> tensor<?x?x?x?xf32>
+func.func @masked_non_permutation_xfer_write_fixed_width(
+    %arg0 : tensor<?x?x?x?xf32>,
+    %v1 : vector<14x8x16xf32>, %dim : index) -> tensor<?x?x?x?xf32> {
+  // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+  %c0 = arith.constant 0 : index
+  %mask = vector.create_mask %dim, %dim, %dim : vector<14x8x16xi1>
+  %0 = vector.mask %mask { vector.transfer_write %v1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>} : vector<14x8x16xf32>, tensor<?x?x?x?xf32> } : vector<14x8x16xi1> -> tensor<?x?x?x?xf32>
+  // CHECK: %[[masked1:.*]] = vector.mask %0 { vector.transfer_write %[[ARG1]], %[[ARG0]]{{.*}}permutation_map = #[[MAP:.*]]} : vector<14x8x16xf32>, tensor<?x?x?x?xf32> } : vector<14x8x16xi1> -> tensor<?x?x?x?xf32>
+
+  return %0 : tensor<?x?x?x?xf32>
+}
+
 ///----------------------------------------------------------------------------------------
 /// vector.transfer_read
 ///----------------------------------------------------------------------------------------
@@ -101,6 +147,37 @@ func.func @permutation_with_mask_xfer_read_scalable(%mem: memref<?x?xf32>, %dim_
   return %1 : vector<8x[4]x2xf32>
 }
 
+// transfer_read in MaskOp case not supported.
+// CHECK-LABEL: func @masked_permutation_xfer_read_fixed_width
+//  CHECK-SAME:        %[[ARG_0:.*]]: tensor<?x1xf32>,
+//  CHECK-SAME:        %[[ARG_1:.*]]: vector<4x1xi1>
+//       CHECK: vector.mask %[[ARG_1]] { vector.transfer_read %[[ARG_0]]{{.*}}: tensor<?x1xf32>, vector<1x4x4xf32> } : vector<4x1xi1> -> vector<1x4x4xf32>
+func.func @masked_permutation_xfer_read_fixed_width(%arg0: tensor<?x1xf32>, %mask : vector<4x1xi1>) {
+  %cst = arith.constant 0.000000e+00 : f32
+  %c0 = arith.constant 0 : index
+  %3 = vector.mask %mask { vector.transfer_read %arg0[%c0, %c0], %cst {permutation_map = affine_map<(d0, d1) -> (d1, 0, d0)>} : tensor<?x1xf32>, vector<1x4x4xf32> } : vector<4x1xi1> -> vector<1x4x4xf32>
+  call @test.some_use(%3) : (vector<1x4x4xf32>) -> ()
+  return
+}
+func.func private @test.some_use(vector<1x4x4xf32>)
+
+// CHECK-LABEL:   func.func @masked_permutation_xfer_read_scalable(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<?x?xf32>,
+// CHECK-SAME:      %[[MASK:.*]]: vector<2x[4]xi1>) -> vector<8x[4]x2xf32> {
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[T_READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[ARG_0]][%[[C0]], %[[C0]]], %cst {in_bounds = [true, true, true], permutation_map = #[[MAP:.*]]} : tensor<?x?xf32>, vector<8x[4]x2xf32> } : vector<2x[4]xi1> -> vector<8x[4]x2xf32>
+// CHECK:           return %[[T_READ]] : vector<8x[4]x2xf32>
+func.func @masked_permutation_xfer_read_scalable(%t: tensor<?x?xf32>, %mask : vector<2x[4]xi1>) -> vector<8x[4]x2xf32> {
+
+  %c0 = arith.constant 0 : index
+  %cst_0 = arith.constant 0.000000e+00 : f32
+
+  %1 = vector.mask %mask { vector.transfer_read %t[%c0, %c0], %cst_0
+    {in_bounds = [true, true, true], permutation_map = affine_map<(d0, d1) -> (0, d1, d0)>}
+    : tensor<?x?xf32>, vector<8x[4]x2xf32> } :vector<2x[4]xi1> -> vector<8x[4]x2xf32>
+  return %1 : vector<8x[4]x2xf32>
+}
+
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%module_op: !transform.any_op {transform.readonly}) {
     %f = transform.structured.match ops{["func.func"]} in %module_op

banach-space

Thanks, I really like how the code is gradually becoming self-documenting :)

LGTM!

mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir

mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp

…ewritePattern

banach-space

Hey Hugo, sorry for the delay with this.

Having read this again, I am realising that I forgot about MemRef semantics when implementing MaskableOpRewritePattern - thanks for fixing that! I think that it would be good to capture that with some additional comments - see my suggestions inline. It would also be good to updated the summary accordingly (something along the lines:

Updates MaskableOpRewritePattern so that it works correctly with MemRefs.

Feel free to re-use and/or re-write.

mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp

banach-space · 2024-05-17T16:23:27Z

mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp

+        AffineMapAttr::get(newMap), op.getMask(), newInBoundsAttr);
+    if (newWrite.hasPureTensorSemantics())
+      return newWrite.getResult();
+    // In memref case, MaskableOpRewritePattern cannot replaceOp with result.


Suggested change

// In memref case, MaskableOpRewritePattern cannot replaceOp with result.

// In the memref case there's no return value. Use empty value to signal success.

banach-space · 2024-05-17T16:25:32Z

mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h

+    if (rootOp->getNumResults() == 0 || *newOp == Value())
+      rewriter.eraseOp(rootOp);
+    else
+      rewriter.replaceOp(rootOp, *newOp);


IIUC, the only case that we are testing now is when "there's no return value" and "newOp is Value()". Hence I'm suggesting to replace || with &&.

Suggested change

if (rootOp->getNumResults() == 0 || *newOp == Value())

rewriter.eraseOp(rootOp);

else

rewriter.replaceOp(rootOp, *newOp);

// In the memref case there won't be a return value to replace. Instead, use an empty value to signal success.

if (rootOp->getNumResults() == 0 && *newOp == Value())

rewriter.eraseOp(rootOp);

else

rewriter.replaceOp(rootOp, *newOp);

Sorry for late answer, I have been thinking about it while implementing and did not come up with a solution I liked. With the weekend fresh mind, Here is my point returning Value() means it did NOT fail. aka code updates happened but no value to give e.g. memref case.
if we split cases :

if *newOp == Value() | if NumResult == 0 // simple case | | rewriter.eraseOp(rootOp); | else | | // We have to replace something with a value with Value() so there might be uses of rootOp in the rest | | // of the program if we try to erase it. So I suggest to raise an error. | | raise Error(); if pattern returns a value: | if NumResult == 1 // simple case | | rewriter.replaceOp(rootOp, *newOp); | else // We created ops with a value which should replace something without a value. We can't use it in the program. It will most likely be DCE-ed. | | rewriter.eraseOp(rootOp);

Which can then be reduced to

if (failed(newOp)) return failure(); if NumResult == 0 rewriter.eraseOp(rootOp); else assert(*newOp != Value() && "Can't replace an op use with Value()"); rewriter.replaceOp(rootOp, *newOp); return success()

As an additionnal point, technically, matchAndRewriteMaskableOp could return a ValueRange as replaceOp takes a ValueRange as input. replaceOp will assert rootOp->getNumResults() != newOp.size(). And will allow to handle cases where ops have multiple results. But I suggest as part of a separate patch.

nujaa · 2024-05-20T14:18:41Z

Thanks for your comments. I interpreted your comments slightly differently. Feel free to debate or merge if you are satisfied.

banach-space · 2024-05-20T19:46:35Z

We are on the same page here, thanks for seeing this through!

One thing that this discussion makes me question - should vector.mask allow memref semantics? Is that needed at all? I’m leaning towards “no”, but that’s a discussion for a different PR.

nujaa requested review from hanhanW and nicolasvasilache as code owners May 13, 2024 16:08

llvmbot added mlir:vectorops mlir mlir:vector labels May 13, 2024

banach-space self-requested a review May 13, 2024 18:47

banach-space approved these changes May 14, 2024

View reviewed changes

mlir/test/Dialect/Vector/vector-transfer-permutation-lowering.mlir Outdated Show resolved Hide resolved

banach-space requested a review from dcaballe May 14, 2024 09:10

banach-space mentioned this pull request May 14, 2024

[MLIR][Vector] Fix transferOps optimization inside maskOp #90835

Closed

nujaa force-pushed the hugo.maskTfPermutationLowering branch 2 times, most recently from 6de7022 to b62a36d Compare May 14, 2024 16:23

nujaa mentioned this pull request May 16, 2024

[MLIR][Vector] Implement TransferOpReduceRank as MaskableOpRewritePattern #92426

Merged

banach-space reviewed May 16, 2024

View reviewed changes

mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp Outdated Show resolved Hide resolved

[mlir][vector] Support MaskableOpRewritePattern of Op without a result.

2f7fc4b

nujaa mentioned this pull request May 17, 2024

[mlir][vector] Support MaskableOpRewritePattern of op without a result. #92526

Closed

nujaa added 5 commits May 17, 2024 19:44

[MLIR][Vector] Implement transferXXPermutationLowering as MaskableOpR…

b6a41bc

…ewritePattern

Fixup: test less for negative tests.

1788c0c

Fixup MaskableOpRewritePattern when transfer_write has no result

c50ef19

FixUp introduce llvm#92526

127b950

FixUp introduce llvm#92526

c11da46

nujaa force-pushed the hugo.maskTfPermutationLowering branch from 74894d7 to c11da46 Compare May 17, 2024 12:27

banach-space reviewed May 17, 2024

View reviewed changes

nujaa added 2 commits May 20, 2024 21:53

Fixup : fix comments.

9e1536d

fixup : MaskableOpRewritePattern and Value()

7c3cf86

banach-space merged commit fdd245a into llvm:main May 20, 2024
4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[MLIR][Vector] Implement transferXXPermutationLowering as MaskableOpRewritePattern #91987

[MLIR][Vector] Implement transferXXPermutationLowering as MaskableOpRewritePattern #91987

Uh oh!

nujaa commented May 13, 2024 •

edited

Loading

Uh oh!

nujaa commented May 13, 2024

Uh oh!

llvmbot commented May 13, 2024 •

edited

Loading

Uh oh!

banach-space left a comment

Uh oh!

Uh oh!

Uh oh!

banach-space left a comment

Uh oh!

Uh oh!

banach-space May 17, 2024

Uh oh!

banach-space May 17, 2024

Uh oh!

nujaa May 20, 2024 •

edited

Loading

Uh oh!

nujaa commented May 20, 2024 •

edited

Loading

Uh oh!

banach-space commented May 20, 2024

Uh oh!

Uh oh!

Uh oh!

	// In memref case, MaskableOpRewritePattern cannot replaceOp with result.
	// In the memref case there's no return value. Use empty value to signal success.

[MLIR][Vector] Implement transferXXPermutationLowering as MaskableOpRewritePattern #91987

[MLIR][Vector] Implement transferXXPermutationLowering as MaskableOpRewritePattern #91987

Uh oh!

Conversation

nujaa commented May 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nujaa commented May 13, 2024

Uh oh!

llvmbot commented May 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

banach-space left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

banach-space left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

banach-space May 17, 2024

Choose a reason for hiding this comment

Uh oh!

banach-space May 17, 2024

Choose a reason for hiding this comment

Uh oh!

nujaa May 20, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

nujaa commented May 20, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

banach-space commented May 20, 2024

Uh oh!

Uh oh!

Uh oh!

nujaa commented May 13, 2024 •

edited

Loading

llvmbot commented May 13, 2024 •

edited

Loading

nujaa May 20, 2024 •

edited

Loading

nujaa commented May 20, 2024 •

edited

Loading