[mlir][Vectorizer] Added support to Vectorize tensor.unpack #76087

bviyer · 2023-12-20T17:48:33Z

Added support to vectorized tensor.unpack. The unpack Op is split into a vector.transfer_read, vector.transpose, vector.shape_cast and a vector.transfer_write.

llvmbot · 2023-12-20T17:49:00Z

@llvm/pr-subscribers-mlir-tensor
@llvm/pr-subscribers-mlir-linalg

@llvm/pr-subscribers-mlir

Author: Balaji V. Iyer. (bviyer)

Changes

Added support to vectorized tensor.unpack. The unpack Op is split into a vector.transfer_read, vector.transpose, vector.shape_cast and a vector.transfer_write.

Full diff: https://github.com/llvm/llvm-project/pull/76087.diff

3 Files Affected:

(modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+1-1)
(modified) mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp (+96)
(modified) mlir/test/Dialect/Linalg/vectorization.mlir (+25)

diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 14404d837ff748..4c456a5e671f5d 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -3078,7 +3078,7 @@ DiagnosedSilenceableFailure transform::VectorizeOp::apply(
 
   // TODO: Check that the correct number of vectorSizes was provided.
   for (Operation *target : targets) {
-    if (!isa<linalg::LinalgOp, tensor::PadOp>(target)) {
+    if (!isa<linalg::LinalgOp, tensor::PadOp, tensor::UnPackOp>(target)) {
       return mlir::emitSilenceableFailure(target->getLoc())
              << "Unsupported Op, cannot vectorize";
     }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index f9a53a8451a601..7a9846154bf34b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -19,6 +19,7 @@
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h"
@@ -1385,6 +1386,88 @@ vectorizeAsLinalgGeneric(RewriterBase &rewriter, VectorizationState &state,
 
   return success();
 }
+// Vectorize an `tensor::UnPackOp` without OuterDimsPerms to these 4 Ops:
+//   Vector::TransferReadOp - Reads the Vector Array of Source data
+//   vector::TransposeOp - Transpose the Source
+//   ShapeCastOp - Reshapes the data based on the target.
+//   vector::TransferWriteOp. - Write the result vector back.
+
+static LogicalResult vectorizeAsUnpackOp(RewriterBase &rewriter,
+                                         tensor::UnPackOp unpackOp,
+                                         ArrayRef<int64_t> inputVectorSizes,
+                                         SmallVectorImpl<Value> &newResults) {
+
+  if (!unpackOp.getOuterDimsPerm().empty()) {
+    LDBG("outer dimensions perms NYI for: " << unpackOp);
+    return failure();
+  }
+
+  OpBuilder::InsertionGuard g(rewriter);
+  rewriter.setInsertionPoint(unpackOp);
+
+  RankedTensorType packTensorType = unpackOp.getSourceType();
+  auto maskType =
+      VectorType::get(packTensorType.getShape(), rewriter.getI1Type());
+  auto vectorType = VectorType::get(packTensorType.getShape(),
+                                    packTensorType.getElementType());
+  ReifiedRankedShapedTypeDims reifiedRetShapes;
+  LogicalResult status =
+      cast<ReifyRankedShapedTypeOpInterface>(unpackOp.getOperation())
+          .reifyResultShapes(rewriter, reifiedRetShapes);
+  if (status.failed()) {
+    LDBG("Unable to reify result shapes of " << unpackOp);
+    return failure();
+  }
+
+  arith::ConstantIndexOp zeroOp =
+      rewriter.create<arith::ConstantIndexOp>(unpackOp->getLoc(), 0);
+  Value mask = rewriter.create<vector::CreateMaskOp>(
+      unpackOp.getLoc(), maskType,
+      tensor::getMixedSizes(rewriter, unpackOp.getLoc(), unpackOp.getSource()));
+
+  vector::TransferReadOp readOp = rewriter.create<vector::TransferReadOp>(
+      unpackOp.getLoc(), vectorType, unpackOp.getSource(),
+      SmallVector<Value>(packTensorType.getRank(), zeroOp),
+      rewriter.getMultiDimIdentityMap(packTensorType.getRank()));
+
+  vector::MaskOp maskedOp =
+      cast<vector::MaskOp>(mlir::vector::maskOperation(rewriter, readOp, mask));
+
+  int64_t numPackedDim = unpackOp.getInnerDimsPos().size();
+  int64_t packRank = packTensorType.getRank();
+  auto lastDims =
+      llvm::to_vector(llvm::seq<int64_t>(packRank - numPackedDim, packRank));
+  PackingMetadata packMetadata =
+      computePackingMetadata(packRank, unpackOp.getInnerDimsPos());
+  SmallVector<int64_t> lastDimToInsertPosPerm = computePermutationVector(
+      packRank, lastDims, packMetadata.insertPositions);
+  SmallVector<int64_t> stripMineShape(packTensorType.getShape());
+  applyPermutationToVector(stripMineShape, lastDimToInsertPosPerm);
+
+  RankedTensorType stripMineTensorType =
+      RankedTensorType::Builder(packTensorType).setShape(stripMineShape);
+
+  RankedTensorType collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
+      stripMineTensorType, packMetadata.reassociations);
+  auto vecCollapsedType =
+      VectorType::get(collapsedType.getShape(), collapsedType.getElementType());
+
+  vector::TransposeOp transposeOp = rewriter.create<vector::TransposeOp>(
+      unpackOp.getLoc(), maskedOp.getResult(0), lastDimToInsertPosPerm);
+
+  vector::ShapeCastOp shapeCastOp = rewriter.create<vector::ShapeCastOp>(
+      unpackOp.getLoc(), vecCollapsedType, transposeOp->getResult(0));
+  tensor::EmptyOp emptyOp = rewriter.create<tensor::EmptyOp>(
+      unpackOp.getLoc(), reifiedRetShapes[0], packTensorType.getElementType());
+
+  vector::TransferWriteOp writeOp = rewriter.create<vector::TransferWriteOp>(
+      unpackOp.getLoc(), shapeCastOp->getResult(0), emptyOp,
+      SmallVector<Value>(lastDims.size(), zeroOp),
+      SmallVector<bool>(lastDims.size(), true));
+
+  newResults.push_back(writeOp->getResult(0));
+  return success();
+}
 
 /// Vectorize a `padOp` with (1) static result type, (2) constant padding value
 /// and (3) all-zero lowPad to
@@ -1578,6 +1661,12 @@ vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
   return success();
 }
 
+static LogicalResult
+vectorizeUnpackOpPrecondition(tensor::UnPackOp unpackOp,
+                              ArrayRef<int64_t> inputVectorSizes) {
+  return success();
+}
+
 static LogicalResult
 vectorizePadOpPrecondition(tensor::PadOp padOp,
                            ArrayRef<int64_t> inputVectorSizes) {
@@ -1637,6 +1726,9 @@ LogicalResult mlir::linalg::vectorizeOpPrecondition(
       .Case<tensor::PadOp>([&](auto padOp) {
         return vectorizePadOpPrecondition(padOp, inputVectorSizes);
       })
+      .Case<tensor::UnPackOp>([&](auto unpackOp) {
+        return vectorizeUnpackOpPrecondition(unpackOp, inputVectorSizes);
+      })
       .Default([](auto) { return failure(); });
 }
 
@@ -1724,6 +1816,10 @@ LogicalResult mlir::linalg::vectorize(RewriterBase &rewriter, Operation *op,
             return vectorizeAsTensorPadOp(rewriter, padOp, inputVectorSizes,
                                           results);
           })
+          .Case<tensor::UnPackOp>([&](auto unpackOp) {
+            return vectorizeAsUnpackOp(rewriter, unpackOp, inputVectorSizes,
+                                       results);
+          })
           .Default([](auto) { return failure(); });
 
   if (failed(vectorizeResult)) {
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index 610339405d1c2c..acf7276626a4c5 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -419,6 +419,31 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL: func @test_vectorize_unpack
+func.func @test_vectorize_unpack(%0 : tensor<7x1136x16x16xf32>) -> tensor<100x18176xf32>  {
+    // CHECK %[[c0:.*]] = arith.constant 0 : index
+    // CHECK: %[[tr0:.*]] = vector.mask %[[m0:.*]] {{.*}} vector.transfer_read %{{.*}} : tensor<7x1136x16x16xf32>, vector<7x1136x16x16xf32> } : vector<7x1136x16x16xi1> -> vector<7x1136x16x16xf32>
+    // CHECK: %[[trans0:.*]] = vector.transpose %[[tr0]], [0, 2, 1, 3] : vector<7x1136x16x16xf32> to vector<7x16x1136x16xf32>
+    // CHECK: %[[sc0:.*]] = vector.shape_cast %[[trans0]]  : vector<7x16x1136x16xf32> to vector<112x18176xf32>
+    // CHECK: %[[empt0:.*]] = tensor.empty() : tensor<100x18176xf32>
+    // CHECK: %[[tw0:.*]] = vector.transfer_write %[[sc0]], %[[empt0]]
+    // CHECK: return %[[tw0]]
+    %8 = tensor.empty() : tensor<100x18176xf32>
+    %unpack = tensor.unpack %0 inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %8 : tensor<7x1136x16x16xf32> -> tensor<100x18176xf32>
+    return %unpack : tensor<100x18176xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1
+      : (!transform.any_op) -> !transform.any_op
+    transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // CHECK-LABEL: func @test_masked_vectorize_pad
 func.func @test_masked_vectorize_pad(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index)

mlir/test/Dialect/Linalg/vectorization.mlir

dcaballe

The approach looks good to me (% Hanhan's comments). Dropped a few comments.

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

dcaballe

It looks like this needs more discussion. Let's hold on a bit

hanhanW

A high-level comment: IMO, inputVectorSizes should be in unpacked domain (i.e., it is for dest shapes). In this context, the behavior will be aligned with tiling. The tile_sizes in TilingInterface implementations are provided for unpacked domain (i.e., dest shape). E.g.,

llvm-project/mlir/test/Dialect/Tensor/tiling.mlir

Lines 401 to 446 in adbf21f

    
           // CHECK-DAG:   #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)> 
        
           // CHECK-DAG:   #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)> 
        
           // CHECK-DAG:   #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)> 
        
           // CHECK-DAG:   #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)> 
        
           // CHECK-DAG:   #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)> 
        
           // CHECK-DAG:   #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)> 
        
           // CHECK:       func.func @NCnc_to_NC 
        
           // CHECK-SAME:    %[[IN:[A-Za-z0-9]+]]: 
        
           // CHECK-SAME:    %[[OUT:[A-Za-z0-9]+]]: 
        
           // CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index 
        
           // CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index 
        
           // CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index 
        
           // CHECK-DAG:     %[[C128:.*]] = arith.constant 128 : index 
        
           // CHECK-DAG:     %[[C256:.*]] = arith.constant 256 : index 
        
           // CHECK:         %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]] 
        
           // CHECK:           %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]] 
        
           // CHECK-DAG:         %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]]) 
        
           // CHECK-DAG:         %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]]) 
        
           // CHECK-DAG:         %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]]) 
        
           // CHECK-DAG:         %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]]) 
        
           // CHECK-DAG:         %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]]) 
        
           // CHECK-DAG:         %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]]) 
        
           // CHECK:             %[[SLICE:.+]] = tensor.extract_slice %[[IN]] 
        
           // CHECK-SAME:          [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16] 
        
           // CHECK-SAME:        : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32> 
        
           // CHECK:             %[[EMPTY:.+]] = tensor.empty 
        
           // CHECK:             %[[UNPACK:.+]] = tensor.unpack 
        
           // CHECK-SAME:          %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16] 
        
           // CHECK-SAME:          into %[[EMPTY]] 
        
           // CHECK:             %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]] 
        
           // CHECK-SAME:          [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4] 
        
           // CHECK:             %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]] 
        
           // CHECK-SAME:          into %{{.+}}[%[[I]], %[[J]]] [2, 4] 
        
           // CHECK:             scf.yield %[[RES]] 
        
           func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { 
        
             %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> 
        
             return %0 : tensor<256x128xf32> 
        
           } 
        
           module attributes {transform.with_named_sequence} { 
        
             transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { 
        
                 %0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op 
        
                 %1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) 
        
                 transform.yield 
        
             } 
        
           }

I think we really need them being aligned. Otherwise it leads to a lot of corner cases that play with other passes. E.g., IREE CPU backend has lowering_config. We use the tile_sizes to decide input_vector_sizes. If they don't match, you will have to implement other logics to make them align.

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

mlir/test/Dialect/Linalg/vectorization.mlir

This patch allows vectorization of a `tensor.unpack` operation.

shapes test case.

dcaballe

I started review and the realized that I couldn't find the code for some of the comments marked as fixed so I'm assuming that perhaps you didn't upload the latest version of the code? :)

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

…ensorType

and Diego.

@Max191

I will be out for a week. @Max191 can you help on the review? (Feel free to ping me if you have any questions)

dcaballe

Thanks! I took a first look and dropped some comments!

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

dcaballe · 2024-02-13T19:04:08Z

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

@@ -1559,6 +1571,90 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
  return success();
 }

+/// Vectorize a `tensor::UnPackOp` without OuterDimsPerms to these 4 Ops:


what happens for cases with OuterDimsPerms?

Added this in c7ed75e

update doc accordingly?

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

mlir/lib/Dialect/Tensor/Utils/Utils.cpp

Max191

Overall, looks good. I think we should try to support outer_dims_perm cases though. I added a comment describing how you can do that.

One of the tests looks wrong to me, although the logic for vectorization seems good to me. Maybe the test is incorrect?

Also, I think we can just use a zero constant as the padding_value for unpack. The padding_value should not show up in the result of the unpack, so it doesn't matter what value it is.

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

mlir/test/Dialect/Linalg/vectorization.mlir

Max191 · 2024-02-14T21:03:31Z

mlir/test/Dialect/Linalg/vectorization.mlir

+// CHECK: %[[CNST2:.*]] = arith.constant 2 : index
+// CHECK: %[[readMsk0:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x16x2xi1>
+// CHECK: %[[read0:.*]] = vector.mask %[[readMsk0]] {{.*}} vector.transfer_read %{{.*}} : tensor<?x?x16x2xf32>, vector<2x1x16x2xf32> } : vector<2x1x16x2xi1> -> vector<2x1x16x2xf32>
+// CHECK: %[[trans0:.*]] = vector.transpose %[[read0]], [0, 2, 3, 1] : vector<2x1x16x2xf32> to vector<2x16x2x1xf32>


This looks like the inverse of the correct permutation. I think this transpose should be [0, 3, 1, 2].

Not sure I understand you. It is taking a 2x1x16x2 and converting to 2x16x2x1. This should correspond to [0(2), 2(16), 3(2), 1(1)] (note: shape in parenthesis)

In this case, the result type of the transpose should be vector<2x(2)x1x(16)>, where the inner_tiles are next to their corresponding inner_dim. This would correspond to a permutation of [0, 3, 1, 2], which is the inverse of the permutation in the test.

I think you just need to use the inverse of the permutation that you are currently using (see the above comment).

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

(Except handling of outer Dimensions attribute)

mlir/include/mlir/Dialect/Tensor/Utils/Utils.h

dcaballe · 2024-02-16T15:19:30Z

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

@@ -1559,6 +1571,90 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
  return success();
 }

+/// Vectorize a `tensor::UnPackOp` without OuterDimsPerms to these 4 Ops:


update doc accordingly?

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

mlir/lib/Dialect/Tensor/Utils/Utils.cpp

rengolin · 2024-02-16T18:44:24Z

Fly by comment: this is really important, thanks for working on this!

dcaballe

LGTM! Just a few nits. Please, wait for Max's approval before landing

mlir/include/mlir/Dialect/Tensor/Utils/Utils.h

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

Max191

Just a couple comments about some of the permutation logic. I think I gave you some slightly out of order logic for the outer_dims_perm, so I am sorry about that, but the fixes should be pretty quick.

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp

Max191 · 2024-02-19T15:59:16Z

mlir/test/Dialect/Linalg/vectorization.mlir

+// CHECK: %[[CNST2:.*]] = arith.constant 2 : index
+// CHECK: %[[readMsk0:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x16x2xi1>
+// CHECK: %[[read0:.*]] = vector.mask %[[readMsk0]] {{.*}} vector.transfer_read %{{.*}} : tensor<?x?x16x2xf32>, vector<2x1x16x2xf32> } : vector<2x1x16x2xi1> -> vector<2x1x16x2xf32>
+// CHECK: %[[trans0:.*]] = vector.transpose %[[read0]], [0, 2, 3, 1] : vector<2x1x16x2xf32> to vector<2x16x2x1xf32>


In this case, the result type of the transpose should be vector<2x(2)x1x(16)>, where the inner_tiles are next to their corresponding inner_dim. This would correspond to a permutation of [0, 3, 1, 2], which is the inverse of the permutation in the test.

I think you just need to use the inverse of the permutation that you are currently using (see the above comment).

mlir/test/Dialect/Linalg/vectorization.mlir

Max191

Looks good, thanks for pushing on this!

bviyer requested a review from jpienaar December 20, 2023 17:48

bviyer requested review from hanhanW, nicolasvasilache and dcaballe as code owners December 20, 2023 17:48

llvmbot added mlir:linalg mlir labels Dec 20, 2023

bviyer mentioned this pull request Jan 3, 2024

Added a new flag to disable decomposing tensor.unpack iree-org/iree#16036

Closed

hanhanW requested changes Jan 11, 2024

View reviewed changes

mlir/test/Dialect/Linalg/vectorization.mlir Outdated Show resolved Hide resolved

dcaballe approved these changes Jan 17, 2024

View reviewed changes

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp Outdated Show resolved Hide resolved

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp Show resolved Hide resolved

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp Outdated Show resolved Hide resolved

hanhanW requested changes Jan 19, 2024

View reviewed changes

mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp Outdated Show resolved Hide resolved

rengolin requested a review from chelini January 19, 2024 10:34

dcaballe requested changes Jan 19, 2024

View reviewed changes

dcaballe mentioned this pull request Feb 1, 2024

[mlir] Add direct vectorization lowering for tensor.pack ops #78660

Merged

bviyer requested review from hanhanW and dcaballe February 6, 2024 23:47

hanhanW requested a review from Max191 February 7, 2024 03:29

hanhanW previously requested changes Feb 7, 2024

View reviewed changes

bviyer added 5 commits February 7, 2024 19:48

[mlir][Vectorizer] Vectorize tensor.unpack

c0c6432

This patch allows vectorization of a `tensor.unpack` operation.

Enabled tensor.unpack vectorization and added test case.

853a735

Added some of the changes requested by Diego and HanHan

a48dfac

Used vectorSizes for masks and added a dynamic

70cc122

shapes test case.

Added some changes proposed by HanHan.

c33642b

dcaballe reviewed Feb 9, 2024

View reviewed changes

bviyer added 2 commits February 9, 2024 17:33

Fixed all issues pointed out by HanHan except factoring in StripMineT…

744a291

…ensorType

Fixed all the issues pointed out by HanHan

d5a0dec

and Diego.

bviyer force-pushed the balaji/iree_15249_2 branch from 432ce04 to d5a0dec Compare February 9, 2024 23:15

llvmbot added the mlir:tensor label Feb 9, 2024

bviyer requested review from hanhanW and dcaballe February 9, 2024 23:17

dcaballe reviewed Feb 13, 2024

View reviewed changes

Max191 requested changes Feb 14, 2024

View reviewed changes

Added all the changes requested by Diego and Max

59d761f

(Except handling of outer Dimensions attribute)

bviyer requested a review from dcaballe February 15, 2024 23:38

Added outer_dims_perm support to unpack.

c7ed75e

dcaballe reviewed Feb 16, 2024

View reviewed changes

Fixed all the issues mentioned by Diego on 2/16.

a349b14

bviyer requested a review from dcaballe February 16, 2024 18:40

dcaballe approved these changes Feb 17, 2024

View reviewed changes

Added all the comment changes requested by Diego.

e8e0d88

bviyer requested a review from Max191 February 17, 2024 05:16

Max191 requested changes Feb 19, 2024

View reviewed changes

Fixed all the issues mentioned by Max on 2/20.

524c0d9

bviyer requested a review from Max191 February 20, 2024 21:49

Max191 approved these changes Feb 20, 2024

View reviewed changes

bviyer merged commit adf838d into llvm:main Feb 20, 2024

hanhanW mentioned this pull request Apr 4, 2024

Add support for static unpack op vectorization without providing input vector size. iree-org/iree#16979

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[mlir][Vectorizer] Added support to Vectorize tensor.unpack #76087

[mlir][Vectorizer] Added support to Vectorize tensor.unpack #76087

bviyer commented Dec 20, 2023

llvmbot commented Dec 20, 2023 •

edited

Loading

dcaballe left a comment

dcaballe left a comment

hanhanW left a comment

dcaballe left a comment

dcaballe left a comment •

edited

Loading

dcaballe Feb 13, 2024

bviyer Feb 16, 2024

dcaballe Feb 16, 2024

bviyer Feb 16, 2024

Max191 left a comment

Max191 Feb 14, 2024

bviyer Feb 15, 2024

Max191 Feb 19, 2024

dcaballe Feb 16, 2024

rengolin commented Feb 16, 2024

dcaballe left a comment

Max191 left a comment

Max191 Feb 19, 2024

Max191 left a comment

	// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 floordiv 32)>
	// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 mod 32)>
	// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0) -> ((d0 + 1) floordiv 32 - d0 floordiv 32 + 1)>
	// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0) -> (d0 floordiv 16)>
	// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0) -> (d0 mod 16)>
	// CHECK-DAG: #[[MAP6:.+]] = affine_map<(d0) -> ((d0 + 3) floordiv 16 - d0 floordiv 16 + 1)>
	// CHECK: func.func @NCnc_to_NC
	// CHECK-SAME: %[[IN:[A-Za-z0-9]+]]:
	// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]:
	// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
	// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
	// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
	// CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
	// CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
	// CHECK: %{{.+}} = scf.for %[[I:.+]] = %[[C0]] to %[[C256]] step %[[C2]]
	// CHECK: %{{.+}} = scf.for %[[J:.+]] = %[[C0]] to %[[C128]] step %[[C4]]
	// CHECK-DAG: %[[IN_I:.+]] = affine.apply #[[MAP0]](%[[I]])
	// CHECK-DAG: %[[OFFSET_I:.+]] = affine.apply #[[MAP1]](%[[I]])
	// CHECK-DAG: %[[IN_I_SZ:.+]] = affine.apply #[[MAP2]](%[[I]])
	// CHECK-DAG: %[[IN_J:.+]] = affine.apply #[[MAP4]](%[[J]])
	// CHECK-DAG: %[[OFFSET_J:.+]] = affine.apply #[[MAP5]](%[[J]])
	// CHECK-DAG: %[[IN_J_SZ:.+]] = affine.apply #[[MAP6]](%[[J]])
	// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[IN]]
	// CHECK-SAME: [%[[IN_I]], %[[IN_J]], 0, 0] [%[[IN_I_SZ]], %[[IN_J_SZ]], 32, 16]
	// CHECK-SAME: : tensor<8x8x32x16xf32> to tensor<?x?x32x16xf32>
	// CHECK: %[[EMPTY:.+]] = tensor.empty
	// CHECK: %[[UNPACK:.+]] = tensor.unpack
	// CHECK-SAME: %[[SLICE]] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
	// CHECK-SAME: into %[[EMPTY]]
	// CHECK: %[[UNPACK_SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
	// CHECK-SAME: [%[[OFFSET_I]], %[[OFFSET_J]]] [2, 4]
	// CHECK: %[[RES:.+]] = tensor.insert_slice %[[UNPACK_SLICE]]
	// CHECK-SAME: into %{{.+}}[%[[I]], %[[J]]] [2, 4]
	// CHECK: scf.yield %[[RES]]
	func.func @NCnc_to_NC(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> {
	%0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
	return %0 : tensor<256x128xf32>
	}

	module attributes {transform.with_named_sequence} {
	transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
	%0 = transform.structured.match ops{["tensor.unpack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
	%1, %loops:2 = transform.structured.tile_using_for %0 [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
	transform.yield
	}
	}

[mlir][Vectorizer] Added support to Vectorize tensor.unpack #76087

[mlir][Vectorizer] Added support to Vectorize tensor.unpack #76087

Conversation

bviyer commented Dec 20, 2023

llvmbot commented Dec 20, 2023 • edited Loading

dcaballe left a comment

Choose a reason for hiding this comment

dcaballe left a comment

Choose a reason for hiding this comment

hanhanW left a comment

Choose a reason for hiding this comment

dcaballe left a comment

Choose a reason for hiding this comment

dcaballe left a comment • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Max191 left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

rengolin commented Feb 16, 2024

dcaballe left a comment

Choose a reason for hiding this comment

Max191 left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Max191 left a comment

Choose a reason for hiding this comment

llvmbot commented Dec 20, 2023 •

edited

Loading

dcaballe left a comment •

edited

Loading