llvm · matthias-springer · Sep 12, 2023 · Sep 12, 2023 · maerhart · Sep 12, 2023
@@ -209,22 +209,33 @@ def Bufferization_CloneOp : Bufferization_Op<"clone", [
 }
 
 //===----------------------------------------------------------------------===//
-// CopyTensorOp
+// MaterializeInDestinationOp
 //===----------------------------------------------------------------------===//
 
-def Bufferization_CopyTensorOp : Bufferization_Op<"copy_tensor",
-    [BufferizableOpInterface, SameOperandsAndResultType,
-     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
+def Bufferization_MaterializeInDestinationOp
+    : Bufferization_Op<"materialize_in_destination",
+        [BufferizableOpInterface, SameOperandsAndResultType,
+         DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>]> {
   let summary = "copy a tensor";
 
   let description = [{
-    Copy the contents of the source tensor into the destination tensor. This
-    operation is guaranteed to bufferize to a memory copy.
+    This op indicates that the data of the `source` tensor should materialize
+    in the future buffer of the `dest` tensors. Both tensors must have the same
+    shape and element type at runtime.
+
+    By default, this op bufferizes to a memcpy from the future buffer of the
+    `source` tensor to the future buffer of the `dest` tensor. However,
+    transformations such as "empty tensor elimination" may rewrite IR such that
+    a computation is performed directly in the future buffer of the `dest`
+    tensor and no memcpy is needed.
+
+    Note: "tensor.insert_slice" could be used for the same purpose, but since
+    tensor dialect ops only indicate *what* should be computed but not *where*,
+    it could fold away, causing the computation to materialize in a different
+    buffer.
   }];
 
-  let arguments = (ins AnyTensor:$source,
-                       AnyTensor:$dest);
-
+  let arguments = (ins AnyTensor:$source, AnyTensor:$dest);
   let results = (outs AnyTensor:$result);
 
   let extraClassDeclaration = [{
@@ -245,7 +256,7 @@ def Bufferization_CopyTensorOp : Bufferization_Op<"copy_tensor",
     }
   }];
 
-  let assemblyFormat = "$source `,` $dest attr-dict `:` type($source)";
+  let assemblyFormat = "$source `in` $dest attr-dict `:` type($source)";
 }
 
 //===----------------------------------------------------------------------===//

@@ -941,7 +941,7 @@ def PadOp : Op<Transform_Dialect, "structured.pad",
     the original destination tensor of the targeted op. The op that copies back
     the result can be customized with `copy_back_op`:
 
-    * "bufferization.copy_tensor" (default)
+    * "bufferization.materialize_in_destination" (default)
     * "linalg.copy"
     * "none" (no copy back)
 
@@ -966,7 +966,7 @@ def PadOp : Op<Transform_Dialect, "structured.pad",
          DefaultValuedAttr<
           TypedArrayAttrBase<I64ArrayAttr, "array of arrays of i64">,
           "{}">:$transpose_paddings,
-         DefaultValuedAttr<StrAttr, "::mlir::bufferization::CopyTensorOp::getOperationName()">:$copy_back_op);
+         DefaultValuedAttr<StrAttr, "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copy_back_op);
   let results = (outs TransformHandleTypeInterface:$padded,
                       TransformHandleTypeInterface:$pad,
                       TransformHandleTypeInterface:$copy);
@@ -986,7 +986,7 @@ def PadOp : Op<Transform_Dialect, "structured.pad",
                    CArg<"ArrayRef<int64_t>", "{}">:$padToMultipleOf,
                    CArg<"ArrayRef<int64_t>", "{}">:$packPaddings,
                    CArg<"ArrayRef<Attribute>", "{}">:$transposePaddings,
-                   CArg<"StringRef", "::mlir::bufferization::CopyTensorOp::getOperationName()">:$copyBackOp)>
+                   CArg<"StringRef", "::mlir::bufferization::MaterializeInDestinationOp::getOperationName()">:$copyBackOp)>
   ];
 
   let extraClassDeclaration = [{

@@ -299,12 +299,12 @@ struct LinalgPaddingOptions {
   }
   enum class CopyBackOp : int8_t {
     None = 0,
-    BufferizationCopyTensor = 1,
+    BufferizationMaterializeInDestination = 1,
     LinalgCopy = 2
   };
   /// The op to be used for copying the padded result to the original
   /// destination tensor.
-  CopyBackOp copyBackOp = CopyBackOp::BufferizationCopyTensor;
+  CopyBackOp copyBackOp = CopyBackOp::BufferizationMaterializeInDestination;
   LinalgPaddingOptions &setCopyBackOp(CopyBackOp op) {
     copyBackOp = op;
     return *this;

@@ -441,48 +441,6 @@ Value AllocTensorOp::getDynamicSize(OpBuilder &b, unsigned idx) {
   return getOperand(getIndexOfDynamicSize(idx));
 }
 
-//===----------------------------------------------------------------------===//
-// CopyTensorOp
-//===----------------------------------------------------------------------===//
-
-bool CopyTensorOp::bufferizesToMemoryRead(OpOperand &opOperand,
-                                          const AnalysisState &state) {
-  if (&opOperand == &getOperation()->getOpOperand(0) /*source*/)
-    return true;
-  return false;
-}
-
-bool CopyTensorOp::bufferizesToMemoryWrite(OpOperand &opOperand,
-                                           const AnalysisState &state) {
-  if (&opOperand == &getOperation()->getOpOperand(1) /*dest*/)
-    return true;
-  return false;
-}
-
-AliasingValueList CopyTensorOp::getAliasingValues(OpOperand &opOperand,
-                                                  const AnalysisState &state) {
-  if (&opOperand == &getOperation()->getOpOperand(1) /*dest*/)
-    return {{getOperation()->getResult(0), BufferRelation::Equivalent}};
-  return {};
-}
-
-LogicalResult CopyTensorOp::bufferize(RewriterBase &rewriter,
-                                      const BufferizationOptions &options) {
-  FailureOr<Value> buffer = getBuffer(rewriter, getDest(), options);
-  if (failed(buffer))
-    return failure();
-  rewriter.create<memref::TensorStoreOp>(getLoc(), getSource(), *buffer);
-  replaceOpWithBufferizedValues(rewriter, getOperation(), *buffer);
-  return success();
-}
-
-LogicalResult CopyTensorOp::reifyResultShapes(
-    OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
-  reifiedReturnShapes.resize(1, SmallVector<OpFoldResult>(getType().getRank()));
-  reifiedReturnShapes[0] = tensor::getMixedSizes(builder, getLoc(), getDest());
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // CloneOp
 //===----------------------------------------------------------------------===//
@@ -585,6 +543,50 @@ LogicalResult DeallocTensorOp::bufferize(RewriterBase &rewriter,
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// MaterializeInDestinationOp
+//===----------------------------------------------------------------------===//
+
+bool MaterializeInDestinationOp::bufferizesToMemoryRead(
+    OpOperand &opOperand, const AnalysisState &state) {
+  if (&opOperand == &getOperation()->getOpOperand(0) /*source*/)
+    return true;
+  return false;
+}
+
+bool MaterializeInDestinationOp::bufferizesToMemoryWrite(
+    OpOperand &opOperand, const AnalysisState &state) {
+  if (&opOperand == &getOperation()->getOpOperand(1) /*dest*/)
+    return true;
+  return false;
+}
+
+AliasingValueList
+MaterializeInDestinationOp::getAliasingValues(OpOperand &opOperand,
+                                              const AnalysisState &state) {
+  if (&opOperand == &getOperation()->getOpOperand(1) /*dest*/)
+    return {{getOperation()->getResult(0), BufferRelation::Equivalent}};
+  return {};
+}
+
+LogicalResult
+MaterializeInDestinationOp::bufferize(RewriterBase &rewriter,
+                                      const BufferizationOptions &options) {
+  FailureOr<Value> buffer = getBuffer(rewriter, getDest(), options);
+  if (failed(buffer))
+    return failure();
+  rewriter.create<memref::TensorStoreOp>(getLoc(), getSource(), *buffer);
+  replaceOpWithBufferizedValues(rewriter, getOperation(), *buffer);
+  return success();
+}
+
+LogicalResult MaterializeInDestinationOp::reifyResultShapes(
+    OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
+  reifiedReturnShapes.resize(1, SmallVector<OpFoldResult>(getType().getRank()));
+  reifiedReturnShapes[0] = tensor::getMixedSizes(builder, getLoc(), getDest());
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // ToTensorOp
 //===----------------------------------------------------------------------===//

@@ -1683,9 +1683,10 @@ transform::PadOp::apply(transform::TransformRewriter &rewriter,
     options.padToMultipleOf = padToMultipleOf;
     options.paddingValues = paddingValues;
     options.packPaddings = packPaddings;
-    if (getCopyBackOp() == bufferization::CopyTensorOp::getOperationName()) {
-      options.copyBackOp =
-          LinalgPaddingOptions::CopyBackOp::BufferizationCopyTensor;
+    if (getCopyBackOp() ==
+        bufferization::MaterializeInDestinationOp::getOperationName()) {
+      options.copyBackOp = LinalgPaddingOptions::CopyBackOp::
+          BufferizationMaterializeInDestination;
     } else if (getCopyBackOp() == linalg::CopyOp::getOperationName()) {
       options.copyBackOp = LinalgPaddingOptions::CopyBackOp::LinalgCopy;
     } else if (getCopyBackOp() == kCopyOpNone) {
@@ -1761,7 +1762,8 @@ LogicalResult transform::PadOp::verify() {
              << attr;
     }
   }
-  if (getCopyBackOp() != bufferization::CopyTensorOp::getOperationName() &&
+  if (getCopyBackOp() !=
+          bufferization::MaterializeInDestinationOp::getOperationName() &&
       getCopyBackOp() != linalg::CopyOp::getOperationName() &&
       getCopyBackOp() != kCopyOpNone)
     return emitOpError() << "invalid copy_back_op";

@@ -245,9 +245,11 @@ linalg::rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad,
                                                          std::get<1>(it)->get())
                                  .getResult(0));
     } else if (options.copyBackOp ==
-               LinalgPaddingOptions::CopyBackOp::BufferizationCopyTensor) {
-      replacements.push_back(rewriter.create<bufferization::CopyTensorOp>(
-          loc, std::get<0>(it), std::get<1>(it)->get()));
+               LinalgPaddingOptions::CopyBackOp::
+                   BufferizationMaterializeInDestination) {
+      replacements.push_back(
+          rewriter.create<bufferization::MaterializeInDestinationOp>(
+              loc, std::get<0>(it), std::get<1>(it)->get()));
     } else {
       llvm_unreachable("unsupported copy back op");
     }

diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -224,6 +224,6 @@ func.func @tensor_copy(%arg0: tensor<5xf32>) -> tensor<5xf32> {
   // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[r]]
   %dest = bufferization.alloc_tensor() : tensor<5xf32>
-  %0 = bufferization.copy_tensor %arg0, %dest : tensor<5xf32>
+  %0 = bufferization.materialize_in_destination %arg0 in %dest : tensor<5xf32>
   return %0 : tensor<5xf32>
 }
diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir
@@ -99,9 +99,9 @@ func.func @invalid_writable_on_op() {
 // -----
 
 // expected-note @below{{prior use here}}
-func.func @invalid_tensor_copy(%arg0: tensor<?xf32>, %arg1: tensor<5xf32>) {
+func.func @invalid_materialize_in_destination(%arg0: tensor<?xf32>, %arg1: tensor<5xf32>) {
   // expected-error @below{{expects different type than prior uses: 'tensor<?xf32>' vs 'tensor<5xf32>'}}
-  bufferization.copy_tensor %arg0, %arg1 : tensor<?xf32>
+  bufferization.materialize_in_destination %arg0 in %arg1 : tensor<?xf32>
 }
 
 // -----

diff --git a/mlir/test/Dialect/Bufferization/ops.mlir b/mlir/test/Dialect/Bufferization/ops.mlir
@@ -58,11 +58,11 @@ func.func @test_dealloc_tensor_op(%arg0: tensor<4xi32>) {
   return
 }
 
-// CHECK-LABEL: func @test_copy_tensor_op
-func.func @test_copy_tensor_op(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>)
+// CHECK-LABEL: func @test_materialize_in_destination_op
+func.func @test_materialize_in_destination_op(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>)
     -> tensor<?xf32> {
-  // CHECK: bufferization.copy_tensor {{.*}} : tensor<?xf32>
-  %1 = bufferization.copy_tensor %arg0, %arg1 : tensor<?xf32>
+  // CHECK: bufferization.materialize_in_destination {{.*}} : tensor<?xf32>
+  %1 = bufferization.materialize_in_destination %arg0 in %arg1 : tensor<?xf32>
   return %1 : tensor<?xf32>
 }
 

diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
@@ -27,7 +27,7 @@ func.func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
   // CHECK-SAME:              outs(%[[T2]] : tensor<4x5xf32>)
 
   //      CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]]
-  //      CHECK: %[[T7:.*]] = bufferization.copy_tensor %[[T6]], %[[T2]]
+  //      CHECK: %[[T7:.*]] = bufferization.materialize_in_destination %[[T6]] in %[[T2]]
   %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   func.return %5 : tensor<24x25xf32>
@@ -40,9 +40,9 @@ transform.sequence failures(propagate) {
     padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
     padding_dimensions=[0, 1, 2],
     pack_paddings=[1, 1, 0]
-  } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.op<"bufferization.copy_tensor">)
+  } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.op<"bufferization.materialize_in_destination">)
   // expected-remark @below {{1}}
-  test_print_number_of_associated_payload_ir_ops %copy_back : !transform.op<"bufferization.copy_tensor">
+  test_print_number_of_associated_payload_ir_ops %copy_back : !transform.op<"bufferization.materialize_in_destination">
 }
 
 // -----
@@ -272,7 +272,7 @@ func.func @pack_everything(%arg0: tensor<24x12xf32>,
   //      CHECK: %[[T6:.*]] = tensor.extract_slice %[[T5]]
   // Copy back result to the original buffer, so that the destination of the
   // computation does not change.
-  //      CHECK: %[[T7:.*]] = bufferization.copy_tensor %[[T6]], %[[T2]]
+  //      CHECK: %[[T7:.*]] = bufferization.materialize_in_destination %[[T6]] in %[[T2]]
   %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
 
   //      CHECK: %[[T8:.*]] = tensor.insert_slice %[[T7]] into %{{.*}}