Skip to content

[MLIR] Bufferization behaving differently based on the position of extract_slice. #122869

Open
@pashu123

Description

@pashu123

Input IR:

func.func @test_one(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
  %c0 = arith.constant 0 : index
  %0 = tensor.empty() : tensor<64x64xf16>
  %1 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
  %2 = vector.transfer_write %1, %0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
  %extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
  %inserted_slice = tensor.insert_slice %2 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
  return %inserted_slice : tensor<1x64x1x64xf16>
}

func.func @test_two(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
  %c0 = arith.constant 0 : index
  %0 = tensor.empty() : tensor<64x64xf16>
  %1 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
  %extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
  %2 = vector.transfer_write %1, %0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
  %inserted_slice = tensor.insert_slice %2 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
  return %inserted_slice : tensor<1x64x1x64xf16>
}

Command: mlir-opt above.mlir -eliminate-empty-tensors -canonicalize

Output

module {
  func.func @test_one(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
    %c0 = arith.constant 0 : index
    %0 = tensor.empty() : tensor<64x64xf16>
    %1 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
    %2 = vector.transfer_write %1, %0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
    %extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
    %inserted_slice = tensor.insert_slice %2 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
    return %inserted_slice : tensor<1x64x1x64xf16>
  }

  func.func @test_two(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
    %c0 = arith.constant 0 : index
    %0 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
    %extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
    %extracted_slice_0 = tensor.extract_slice %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<1x64x1x64xf16> to tensor<64x64xf16>
    %1 = vector.transfer_write %0, %extracted_slice_0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
    %inserted_slice = tensor.insert_slice %1 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
    return %inserted_slice : tensor<1x64x1x64xf16>
  }

The only difference between test_one and test_two is the placement of tensor.extract_slice. test_one doesn't get rid of the empty buffer, whereas test_two gets rid of the empty buffer and reuses the extracted slice.

@matthias-springer Could you suggest what would be happening here? Do you know if it is the intended behaviour?

Metadata

Metadata

Assignees

Labels

Type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions