Open
Description
Input IR:
func.func @test_one(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
%c0 = arith.constant 0 : index
%0 = tensor.empty() : tensor<64x64xf16>
%1 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
%2 = vector.transfer_write %1, %0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
%extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
%inserted_slice = tensor.insert_slice %2 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
return %inserted_slice : tensor<1x64x1x64xf16>
}
func.func @test_two(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
%c0 = arith.constant 0 : index
%0 = tensor.empty() : tensor<64x64xf16>
%1 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
%extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
%2 = vector.transfer_write %1, %0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
%inserted_slice = tensor.insert_slice %2 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
return %inserted_slice : tensor<1x64x1x64xf16>
}
Command: mlir-opt above.mlir -eliminate-empty-tensors -canonicalize
Output
module {
func.func @test_one(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
%c0 = arith.constant 0 : index
%0 = tensor.empty() : tensor<64x64xf16>
%1 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
%2 = vector.transfer_write %1, %0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
%extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
%inserted_slice = tensor.insert_slice %2 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
return %inserted_slice : tensor<1x64x1x64xf16>
}
func.func @test_two(%arg0: index, %arg1: vector<64x64xf32>, %arg2: tensor<2x4096x10x64xf16>) -> tensor<1x64x1x64xf16> {
%c0 = arith.constant 0 : index
%0 = arith.truncf %arg1 : vector<64x64xf32> to vector<64x64xf16>
%extracted_slice = tensor.extract_slice %arg2[%arg0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<2x4096x10x64xf16> to tensor<1x64x1x64xf16>
%extracted_slice_0 = tensor.extract_slice %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<1x64x1x64xf16> to tensor<64x64xf16>
%1 = vector.transfer_write %0, %extracted_slice_0[%c0, %c0] {in_bounds = [true, true]} : vector<64x64xf16>, tensor<64x64xf16>
%inserted_slice = tensor.insert_slice %1 into %extracted_slice[0, 0, 0, 0] [1, 64, 1, 64] [1, 1, 1, 1] : tensor<64x64xf16> into tensor<1x64x1x64xf16>
return %inserted_slice : tensor<1x64x1x64xf16>
}
The only difference between test_one
and test_two
is the placement of tensor.extract_slice
. test_one doesn't get rid of the empty buffer, whereas test_two gets rid of the empty buffer and reuses the extracted slice.
@matthias-springer Could you suggest what would be happening here? Do you know if it is the intended behaviour?