Skip to content

Commit fc41b3c

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Add unfold_copy.out (#8952)
Summary: As titled Naive kernel for `unfold_copy.out` Reviewed By: iseeyuan, Gasoonjia Differential Revision: D70597013
1 parent 5dd96c3 commit fc41b3c

File tree

7 files changed

+276
-0
lines changed

7 files changed

+276
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include <c10/util/irange.h>
2+
#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
3+
#include <executorch/runtime/kernel/kernel_includes.h>
4+
#include <executorch/runtime/platform/assert.h>
5+
#include <cstring>
6+
namespace torch {
7+
namespace executor {
8+
namespace native {
9+
10+
using Tensor = executorch::aten::Tensor;
11+
12+
// unfold_copy(Tensor self, int dimension, int size, int step, *, Tensor(a!)
13+
// out) -> Tensor(a!)
14+
Tensor unfold_copy_out(
15+
KernelRuntimeContext& ctx,
16+
const Tensor& self,
17+
int64_t dim,
18+
int64_t size,
19+
int64_t step,
20+
Tensor& out) {
21+
(void)ctx;
22+
// Check if dimension is valid
23+
ET_KERNEL_CHECK(
24+
ctx,
25+
check_unfold_copy_args(self, dim, size, step, out),
26+
InvalidArgument,
27+
out);
28+
if (dim < 0) {
29+
dim += nonzero_dim(self);
30+
}
31+
// Calculate output size
32+
Tensor::SizesType expected_output_size[kTensorDimensionLimit];
33+
size_t expected_out_dim = 0;
34+
35+
get_unfold_copy_out_target_size(
36+
self, dim, size, step, expected_output_size, &expected_out_dim);
37+
38+
ET_KERNEL_CHECK(
39+
ctx,
40+
resize_tensor(out, {expected_output_size, expected_out_dim}) == Error::Ok,
41+
InvalidArgument,
42+
out);
43+
44+
// Copy data
45+
const size_t leading_dims = getLeadingDims(self, dim);
46+
const size_t trailing_dims = getTrailingDims(self, dim);
47+
ScalarType in_type = self.scalar_type();
48+
ScalarType out_type = out.scalar_type();
49+
50+
ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, "unfold_copy.out", CTYPE_IN, [&]() {
51+
const CTYPE_IN* input_ptr = self.const_data_ptr<CTYPE_IN>();
52+
ET_SWITCH_REALHBBF16_TYPES(
53+
out_type, ctx, "unfold_copy.out", CTYPE_OUT, [&] {
54+
CTYPE_OUT* out_ptr = out.mutable_data_ptr<CTYPE_OUT>();
55+
for (const auto i : c10::irange(leading_dims)) {
56+
const CTYPE_IN* src =
57+
input_ptr + i * self.size(dim) * trailing_dims;
58+
for (const auto j : c10::irange(out.size(dim))) {
59+
const CTYPE_IN* dim_src = src + j * step * trailing_dims;
60+
for (const auto k : c10::irange(trailing_dims)) {
61+
for (const auto l : c10::irange(size)) {
62+
*out_ptr = convert<CTYPE_OUT, CTYPE_IN>(
63+
dim_src[k + l * trailing_dims]);
64+
out_ptr++;
65+
}
66+
}
67+
}
68+
}
69+
});
70+
});
71+
return out;
72+
}
73+
} // namespace native
74+
} // namespace executor
75+
} // namespace torch

kernels/portable/cpu/util/copy_ops_util.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,5 +964,49 @@ void get_diagonal_copy_out_target_size(
964964
out_sizes[in.dim() - 2] = diagonal_size;
965965
}
966966

967+
bool check_unfold_copy_args(
968+
const Tensor& self,
969+
int64_t dim,
970+
int64_t size,
971+
int64_t step,
972+
Tensor& out) {
973+
if (dim < 0) {
974+
dim += nonzero_dim(self);
975+
}
976+
ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(self, dim));
977+
ET_CHECK_OR_RETURN_FALSE(
978+
size >= 0, "size is %" PRId64 " but must be >= 0", size);
979+
ET_CHECK_OR_RETURN_FALSE(
980+
size <= self.size(dim),
981+
"maximum size for tensor at dimension %" PRId64 " is %" PRId64
982+
" but size is %" PRId64,
983+
dim,
984+
self.size(dim),
985+
size);
986+
ET_CHECK_OR_RETURN_FALSE(
987+
step > 0, "step is %" PRId64 " but must be > 0", step);
988+
return true;
989+
}
990+
991+
void get_unfold_copy_out_target_size(
992+
const Tensor& self,
993+
int64_t dim,
994+
int64_t size,
995+
int64_t step,
996+
executorch::aten::SizesType* out_sizes,
997+
size_t* out_ndim) {
998+
for (size_t i = 0; i < dim; ++i) {
999+
out_sizes[i] = self.size(i);
1000+
}
1001+
// At `dim` dimension, we split the tensor into `size` chunks with `step`
1002+
// stride.
1003+
out_sizes[dim] = (self.size(dim) - size + step) / step;
1004+
for (size_t i = dim + 1; i < self.dim(); ++i) {
1005+
out_sizes[i] = self.size(i);
1006+
}
1007+
out_sizes[self.dim()] = size;
1008+
*out_ndim = self.dim() + 1;
1009+
}
1010+
9671011
} // namespace executor
9681012
} // namespace torch

kernels/portable/cpu/util/copy_ops_util.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,5 +233,20 @@ void get_diagonal_copy_out_target_size(
233233
executorch::aten::SizesType* out_sizes,
234234
size_t* out_ndim);
235235

236+
bool check_unfold_copy_args(
237+
const Tensor& self,
238+
int64_t dim,
239+
int64_t size,
240+
int64_t step,
241+
Tensor& out);
242+
243+
void get_unfold_copy_out_target_size(
244+
const Tensor& self,
245+
int64_t dim,
246+
int64_t size,
247+
int64_t step,
248+
executorch::aten::SizesType* out_sizes,
249+
size_t* out_ndim);
250+
236251
} // namespace executor
237252
} // namespace torch

kernels/portable/functions.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,11 @@
917917
- arg_meta: null
918918
kernel_name: torch::executor::unbind_copy_int_out
919919

920+
- op: unfold_copy.out
921+
kernels:
922+
- arg_meta: null
923+
kernel_name: torch::executor::unfold_copy_out
924+
920925
- op: unsqueeze_copy.out
921926
kernels:
922927
- arg_meta: null

kernels/test/op_unfold_copy_test.cpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
10+
#include <executorch/kernels/test/supported_features.h>
11+
#include <executorch/runtime/core/exec_aten/exec_aten.h>
12+
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
13+
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
14+
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
15+
16+
#include <executorch/kernels/test/TestUtil.h>
17+
18+
#include <gtest/gtest.h>
19+
20+
using namespace ::testing;
21+
using executorch::aten::ScalarType;
22+
using executorch::aten::Tensor;
23+
using torch::executor::testing::TensorFactory;
24+
25+
class OpUnfoldTest : public OperatorTest {
26+
protected:
27+
Tensor& op_unfold_copy_out(
28+
const Tensor& self,
29+
int64_t dim,
30+
int64_t size,
31+
int64_t step,
32+
Tensor& out) {
33+
return torch::executor::aten::unfold_copy_outf(
34+
context_, self, dim, size, step, out);
35+
}
36+
37+
template <class CTYPE, ScalarType DTYPE>
38+
void test_unfold_copy_dtype() {
39+
TensorFactory<DTYPE> tf;
40+
41+
auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
42+
auto expected = tf.make({3, 2, 2}, {1, 2, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9});
43+
auto actual_out = tf.zeros_like(expected);
44+
op_unfold_copy_out(input, /*dim=*/1, /*size=*/2, /*step=*/1, actual_out);
45+
EXPECT_TENSOR_CLOSE(actual_out, expected);
46+
}
47+
};
48+
49+
TEST_F(OpUnfoldTest, SmokeTest) {
50+
TensorFactory<ScalarType::Float> tf;
51+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
52+
const auto expected = tf.make({3, 1, 2}, {1, 2, 4, 5, 7, 8});
53+
auto output = tf.zeros_like(expected);
54+
55+
op_unfold_copy_out(input, /*dim=*/1, /*size=*/2, /*step=*/2, output);
56+
EXPECT_TENSOR_CLOSE(output, expected);
57+
}
58+
59+
TEST_F(OpUnfoldTest, DType) {
60+
#define TEST_ENTRY(ctype, dtype) \
61+
test_unfold_copy_dtype<ctype, ScalarType::dtype>();
62+
ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
63+
#undef TEST_ENTRY
64+
}
65+
66+
TEST_F(OpUnfoldTest, ZeroDimension) {
67+
TensorFactory<ScalarType::Float> tf;
68+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
69+
const auto expected =
70+
tf.make({2, 3, 2}, {1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 9});
71+
auto output = tf.zeros_like(expected);
72+
73+
op_unfold_copy_out(input, /*dim=*/0, /*size=*/2, /*step=*/1, output);
74+
EXPECT_TENSOR_CLOSE(output, expected);
75+
}
76+
77+
TEST_F(OpUnfoldTest, NegativeDimension) {
78+
TensorFactory<ScalarType::Float> tf;
79+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
80+
const auto expected = tf.make({3, 1, 2}, {1, 2, 4, 5, 7, 8});
81+
auto output = tf.zeros_like(expected);
82+
83+
op_unfold_copy_out(input, /*dim=*/-1, /*size=*/2, /*step=*/2, output);
84+
EXPECT_TENSOR_CLOSE(output, expected);
85+
}
86+
87+
TEST_F(OpUnfoldTest, LargeStep) {
88+
TensorFactory<ScalarType::Float> tf;
89+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
90+
const auto expected = tf.make({3, 1, 2}, {1, 2, 4, 5, 7, 8});
91+
auto output = tf.zeros_like(expected);
92+
93+
op_unfold_copy_out(input, /*dim=*/-1, /*size=*/2, /*step=*/5, output);
94+
EXPECT_TENSOR_CLOSE(output, expected);
95+
}
96+
97+
TEST_F(OpUnfoldTest, ZeroSize) {
98+
TensorFactory<ScalarType::Float> tf;
99+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
100+
const auto expected = tf.make({3, 4, 0}, {});
101+
auto output = tf.zeros_like(expected);
102+
103+
op_unfold_copy_out(input, /*dim=*/1, /*size=*/0, /*step=*/1, output);
104+
EXPECT_TENSOR_CLOSE(output, expected);
105+
}
106+
107+
TEST_F(OpUnfoldTest, NegativeSizeAndNegativeStepDies) {
108+
TensorFactory<ScalarType::Float> tf;
109+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
110+
auto output = tf.zeros({3, 1, 2});
111+
112+
ET_EXPECT_KERNEL_FAILURE(
113+
context_,
114+
op_unfold_copy_out(input, /*dim=*/1, /*size=*/-1, /*step=*/1, output));
115+
ET_EXPECT_KERNEL_FAILURE(
116+
context_,
117+
op_unfold_copy_out(input, /*dim=*/1, /*size=*/1, /*step=*/-1, output));
118+
}
119+
120+
TEST_F(OpUnfoldTest, InvalidDimAndSizeTooLargeDies) {
121+
TensorFactory<ScalarType::Float> tf;
122+
const auto input = tf.make({3, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
123+
auto output = tf.zeros({3, 1, 2});
124+
ET_EXPECT_KERNEL_FAILURE(
125+
context_,
126+
op_unfold_copy_out(input, /*dim=*/3, /*size=*/2, /*step=*/1, output));
127+
ET_EXPECT_KERNEL_FAILURE(
128+
context_,
129+
op_unfold_copy_out(input, /*dim=*/1, /*size=*/10, /*step=*/1, output));
130+
}

kernels/test/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ def define_common_targets():
324324
_common_op_test("op_tril_test", ["aten", "portable"])
325325
_common_op_test("op_trunc_test", ["aten", "portable"])
326326
_common_op_test("op_unbind_copy_test", ["aten", "portable"])
327+
_common_op_test("op_unfold_copy_test", ["aten", "portable"])
327328
_common_op_test("op_unsqueeze_copy_test", ["aten", "portable"])
328329
_common_op_test("op_upsample_bilinear2d_test", ["aten", "portable"])
329330
_common_op_test("op_upsample_nearest2d_test", ["aten", "portable"])

shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,12 @@ ATEN_OPS = (
12231223
"//executorch/kernels/portable/cpu/util:copy_ops_util",
12241224
],
12251225
),
1226+
op_target(
1227+
name = "op_unfold_copy",
1228+
deps = [
1229+
"//executorch/kernels/portable/cpu/util:copy_ops_util",
1230+
],
1231+
),
12261232
op_target(
12271233
name = "op_unsqueeze_copy",
12281234
deps = [

0 commit comments

Comments
 (0)