Skip to content

Commit 1813bbe

Browse files
GregoryComerkirklandsign
authored andcommitted
Support channels_last format in portable upsample kernels
Differential Revision: D71690379 Pull Request resolved: #9526
1 parent 3db27d2 commit 1813bbe

File tree

6 files changed

+349
-9
lines changed

6 files changed

+349
-9
lines changed

kernels/portable/cpu/op_upsample_bilinear2d.cpp

Lines changed: 96 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ using executorch::aten::SizesType;
2020

2121
namespace {
2222
template <typename CTYPE>
23-
void upsample_bilinear2d_kernel_impl(
23+
void upsample_bilinear2d_kernel_impl_nchw(
2424
const Tensor& in,
2525
bool align_corners,
2626
const float scale_h,
@@ -86,6 +86,99 @@ void upsample_bilinear2d_kernel_impl(
8686
}
8787
}
8888
}
89+
90+
template <typename CTYPE>
91+
void upsample_bilinear2d_kernel_impl_nhwc(
92+
const Tensor& in,
93+
bool align_corners,
94+
const float scale_h,
95+
const float scale_w,
96+
Tensor& out) {
97+
auto in_data = in.const_data_ptr<CTYPE>();
98+
auto out_data = out.mutable_data_ptr<CTYPE>();
99+
100+
for ([[maybe_unused]] const auto n : c10::irange(out.size(0))) {
101+
for (const auto h : c10::irange(out.size(2))) {
102+
// Compute source index and weights.
103+
int64_t in_h1, in_h2;
104+
float weight_h, inv_weight_h;
105+
106+
compute_source_index_and_lambda(
107+
in_h1,
108+
in_h2,
109+
weight_h,
110+
inv_weight_h,
111+
scale_h,
112+
h,
113+
in.sizes()[2],
114+
out.sizes()[2],
115+
align_corners);
116+
117+
for (const auto w : c10::irange(out.size(3))) {
118+
int64_t in_w1, in_w2;
119+
float weight_w, inv_weight_w;
120+
121+
compute_source_index_and_lambda(
122+
in_w1,
123+
in_w2,
124+
weight_w,
125+
inv_weight_w,
126+
scale_w,
127+
w,
128+
in.sizes()[3],
129+
out.sizes()[3],
130+
align_corners);
131+
132+
for ([[maybe_unused]] const auto c : c10::irange(out.size(1))) {
133+
const auto top_left = in_data
134+
[in_h1 * in.strides()[2] + in_w1 * in.strides()[3] +
135+
c * in.strides()[1]];
136+
const auto top_right = in_data
137+
[in_h1 * in.strides()[2] + in_w2 * in.strides()[3] +
138+
c * in.strides()[1]];
139+
const auto bottom_left = in_data
140+
[in_h2 * in.strides()[2] + in_w1 * in.strides()[3] +
141+
c * in.strides()[1]];
142+
const auto bottom_right = in_data
143+
[in_h2 * in.strides()[2] + in_w2 * in.strides()[3] +
144+
c * in.strides()[1]];
145+
146+
const auto top = top_left * weight_w + top_right * inv_weight_w;
147+
const auto bottom =
148+
bottom_left * weight_w + bottom_right * inv_weight_w;
149+
const auto val = top * weight_h + bottom * inv_weight_h;
150+
151+
*out_data = val;
152+
out_data++;
153+
}
154+
}
155+
}
156+
157+
in_data += in.strides()[0];
158+
}
159+
}
160+
161+
template <typename CTYPE>
162+
void upsample_bilinear2d_kernel_impl(
163+
KernelRuntimeContext& ctx,
164+
const Tensor& in,
165+
bool align_corners,
166+
const float scale_h,
167+
const float scale_w,
168+
Tensor& out) {
169+
if (is_contiguous_dim_order(in.dim_order().data(), in.dim_order().size())) {
170+
upsample_bilinear2d_kernel_impl_nchw<CTYPE>(
171+
in, align_corners, scale_h, scale_w, out);
172+
} else if (is_channels_last_dim_order(
173+
in.dim_order().data(), in.dim_order().size())) {
174+
upsample_bilinear2d_kernel_impl_nhwc<CTYPE>(
175+
in, align_corners, scale_h, scale_w, out);
176+
} else {
177+
// Shouldn't be reachable because of args checks, but just in case.
178+
ET_LOG(Error, "Unsupported dim order");
179+
ctx.fail(Error::InvalidArgument);
180+
}
181+
}
89182
} // namespace
90183

91184
// Signatures are auto-generated, so disable pass-by-value lint.
@@ -101,7 +194,7 @@ Tensor& upsample_bilinear2d_vec_out(
101194
// Preconditions (checked in check_..._args):
102195
// In and out tensors have same dtype.
103196
// In and out tensors are rank 4 and have same dim[0] and dim[1].
104-
// In and out tensors are default dim order (NCHW).
197+
// In and out tensors are NHWC or NCHW dim order.
105198
ET_KERNEL_CHECK(
106199
ctx,
107200
check_upsample_bilinear2d_args(
@@ -127,7 +220,7 @@ Tensor& upsample_bilinear2d_vec_out(
127220
ET_SWITCH_REALHBF16_TYPES(
128221
in.scalar_type(), ctx, "upsample_bilinear2d.out", CTYPE, [&]() {
129222
upsample_bilinear2d_kernel_impl<CTYPE>(
130-
in, align_corners, kernel_scale_h, kernel_scale_w, out);
223+
ctx, in, align_corners, kernel_scale_h, kernel_scale_w, out);
131224
});
132225

133226
return out;

kernels/portable/cpu/op_upsample_nearest2d.cpp

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ using executorch::aten::SizesType;
1919

2020
namespace {
2121
template <typename CTYPE>
22-
void upsample_nearest2d_kernel_impl(
22+
void upsample_nearest2d_kernel_impl_nchw(
2323
const Tensor& in,
2424
const float scale_h,
2525
const float scale_w,
@@ -46,6 +46,54 @@ void upsample_nearest2d_kernel_impl(
4646
}
4747
}
4848
}
49+
50+
template <typename CTYPE>
51+
void upsample_nearest2d_kernel_impl_nhwc(
52+
const Tensor& in,
53+
const float scale_h,
54+
const float scale_w,
55+
Tensor& out) {
56+
auto in_data = in.const_data_ptr<CTYPE>();
57+
auto out_data = out.mutable_data_ptr<CTYPE>();
58+
59+
for (auto n = 0; n < out.size(0); n++) {
60+
for (auto h = 0; h < out.size(2); h++) {
61+
const auto in_h =
62+
nearest_neighbor_compute_source_index(scale_h, h, in.sizes()[2]);
63+
for (auto w = 0; w < out.size(3); w++) {
64+
const auto in_w =
65+
nearest_neighbor_compute_source_index(scale_w, w, in.sizes()[3]);
66+
for (auto c = 0; c < out.size(1); c++) {
67+
*out_data = in_data
68+
[in_h * in.strides()[2] + in_w * in.strides()[3] +
69+
c * in.strides()[1]];
70+
out_data++;
71+
}
72+
}
73+
}
74+
75+
in_data += in.strides()[0];
76+
}
77+
}
78+
79+
template <typename CTYPE>
80+
void upsample_nearest2d_kernel_impl(
81+
KernelRuntimeContext& ctx,
82+
const Tensor& in,
83+
const float scale_h,
84+
const float scale_w,
85+
Tensor& out) {
86+
if (is_contiguous_dim_order(in.dim_order().data(), in.dim_order().size())) {
87+
upsample_nearest2d_kernel_impl_nchw<CTYPE>(in, scale_h, scale_w, out);
88+
} else if (is_channels_last_dim_order(
89+
in.dim_order().data(), in.dim_order().size())) {
90+
upsample_nearest2d_kernel_impl_nhwc<CTYPE>(in, scale_h, scale_w, out);
91+
} else {
92+
// Shouldn't be reachable because of args checks, but just in case.
93+
ET_LOG(Error, "Unsupported dim order");
94+
ctx.fail(Error::InvalidArgument);
95+
}
96+
}
4997
} // namespace
5098

5199
Tensor& upsample_nearest2d_vec_out(
@@ -82,7 +130,7 @@ Tensor& upsample_nearest2d_vec_out(
82130
ET_SWITCH_REALHBF16_TYPES(
83131
in.scalar_type(), ctx, "upsample_nearest2d.out", CTYPE, [&]() {
84132
upsample_nearest2d_kernel_impl<CTYPE>(
85-
in, kernel_scale_h, kernel_scale_w, out);
133+
ctx, in, kernel_scale_h, kernel_scale_w, out);
86134
});
87135

88136
return out;

kernels/portable/cpu/util/upsample_util.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ bool check_upsample_2d_common_args(
1818
const executorch::aten::OptionalArrayRef<double>& scale_factors,
1919
Tensor& out) {
2020
ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
21+
ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dim_order(in, out));
2122
ET_LOG_AND_RETURN_IF_FALSE(in.dim() == 4);
2223
ET_LOG_AND_RETURN_IF_FALSE(out.dim() == 4);
23-
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(in));
24-
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_dim_order(out));
24+
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(in));
25+
ET_LOG_AND_RETURN_IF_FALSE(tensor_is_default_or_channels_last_dim_order(out));
2526
ET_LOG_AND_RETURN_IF_FALSE(
2627
output_size.has_value() ^ scale_factors.has_value());
2728
if (scale_factors.has_value()) {

kernels/test/op_upsample_bilinear2d_test.cpp

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,28 @@ TEST_F(OpUpsampleBilinear2dTest, ZeroComputedOutputSizeDies) {
468468
out));
469469
}
470470

471+
TEST_F(OpUpsampleBilinear2dTest, MismatchedDimOrderDies) {
472+
TensorFactory<ScalarType::Float> tf;
473+
474+
if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
475+
GTEST_SKIP() << "ATen kernel can implicitly convert dim order";
476+
}
477+
478+
const auto input = tf.ones({1, 1, 1, 2});
479+
auto out = tf.zeros_channels_last({1, 1, 1, 4});
480+
std::array<double, 2> scale_factors = {2, 2};
481+
482+
ET_EXPECT_KERNEL_FAILURE(
483+
context_,
484+
op_upsample_bilinear2d_vec_out(
485+
input,
486+
{},
487+
false,
488+
OptionalArrayRef<double>(
489+
{scale_factors.data(), scale_factors.size()}),
490+
out));
491+
}
492+
471493
TEST_F(OpUpsampleBilinear2dTest, NumericsCheck) {
472494
TensorFactory<ScalarType::Float> tf;
473495

@@ -577,3 +599,129 @@ TEST_F(OpUpsampleBilinear2dTest, Simple5x1To4x1AlignCorners) {
577599

578600
EXPECT_TENSOR_CLOSE(out, expected);
579601
}
602+
603+
TEST_F(OpUpsampleBilinear2dTest, Simple1x2To1x4ChannelsLast) {
604+
TensorFactory<ScalarType::Float> tf;
605+
606+
const auto input = tf.make_channels_last({1, 1, 1, 2}, {1.0, 4.0});
607+
std::array<int64_t, 2> output_size = {1, 4};
608+
auto out = tf.zeros_channels_last({1, 1, 1, 4});
609+
610+
op_upsample_bilinear2d_vec_out(
611+
input,
612+
OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
613+
false,
614+
{},
615+
out);
616+
617+
const auto expected =
618+
tf.make_channels_last({1, 1, 1, 4}, {1.0, 1.75, 3.25, 4.0});
619+
620+
EXPECT_TENSOR_EQ(out, expected);
621+
}
622+
623+
TEST_F(OpUpsampleBilinear2dTest, SmokeTestChannelsLast) {
624+
TensorFactory<ScalarType::Float> tf;
625+
626+
const auto input = tf.make_channels_last(
627+
{1, 2, 3, 4}, {0.0, 12, 1, 13, 2, 14, 3, 15, 4, 16, 5, 17,
628+
6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23});
629+
std::array<int64_t, 2> output_size = {6, 8};
630+
auto out = tf.zeros_channels_last({1, 2, 6, 8});
631+
632+
op_upsample_bilinear2d_vec_out(
633+
input,
634+
OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
635+
false,
636+
{},
637+
out);
638+
639+
const auto expected = tf.make_channels_last(
640+
{1, 2, 6, 8},
641+
{0.0000, 12.0000, 0.2500, 12.2500, 0.7500, 12.7500, 1.2500, 13.2500,
642+
1.7500, 13.7500, 2.2500, 14.2500, 2.7500, 14.7500, 3.0000, 15.0000,
643+
1.0000, 13.0000, 1.2500, 13.2500, 1.7500, 13.7500, 2.2500, 14.2500,
644+
2.7500, 14.7500, 3.2500, 15.2500, 3.7500, 15.7500, 4.0000, 16.0000,
645+
3.0000, 15.0000, 3.2500, 15.2500, 3.7500, 15.7500, 4.2500, 16.2500,
646+
4.7500, 16.7500, 5.2500, 17.2500, 5.7500, 17.7500, 6.0000, 18.0000,
647+
5.0000, 17.0000, 5.2500, 17.2500, 5.7500, 17.7500, 6.2500, 18.2500,
648+
6.7500, 18.7500, 7.2500, 19.2500, 7.7500, 19.7500, 8.0000, 20.0000,
649+
7.0000, 19.0000, 7.2500, 19.2500, 7.7500, 19.7500, 8.2500, 20.2500,
650+
8.7500, 20.7500, 9.2500, 21.2500, 9.7500, 21.7500, 10.0000, 22.0000,
651+
8.0000, 20.0000, 8.2500, 20.2500, 8.7500, 20.7500, 9.2500, 21.2500,
652+
9.7500, 21.7500, 10.2500, 22.2500, 10.7500, 22.7500, 11.0000, 23.0000});
653+
654+
EXPECT_TENSOR_CLOSE(out, expected);
655+
}
656+
657+
TEST_F(OpUpsampleBilinear2dTest, NumericsCheckChannelsLast) {
658+
TensorFactory<ScalarType::Float> tf;
659+
660+
const auto input = tf.zeros_channels_last({3, 7, 47, 99});
661+
auto out = tf.zeros_channels_last({3, 7, 291, 512});
662+
std::array<int64_t, 2> output_size = {291, 512};
663+
664+
auto input_ptr = static_cast<float*>(input.mutable_data_ptr());
665+
for (auto i = 0ul; i < input.numel(); i++) {
666+
input_ptr[i] = static_cast<float>(i);
667+
}
668+
669+
op_upsample_bilinear2d_vec_out(
670+
input,
671+
OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
672+
false,
673+
{},
674+
out);
675+
676+
// Indices and expected values to evaluate.
677+
std::vector<std::tuple<int, int, int, int, float>> test_values = {
678+
{0, 2, 60, 200, 6695.0137},
679+
{1, 6, 5, 503, 33524.098},
680+
{2, 0, 111, 300, 77678.68},
681+
};
682+
683+
const auto output_data = static_cast<const float*>(out.const_data_ptr());
684+
for (const auto& test_case : test_values) {
685+
const auto [n, c, h, w, expected] = test_case;
686+
const auto actual = output_data
687+
[n * out.strides()[0] + c * out.strides()[1] + h * out.strides()[2] +
688+
w * out.strides()[3]];
689+
EXPECT_FLOAT_EQ(expected, actual);
690+
}
691+
}
692+
693+
TEST_F(OpUpsampleBilinear2dTest, NumericsCheckAlignCornersChannelsLast) {
694+
TensorFactory<ScalarType::Float> tf;
695+
696+
const auto input = tf.zeros_channels_last({3, 7, 47, 99});
697+
auto out = tf.zeros_channels_last({3, 7, 291, 512});
698+
std::array<int64_t, 2> output_size = {291, 512};
699+
700+
auto input_ptr = static_cast<float*>(input.mutable_data_ptr());
701+
for (auto i = 0ul; i < input.numel(); i++) {
702+
input_ptr[i] = static_cast<float>(i);
703+
}
704+
705+
op_upsample_bilinear2d_vec_out(
706+
input,
707+
OptionalArrayRef<int64_t>({output_size.data(), output_size.size()}),
708+
true,
709+
{},
710+
out);
711+
712+
// Indices and expected values to evaluate.
713+
std::vector<std::tuple<int, int, int, int, float>> test_values = {
714+
{0, 2, 60, 200, 6865.9414},
715+
{1, 6, 5, 503, 33801.883},
716+
{2, 0, 111, 300, 77746.32},
717+
};
718+
719+
const auto output_data = static_cast<const float*>(out.const_data_ptr());
720+
for (const auto& test_case : test_values) {
721+
const auto [n, c, h, w, expected] = test_case;
722+
const auto actual = output_data
723+
[n * out.strides()[0] + c * out.strides()[1] + h * out.strides()[2] +
724+
w * out.strides()[3]];
725+
EXPECT_FLOAT_EQ(expected, actual);
726+
}
727+
}

0 commit comments

Comments
 (0)