Skip to content

Commit 4fe7367

Browse files
authored
Save some size in dtype_util when dtype selective build is not in use (#9842)
We duplicate a lot of functions depending on the operator name so that dtype selective build will work. We can just detect if dtype selective build is in use and, if not, stop duplicating. Test Plan: Saves 28288 bytes of text in size_test_all_optimized_ops compared to previous PR on my Mac. Pull-Request-resolved: #9742
1 parent b01c7de commit 4fe7367

File tree

1 file changed

+37
-2
lines changed

1 file changed

+37
-2
lines changed

kernels/portable/cpu/util/dtype_util.h

+37-2
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ enum class SupportedTensorDtypes {
228228
namespace internal {
229229

230230
template <typename CTYPE_COMPUTE, const char* op_name>
231-
load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn(
231+
load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn_impl(
232232
const Tensor& t,
233233
SupportedTensorDtypes dtypes) {
234234
switch (dtypes) {
@@ -252,7 +252,7 @@ load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn(
252252
}
253253

254254
template <typename CTYPE_COMPUTE, const char* op_name>
255-
store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
255+
store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn_impl(
256256
const Tensor& t,
257257
SupportedTensorDtypes dtypes) {
258258
switch (dtypes) {
@@ -285,6 +285,41 @@ store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
285285
return nullptr;
286286
}
287287

288+
#ifndef EXECUTORCH_SELECTIVE_BUILD_DTYPE
289+
constexpr const char kGenericElementwiseOpName[] = "generic_elementwise_op";
290+
#endif // EXECUTORCH_SELECTIVE_BUILD_DTYPE
291+
292+
template <typename CTYPE_COMPUTE, const char* op_name>
293+
load_to_compute_fn<CTYPE_COMPUTE> get_load_to_compute_fn(
294+
const Tensor& t,
295+
SupportedTensorDtypes dtypes) {
296+
// NOTE: Selective build relies on the operator name being passed
297+
// here. When it's *not* active, using the same operator name
298+
// everywhere saves on size because we don't require a new template
299+
// instantiation for every operator.
300+
return get_load_to_compute_fn_impl<
301+
CTYPE_COMPUTE,
302+
#ifdef EXECUTORCH_SELECTIVE_BUILD_DTYPE
303+
op_name
304+
#else // EXECUTORCH_SELECTIVE_BUILD_DTYPE
305+
kGenericElementwiseOpName
306+
#endif // EXECUTORCH_SELECTIVE_BUILD_DTYPE
307+
>(t, dtypes);
308+
}
309+
310+
template <typename CTYPE_COMPUTE, const char* op_name>
311+
store_compute_to_tensor_fn<CTYPE_COMPUTE> get_store_compute_to_tensor_fn(
312+
const Tensor& t,
313+
SupportedTensorDtypes dtypes) {
314+
return get_store_compute_to_tensor_fn_impl<
315+
CTYPE_COMPUTE,
316+
#ifdef EXECUTORCH_SELECTIVE_BUILD_DTYPE
317+
op_name
318+
#else // EXECUTORCH_SELECTIVE_BUILD_DTYPE
319+
kGenericElementwiseOpName
320+
#endif // EXECUTORCH_SELECTIVE_BUILD_DTYPE
321+
>(t, dtypes);
322+
}
288323
bool check_tensor_dtype(
289324
const Tensor t,
290325
SupportedTensorDtypes dtypes,

0 commit comments

Comments
 (0)