-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[libc][math] Optimize nearest integer functions using builtins when available #98376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
cc @lntue |
@llvm/pr-subscribers-libc Author: OverMighty (overmighty) ChangesPatch is 34.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98376.diff 27 Files Affected:
diff --git a/libc/cmake/modules/CheckCompilerFeatures.cmake b/libc/cmake/modules/CheckCompilerFeatures.cmake
index d84c07b35d2d7..83822892c8096 100644
--- a/libc/cmake/modules/CheckCompilerFeatures.cmake
+++ b/libc/cmake/modules/CheckCompilerFeatures.cmake
@@ -2,7 +2,15 @@
# Compiler features definition and flags
# ------------------------------------------------------------------------------
-set(ALL_COMPILER_FEATURES "float16" "float128" "fixed_point")
+set(
+ ALL_COMPILER_FEATURES
+ "builtin_ceil_floor_trunc"
+ "builtin_round"
+ "builtin_roundeven"
+ "float16"
+ "float128"
+ "fixed_point"
+)
# Making sure ALL_COMPILER_FEATURES is sorted.
list(SORT ALL_COMPILER_FEATURES)
@@ -39,11 +47,19 @@ endfunction()
set(AVAILABLE_COMPILER_FEATURES "")
# Try compile a C file to check if flag is supported.
-set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
foreach(feature IN LISTS ALL_COMPILER_FEATURES)
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set(compile_options ${LIBC_COMPILE_OPTIONS_NATIVE})
if(${feature} STREQUAL "fixed_point")
list(APPEND compile_options "-ffixed-point")
+ elseif(${feature} MATCHES "^builtin_")
+ set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT})
+ # The compiler might handle calls to rounding builtins by generating calls
+ # to the respective libc math functions, in which case we cannot use these
+ # builtins in our implementations of these functions. We check that this is
+ # not the case by trying to link an executable, since linking would fail due
+ # to unresolved references if calls to libc functions were generated.
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE EXECUTABLE)
endif()
try_compile(
@@ -60,6 +76,12 @@ foreach(feature IN LISTS ALL_COMPILER_FEATURES)
set(LIBC_TYPES_HAS_FLOAT128 TRUE)
elseif(${feature} STREQUAL "fixed_point")
set(LIBC_COMPILER_HAS_FIXED_POINT TRUE)
+ elseif(${feature} STREQUAL "builtin_ceil_floor_trunc")
+ set(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_TRUNC TRUE)
+ elseif(${feature} STREQUAL "builtin_round")
+ set(LIBC_COMPILER_HAS_BUILTIN_ROUND TRUE)
+ elseif(${feature} STREQUAL "builtin_roundeven")
+ set(LIBC_COMPILER_HAS_BUILTIN_ROUNDEVEN TRUE)
endif()
endif()
endforeach()
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index c5e7dfe8abd0f..855d69d2a0fc9 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -4,7 +4,7 @@ function(_get_compile_options_from_flags output_var)
if(LIBC_TARGET_ARCHITECTURE_IS_RISCV64 OR(LIBC_CPU_FEATURES MATCHES "FMA"))
check_flag(ADD_FMA_FLAG ${FMA_OPT_FLAG} ${ARGN})
endif()
- check_flag(ADD_SSE4_2_FLAG ${ROUND_OPT_FLAG} ${ARGN})
+ check_flag(ADD_ROUND_OPT_FLAG ${ROUND_OPT_FLAG} ${ARGN})
check_flag(ADD_EXPLICIT_SIMD_OPT_FLAG ${EXPLICIT_SIMD_OPT_FLAG} ${ARGN})
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
@@ -16,8 +16,22 @@ function(_get_compile_options_from_flags output_var)
list(APPEND compile_options "-D__LIBC_RISCV_USE_FMA")
endif()
endif()
- if(ADD_SSE4_2_FLAG)
- list(APPEND compile_options "-msse4.2")
+ if(ADD_ROUND_OPT_FLAG)
+ if(LIBC_TARGET_ARCHITECTURE_IS_X86)
+ # ROUND_OPT_FLAG is only enabled if SSE4.2 is detected, not just SSE4.1,
+ # because there was code to check for SSE4.2 already, and few CPUs only
+ # have SSE4.1.
+ list(APPEND compile_options "-msse4.2")
+ endif()
+ if(LIBC_COMPILER_HAS_BUILTIN_CEIL_FLOOR_TRUNC)
+ list(APPEND compile_options "-D__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC")
+ endif()
+ if(LIBC_COMPILER_HAS_BUILTIN_ROUND)
+ list(APPEND compile_options "-D__LIBC_USE_BUILTIN_ROUND")
+ endif()
+ if(LIBC_COMPILER_HAS_BUILTIN_ROUNDEVEN)
+ list(APPEND compile_options "-D__LIBC_USE_BUILTIN_ROUNDEVEN")
+ endif()
endif()
if(ADD_EXPLICIT_SIMD_OPT_FLAG)
list(APPEND compile_options "-D__LIBC_EXPLICIT_SIMD_OPT")
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index 18e36dfde5cc1..eca7ba8d183e6 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -277,6 +277,7 @@ if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
endif()
# Skip ROUND_OPT flag for targets that don't support SSE 4.2.
-if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
+if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR
+ LIBC_TARGET_ARCHITECTURE_IS_AARCH64))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
diff --git a/libc/cmake/modules/compiler_features/check_builtin_ceil_floor_trunc.cpp b/libc/cmake/modules/compiler_features/check_builtin_ceil_floor_trunc.cpp
new file mode 100644
index 0000000000000..031dd9376f3c1
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_ceil_floor_trunc.cpp
@@ -0,0 +1,9 @@
+float try_builtin_ceilf(float x) { return __builtin_ceilf(x); }
+float try_builtin_floorf(float x) { return __builtin_ceilf(x); }
+float try_builtin_truncf(float x) { return __builtin_truncf(x); }
+
+double try_builtin_ceil(double x) { return __builtin_ceil(x); }
+double try_builtin_floor(double x) { return __builtin_ceil(x); }
+double try_builtin_trunc(double x) { return __builtin_trunc(x); }
+
+int main() {}
diff --git a/libc/cmake/modules/compiler_features/check_builtin_round.cpp b/libc/cmake/modules/compiler_features/check_builtin_round.cpp
new file mode 100644
index 0000000000000..8c3065c2de06a
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_round.cpp
@@ -0,0 +1,5 @@
+float try_builtin_roundf(float x) { return __builtin_roundf(x); }
+
+double try_builtin_round(double x) { return __builtin_round(x); }
+
+int main() {}
diff --git a/libc/cmake/modules/compiler_features/check_builtin_roundeven.cpp b/libc/cmake/modules/compiler_features/check_builtin_roundeven.cpp
new file mode 100644
index 0000000000000..2480abae84c36
--- /dev/null
+++ b/libc/cmake/modules/compiler_features/check_builtin_roundeven.cpp
@@ -0,0 +1,5 @@
+float try_builtin_roundevenf(float x) { return __builtin_roundevenf(x); }
+
+double try_builtin_roundeven(double x) { return __builtin_roundeven(x); }
+
+int main() {}
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 030c3d3a99a02..515c472ef309e 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -489,6 +489,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.rintl
libc.src.math.round
libc.src.math.roundf
+ libc.src.math.roundevenf
libc.src.math.roundl
libc.src.math.scalbn
libc.src.math.scalbnf
diff --git a/libc/src/__support/FPUtil/NearestIntegerOperations.h b/libc/src/__support/FPUtil/NearestIntegerOperations.h
index cff32938229d0..a9a0a97eebb5c 100644
--- a/libc/src/__support/FPUtil/NearestIntegerOperations.h
+++ b/libc/src/__support/FPUtil/NearestIntegerOperations.h
@@ -75,15 +75,17 @@ LIBC_INLINE T ceil(T x) {
}
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- bits.set_mantissa(trunc_mantissa);
- T trunc_value = bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ bits.set_uintval(trunc_u);
+ T trunc_value = bits.get_val();
+
// If x is negative, the ceil operation is equivalent to the trunc operation.
if (is_neg)
return trunc_value;
@@ -130,15 +132,17 @@ LIBC_INLINE T round(T x) {
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
bool half_bit_set =
bool(bits.get_mantissa() & (StorageType(1) << (trim_size - 1)));
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- bits.set_mantissa(trunc_mantissa);
- T trunc_value = bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ bits.set_uintval(trunc_u);
+ T trunc_value = bits.get_val();
+
if (!half_bit_set) {
// Franctional part is less than 0.5 so round value is the
// same as the trunc value.
@@ -188,16 +192,17 @@ round_using_specific_rounding_mode(T x, int rnd) {
}
uint32_t trim_size = FPBits<T>::FRACTION_LEN - exponent;
- FPBits<T> new_bits = bits;
- StorageType trunc_mantissa =
- static_cast<StorageType>((bits.get_mantissa() >> trim_size) << trim_size);
- new_bits.set_mantissa(trunc_mantissa);
- T trunc_value = new_bits.get_val();
+ StorageType x_u = bits.uintval();
+ StorageType trunc_u =
+ static_cast<StorageType>((x_u >> trim_size) << trim_size);
// If x is already an integer, return it.
- if (trunc_value == x)
+ if (trunc_u == x_u)
return x;
+ FPBits<T> new_bits(trunc_u);
+ T trunc_value = new_bits.get_val();
+
StorageType trim_value =
bits.get_mantissa() &
static_cast<StorageType>(((StorageType(1) << trim_size) - 1));
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 5e920307d39de..915fc076826f9 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -70,6 +70,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -82,6 +84,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -107,6 +111,9 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.macros.properties.architectures
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -455,6 +462,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -467,6 +476,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -492,6 +503,9 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.macros.properties.architectures
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -517,6 +531,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -529,6 +545,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -554,6 +572,9 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.macros.properties.architectures
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -579,6 +600,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -591,6 +614,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -616,6 +641,9 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.macros.properties.architectures
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -641,6 +669,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -653,6 +683,8 @@ add_entrypoint_object(
-O3
DEPENDS
libc.src.__support.FPUtil.nearest_integer_operations
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
@@ -678,6 +710,9 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.macros.properties.architectures
+ FLAGS
+ ROUND_OPT
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/ceil.cpp b/libc/src/math/generic/ceil.cpp
index efd0f246a9b90..63da803033e22 100644
--- a/libc/src/math/generic/ceil.cpp
+++ b/libc/src/math/generic/ceil.cpp
@@ -12,6 +12,12 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(double, ceil, (double x)) { return fputil::ceil(x); }
+LLVM_LIBC_FUNCTION(double, ceil, (double x)) {
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+ return __builtin_ceil(x);
+#else
+ return fputil::ceil(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/ceilf.cpp b/libc/src/math/generic/ceilf.cpp
index d49b34242da4f..51ef68f1dd871 100644
--- a/libc/src/math/generic/ceilf.cpp
+++ b/libc/src/math/generic/ceilf.cpp
@@ -12,6 +12,12 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(float, ceilf, (float x)) { return fputil::ceil(x); }
+LLVM_LIBC_FUNCTION(float, ceilf, (float x)) {
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+ return __builtin_ceilf(x);
+#else
+ return fputil::ceil(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/ceilf16.cpp b/libc/src/math/generic/ceilf16.cpp
index 205d7428f66e6..ee584c25a4ae9 100644
--- a/libc/src/math/generic/ceilf16.cpp
+++ b/libc/src/math/generic/ceilf16.cpp
@@ -9,9 +9,17 @@
#include "src/math/ceilf16.h"
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
+#include "src/__support/macros/properties/architectures.h"
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) { return fputil::ceil(x); }
+LLVM_LIBC_FUNCTION(float16, ceilf16, (float16 x)) {
+#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC) && \
+ defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ return static_cast<float16>(__builtin_ceilf(x));
+#else
+ return fputil::ceil(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/floor.cpp b/libc/src/math/generic/floor.cpp
index 60386f0c9cf81..bb58ca6a35402 100644
--- a/libc/src/math/generic/floor.cpp
+++ b/libc/src/math/generic/floor.cpp
@@ -12,6 +12,12 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(double, floor, (double x)) { return fputil::floor(x); }
+LLVM_LIBC_FUNCTION(double, floor, (double x)) {
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+ return __builtin_floor(x);
+#else
+ return fputil::floor(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/floorf.cpp b/libc/src/math/generic/floorf.cpp
index 85666688685dc..459f338d897be 100644
--- a/libc/src/math/generic/floorf.cpp
+++ b/libc/src/math/generic/floorf.cpp
@@ -12,6 +12,12 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(float, floorf, (float x)) { return fputil::floor(x); }
+LLVM_LIBC_FUNCTION(float, floorf, (float x)) {
+#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC
+ return __builtin_floorf(x);
+#else
+ return fputil::floor(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/floorf16.cpp b/libc/src/math/generic/floorf16.cpp
index db0b326c0e5f6..6d8c497946c84 100644
--- a/libc/src/math/generic/floorf16.cpp
+++ b/libc/src/math/generic/floorf16.cpp
@@ -9,9 +9,17 @@
#include "src/math/floorf16.h"
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
+#include "src/__support/macros/properties/architectures.h"
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) { return fputil::floor(x); }
+LLVM_LIBC_FUNCTION(float16, floorf16, (float16 x)) {
+#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC) && \
+ defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ return static_cast<float16>(__builtin_floorf(x));
+#else
+ return fputil::floor(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/round.cpp b/libc/src/math/generic/round.cpp
index ca8f19f35f7fe..d873524ad9f42 100644
--- a/libc/src/math/generic/round.cpp
+++ b/libc/src/math/generic/round.cpp
@@ -12,6 +12,12 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(double, round, (double x)) { return fputil::round(x); }
+LLVM_LIBC_FUNCTION(double, round, (double x)) {
+#ifdef __LIBC_USE_BUILTIN_ROUND
+ return __builtin_round(x);
+#else
+ return fputil::round(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/roundeven.cpp b/libc/src/math/generic/roundeven.cpp
index 5f2adf9b5fce6..76409d526e208 100644
--- a/libc/src/math/generic/roundeven.cpp
+++ b/libc/src/math/generic/roundeven.cpp
@@ -13,7 +13,11 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(double, roundeven, (double x)) {
+#ifdef __LIBC_USE_BUILTIN_ROUNDEVEN
+ return __builtin_roundeven(x);
+#else
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);
+#endif
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/roundevenf.cpp b/libc/src/math/generic/roundevenf.cpp
index 353bec74ecf02..22538272bedbd 100644
--- a/libc/src/math/generic/roundevenf.cpp
+++ b/libc/src/math/generic/roundevenf.cpp
@@ -13,7 +13,11 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(float, roundevenf, (float x)) {
+#ifdef __LIBC_USE_BUILTIN_ROUNDEVEN
+ return __builtin_roundevenf(x);
+#else
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);
+#endif
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/roundevenf16.cpp b/libc/src/math/generic/roundevenf16.cpp
index 9ecf79ce6f6c2..90c75a10d3ddb 100644
--- a/libc/src/math/generic/roundevenf16.cpp
+++ b/libc/src/math/generic/roundevenf16.cpp
@@ -9,11 +9,17 @@
#include "src/math/roundevenf16.h"
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
+#include "src/__support/macros/properties/architectures.h"
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(float16, roundevenf16, (float16 x)) {
+#if defined(__LIBC_USE_BUILTIN_ROUNDEVEN) && \
+ defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ return static_cast<float16>(__builtin_roundevenf(x));
+#else
return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST);
+#endif
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/roundf.cpp b/libc/src/math/generic/roundf.cpp
index 9627390ea8b8d..8b3add7cb9e2d 100644
--- a/libc/src/math/generic/roundf.cpp
+++ b/libc/src/math/generic/roundf.cpp
@@ -12,6 +12,12 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(float, roundf, (float x)) { return fputil::round(x); }
+LLVM_LIBC_FUNCTION(float, roundf, (float x)) {
+#ifdef __LIBC_USE_BUILTIN_ROUND
+ return __builtin_roundf(x);
+#else
+ return fputil::round(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/roundf16.cpp b/libc/src/math/generic/roundf16.cpp
index 75a255d7798d5..fca0194ec5dbb 100644
--- a/libc/src/math/generic/roundf16.cpp
+++ b/libc/src/math/generic/roundf16.cpp
@@ -9,9 +9,16 @@
#include "src/math/roundf16.h"
#include "src/__support/FPUtil/NearestIntegerOperations.h"
#include "src/__support/common.h"
+#include "src/__support/macros/properties/architectures.h"
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) { return fputil::round(x); }
+LLVM_LIBC_FUNCTION(float16, roundf16, (float16 x)) {
+#if defined(__LIBC_USE_BUILTIN_ROUND) && defined(LIBC_TARGET_ARCH_IS_AARCH64)
+ return static_cast<float16>(__builtin_roundf(x));
+#else
+ return fputil::round(x);
+#endif
+}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/trunc.cpp b/libc/src/math/generic/trunc.cpp
index d171ab1f092fd..5761565646c36 100644
--- a/libc/src/math/generic/trunc.cpp
+++ b/libc/src/math/generic/trunc.cpp
@@ -12,6 +12,12 @@
namespace L...
[truncated]
|
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")) OR | ||
LIBC_TARGET_ARCHITECTURE_IS_AARCH64)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice if we had a CMake formatter. I copied the style from the lines above, but the OR
line is longer than 80 chars (just like above).
libc.src.__support.macros.properties.types | ||
libc.src.__support.FPUtil.nearest_integer_operations | ||
libc.src.__support.macros.properties.architectures |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are CMakeLists.txt files where libc.src.__support.macros*
appear before libc.src.__support.FPUtil.*
and files where it appears after. I assumed that it was because the header's dependencies are listed first and then the implementation file's, not because the ordering is random.
libc/src/math/generic/ceilf16.cpp
Outdated
#if defined(__LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC) && \ | ||
defined(LIBC_TARGET_ARCH_IS_AARCH64) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably would be faster than the generic implementation on x86-64 with AVX-512 FP16, but I don't have a CPU that supports it. It could also be interesting to try on a Zen 2 or later CPU, because F16C instructions have lower latencies on those: https://uops.info/table.html?search=vcvtph2ps%20(xmm%2C%20xmm)&cb_lat=on&cb_tp=on&cb_uops=on&cb_ports=on&cb_ADLP=on&cb_ADLE=on&cb_ZENp=on&cb_ZEN2=on&cb_measurements=on&cb_doc=on&cb_others=on.
On my i7-13700H however, using F16C instructions is slower than using the generic functions.
Edit: builtins are actually faster but only with GCC and -march=native
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Relevant: #98630.
libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp
Outdated
Show resolved
Hide resolved
#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC | ||
return __builtin_ceilf(x); | ||
#else | ||
return fputil::ceil(x); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we not just make fputil::ceil()
use the builtin? That way internal usage gets the more optimal version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I talked about this with Tue yesterday. We were initially talking about doing this in FPUtil, but yesterday he suggested having the builtin/generic switch in the entrypoints directly because fputil::{ceil,floor,round,trunc}
are only used in the respective entrypoints (actually fputill::trunc
is used in unit tests for modf
, but nowhere in libc/src/).
libc/src/math/generic/ceilf.cpp
Outdated
@@ -12,6 +12,12 @@ | |||
|
|||
namespace LIBC_NAMESPACE { | |||
|
|||
LLVM_LIBC_FUNCTION(float, ceilf, (float x)) { return fputil::ceil(x); } | |||
LLVM_LIBC_FUNCTION(float, ceilf, (float x)) { | |||
#ifdef __LIBC_USE_BUILTIN_CEIL_FLOOR_TRUNC |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We shouldn't use double underscores for the name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
libc/cmake/modules/LLVMLibCCompileOptionRules.cmake defines __LIBC_RISCV_USE_FMA
and __LIBC_EXPLICIT_SIMD_OPT
. I guessed that a macro defined through compile options should have __
.
34ffd03
to
11e4992
Compare
|
11e4992
to
50f1237
Compare
float try_builtin_ceilf(float x) { return __builtin_ceilf(x); } | ||
float try_builtin_floorf(float x) { return __builtin_ceilf(x); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops, try_builtin_floorf
checks for __builtin_ceilf
.
Before:
|
After:
|
… when available Fix check_builtin_ceil_floor_trunc.cpp not checking for __builtin_floor{,f}.
… when available Optimize rint{,f,f16} using __builtin_rint{,f} when available.
… when available Nit: sort roundeven after round.
… when available Fix builtin detection succeeding even when calls to functions provided by the system libc are generated.
… when available Delete inline assembly implementations for AArch64.
… when available Fix GCC warning "variable ‘result’ set but not used".
… when available Fix builtin detection on Android due to missing _start being an error with LLD from Android NDK r26d.
911d1ef
to
7f212a3
Compare
Rebased to fix the merge conflicts. I still get these "undefined hidden symbol" linking errors I've been getting since the switch to
I change MPFRUtils to use diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index b67a9da40bd7..3f1758b20519 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -23,7 +23,7 @@
template <typename T> using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
-namespace LIBC_NAMESPACE_DECL {
+namespace LIBC_NAMESPACE {
namespace testing {
namespace mpfr {
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 28390af9ee6d..9b0002209bed 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -16,7 +16,7 @@
#include <stdint.h>
-namespace LIBC_NAMESPACE_DECL {
+namespace LIBC_NAMESPACE {
namespace testing {
namespace mpfr { |
…vailable (#98376) Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251690
No description provided.