-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[libc++] Optimize std::minmax_element #135495
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libcxx Author: Leslie (wsehjk) ChangesFull diff: https://github.com/llvm/llvm-project/pull/135495.diff 1 Files Affected:
diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index dc0c3a818cd57..9f6ca60267e42 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -15,6 +15,7 @@
#include <__iterator/iterator_traits.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
+#include <__type_traits/is_integral.h>
#include <__utility/pair.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -38,9 +39,10 @@ class _MinmaxElementLessFunc {
}
};
-template <class _Iter, class _Sent, class _Proj, class _Comp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
-__minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
+template<class _Iter, class _Sent, class _Proj, class _Comp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
+ __builtin_printf("Debug: __minmax_element_impl called, %d\n", __LINE__); // 不需要 iostream
auto __less = _MinmaxElementLessFunc<_Comp, _Proj>(__comp, __proj);
pair<_Iter, _Iter> __result(__first, __first);
@@ -78,6 +80,50 @@ __minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj)
return __result;
}
+
+// template<class _Tp>
+// typename std::iterator_traits<_Iter>::value_type
+// __minmax_element_vectorized(_Tp __first, _Tp __last) {
+
+// }
+
+
+template <class _Iter, class _Proj, class _Comp,
+ __enable_if_t<is_integral_v<typename std::iterator_traits<_Iter>::value_type>
+ && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+ int> = 0
+ >
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
+ if (__libcpp_is_constant_evaluated()) {
+ return __minmax_element_loop(__first, __last, __comp, __proj);
+ } else {
+
+ }
+}
+
+template <class _Iter, class _Proj, class _Comp,
+ __enable_if_t<!is_integral_v<typename std::iterator_traits<_Iter>::value_type>
+ && __can_map_to_integer_v<typename std::iterator_traits<_Iter>::value_type>
+ && __libcpp_is_trivially_equality_comparable<typename std::iterator_traits<_Iter>::value_type, typename std::iterator_traits<_Iter>::value_type>::value
+ && __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+ int> = 0
+ >
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
+ if (__libcpp_is_constant_evaluated()) {
+ return __minmax_element_loop(__first, __last, __comp, __proj);
+ } else {
+
+ }
+}
+
+template <class _Iter, class _Sent, class _Proj, class _Comp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+__minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
+ return std::__minmax_element_loop(__first, __last, __comp, __proj);
+}
+
template <class _ForwardIterator, class _Compare>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator>
minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h -- libcxx/include/__algorithm/minmax_element.h View the diff from clang-format here.diff --git a/libcxx/include/__algorithm/minmax_element.h b/libcxx/include/__algorithm/minmax_element.h
index 67287b413..8894e0003 100644
--- a/libcxx/include/__algorithm/minmax_element.h
+++ b/libcxx/include/__algorithm/minmax_element.h
@@ -41,8 +41,8 @@ public:
}
};
-template<class _Iter, class _Sent, class _Proj, class _Comp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+template <class _Iter, class _Sent, class _Proj, class _Comp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
__minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) {
auto __less = _MinmaxElementLessFunc<_Comp, _Proj>(__comp, __proj);
@@ -82,8 +82,8 @@ __minmax_element_loop(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj)
}
#if _LIBCPP_VECTORIZE_ALGORITHMS
-template<class _Iter>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
+template <class _Iter>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
__minmax_element_vectorized(_Iter __first, _Iter __last) {
using __value_type = __iter_value_type<_Iter>;
constexpr size_t __unroll_count = 4;
@@ -100,63 +100,63 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
__value_type __max_element = *__first;
_Iter __min_block_start = __first;
- _Iter __min_block_end = __last + 1;
+ _Iter __min_block_end = __last + 1;
_Iter __max_block_start = __first;
- _Iter __max_block_end = __last + 1;
-
- while(static_cast<size_t>(__last - __first) >= __unroll_count * __vec_size) [[__likely__]]{
+ _Iter __max_block_end = __last + 1;
+
+ while (static_cast<size_t>(__last - __first) >= __unroll_count * __vec_size) [[__likely__]] {
__vec_type __vec[__unroll_count];
- for(size_t __i = 0; __i < __unroll_count; ++__i) {
+ for (size_t __i = 0; __i < __unroll_count; ++__i) {
__vec[__i] = std::__load_vector<__vec_type>(__first + __i * __vec_size);
// block min
auto __block_min_element = __builtin_reduce_min(__vec[__i]);
if (__block_min_element < __min_element) {
- __min_element = __block_min_element;
+ __min_element = __block_min_element;
__min_block_start = __first + __i * __vec_size;
- __min_block_end = __first + (__i + 1) * __vec_size;
+ __min_block_end = __first + (__i + 1) * __vec_size;
}
// block max
auto __block_max_element = __builtin_reduce_max(__vec[__i]);
if (__block_max_element >= __max_element) {
- __max_element = __block_max_element;
+ __max_element = __block_max_element;
__max_block_start = __first + __i * __vec_size;
- __max_block_end = __first + (__i + 1) * __vec_size;
+ __max_block_end = __first + (__i + 1) * __vec_size;
}
}
__first += __unroll_count * __vec_size;
}
- // remaining vectors
- while(static_cast<size_t>(__last - __first) >= __vec_size) {
- __vec_type __vec = std::__load_vector<__vec_type>(__first);
- auto __block_min_element = __builtin_reduce_min(__vec);
- if (__block_min_element < __min_element) {
- __min_element = __block_min_element;
- __min_block_start = __first;
- __min_block_end = __first + __vec_size;
- }
- // max
- auto __block_max_element = __builtin_reduce_max(__vec);
- if (__block_max_element >= __max_element) {
- __max_element = __block_max_element;
- __max_block_start = __first;
- __max_block_end = __first + __vec_size;
- }
- __first += __vec_size;
+ // remaining vectors
+ while (static_cast<size_t>(__last - __first) >= __vec_size) {
+ __vec_type __vec = std::__load_vector<__vec_type>(__first);
+ auto __block_min_element = __builtin_reduce_min(__vec);
+ if (__block_min_element < __min_element) {
+ __min_element = __block_min_element;
+ __min_block_start = __first;
+ __min_block_end = __first + __vec_size;
+ }
+ // max
+ auto __block_max_element = __builtin_reduce_max(__vec);
+ if (__block_max_element >= __max_element) {
+ __max_element = __block_max_element;
+ __max_block_start = __first;
+ __max_block_end = __first + __vec_size;
+ }
+ __first += __vec_size;
}
if (__last > __first) {
- auto __epilogue = std::__minmax_element_loop(__first, __last, __comp, __proj);
+ auto __epilogue = std::__minmax_element_loop(__first, __last, __comp, __proj);
__value_type __epilogue_min_element = *__epilogue.first;
__value_type __epilogue_max_element = *__epilogue.second;
if (__epilogue_min_element < __min_element && __epilogue_max_element >= __max_element) {
return __epilogue;
} else if (__epilogue_min_element < __min_element) {
- __min_element = __epilogue_min_element;
+ __min_element = __epilogue_min_element;
__min_block_start = __epilogue.first;
__min_block_end = __epilogue.first; // this is global min_element
} else if (__epilogue_max_element >= __max_element) {
- __max_element = __epilogue_max_element;
+ __max_element = __epilogue_max_element;
__max_block_start = __epilogue.second;
__max_block_end = __epilogue.second; // this is global max_element
}
@@ -179,14 +179,13 @@ __minmax_element_vectorized(_Iter __first, _Iter __last) {
return {__min_block_start, __max_block_start};
}
-template <class _Iter, class _Proj, class _Comp,
- __enable_if_t
- <is_integral_v<__iter_value_type<_Iter>>
- && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag>
- && __is_identity<_Proj>::value
- && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
- int> = 0
- >
+template <class _Iter,
+ class _Proj,
+ class _Comp,
+ __enable_if_t<is_integral_v<__iter_value_type<_Iter>> &&
+ is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag> &&
+ __is_identity<_Proj>::value && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
+ int> = 0 >
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter, _Iter>
__minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj) {
if (__libcpp_is_constant_evaluated()) {
@@ -200,9 +199,9 @@ __minmax_element_impl(_Iter __first, _Iter __last, _Comp& __comp, _Proj& __proj)
// __enable_if_t
// <!is_integral_v<__iter_value_type<_Iter>>
// && is_same_v<__iterator_category_type<_Iter>, random_access_iterator_tag>
-// && __can_map_to_integer_v<__iter_value_type<_Iter>>
+// && __can_map_to_integer_v<__iter_value_type<_Iter>>
// && __libcpp_is_trivially_equality_comparable<__iter_value_type<_Iter>, __iter_value_type<_Iter>>::value
-// && __is_identity<_Proj>::value
+// && __is_identity<_Proj>::value
// && __desugars_to_v<__less_tag, _Comp, _Iter, _Iter>,
// int> = 0
// >
|
Hi @hiraditya @philnik777, could you please review my code? Thanks |
seems like you are getting speedup on large sizes (>64 etc). Maybe use the default algorithm for small sizes and switch to new implementations otherwise? |
No, the speedups for |
I think the main problem is that you're currently reducing in every single iteration. If we search for the minimum and maximum element by line instead, I think the performance would be significantly better, since we'd be able to reduce only once in the end instead. |
Hi, I don't quite get it. I'm reducing in every block to get the |
This pr is to close #112397.
This method is inspired by find and locate. I slice the input into fix-sized block and update the
_max_element
,__max_block_start
and__max_block_end
variable. In the end, the code iterates the__max_block_start
and__max_block_end
block to loacate the_max_element
. So is to find themin_element pos
.However. The bencmark result is not promising as exected. This may be because I'm testing on Macos, which only supports
__sse__