-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[libc++] Optimize ranges::rotate for vector<bool>::iterator #121168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
05e0cb3
to
e1bd88f
Compare
0130b4a
to
0d08932
Compare
4a2d895
to
8fce8d6
Compare
8fce8d6
to
19cdccc
Compare
@llvm/pr-subscribers-libcxx Author: Peng Liu (winner245) ChangesThis PR optimizes the performance of
Closes #64038. (Note: I've adopted the same benchmark code Full diff: https://github.com/llvm/llvm-project/pull/121168.diff 8 Files Affected:
diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index 633d114693c82..f41cf9b51c292 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -46,7 +46,7 @@ Implemented Papers
Improvements and New Features
-----------------------------
-- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
+- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward, rotate}`` algorithms have been optimized for
``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.
- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
diff --git a/libcxx/include/__algorithm/rotate.h b/libcxx/include/__algorithm/rotate.h
index df4ca95aac95b..c676980f0c1ca 100644
--- a/libcxx/include/__algorithm/rotate.h
+++ b/libcxx/include/__algorithm/rotate.h
@@ -9,12 +9,19 @@
#ifndef _LIBCPP___ALGORITHM_ROTATE_H
#define _LIBCPP___ALGORITHM_ROTATE_H
+#include <__algorithm/copy.h>
+#include <__algorithm/copy_backward.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/move.h>
#include <__algorithm/move_backward.h>
#include <__algorithm/swap_ranges.h>
#include <__config>
+#include <__cstddef/size_t.h>
+#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
+#include <__memory/construct_at.h>
+#include <__memory/pointer_traits.h>
+#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
@@ -185,6 +192,44 @@ __rotate(_Iterator __first, _Iterator __middle, _Sentinel __last) {
return _Ret(std::move(__result), std::move(__last_iter));
}
+template <class, class _Cp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, false>, __bit_iterator<_Cp, false> >
+__rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
+ using _I1 = __bit_iterator<_Cp, false>;
+ using difference_type = typename _I1::difference_type;
+ difference_type __d1 = __middle - __first;
+ difference_type __d2 = __last - __middle;
+ _I1 __r = __first + __d2;
+ while (__d1 != 0 && __d2 != 0) {
+ if (__d1 <= __d2) {
+ if (__d1 <= __bit_array<_Cp>::capacity()) {
+ __bit_array<_Cp> __b(__d1);
+ std::copy(__first, __middle, __b.begin());
+ std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
+ break;
+ } else {
+ __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
+ __first = __middle;
+ __middle = __mp;
+ __d2 -= __d1;
+ }
+ } else {
+ if (__d2 <= __bit_array<_Cp>::capacity()) {
+ __bit_array<_Cp> __b(__d2);
+ std::copy(__middle, __last, __b.begin());
+ std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
+ break;
+ } else {
+ __bit_iterator<_Cp, false> __mp = __first + __d2;
+ std::swap_ranges(__first, __mp, __middle);
+ __first = __mp;
+ __d1 -= __d2;
+ }
+ }
+ }
+ return std::make_pair(__r, __last);
+}
+
template <class _ForwardIterator>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) {
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index f91250c4a440d..af88c253762b5 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -16,6 +16,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/equal.h>
#include <__algorithm/min.h>
+#include <__algorithm/rotate.h>
#include <__algorithm/swap_ranges.h>
#include <__assert>
#include <__bit/countr.h>
@@ -216,8 +217,6 @@ private:
__mask_(__m) {}
};
-// rotate
-
template <class _Cp>
struct __bit_array {
using difference_type _LIBCPP_NODEBUG = typename __size_difference_type_traits<_Cp>::difference_type;
@@ -249,45 +248,6 @@ struct __bit_array {
}
};
-template <class _Cp>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false>
-rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
- using _I1 = __bit_iterator<_Cp, false>;
- using difference_type = typename _I1::difference_type;
-
- difference_type __d1 = __middle - __first;
- difference_type __d2 = __last - __middle;
- _I1 __r = __first + __d2;
- while (__d1 != 0 && __d2 != 0) {
- if (__d1 <= __d2) {
- if (__d1 <= __bit_array<_Cp>::capacity()) {
- __bit_array<_Cp> __b(__d1);
- std::copy(__first, __middle, __b.begin());
- std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
- break;
- } else {
- __bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
- __first = __middle;
- __middle = __mp;
- __d2 -= __d1;
- }
- } else {
- if (__d2 <= __bit_array<_Cp>::capacity()) {
- __bit_array<_Cp> __b(__d2);
- std::copy(__middle, __last, __b.begin());
- std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
- break;
- } else {
- __bit_iterator<_Cp, false> __mp = __first + __d2;
- std::swap_ranges(__first, __mp, __middle);
- __first = __mp;
- __d1 -= __d2;
- }
- }
- }
- return __r;
-}
-
template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
class __bit_iterator {
public:
@@ -507,9 +467,9 @@ private:
template <class, class _Cl, class _Cr>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
- template <class _Dp>
- _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
- rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
+ template <class, class _Dp>
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false> >
+ __rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
template <class _Dp, bool _IsConst1, bool _IsConst2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
__equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
diff --git a/libcxx/include/__fwd/bit_reference.h b/libcxx/include/__fwd/bit_reference.h
index d65f043e89ad6..451e76d548dc3 100644
--- a/libcxx/include/__fwd/bit_reference.h
+++ b/libcxx/include/__fwd/bit_reference.h
@@ -23,6 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
class __bit_iterator;
+template <class _Cp>
+struct __bit_array;
+
template <class, class = void>
struct __size_difference_type_traits;
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 1d3960bd01597..b02b0dc1725f2 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -14,6 +14,7 @@
#include <__algorithm/fill_n.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/max.h>
+#include <__algorithm/rotate.h>
#include <__assert>
#include <__bit_reference>
#include <__config>
diff --git a/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp b/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
new file mode 100644
index 0000000000000..eb84eb5cdf25c
--- /dev/null
+++ b/libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include <algorithm>
+#include <cstddef>
+#include <deque>
+#include <iterator>
+#include <list>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "../../GenerateInput.h"
+#include "test_macros.h"
+
+int main(int argc, char** argv) {
+ auto std_rotate = [](auto first, auto middle, auto last) { return std::rotate(first, middle, last); };
+
+ // Benchmark {std,ranges}::rotate where we rotate various fractions of the range. It is possible to
+ // special-case some of these fractions to cleverly perform swap_ranges.
+ {
+ auto bm = []<class Container>(std::string name, auto rotate, double fraction) {
+ benchmark::RegisterBenchmark(
+ name,
+ [=](auto& st) {
+ std::size_t const size = st.range(0);
+ using ValueType = typename Container::value_type;
+ Container c;
+ std::generate_n(std::back_inserter(c), size, [] { return Generate<ValueType>::random(); });
+
+ auto nth = std::next(c.begin(), static_cast<std::size_t>(size * fraction));
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ auto result = rotate(c.begin(), nth, c.end());
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(32)
+ ->Arg(50) // non power-of-two
+ ->RangeMultiplier(2)
+ ->Range(64, 1 << 20);
+ };
+
+ bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 1/4)", std_rotate, 0.25);
+ bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 51%)", std_rotate, 0.51);
+#if TEST_STD_VER >= 23 // vector<bool>::iterator is not std::permutable before C++23
+ bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 1/4)", std::ranges::rotate, 0.25);
+ bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 51%)", std::ranges::rotate, 0.51);
+#endif
+ }
+
+ benchmark::Initialize(&argc, argv);
+ benchmark::RunSpecifiedBenchmarks();
+ benchmark::Shutdown();
+ return 0;
+}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
index 56d0dfaf9ee27..5f594400e8321 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp
@@ -21,8 +21,10 @@
#include <array>
#include <cassert>
#include <ranges>
+#include <vector>
#include "almost_satisfies_types.h"
+#include "test_macros.h"
#include "test_iterators.h"
#include "type_algorithms.h"
@@ -131,6 +133,29 @@ constexpr void test_iter_sent() {
test_one<Iter, Sent, 7>({1, 2, 3, 4, 5, 6, 7}, 7, {1, 2, 3, 4, 5, 6, 7});
}
+#if TEST_STD_VER >= 23
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
+ for (int offset = -4; offset <= 4; ++offset) {
+ std::vector<bool> a(N, false);
+ std::size_t mid = N / 2 + offset;
+ for (std::size_t i = mid; i < N; ++i)
+ a[i] = true;
+
+ // (iterator, sentinel)-overload
+ std::ranges::rotate(std::ranges::begin(a), std::ranges::begin(a) + mid, std::ranges::end(a));
+ for (std::size_t i = 0; i < N; ++i)
+ assert(a[i] == (i < N - mid));
+
+ // range-overload
+ std::ranges::rotate(a, std::ranges::begin(a) + (N - mid));
+ for (std::size_t i = 0; i < N; ++i)
+ assert(a[i] == (i >= mid));
+ }
+ return true;
+};
+#endif
+
constexpr bool test() {
types::for_each(types::forward_iterator_list<int*>(), []<class Iter>() {
test_iter_sent<Iter, Iter>();
@@ -167,6 +192,16 @@ constexpr bool test() {
}
}
+#if TEST_STD_VER >= 23
+ test_vector_bool<8>();
+ test_vector_bool<19>();
+ test_vector_bool<32>();
+ test_vector_bool<49>();
+ test_vector_bool<64>();
+ test_vector_bool<199>();
+ test_vector_bool<256>();
+#endif
+
return true;
}
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
index ce45bb1916816..448abdddbb4cb 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp
@@ -15,6 +15,7 @@
#include <cassert>
#include <memory>
#include <type_traits>
+#include <vector>
#include "test_macros.h"
#include "test_iterators.h"
@@ -420,6 +421,20 @@ struct TestUniquePtr {
#endif // TEST_STD_VER >= 11
+template <std::size_t N>
+TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
+ for (int offset = -4; offset <= 4; ++offset) {
+ std::vector<bool> a(N, false);
+ std::size_t mid = N / 2 + offset;
+ for (std::size_t i = mid; i < N; ++i)
+ a[i] = true;
+ std::rotate(a.begin(), a.begin() + mid, a.end());
+ for (std::size_t i = 0; i < N; ++i)
+ assert(a[i] == (i < N - mid));
+ }
+ return true;
+};
+
TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<int*>(), TestIter());
@@ -428,6 +443,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<std::unique_ptr<int>*>(), TestUniquePtr());
#endif
+ test_vector_bool<8>();
+ test_vector_bool<19>();
+ test_vector_bool<32>();
+ test_vector_bool<49>();
+ test_vector_bool<64>();
+ test_vector_bool<199>();
+ test_vector_bool<256>();
+
return true;
}
|
) This PR optimizes the performance of `std::ranges::rotate` for `vector<bool>::iterator`. The optimization yields a performance improvement of up to 2096x. Closes llvm#64038.
This PR optimizes the performance of
std::ranges::rotate
forvector<bool>::iterator
. The optimization yields a performance improvement of up to 2096x.Closes #64038.
(Note: I've adopted the same benchmark code
modifying/rotate.bench.cpp
from #127354 for consistency. This ensures minimum conflicts when merging them.)