Skip to content

[libc++] Optimize ranges::rotate for vector<bool>::iterator #121168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcxx/docs/ReleaseNotes/21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Implemented Papers
Improvements and New Features
-----------------------------

- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward, rotate}`` algorithms have been optimized for
``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.

- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance
Expand Down
45 changes: 45 additions & 0 deletions libcxx/include/__algorithm/rotate.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@
#ifndef _LIBCPP___ALGORITHM_ROTATE_H
#define _LIBCPP___ALGORITHM_ROTATE_H

#include <__algorithm/copy.h>
#include <__algorithm/copy_backward.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/move.h>
#include <__algorithm/move_backward.h>
#include <__algorithm/swap_ranges.h>
#include <__config>
#include <__cstddef/size_t.h>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__memory/construct_at.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_trivially_assignable.h>
#include <__utility/move.h>
#include <__utility/pair.h>
Expand Down Expand Up @@ -185,6 +192,44 @@ __rotate(_Iterator __first, _Iterator __middle, _Sentinel __last) {
return _Ret(std::move(__result), std::move(__last_iter));
}

template <class, class _Cp>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, false>, __bit_iterator<_Cp, false> >
__rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
using _I1 = __bit_iterator<_Cp, false>;
using difference_type = typename _I1::difference_type;
difference_type __d1 = __middle - __first;
difference_type __d2 = __last - __middle;
_I1 __r = __first + __d2;
while (__d1 != 0 && __d2 != 0) {
if (__d1 <= __d2) {
if (__d1 <= __bit_array<_Cp>::capacity()) {
__bit_array<_Cp> __b(__d1);
std::copy(__first, __middle, __b.begin());
std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
break;
} else {
__bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
__first = __middle;
__middle = __mp;
__d2 -= __d1;
}
} else {
if (__d2 <= __bit_array<_Cp>::capacity()) {
__bit_array<_Cp> __b(__d2);
std::copy(__middle, __last, __b.begin());
std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
break;
} else {
__bit_iterator<_Cp, false> __mp = __first + __d2;
std::swap_ranges(__first, __mp, __middle);
__first = __mp;
__d1 -= __d2;
}
}
}
return std::make_pair(__r, __last);
}

template <class _ForwardIterator>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) {
Expand Down
48 changes: 4 additions & 44 deletions libcxx/include/__bit_reference
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <__algorithm/copy_n.h>
#include <__algorithm/equal.h>
#include <__algorithm/min.h>
#include <__algorithm/rotate.h>
#include <__algorithm/swap_ranges.h>
#include <__assert>
#include <__bit/countr.h>
Expand Down Expand Up @@ -216,8 +217,6 @@ private:
__mask_(__m) {}
};

// rotate

template <class _Cp>
struct __bit_array {
using difference_type _LIBCPP_NODEBUG = typename __size_difference_type_traits<_Cp>::difference_type;
Expand Down Expand Up @@ -249,45 +248,6 @@ struct __bit_array {
}
};

template <class _Cp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false>
rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
using _I1 = __bit_iterator<_Cp, false>;
using difference_type = typename _I1::difference_type;

difference_type __d1 = __middle - __first;
difference_type __d2 = __last - __middle;
_I1 __r = __first + __d2;
while (__d1 != 0 && __d2 != 0) {
if (__d1 <= __d2) {
if (__d1 <= __bit_array<_Cp>::capacity()) {
__bit_array<_Cp> __b(__d1);
std::copy(__first, __middle, __b.begin());
std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
break;
} else {
__bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
__first = __middle;
__middle = __mp;
__d2 -= __d1;
}
} else {
if (__d2 <= __bit_array<_Cp>::capacity()) {
__bit_array<_Cp> __b(__d2);
std::copy(__middle, __last, __b.begin());
std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
break;
} else {
__bit_iterator<_Cp, false> __mp = __first + __d2;
std::swap_ranges(__first, __mp, __middle);
__first = __mp;
__d1 -= __d2;
}
}
}
return __r;
}

template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
class __bit_iterator {
public:
Expand Down Expand Up @@ -507,9 +467,9 @@ private:
template <class, class _Cl, class _Cr>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
template <class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
template <class, class _Dp>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false> >
__rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
template <class _Dp, bool _IsConst1, bool _IsConst2>
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
__equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);
Expand Down
3 changes: 3 additions & 0 deletions libcxx/include/__fwd/bit_reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
class __bit_iterator;

template <class _Cp>
struct __bit_array;

template <class, class = void>
struct __size_difference_type_traits;

Expand Down
1 change: 1 addition & 0 deletions libcxx/include/__vector/vector_bool.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <__algorithm/fill_n.h>
#include <__algorithm/iterator_operations.h>
#include <__algorithm/max.h>
#include <__algorithm/rotate.h>
#include <__assert>
#include <__bit_reference>
#include <__config>
Expand Down
64 changes: 64 additions & 0 deletions libcxx/test/benchmarks/algorithms/modifying/rotate.bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17

#include <algorithm>
#include <cstddef>
#include <deque>
#include <iterator>
#include <list>
#include <ranges>
#include <string>
#include <vector>

#include "benchmark/benchmark.h"
#include "../../GenerateInput.h"
#include "test_macros.h"

int main(int argc, char** argv) {
auto std_rotate = [](auto first, auto middle, auto last) { return std::rotate(first, middle, last); };

// Benchmark {std,ranges}::rotate where we rotate various fractions of the range. It is possible to
// special-case some of these fractions to cleverly perform swap_ranges.
{
auto bm = []<class Container>(std::string name, auto rotate, double fraction) {
benchmark::RegisterBenchmark(
name,
[=](auto& st) {
std::size_t const size = st.range(0);
using ValueType = typename Container::value_type;
Container c;
std::generate_n(std::back_inserter(c), size, [] { return Generate<ValueType>::random(); });

auto nth = std::next(c.begin(), static_cast<std::size_t>(size * fraction));
for ([[maybe_unused]] auto _ : st) {
benchmark::DoNotOptimize(c);
auto result = rotate(c.begin(), nth, c.end());
benchmark::DoNotOptimize(result);
}
})
->Arg(32)
->Arg(50) // non power-of-two
->RangeMultiplier(2)
->Range(64, 1 << 20);
};

bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 1/4)", std_rotate, 0.25);
bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 51%)", std_rotate, 0.51);
#if TEST_STD_VER >= 23 // vector<bool>::iterator is not std::permutable before C++23
bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 1/4)", std::ranges::rotate, 0.25);
bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 51%)", std::ranges::rotate, 0.51);
#endif
}

benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
#include <array>
#include <cassert>
#include <ranges>
#include <vector>

#include "almost_satisfies_types.h"
#include "test_macros.h"
#include "test_iterators.h"
#include "type_algorithms.h"

Expand Down Expand Up @@ -131,6 +133,29 @@ constexpr void test_iter_sent() {
test_one<Iter, Sent, 7>({1, 2, 3, 4, 5, 6, 7}, 7, {1, 2, 3, 4, 5, 6, 7});
}

#if TEST_STD_VER >= 23
template <std::size_t N>
TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
for (int offset = -4; offset <= 4; ++offset) {
std::vector<bool> a(N, false);
std::size_t mid = N / 2 + offset;
for (std::size_t i = mid; i < N; ++i)
a[i] = true;

// (iterator, sentinel)-overload
std::ranges::rotate(std::ranges::begin(a), std::ranges::begin(a) + mid, std::ranges::end(a));
for (std::size_t i = 0; i < N; ++i)
assert(a[i] == (i < N - mid));

// range-overload
std::ranges::rotate(a, std::ranges::begin(a) + (N - mid));
for (std::size_t i = 0; i < N; ++i)
assert(a[i] == (i >= mid));
}
return true;
};
#endif

constexpr bool test() {
types::for_each(types::forward_iterator_list<int*>(), []<class Iter>() {
test_iter_sent<Iter, Iter>();
Expand Down Expand Up @@ -167,6 +192,16 @@ constexpr bool test() {
}
}

#if TEST_STD_VER >= 23
test_vector_bool<8>();
test_vector_bool<19>();
test_vector_bool<32>();
test_vector_bool<49>();
test_vector_bool<64>();
test_vector_bool<199>();
test_vector_bool<256>();
#endif

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <cassert>
#include <memory>
#include <type_traits>
#include <vector>

#include "test_macros.h"
#include "test_iterators.h"
Expand Down Expand Up @@ -420,6 +421,20 @@ struct TestUniquePtr {

#endif // TEST_STD_VER >= 11

template <std::size_t N>
TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
for (int offset = -4; offset <= 4; ++offset) {
std::vector<bool> a(N, false);
std::size_t mid = N / 2 + offset;
for (std::size_t i = mid; i < N; ++i)
a[i] = true;
std::rotate(a.begin(), a.begin() + mid, a.end());
for (std::size_t i = 0; i < N; ++i)
assert(a[i] == (i < N - mid));
}
return true;
};

TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<int*>(), TestIter());

Expand All @@ -428,6 +443,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
types::for_each(types::forward_iterator_list<std::unique_ptr<int>*>(), TestUniquePtr());
#endif

test_vector_bool<8>();
test_vector_bool<19>();
test_vector_bool<32>();
test_vector_bool<49>();
test_vector_bool<64>();
test_vector_bool<199>();
test_vector_bool<256>();

return true;
}

Expand Down