Skip to content

Commit 1b80c49

Browse files
winner245frederik-h
authored andcommitted
[libc++] Optimize ranges::rotate for vector<bool>::iterator (llvm#121168)
This PR optimizes the performance of `std::ranges::rotate` for `vector<bool>::iterator`. The optimization yields a performance improvement of up to 2096x. Closes llvm#64038.
1 parent 7ca3e88 commit 1b80c49

File tree

8 files changed

+176
-45
lines changed

8 files changed

+176
-45
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Implemented Papers
4646
Improvements and New Features
4747
-----------------------------
4848

49-
- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
49+
- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward, rotate}`` algorithms have been optimized for
5050
``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.
5151

5252
- The ``std::ranges::equal`` algorithm has been optimized for ``std::vector<bool>::iterator``, resulting in a performance

libcxx/include/__algorithm/rotate.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,19 @@
99
#ifndef _LIBCPP___ALGORITHM_ROTATE_H
1010
#define _LIBCPP___ALGORITHM_ROTATE_H
1111

12+
#include <__algorithm/copy.h>
13+
#include <__algorithm/copy_backward.h>
1214
#include <__algorithm/iterator_operations.h>
1315
#include <__algorithm/move.h>
1416
#include <__algorithm/move_backward.h>
1517
#include <__algorithm/swap_ranges.h>
1618
#include <__config>
19+
#include <__cstddef/size_t.h>
20+
#include <__fwd/bit_reference.h>
1721
#include <__iterator/iterator_traits.h>
22+
#include <__memory/construct_at.h>
23+
#include <__memory/pointer_traits.h>
24+
#include <__type_traits/is_constant_evaluated.h>
1825
#include <__type_traits/is_trivially_assignable.h>
1926
#include <__utility/move.h>
2027
#include <__utility/pair.h>
@@ -185,6 +192,44 @@ __rotate(_Iterator __first, _Iterator __middle, _Sentinel __last) {
185192
return _Ret(std::move(__result), std::move(__last_iter));
186193
}
187194

195+
template <class, class _Cp>
196+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, false>, __bit_iterator<_Cp, false> >
197+
__rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
198+
using _I1 = __bit_iterator<_Cp, false>;
199+
using difference_type = typename _I1::difference_type;
200+
difference_type __d1 = __middle - __first;
201+
difference_type __d2 = __last - __middle;
202+
_I1 __r = __first + __d2;
203+
while (__d1 != 0 && __d2 != 0) {
204+
if (__d1 <= __d2) {
205+
if (__d1 <= __bit_array<_Cp>::capacity()) {
206+
__bit_array<_Cp> __b(__d1);
207+
std::copy(__first, __middle, __b.begin());
208+
std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
209+
break;
210+
} else {
211+
__bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
212+
__first = __middle;
213+
__middle = __mp;
214+
__d2 -= __d1;
215+
}
216+
} else {
217+
if (__d2 <= __bit_array<_Cp>::capacity()) {
218+
__bit_array<_Cp> __b(__d2);
219+
std::copy(__middle, __last, __b.begin());
220+
std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
221+
break;
222+
} else {
223+
__bit_iterator<_Cp, false> __mp = __first + __d2;
224+
std::swap_ranges(__first, __mp, __middle);
225+
__first = __mp;
226+
__d1 -= __d2;
227+
}
228+
}
229+
}
230+
return std::make_pair(__r, __last);
231+
}
232+
188233
template <class _ForwardIterator>
189234
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
190235
rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) {

libcxx/include/__bit_reference

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <__algorithm/copy_n.h>
1717
#include <__algorithm/equal.h>
1818
#include <__algorithm/min.h>
19+
#include <__algorithm/rotate.h>
1920
#include <__algorithm/swap_ranges.h>
2021
#include <__assert>
2122
#include <__bit/countr.h>
@@ -216,8 +217,6 @@ private:
216217
__mask_(__m) {}
217218
};
218219

219-
// rotate
220-
221220
template <class _Cp>
222221
struct __bit_array {
223222
using difference_type _LIBCPP_NODEBUG = typename __size_difference_type_traits<_Cp>::difference_type;
@@ -249,45 +248,6 @@ struct __bit_array {
249248
}
250249
};
251250

252-
template <class _Cp>
253-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false>
254-
rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) {
255-
using _I1 = __bit_iterator<_Cp, false>;
256-
using difference_type = typename _I1::difference_type;
257-
258-
difference_type __d1 = __middle - __first;
259-
difference_type __d2 = __last - __middle;
260-
_I1 __r = __first + __d2;
261-
while (__d1 != 0 && __d2 != 0) {
262-
if (__d1 <= __d2) {
263-
if (__d1 <= __bit_array<_Cp>::capacity()) {
264-
__bit_array<_Cp> __b(__d1);
265-
std::copy(__first, __middle, __b.begin());
266-
std::copy(__b.begin(), __b.end(), std::copy(__middle, __last, __first));
267-
break;
268-
} else {
269-
__bit_iterator<_Cp, false> __mp = std::swap_ranges(__first, __middle, __middle);
270-
__first = __middle;
271-
__middle = __mp;
272-
__d2 -= __d1;
273-
}
274-
} else {
275-
if (__d2 <= __bit_array<_Cp>::capacity()) {
276-
__bit_array<_Cp> __b(__d2);
277-
std::copy(__middle, __last, __b.begin());
278-
std::copy_backward(__b.begin(), __b.end(), std::copy_backward(__first, __middle, __last));
279-
break;
280-
} else {
281-
__bit_iterator<_Cp, false> __mp = __first + __d2;
282-
std::swap_ranges(__first, __mp, __middle);
283-
__first = __mp;
284-
__d1 -= __d2;
285-
}
286-
}
287-
}
288-
return __r;
289-
}
290-
291251
template <class _Cp, bool _IsConst, typename _Cp::__storage_type>
292252
class __bit_iterator {
293253
public:
@@ -507,9 +467,9 @@ private:
507467
template <class, class _Cl, class _Cr>
508468
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Cl, false>, __bit_iterator<_Cr, false> >
509469
__swap_ranges(__bit_iterator<_Cl, false>, __bit_iterator<_Cl, false>, __bit_iterator<_Cr, false>);
510-
template <class _Dp>
511-
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
512-
rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
470+
template <class, class _Dp>
471+
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false> >
472+
__rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>);
513473
template <class _Dp, bool _IsConst1, bool _IsConst2>
514474
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend bool
515475
__equal_aligned(__bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst1>, __bit_iterator<_Dp, _IsConst2>);

libcxx/include/__fwd/bit_reference.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
2323
template <class _Cp, bool _IsConst, typename _Cp::__storage_type = 0>
2424
class __bit_iterator;
2525

26+
template <class _Cp>
27+
struct __bit_array;
28+
2629
template <class, class = void>
2730
struct __size_difference_type_traits;
2831

libcxx/include/__vector/vector_bool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <__algorithm/fill_n.h>
1515
#include <__algorithm/iterator_operations.h>
1616
#include <__algorithm/max.h>
17+
#include <__algorithm/rotate.h>
1718
#include <__assert>
1819
#include <__bit_reference>
1920
#include <__config>
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <iterator>
15+
#include <list>
16+
#include <ranges>
17+
#include <string>
18+
#include <vector>
19+
20+
#include "benchmark/benchmark.h"
21+
#include "../../GenerateInput.h"
22+
#include "test_macros.h"
23+
24+
int main(int argc, char** argv) {
25+
auto std_rotate = [](auto first, auto middle, auto last) { return std::rotate(first, middle, last); };
26+
27+
// Benchmark {std,ranges}::rotate where we rotate various fractions of the range. It is possible to
28+
// special-case some of these fractions to cleverly perform swap_ranges.
29+
{
30+
auto bm = []<class Container>(std::string name, auto rotate, double fraction) {
31+
benchmark::RegisterBenchmark(
32+
name,
33+
[=](auto& st) {
34+
std::size_t const size = st.range(0);
35+
using ValueType = typename Container::value_type;
36+
Container c;
37+
std::generate_n(std::back_inserter(c), size, [] { return Generate<ValueType>::random(); });
38+
39+
auto nth = std::next(c.begin(), static_cast<std::size_t>(size * fraction));
40+
for ([[maybe_unused]] auto _ : st) {
41+
benchmark::DoNotOptimize(c);
42+
auto result = rotate(c.begin(), nth, c.end());
43+
benchmark::DoNotOptimize(result);
44+
}
45+
})
46+
->Arg(32)
47+
->Arg(50) // non power-of-two
48+
->RangeMultiplier(2)
49+
->Range(64, 1 << 20);
50+
};
51+
52+
bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 1/4)", std_rotate, 0.25);
53+
bm.operator()<std::vector<bool>>("std::rotate(vector<bool>) (by 51%)", std_rotate, 0.51);
54+
#if TEST_STD_VER >= 23 // vector<bool>::iterator is not std::permutable before C++23
55+
bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 1/4)", std::ranges::rotate, 0.25);
56+
bm.operator()<std::vector<bool>>("rng::rotate(vector<bool>) (by 51%)", std::ranges::rotate, 0.51);
57+
#endif
58+
}
59+
60+
benchmark::Initialize(&argc, argv);
61+
benchmark::RunSpecifiedBenchmarks();
62+
benchmark::Shutdown();
63+
return 0;
64+
}

libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
#include <array>
2222
#include <cassert>
2323
#include <ranges>
24+
#include <vector>
2425

2526
#include "almost_satisfies_types.h"
27+
#include "test_macros.h"
2628
#include "test_iterators.h"
2729
#include "type_algorithms.h"
2830

@@ -131,6 +133,29 @@ constexpr void test_iter_sent() {
131133
test_one<Iter, Sent, 7>({1, 2, 3, 4, 5, 6, 7}, 7, {1, 2, 3, 4, 5, 6, 7});
132134
}
133135

136+
#if TEST_STD_VER >= 23
137+
template <std::size_t N>
138+
TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
139+
for (int offset = -4; offset <= 4; ++offset) {
140+
std::vector<bool> a(N, false);
141+
std::size_t mid = N / 2 + offset;
142+
for (std::size_t i = mid; i < N; ++i)
143+
a[i] = true;
144+
145+
// (iterator, sentinel)-overload
146+
std::ranges::rotate(std::ranges::begin(a), std::ranges::begin(a) + mid, std::ranges::end(a));
147+
for (std::size_t i = 0; i < N; ++i)
148+
assert(a[i] == (i < N - mid));
149+
150+
// range-overload
151+
std::ranges::rotate(a, std::ranges::begin(a) + (N - mid));
152+
for (std::size_t i = 0; i < N; ++i)
153+
assert(a[i] == (i >= mid));
154+
}
155+
return true;
156+
};
157+
#endif
158+
134159
constexpr bool test() {
135160
types::for_each(types::forward_iterator_list<int*>(), []<class Iter>() {
136161
test_iter_sent<Iter, Iter>();
@@ -167,6 +192,16 @@ constexpr bool test() {
167192
}
168193
}
169194

195+
#if TEST_STD_VER >= 23
196+
test_vector_bool<8>();
197+
test_vector_bool<19>();
198+
test_vector_bool<32>();
199+
test_vector_bool<49>();
200+
test_vector_bool<64>();
201+
test_vector_bool<199>();
202+
test_vector_bool<256>();
203+
#endif
204+
170205
return true;
171206
}
172207

libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate.pass.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <cassert>
1616
#include <memory>
1717
#include <type_traits>
18+
#include <vector>
1819

1920
#include "test_macros.h"
2021
#include "test_iterators.h"
@@ -420,6 +421,20 @@ struct TestUniquePtr {
420421

421422
#endif // TEST_STD_VER >= 11
422423

424+
template <std::size_t N>
425+
TEST_CONSTEXPR_CXX20 bool test_vector_bool() {
426+
for (int offset = -4; offset <= 4; ++offset) {
427+
std::vector<bool> a(N, false);
428+
std::size_t mid = N / 2 + offset;
429+
for (std::size_t i = mid; i < N; ++i)
430+
a[i] = true;
431+
std::rotate(a.begin(), a.begin() + mid, a.end());
432+
for (std::size_t i = 0; i < N; ++i)
433+
assert(a[i] == (i < N - mid));
434+
}
435+
return true;
436+
};
437+
423438
TEST_CONSTEXPR_CXX20 bool test() {
424439
types::for_each(types::forward_iterator_list<int*>(), TestIter());
425440

@@ -428,6 +443,14 @@ TEST_CONSTEXPR_CXX20 bool test() {
428443
types::for_each(types::forward_iterator_list<std::unique_ptr<int>*>(), TestUniquePtr());
429444
#endif
430445

446+
test_vector_bool<8>();
447+
test_vector_bool<19>();
448+
test_vector_bool<32>();
449+
test_vector_bool<49>();
450+
test_vector_bool<64>();
451+
test_vector_bool<199>();
452+
test_vector_bool<256>();
453+
431454
return true;
432455
}
433456

0 commit comments

Comments
 (0)