Skip to content

Commit b439565

Browse files
committed
Optimize ranges::copy{, _n} for vector<bool>::iterator
1 parent 13c6abf commit b439565

File tree

9 files changed

+420
-185
lines changed

9 files changed

+420
-185
lines changed

libcxx/docs/ReleaseNotes/20.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ Improvements and New Features
111111
std::errc::not_a_directory``, or use ``err.default_error_condition()`` to map to an ``error_condition``, and then test
112112
its ``value()`` and ``category()``.
113113

114+
- The ``std::ranges::copy`` and ``std::ranges::copy_n`` algorithms have been optimized for ``std::vector<bool>::iterator``\s,
115+
resulting in a performance improvement of up to 2000x.
116+
114117
Deprecations and Removals
115118
-------------------------
116119

libcxx/include/__algorithm/copy.h

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
#include <__algorithm/for_each_segment.h>
1414
#include <__algorithm/min.h>
1515
#include <__config>
16+
#include <__fwd/bit_reference.h>
1617
#include <__iterator/iterator_traits.h>
1718
#include <__iterator/segmented_iterator.h>
19+
#include <__memory/pointer_traits.h>
1820
#include <__type_traits/common_type.h>
1921
#include <__type_traits/enable_if.h>
2022
#include <__utility/move.h>
@@ -29,9 +31,129 @@ _LIBCPP_PUSH_MACROS
2931

3032
_LIBCPP_BEGIN_NAMESPACE_STD
3133

34+
template <class _InputIterator, class _OutputIterator>
35+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
36+
copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result);
37+
3238
template <class _InIter, class _Sent, class _OutIter>
3339
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
3440

41+
template <class _Cp, bool _IsConst>
42+
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned(
43+
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
44+
using _In = __bit_iterator<_Cp, _IsConst>;
45+
using difference_type = typename _In::difference_type;
46+
using __storage_type = typename _In::__storage_type;
47+
48+
const int __bits_per_word = _In::__bits_per_word;
49+
difference_type __n = __last - __first;
50+
if (__n > 0) {
51+
// do first word
52+
if (__first.__ctz_ != 0) {
53+
unsigned __clz = __bits_per_word - __first.__ctz_;
54+
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
55+
__n -= __dn;
56+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
57+
__storage_type __b = *__first.__seg_ & __m;
58+
*__result.__seg_ &= ~__m;
59+
*__result.__seg_ |= __b;
60+
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
61+
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
62+
++__first.__seg_;
63+
// __first.__ctz_ = 0;
64+
}
65+
// __first.__ctz_ == 0;
66+
// do middle words
67+
__storage_type __nw = __n / __bits_per_word;
68+
std::copy(std::__to_address(__first.__seg_),
69+
std::__to_address(__first.__seg_ + __nw),
70+
std::__to_address(__result.__seg_));
71+
__n -= __nw * __bits_per_word;
72+
__result.__seg_ += __nw;
73+
// do last word
74+
if (__n > 0) {
75+
__first.__seg_ += __nw;
76+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
77+
__storage_type __b = *__first.__seg_ & __m;
78+
*__result.__seg_ &= ~__m;
79+
*__result.__seg_ |= __b;
80+
__result.__ctz_ = static_cast<unsigned>(__n);
81+
}
82+
}
83+
return __result;
84+
}
85+
86+
template <class _Cp, bool _IsConst>
87+
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned(
88+
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
89+
using _In = __bit_iterator<_Cp, _IsConst>;
90+
using difference_type = typename _In::difference_type;
91+
using __storage_type = typename _In::__storage_type;
92+
93+
const int __bits_per_word = _In::__bits_per_word;
94+
difference_type __n = __last - __first;
95+
if (__n > 0) {
96+
// do first word
97+
if (__first.__ctz_ != 0) {
98+
unsigned __clz_f = __bits_per_word - __first.__ctz_;
99+
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
100+
__n -= __dn;
101+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
102+
__storage_type __b = *__first.__seg_ & __m;
103+
unsigned __clz_r = __bits_per_word - __result.__ctz_;
104+
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
105+
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
106+
*__result.__seg_ &= ~__m;
107+
if (__result.__ctz_ > __first.__ctz_)
108+
*__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
109+
else
110+
*__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_);
111+
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
112+
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
113+
__dn -= __ddn;
114+
if (__dn > 0) {
115+
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
116+
*__result.__seg_ &= ~__m;
117+
*__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
118+
__result.__ctz_ = static_cast<unsigned>(__dn);
119+
}
120+
++__first.__seg_;
121+
// __first.__ctz_ = 0;
122+
}
123+
// __first.__ctz_ == 0;
124+
// do middle words
125+
unsigned __clz_r = __bits_per_word - __result.__ctz_;
126+
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
127+
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
128+
__storage_type __b = *__first.__seg_;
129+
*__result.__seg_ &= ~__m;
130+
*__result.__seg_ |= __b << __result.__ctz_;
131+
++__result.__seg_;
132+
*__result.__seg_ &= __m;
133+
*__result.__seg_ |= __b >> __clz_r;
134+
}
135+
// do last word
136+
if (__n > 0) {
137+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
138+
__storage_type __b = *__first.__seg_ & __m;
139+
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
140+
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
141+
*__result.__seg_ &= ~__m;
142+
*__result.__seg_ |= __b << __result.__ctz_;
143+
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
144+
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
145+
__n -= __dn;
146+
if (__n > 0) {
147+
__m = ~__storage_type(0) >> (__bits_per_word - __n);
148+
*__result.__seg_ &= ~__m;
149+
*__result.__seg_ |= __b >> __dn;
150+
__result.__ctz_ = static_cast<unsigned>(__n);
151+
}
152+
}
153+
}
154+
return __result;
155+
}
156+
35157
struct __copy_impl {
36158
template <class _InIter, class _Sent, class _OutIter>
37159
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
@@ -95,6 +217,16 @@ struct __copy_impl {
95217
}
96218
}
97219

220+
template <class _Cp, bool _IsConst>
221+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
222+
operator()(__bit_iterator<_Cp, _IsConst> __first,
223+
__bit_iterator<_Cp, _IsConst> __last,
224+
__bit_iterator<_Cp, false> __result) {
225+
if (__first.__ctz_ == __result.__ctz_)
226+
return std::make_pair(__last, std::__copy_aligned(__first, __last, __result));
227+
return std::make_pair(__last, std::__copy_unaligned(__first, __last, __result));
228+
}
229+
98230
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
99231
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
100232
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
@@ -110,7 +242,7 @@ __copy(_InIter __first, _Sent __last, _OutIter __result) {
110242
}
111243

112244
template <class _InputIterator, class _OutputIterator>
113-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
245+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
114246
copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result) {
115247
return std::__copy(__first, __last, __result).second;
116248
}

libcxx/include/__bit_reference

Lines changed: 5 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#ifndef _LIBCPP___BIT_REFERENCE
1111
#define _LIBCPP___BIT_REFERENCE
1212

13+
#include <__algorithm/copy.h>
1314
#include <__algorithm/copy_n.h>
1415
#include <__algorithm/min.h>
1516
#include <__bit/countr.h>
@@ -24,6 +25,7 @@
2425
#include <__type_traits/conditional.h>
2526
#include <__type_traits/is_constant_evaluated.h>
2627
#include <__type_traits/void_t.h>
28+
#include <__utility/pair.h>
2729
#include <__utility/swap.h>
2830

2931
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -183,130 +185,6 @@ private:
183185
__mask_(__m) {}
184186
};
185187

186-
// copy
187-
188-
template <class _Cp, bool _IsConst>
189-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_aligned(
190-
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
191-
using _In = __bit_iterator<_Cp, _IsConst>;
192-
using difference_type = typename _In::difference_type;
193-
using __storage_type = typename _In::__storage_type;
194-
195-
const int __bits_per_word = _In::__bits_per_word;
196-
difference_type __n = __last - __first;
197-
if (__n > 0) {
198-
// do first word
199-
if (__first.__ctz_ != 0) {
200-
unsigned __clz = __bits_per_word - __first.__ctz_;
201-
difference_type __dn = std::min(static_cast<difference_type>(__clz), __n);
202-
__n -= __dn;
203-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn));
204-
__storage_type __b = *__first.__seg_ & __m;
205-
*__result.__seg_ &= ~__m;
206-
*__result.__seg_ |= __b;
207-
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
208-
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
209-
++__first.__seg_;
210-
// __first.__ctz_ = 0;
211-
}
212-
// __first.__ctz_ == 0;
213-
// do middle words
214-
__storage_type __nw = __n / __bits_per_word;
215-
std::copy_n(std::__to_address(__first.__seg_), __nw, std::__to_address(__result.__seg_));
216-
__n -= __nw * __bits_per_word;
217-
__result.__seg_ += __nw;
218-
// do last word
219-
if (__n > 0) {
220-
__first.__seg_ += __nw;
221-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
222-
__storage_type __b = *__first.__seg_ & __m;
223-
*__result.__seg_ &= ~__m;
224-
*__result.__seg_ |= __b;
225-
__result.__ctz_ = static_cast<unsigned>(__n);
226-
}
227-
}
228-
return __result;
229-
}
230-
231-
template <class _Cp, bool _IsConst>
232-
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> __copy_unaligned(
233-
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
234-
using _In = __bit_iterator<_Cp, _IsConst>;
235-
using difference_type = typename _In::difference_type;
236-
using __storage_type = typename _In::__storage_type;
237-
238-
const int __bits_per_word = _In::__bits_per_word;
239-
difference_type __n = __last - __first;
240-
if (__n > 0) {
241-
// do first word
242-
if (__first.__ctz_ != 0) {
243-
unsigned __clz_f = __bits_per_word - __first.__ctz_;
244-
difference_type __dn = std::min(static_cast<difference_type>(__clz_f), __n);
245-
__n -= __dn;
246-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
247-
__storage_type __b = *__first.__seg_ & __m;
248-
unsigned __clz_r = __bits_per_word - __result.__ctz_;
249-
__storage_type __ddn = std::min<__storage_type>(__dn, __clz_r);
250-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn));
251-
*__result.__seg_ &= ~__m;
252-
if (__result.__ctz_ > __first.__ctz_)
253-
*__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_);
254-
else
255-
*__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_);
256-
__result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word;
257-
__result.__ctz_ = static_cast<unsigned>((__ddn + __result.__ctz_) % __bits_per_word);
258-
__dn -= __ddn;
259-
if (__dn > 0) {
260-
__m = ~__storage_type(0) >> (__bits_per_word - __dn);
261-
*__result.__seg_ &= ~__m;
262-
*__result.__seg_ |= __b >> (__first.__ctz_ + __ddn);
263-
__result.__ctz_ = static_cast<unsigned>(__dn);
264-
}
265-
++__first.__seg_;
266-
// __first.__ctz_ = 0;
267-
}
268-
// __first.__ctz_ == 0;
269-
// do middle words
270-
unsigned __clz_r = __bits_per_word - __result.__ctz_;
271-
__storage_type __m = ~__storage_type(0) << __result.__ctz_;
272-
for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) {
273-
__storage_type __b = *__first.__seg_;
274-
*__result.__seg_ &= ~__m;
275-
*__result.__seg_ |= __b << __result.__ctz_;
276-
++__result.__seg_;
277-
*__result.__seg_ &= __m;
278-
*__result.__seg_ |= __b >> __clz_r;
279-
}
280-
// do last word
281-
if (__n > 0) {
282-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
283-
__storage_type __b = *__first.__seg_ & __m;
284-
__storage_type __dn = std::min(__n, static_cast<difference_type>(__clz_r));
285-
__m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn));
286-
*__result.__seg_ &= ~__m;
287-
*__result.__seg_ |= __b << __result.__ctz_;
288-
__result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word;
289-
__result.__ctz_ = static_cast<unsigned>((__dn + __result.__ctz_) % __bits_per_word);
290-
__n -= __dn;
291-
if (__n > 0) {
292-
__m = ~__storage_type(0) >> (__bits_per_word - __n);
293-
*__result.__seg_ &= ~__m;
294-
*__result.__seg_ |= __b >> __dn;
295-
__result.__ctz_ = static_cast<unsigned>(__n);
296-
}
297-
}
298-
}
299-
return __result;
300-
}
301-
302-
template <class _Cp, bool _IsConst>
303-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false>
304-
copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
305-
if (__first.__ctz_ == __result.__ctz_)
306-
return std::__copy_aligned(__first, __last, __result);
307-
return std::__copy_unaligned(__first, __last, __result);
308-
}
309-
310188
// copy_backward
311189

312190
template <class _Cp, bool _IsConst>
@@ -989,8 +867,9 @@ private:
989867
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_unaligned(
990868
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
991869
template <class _Dp, bool _IC>
992-
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false>
993-
copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
870+
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend pair<__bit_iterator<_Dp, _IC>, __bit_iterator<_Dp, false> >
871+
__copy_impl::operator()(
872+
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);
994873
template <class _Dp, bool _IC>
995874
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_backward_aligned(
996875
__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result);

libcxx/include/bitset

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ template <size_t N> struct hash<std::bitset<N>>;
129129
#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
130130
# include <__cxx03/bitset>
131131
#else
132+
# include <__algorithm/copy.h>
133+
# include <__algorithm/copy_backward.h>
132134
# include <__algorithm/count.h>
133135
# include <__algorithm/fill.h>
134136
# include <__algorithm/fill_n.h>

0 commit comments

Comments
 (0)