Skip to content

Commit a9138cd

Browse files
committed
[libc++] Optimize ranges::count for __bit_iterators
``` --------------------------------------------------------------- Benchmark old new --------------------------------------------------------------- bm_vector_bool_count/1 1.92 ns 1.92 ns bm_vector_bool_count/2 1.92 ns 1.92 ns bm_vector_bool_count/3 1.92 ns 1.92 ns bm_vector_bool_count/4 1.92 ns 1.92 ns bm_vector_bool_count/5 1.92 ns 1.92 ns bm_vector_bool_count/6 1.92 ns 1.92 ns bm_vector_bool_count/7 1.92 ns 1.92 ns bm_vector_bool_count/8 1.92 ns 1.92 ns bm_vector_bool_count/16 1.92 ns 1.92 ns bm_vector_bool_count/64 2.24 ns 2.25 ns bm_vector_bool_count/512 3.19 ns 3.20 ns bm_vector_bool_count/4096 14.1 ns 12.3 ns bm_vector_bool_count/32768 84.0 ns 83.6 ns bm_vector_bool_count/262144 664 ns 661 ns bm_vector_bool_count/1048576 2623 ns 2628 ns bm_vector_bool_ranges_count/1 1.07 ns 1.92 ns bm_vector_bool_ranges_count/2 1.65 ns 1.92 ns bm_vector_bool_ranges_count/3 2.27 ns 1.92 ns bm_vector_bool_ranges_count/4 2.68 ns 1.92 ns bm_vector_bool_ranges_count/5 3.33 ns 1.92 ns bm_vector_bool_ranges_count/6 3.99 ns 1.92 ns bm_vector_bool_ranges_count/7 4.67 ns 1.92 ns bm_vector_bool_ranges_count/8 5.19 ns 1.92 ns bm_vector_bool_ranges_count/16 11.1 ns 1.92 ns bm_vector_bool_ranges_count/64 52.2 ns 2.24 ns bm_vector_bool_ranges_count/512 452 ns 3.20 ns bm_vector_bool_ranges_count/4096 3577 ns 12.1 ns bm_vector_bool_ranges_count/32768 28725 ns 83.7 ns bm_vector_bool_ranges_count/262144 229676 ns 662 ns bm_vector_bool_ranges_count/1048576 905574 ns 2625 ns ``` Reviewed By: #libc, ldionne Spies: arichardson, ldionne, libcxx-commits Differential Revision: https://reviews.llvm.org/D156956
1 parent 5d2a710 commit a9138cd

File tree

10 files changed

+162
-75
lines changed

10 files changed

+162
-75
lines changed

libcxx/benchmarks/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ endfunction()
173173
#==============================================================================
174174
set(BENCHMARK_TESTS
175175
algorithms.partition_point.bench.cpp
176+
algorithms/count.bench.cpp
176177
algorithms/equal.bench.cpp
177178
algorithms/find.bench.cpp
178179
algorithms/lower_bound.bench.cpp
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <algorithm>
10+
#include <benchmark/benchmark.h>
11+
#include <cstring>
12+
#include <random>
13+
#include <vector>
14+
15+
static void bm_vector_bool_count(benchmark::State& state) {
16+
std::vector<bool> vec1(state.range(), false);
17+
18+
for (auto _ : state) {
19+
benchmark::DoNotOptimize(vec1);
20+
benchmark::DoNotOptimize(std::count(vec1.begin(), vec1.end(), true));
21+
}
22+
}
23+
BENCHMARK(bm_vector_bool_count)->DenseRange(1, 8)->Range(16, 1 << 20);
24+
25+
static void bm_vector_bool_ranges_count(benchmark::State& state) {
26+
std::vector<bool> vec1(state.range(), false);
27+
28+
for (auto _ : state) {
29+
benchmark::DoNotOptimize(vec1);
30+
benchmark::DoNotOptimize(std::ranges::count(vec1.begin(), vec1.end(), true));
31+
}
32+
}
33+
BENCHMARK(bm_vector_bool_ranges_count)->DenseRange(1, 8)->Range(16, 1 << 20);
34+
35+
BENCHMARK_MAIN();

libcxx/docs/ReleaseNotes/18.rst

+3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ Implemented Papers
5858
Improvements and New Features
5959
-----------------------------
6060

61+
- ``std::ranges::count`` is now optimized for ``vector<bool>::iterator``, which
62+
can lead up to 350x performance improvements.
63+
6164
- The library now provides a hardened mode under which common cases of library undefined behavior will be turned into
6265
a reliable program termination. Vendors can configure whether the hardened mode is enabled by default with the
6366
``LIBCXX_HARDENING_MODE`` variable at CMake configuration time. Users can control whether the hardened mode is

libcxx/include/__algorithm/count.h

+63-6
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,83 @@
1010
#ifndef _LIBCPP___ALGORITHM_COUNT_H
1111
#define _LIBCPP___ALGORITHM_COUNT_H
1212

13+
#include <__algorithm/iterator_operations.h>
14+
#include <__algorithm/min.h>
15+
#include <__bit/invert_if.h>
16+
#include <__bit/popcount.h>
1317
#include <__config>
18+
#include <__functional/identity.h>
19+
#include <__functional/invoke.h>
20+
#include <__fwd/bit_reference.h>
1421
#include <__iterator/iterator_traits.h>
1522

1623
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1724
# pragma GCC system_header
1825
#endif
1926

27+
_LIBCPP_PUSH_MACROS
28+
#include <__undef_macros>
29+
2030
_LIBCPP_BEGIN_NAMESPACE_STD
2131

22-
template <class _InputIterator, class _Tp>
23-
_LIBCPP_NODISCARD_EXT inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_SINCE_CXX20
24-
typename iterator_traits<_InputIterator>::difference_type
25-
count(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
26-
typename iterator_traits<_InputIterator>::difference_type __r(0);
32+
// generic implementation
33+
template <class _AlgPolicy, class _Iter, class _Sent, class _Tp, class _Proj>
34+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename _IterOps<_AlgPolicy>::template __difference_type<_Iter>
35+
__count(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
36+
typename _IterOps<_AlgPolicy>::template __difference_type<_Iter> __r(0);
2737
for (; __first != __last; ++__first)
28-
if (*__first == __value)
38+
if (std::__invoke(__proj, *__first) == __value)
2939
++__r;
3040
return __r;
3141
}
3242

43+
// __bit_iterator implementation
44+
template <bool _ToCount, class _Cp, bool _IsConst>
45+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bit_iterator<_Cp, _IsConst>::difference_type
46+
__count_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
47+
using _It = __bit_iterator<_Cp, _IsConst>;
48+
using __storage_type = typename _It::__storage_type;
49+
using difference_type = typename _It::difference_type;
50+
51+
const int __bits_per_word = _It::__bits_per_word;
52+
difference_type __r = 0;
53+
// do first partial word
54+
if (__first.__ctz_ != 0) {
55+
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
56+
__storage_type __dn = std::min(__clz_f, __n);
57+
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
58+
__r = std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
59+
__n -= __dn;
60+
++__first.__seg_;
61+
}
62+
// do middle whole words
63+
for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word)
64+
__r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_));
65+
// do last partial word
66+
if (__n > 0) {
67+
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
68+
__r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
69+
}
70+
return __r;
71+
}
72+
73+
template <class, class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
74+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<__bit_iterator<_Cp, _IsConst> >
75+
__count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
76+
if (__value)
77+
return std::__count_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
78+
return std::__count_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
79+
}
80+
81+
template <class _InputIterator, class _Tp>
82+
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
83+
count(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
84+
__identity __proj;
85+
return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj);
86+
}
87+
3388
_LIBCPP_END_NAMESPACE_STD
3489

90+
_LIBCPP_POP_MACROS
91+
3592
#endif // _LIBCPP___ALGORITHM_COUNT_H

libcxx/include/__algorithm/ranges_count.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
#ifndef _LIBCPP___ALGORITHM_RANGES_COUNT_H
1010
#define _LIBCPP___ALGORITHM_RANGES_COUNT_H
1111

12-
#include <__algorithm/ranges_count_if.h>
12+
#include <__algorithm/count.h>
13+
#include <__algorithm/iterator_operations.h>
1314
#include <__config>
1415
#include <__functional/identity.h>
1516
#include <__functional/ranges_operations.h>
@@ -36,16 +37,14 @@ struct __fn {
3637
requires indirect_binary_predicate<ranges::equal_to, projected<_Iter, _Proj>, const _Type*>
3738
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter>
3839
operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const {
39-
auto __pred = [&](auto&& __e) { return __e == __value; };
40-
return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj);
40+
return std::__count<_RangeAlgPolicy>(std::move(__first), std::move(__last), __value, __proj);
4141
}
4242

4343
template <input_range _Range, class _Type, class _Proj = identity>
4444
requires indirect_binary_predicate<ranges::equal_to, projected<iterator_t<_Range>, _Proj>, const _Type*>
4545
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range>
4646
operator()(_Range&& __r, const _Type& __value, _Proj __proj = {}) const {
47-
auto __pred = [&](auto&& __e) { return __e == __value; };
48-
return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj);
47+
return std::__count<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __value, __proj);
4948
}
5049
};
5150
} // namespace __count

libcxx/include/__bit_reference

+1-40
Original file line numberDiff line numberDiff line change
@@ -171,45 +171,6 @@ private:
171171
__bit_const_reference& operator=(const __bit_const_reference&) = delete;
172172
};
173173

174-
// count
175-
176-
template <bool _ToCount, class _Cp, bool _IsConst>
177-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 typename __bit_iterator<_Cp, _IsConst>::difference_type
178-
__count_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) {
179-
using _It = __bit_iterator<_Cp, _IsConst>;
180-
using __storage_type = typename _It::__storage_type;
181-
using difference_type = typename _It::difference_type;
182-
183-
const int __bits_per_word = _It::__bits_per_word;
184-
difference_type __r = 0;
185-
// do first partial word
186-
if (__first.__ctz_ != 0) {
187-
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
188-
__storage_type __dn = std::min(__clz_f, __n);
189-
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
190-
__r = std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
191-
__n -= __dn;
192-
++__first.__seg_;
193-
}
194-
// do middle whole words
195-
for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word)
196-
__r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_));
197-
// do last partial word
198-
if (__n > 0) {
199-
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
200-
__r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
201-
}
202-
return __r;
203-
}
204-
205-
template <class _Cp, bool _IsConst, class _Tp>
206-
inline _LIBCPP_HIDE_FROM_ABI typename __bit_iterator<_Cp, _IsConst>::difference_type
207-
count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value) {
208-
if (static_cast<bool>(__value))
209-
return std::__count_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
210-
return std::__count_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
211-
}
212-
213174
// fill_n
214175

215176
template <bool _FillValue, class _Cp>
@@ -1092,7 +1053,7 @@ private:
10921053
_LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC>
10931054
__find_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type);
10941055
template <bool _ToCount, class _Dp, bool _IC>
1095-
friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23
1056+
friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
10961057
__count_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type);
10971058
};
10981059

libcxx/include/bitset

+2-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ template <size_t N> struct hash<std::bitset<N>>;
122122
123123
*/
124124

125+
#include <__algorithm/count.h>
125126
#include <__algorithm/fill.h>
126127
#include <__algorithm/find.h>
127128
#include <__assert> // all public C++ headers provide the assertion handler
@@ -1042,7 +1043,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23
10421043
size_t
10431044
bitset<_Size>::count() const _NOEXCEPT
10441045
{
1045-
return static_cast<size_t>(_VSTD::__count_bool<true>(base::__make_iter(0), _Size));
1046+
return static_cast<size_t>(std::count(base::__make_iter(0), base::__make_iter(_Size), true));
10461047
}
10471048

10481049
template <size_t _Size>

libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp

+37-22
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,50 @@
1313
// constexpr Iter::difference_type // constexpr after C++17
1414
// count(Iter first, Iter last, const T& value);
1515

16+
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=20000000
17+
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=70000000
18+
1619
#include <algorithm>
1720
#include <cassert>
21+
#include <vector>
1822

1923
#include "test_macros.h"
2024
#include "test_iterators.h"
21-
22-
#if TEST_STD_VER > 17
23-
TEST_CONSTEXPR bool test_constexpr() {
24-
int ia[] = {0, 1, 2, 2, 0, 1, 2, 3};
25-
int ib[] = {1, 2, 3, 4, 5, 6};
26-
return (std::count(std::begin(ia), std::end(ia), 2) == 3)
27-
&& (std::count(std::begin(ib), std::end(ib), 9) == 0)
28-
;
25+
#include "type_algorithms.h"
26+
27+
struct Test {
28+
template <class Iter>
29+
TEST_CONSTEXPR_CXX20 void operator()() {
30+
int ia[] = {0, 1, 2, 2, 0, 1, 2, 3};
31+
const unsigned sa = sizeof(ia) / sizeof(ia[0]);
32+
assert(std::count(Iter(ia), Iter(ia + sa), 2) == 3);
33+
assert(std::count(Iter(ia), Iter(ia + sa), 7) == 0);
34+
assert(std::count(Iter(ia), Iter(ia), 2) == 0);
35+
}
36+
};
37+
38+
TEST_CONSTEXPR_CXX20 bool test() {
39+
types::for_each(types::cpp17_input_iterator_list<const int*>(), Test());
40+
41+
if (!TEST_IS_CONSTANT_EVALUATED || TEST_STD_VER >= 20) {
42+
std::vector<bool> vec(256 + 64);
43+
for (ptrdiff_t i = 0; i != 256; ++i) {
44+
for (size_t offset = 0; offset != 64; ++offset) {
45+
std::fill(vec.begin(), vec.end(), false);
46+
std::fill(vec.begin() + offset, vec.begin() + i + offset, true);
47+
assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i);
48+
assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i);
49+
}
2950
}
30-
#endif
51+
}
52+
53+
return true;
54+
}
3155

32-
int main(int, char**)
33-
{
34-
int ia[] = {0, 1, 2, 2, 0, 1, 2, 3};
35-
const unsigned sa = sizeof(ia)/sizeof(ia[0]);
36-
assert(std::count(cpp17_input_iterator<const int*>(ia),
37-
cpp17_input_iterator<const int*>(ia + sa), 2) == 3);
38-
assert(std::count(cpp17_input_iterator<const int*>(ia),
39-
cpp17_input_iterator<const int*>(ia + sa), 7) == 0);
40-
assert(std::count(cpp17_input_iterator<const int*>(ia),
41-
cpp17_input_iterator<const int*>(ia), 2) == 0);
42-
43-
#if TEST_STD_VER > 17
44-
static_assert(test_constexpr());
56+
int main(int, char**) {
57+
test();
58+
#if TEST_STD_VER >= 20
59+
static_assert(test());
4560
#endif
4661

4762
return 0;

libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010

1111
// UNSUPPORTED: c++03, c++11, c++14, c++17
1212

13+
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=20000000
14+
// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=70000000
15+
1316
// template<input_iterator I, sentinel_for<I> S, class T, class Proj = identity>
1417
// requires indirect_binary_predicate<ranges::equal_to, projected<I, Proj>, const T*>
1518
// constexpr iter_difference_t<I>
@@ -23,6 +26,7 @@
2326
#include <array>
2427
#include <cassert>
2528
#include <ranges>
29+
#include <vector>
2630

2731
#include "almost_satisfies_types.h"
2832
#include "test_iterators.h"
@@ -253,6 +257,18 @@ constexpr bool test() {
253257
}
254258
}
255259

260+
{ // check that __bit_iterator optimizations work as expected
261+
std::vector<bool> vec(256 + 64);
262+
for (ptrdiff_t i = 0; i != 256; ++i) {
263+
for (size_t offset = 0; offset != 64; ++offset) {
264+
std::fill(vec.begin(), vec.end(), false);
265+
std::fill(vec.begin() + offset, vec.begin() + i + offset, true);
266+
assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i);
267+
assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i);
268+
}
269+
}
270+
}
271+
256272
return true;
257273
}
258274

libcxx/utils/data/ignore_format.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1133,7 +1133,6 @@ libcxx/test/std/algorithms/alg.nonmodifying/alg.all_of/ranges.all_of.pass.cpp
11331133
libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/any_of.pass.cpp
11341134
libcxx/test/std/algorithms/alg.nonmodifying/alg.any_of/ranges.any_of.pass.cpp
11351135
libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count_if.pass.cpp
1136-
libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp
11371136
libcxx/test/std/algorithms/alg.nonmodifying/alg.count/pstl.count_if.pass.cpp
11381137
libcxx/test/std/algorithms/alg.nonmodifying/alg.count/pstl.count.pass.cpp
11391138
libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count_if.pass.cpp

0 commit comments

Comments
 (0)