|
11 | 11 | #define _LIBCPP___ALGORITHM_MISMATCH_H
|
12 | 12 |
|
13 | 13 | #include <__algorithm/comp.h>
|
| 14 | +#include <__algorithm/unwrap_iter.h> |
| 15 | +#include <__algorithm/vectorization.h> |
14 | 16 | #include <__config>
|
| 17 | +#include <__functional/identity.h> |
15 | 18 | #include <__iterator/iterator_traits.h>
|
| 19 | +#include <__type_traits/invoke.h> |
| 20 | +#include <__type_traits/is_equality_comparable.h> |
| 21 | +#include <__utility/align_down.h> |
| 22 | +#include <__utility/move.h> |
16 | 23 | #include <__utility/pair.h>
|
| 24 | +#include <experimental/__simd/feature_traits.h> |
| 25 | +#include <experimental/__simd/simd.h> |
| 26 | +#include <experimental/__simd/simd_mask.h> |
17 | 27 |
|
18 | 28 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
19 | 29 | # pragma GCC system_header
|
20 | 30 | #endif
|
21 | 31 |
|
22 | 32 | _LIBCPP_BEGIN_NAMESPACE_STD
|
23 | 33 |
|
| 34 | +template <class _InIter1, class _Sent1, class _InIter2, class _Pred, class _Proj1, class _Proj2> |
| 35 | +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter1, _InIter2> |
| 36 | +__mismatch_loop(_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) { |
| 37 | + while (__first1 != __last1) { |
| 38 | + if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) |
| 39 | + break; |
| 40 | + ++__first1; |
| 41 | + ++__first2; |
| 42 | + } |
| 43 | + return {std::move(__first1), std::move(__first2)}; |
| 44 | +} |
| 45 | + |
| 46 | +#if _LIBCPP_CAN_VECTORIZE_ALGORIHTMS |
| 47 | +template <class _Tp> |
| 48 | +struct __mismatch_vector_impl { |
| 49 | + template <bool _VectorizeFloatingPoint> |
| 50 | + static constexpr bool __can_vectorize = |
| 51 | + (__libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value && __fits_in_vector<_Tp> && |
| 52 | + alignof(_Tp) >= alignof(__get_arithmetic_type<_Tp>)) || |
| 53 | + (_VectorizeFloatingPoint && is_floating_point_v<_Tp>); |
| 54 | + |
| 55 | + using __vec = __arithmetic_vec<_Tp>; |
| 56 | + using __mask_traits = experimental::__mask_traits<typename __vec::value_type, typename __vec::abi_type>; |
| 57 | + static constexpr size_t __unroll_count = 4; |
| 58 | + |
| 59 | + struct __result { |
| 60 | + _Tp* __iter1; |
| 61 | + _Tp* __iter2; |
| 62 | + bool __matched; |
| 63 | + }; |
| 64 | + |
| 65 | + _LIBCPP_HIDE_FROM_ABI static __result __prologue(_Tp* __first1, _Tp* __last1, _Tp* __first2) { |
| 66 | + if constexpr (__mask_traits::__has_maskload) { |
| 67 | + auto __first_aligned = std::__align_down(__vec::size(), __first1); |
| 68 | + auto __offset = __first1 - __first_aligned; |
| 69 | + auto __checked_size = __vec::size() - __offset; |
| 70 | + if (__checked_size < __last1 - __first1) |
| 71 | + return {__first1, __first2, false}; |
| 72 | + auto __second_aligned = __first2 - __offset; |
| 73 | + auto __mask = __mask_traits::__mask_with_last_enabled(__checked_size); |
| 74 | + __vec __lhs = |
| 75 | + __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__first_aligned), __mask); |
| 76 | + __vec __rhs = |
| 77 | + __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__second_aligned), __mask); |
| 78 | + auto __res = __mask_traits::__mask_cmp_eq(__mask, __lhs, __rhs); |
| 79 | + auto __inv_mask = ~__mask.__get_data().__mask_; |
| 80 | + if ((__res.__get_data().__mask_ & __mask.__get_data().__mask_) != __mask.__get_data().__mask_) { |
| 81 | + auto __match_offset = experimental::find_first_set(decltype(__mask){ |
| 82 | + experimental::__from_storage, {decltype(__res.__get_data().__mask_)(~__res.__get_data().__mask_)}}); |
| 83 | + return {__first_aligned + __match_offset, __second_aligned + __match_offset, true}; |
| 84 | + } |
| 85 | + return {__first_aligned + __vec::size(), __second_aligned + __vec::size(), false}; |
| 86 | + } else { |
| 87 | + return {__first1, __first2, false}; |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + _LIBCPP_HIDE_FROM_ABI _LIBCPP_ALWAYS_INLINE static __result __loop(_Tp* __first1, _Tp* __last1, _Tp* __first2) { |
| 92 | + while (__last1 - __first1 >= __unroll_count * __vec::size()) { |
| 93 | + __vec __lhs[__unroll_count]; |
| 94 | + __vec __rhs[__unroll_count]; |
| 95 | + |
| 96 | + for (size_t __i = 0; __i != __unroll_count; ++__i) { |
| 97 | + __lhs[__i] = std::__load_as_arithmetic(__first1 + __i * __vec::size()); |
| 98 | + __rhs[__i] = std::__load_as_arithmetic(__first2 + __i * __vec::size()); |
| 99 | + } |
| 100 | + |
| 101 | + for (size_t __i = 0; __i != __unroll_count; ++__i) { |
| 102 | + if (auto __res = __lhs[__i] == __rhs[__i]; !experimental::all_of(__res)) { |
| 103 | + auto __offset = __i * __vec::size() + experimental::find_first_set(__res); |
| 104 | + return {__first1 + __offset, __first2 + __offset, true}; |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + __first1 += __unroll_count * __vec::size(); |
| 109 | + __first2 += __unroll_count * __vec::size(); |
| 110 | + } |
| 111 | + return {__first1, __first2, __first1 == __last1}; |
| 112 | + } |
| 113 | + |
| 114 | + _LIBCPP_HIDE_FROM_ABI static pair<_Tp*, _Tp*> __epilogue(_Tp* __first1, _Tp* __last1, _Tp* __first2) { |
| 115 | + if constexpr (__mask_traits::__has_maskload) { |
| 116 | + auto __size = __last1 - __first1; |
| 117 | + auto __mask = __mask_traits::__mask_with_first_enabled(__size); |
| 118 | + __vec __lhs = |
| 119 | + __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__first1), __mask); |
| 120 | + __vec __rhs = |
| 121 | + __mask_traits::__maskload_unaligned(reinterpret_cast<typename __vec::value_type*>(__first2), __mask); |
| 122 | + auto __res = __mask_traits::__mask_cmp_eq(__mask, __lhs, __rhs); |
| 123 | + auto __inv_mask = ~__mask.__get_data().__mask_; |
| 124 | + if ((__res.__get_data().__mask_ | __inv_mask) != decltype(__mask){true}.__get_data().__mask_) { |
| 125 | + auto __offset = experimental::find_first_set(__res); |
| 126 | + return {__first1 + __offset, __first2 + __offset}; |
| 127 | + } |
| 128 | + return {__first1 + __size, __first2 + __size}; |
| 129 | + } else { |
| 130 | + return std::__mismatch_loop(__first1, __last1, __first2, __equal_to(), __identity(), __identity()); |
| 131 | + } |
| 132 | + } |
| 133 | +}; |
| 134 | +#endif // _LIBCPP_CAN_VECTORIZE_ALGORIHTMS |
| 135 | + |
| 136 | +template <class _InIter1, class _Sent1, class _InIter2, class _Pred, class _Proj1, class _Proj2> |
| 137 | +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter1, _InIter2> |
| 138 | +__mismatch(_InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) { |
| 139 | + return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); |
| 140 | +} |
| 141 | + |
| 142 | +#if _LIBCPP_VECTORIZE_CLASSIC_ALGORITHMS |
| 143 | +template < |
| 144 | + class _Tp, |
| 145 | + class _Pred, |
| 146 | + class _Proj1, |
| 147 | + class _Proj2, |
| 148 | + enable_if_t< |
| 149 | + __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value && __is_identity<_Proj1>::value && |
| 150 | + __is_identity<_Proj2>::value && |
| 151 | + __mismatch_vector_impl<_Tp>::template __can_vectorize<_LIBCPP_VECTORIZE_FLOATING_POINT_CLASSIC_ALGORITHMS>, |
| 152 | + int> = 0> |
| 153 | +_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI inline constexpr pair<_Tp*, _Tp*> |
| 154 | +__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) { |
| 155 | + if (__libcpp_is_constant_evaluated()) |
| 156 | + return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); |
| 157 | + |
| 158 | + using __impl = __mismatch_vector_impl<_Tp>; |
| 159 | + |
| 160 | + // auto [__piter1, __piter2, __pmatch] = __impl::__prologue(__first1, __last1, __first2); |
| 161 | + // if (__pmatch) |
| 162 | + // return {__piter1, __piter2}; |
| 163 | + |
| 164 | + auto [__iter1, __iter2, __matched] = __impl::__loop(__first1, __last1, __first2); |
| 165 | + if (__matched) |
| 166 | + return {__iter1, __iter2}; |
| 167 | + |
| 168 | + return __impl::__epilogue(__first1, __last1, __first2); |
| 169 | +} |
| 170 | +#endif // _LIBCPP_VECTORIZE_ALGORITHMS |
| 171 | + |
24 | 172 | template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
|
25 |
| -_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> |
| 173 | +_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> |
26 | 174 | mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
|
27 |
| - for (; __first1 != __last1; ++__first1, (void)++__first2) |
28 |
| - if (!__pred(*__first1, *__first2)) |
29 |
| - break; |
30 |
| - return pair<_InputIterator1, _InputIterator2>(__first1, __first2); |
| 175 | + __identity __proj; |
| 176 | + auto __res = std::__mismatch( |
| 177 | + std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred, __proj, __proj); |
| 178 | + return std::make_pair(std::__rewrap_iter(__first1, __res.first), std::__rewrap_iter(__first2, __res.second)); |
31 | 179 | }
|
32 | 180 |
|
33 | 181 | template <class _InputIterator1, class _InputIterator2>
|
|
0 commit comments