Skip to content

Commit 058e445

Browse files
authored
[libc++] <experimental/simd> Add copy functions for class simd/simd_mask (#78935)
1 parent c7e9b49 commit 058e445

File tree

7 files changed

+342
-0
lines changed

7 files changed

+342
-0
lines changed

libcxx/docs/Status/ParallelismProjects.csv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Section,Description,Dependencies,Assignee,Complete
2424
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd generate constructor <https://reviews.llvm.org/D159442>`_", None, Yin Zhang, |Complete|
2525
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd load constructor <https://github.com/llvm/llvm-project/pull/76610>`_", None, Yin Zhang, |Complete|
2626
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd subscript operators <https://github.com/llvm/llvm-project/pull/68960>`_", None, Yin Zhang, |Complete|
27+
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "`simd copy functions <https://github.com/llvm/llvm-project/pull/78935>`_", None, Yin Zhang, |Complete|
2728
| `[parallel.simd.class] <https://wg21.link/N4808>`_, "Class template simd implementation", None, Yin Zhang, |In Progress|
2829
| `[parallel.simd.nonmembers] <https://wg21.link/N4808>`_, "simd non-member operations", None, Yin Zhang, |In Progress|
2930
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`Class template simd_mask declaration and alias <https://reviews.llvm.org/D144362>`_", [parallel.simd.abi], Yin Zhang, |Complete|
@@ -33,5 +34,6 @@ Section,Description,Dependencies,Assignee,Complete
3334
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask implicit type conversion constructor <https://github.com/llvm/llvm-project/pull/71132>`_", None, Yin Zhang, |Complete|
3435
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask load constructor <https://github.com/llvm/llvm-project/pull/76610>`_", None, Yin Zhang, |Complete|
3536
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask subscript operators <https://github.com/llvm/llvm-project/pull/68960>`_", None, Yin Zhang, |Complete|
37+
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "`simd_mask copy functions <https://github.com/llvm/llvm-project/pull/78935>`_", None, Yin Zhang, |Complete|
3638
| `[parallel.simd.mask.class] <https://wg21.link/N4808>`_, "Class template simd_mask implementation", None, Yin Zhang, |In Progress|
3739
| `[parallel.simd.mask.nonmembers] <https://wg21.link/N4808>`_, "simd_mask non-member operations", None, Yin Zhang, |In Progress|

libcxx/include/experimental/__simd/scalar.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ struct __simd_operations<_Tp, simd_abi::__scalar> {
6262
static _LIBCPP_HIDE_FROM_ABI void __load(_SimdStorage& __s, const _Up* __mem) noexcept {
6363
__s.__data = static_cast<_Tp>(__mem[0]);
6464
}
65+
66+
template <class _Up>
67+
static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept {
68+
*__mem = static_cast<_Up>(__s.__data);
69+
}
6570
};
6671

6772
template <class _Tp>
@@ -71,6 +76,8 @@ struct __mask_operations<_Tp, simd_abi::__scalar> {
7176
static _LIBCPP_HIDE_FROM_ABI _MaskStorage __broadcast(bool __v) noexcept { return {__v}; }
7277

7378
static _LIBCPP_HIDE_FROM_ABI void __load(_MaskStorage& __s, const bool* __mem) noexcept { __s.__data = __mem[0]; }
79+
80+
static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept { __mem[0] = __s.__data; }
7481
};
7582

7683
} // namespace parallelism_v2

libcxx/include/experimental/__simd/simd.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,17 @@ class simd {
7070
_Impl::__load(__s_, _Flags::template __apply<simd>(__mem));
7171
}
7272

73+
// copy functions
74+
template <class _Up, class _Flags, enable_if_t<__is_vectorizable_v<_Up> && is_simd_flag_type_v<_Flags>, int> = 0>
75+
_LIBCPP_HIDE_FROM_ABI void copy_from(const _Up* __mem, _Flags) {
76+
_Impl::__load(__s_, _Flags::template __apply<simd>(__mem));
77+
}
78+
79+
template <class _Up, class _Flags, enable_if_t<__is_vectorizable_v<_Up> && is_simd_flag_type_v<_Flags>, int> = 0>
80+
_LIBCPP_HIDE_FROM_ABI void copy_to(_Up* __mem, _Flags) const {
81+
_Impl::__store(__s_, _Flags::template __apply<simd>(__mem));
82+
}
83+
7384
// scalar access [simd.subscr]
7485
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
7586
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }

libcxx/include/experimental/__simd/simd_mask.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ class simd_mask {
5858
_Impl::__load(__s_, _Flags::template __apply<simd_mask>(__mem));
5959
}
6060

61+
// copy functions
62+
template <class _Flags, enable_if_t<is_simd_flag_type_v<_Flags>, int> = 0>
63+
_LIBCPP_HIDE_FROM_ABI void copy_from(const value_type* __mem, _Flags) {
64+
_Impl::__load(__s_, _Flags::template __apply<simd_mask>(__mem));
65+
}
66+
67+
template <class _Flags, enable_if_t<is_simd_flag_type_v<_Flags>, int> = 0>
68+
_LIBCPP_HIDE_FROM_ABI void copy_to(value_type* __mem, _Flags) const {
69+
_Impl::__store(__s_, _Flags::template __apply<simd_mask>(__mem));
70+
}
71+
6172
// scalar access [simd.mask.subscr]
6273
_LIBCPP_HIDE_FROM_ABI reference operator[](size_t __i) noexcept { return reference(__s_, __i); }
6374
_LIBCPP_HIDE_FROM_ABI value_type operator[](size_t __i) const noexcept { return __s_.__get(__i); }

libcxx/include/experimental/__simd/vec_ext.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ struct __simd_operations<_Tp, simd_abi::__vec_ext<_Np>> {
8080
for (size_t __i = 0; __i < _Np; __i++)
8181
__s.__data[__i] = static_cast<_Tp>(__mem[__i]);
8282
}
83+
84+
template <class _Up>
85+
static _LIBCPP_HIDE_FROM_ABI void __store(_SimdStorage __s, _Up* __mem) noexcept {
86+
for (size_t __i = 0; __i < _Np; __i++)
87+
__mem[__i] = static_cast<_Up>(__s.__data[__i]);
88+
}
8389
};
8490

8591
template <class _Tp, int _Np>
@@ -99,6 +105,11 @@ struct __mask_operations<_Tp, simd_abi::__vec_ext<_Np>> {
99105
for (size_t __i = 0; __i < _Np; __i++)
100106
__s.__data[__i] = experimental::__set_all_bits<_Tp>(__mem[__i]);
101107
}
108+
109+
static _LIBCPP_HIDE_FROM_ABI void __store(_MaskStorage __s, bool* __mem) noexcept {
110+
for (size_t __i = 0; __i < _Np; __i++)
111+
__mem[__i] = static_cast<bool>(__s.__data[__i]);
112+
}
102113
};
103114

104115
} // namespace parallelism_v2
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14
10+
11+
// FIXME: Fatal error with following targets (remove XFAIL when fixed):
12+
// Pass-by-value arguments with alignment greater than register width are not supported.
13+
// XFAIL: target=powerpc{{.*}}-ibm-aix7.2.5.7
14+
15+
// <experimental/simd>
16+
//
17+
// [simd.class]
18+
// template<class U, class Flags> void copy_from(const U* mem, Flags);
19+
// template<class U, class Flags> void copy_to(U* mem, Flags) const;
20+
21+
#include "../test_utils.h"
22+
23+
namespace ex = std::experimental::parallelism_v2;
24+
25+
template <class T, class SimdAbi, std::size_t array_size>
26+
struct ElementAlignedCopyFromHelper {
27+
template <class U>
28+
void operator()() const {
29+
U buffer[array_size];
30+
for (size_t i = 0; i < array_size; ++i)
31+
buffer[i] = static_cast<U>(i);
32+
ex::simd<T, SimdAbi> origin_simd;
33+
origin_simd.copy_from(buffer, ex::element_aligned_tag());
34+
assert_simd_values_equal(origin_simd, buffer);
35+
}
36+
};
37+
38+
template <class T, class SimdAbi, std::size_t array_size>
39+
struct VectorAlignedCopyFromHelper {
40+
template <class U>
41+
void operator()() const {
42+
alignas(ex::memory_alignment_v<ex::simd<T, SimdAbi>, U>) U buffer[array_size];
43+
for (size_t i = 0; i < array_size; ++i)
44+
buffer[i] = static_cast<U>(i);
45+
ex::simd<T, SimdAbi> origin_simd;
46+
origin_simd.copy_from(buffer, ex::vector_aligned_tag());
47+
assert_simd_values_equal(origin_simd, buffer);
48+
}
49+
};
50+
51+
template <class T, class SimdAbi, std::size_t array_size>
52+
struct OveralignedCopyFromHelper {
53+
template <class U>
54+
void operator()() const {
55+
alignas(bit_ceil(sizeof(U) + 1)) U buffer[array_size];
56+
for (size_t i = 0; i < array_size; ++i)
57+
buffer[i] = static_cast<U>(i);
58+
ex::simd<T, SimdAbi> origin_simd;
59+
origin_simd.copy_from(buffer, ex::overaligned_tag<bit_ceil(sizeof(U) + 1)>());
60+
assert_simd_values_equal(origin_simd, buffer);
61+
}
62+
};
63+
64+
template <class T, std::size_t>
65+
struct CheckSimdCopyFrom {
66+
template <class SimdAbi>
67+
void operator()() {
68+
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;
69+
70+
types::for_each(simd_test_types(), ElementAlignedCopyFromHelper<T, SimdAbi, array_size>());
71+
types::for_each(simd_test_types(), VectorAlignedCopyFromHelper<T, SimdAbi, array_size>());
72+
types::for_each(simd_test_types(), OveralignedCopyFromHelper<T, SimdAbi, array_size>());
73+
}
74+
};
75+
76+
template <class T, class SimdAbi, std::size_t array_size>
77+
struct ElementAlignedCopyToHelper {
78+
template <class U>
79+
void operator()() const {
80+
U buffer[array_size];
81+
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
82+
origin_simd.copy_to(buffer, ex::element_aligned_tag());
83+
assert_simd_values_equal(origin_simd, buffer);
84+
}
85+
};
86+
87+
template <class T, class SimdAbi, std::size_t array_size>
88+
struct VectorAlignedCopyToHelper {
89+
template <class U>
90+
void operator()() const {
91+
alignas(ex::memory_alignment_v<ex::simd<T, SimdAbi>, U>) U buffer[array_size];
92+
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
93+
origin_simd.copy_to(buffer, ex::vector_aligned_tag());
94+
assert_simd_values_equal(origin_simd, buffer);
95+
}
96+
};
97+
98+
template <class T, class SimdAbi, std::size_t array_size>
99+
struct OveralignedCopyToHelper {
100+
template <class U>
101+
void operator()() const {
102+
alignas(bit_ceil(sizeof(U) + 1)) U buffer[array_size];
103+
ex::simd<T, SimdAbi> origin_simd([](T i) { return i; });
104+
origin_simd.copy_to(buffer, ex::overaligned_tag<bit_ceil(sizeof(U) + 1)>());
105+
assert_simd_values_equal(origin_simd, buffer);
106+
}
107+
};
108+
109+
template <class T, std::size_t>
110+
struct CheckSimdCopyTo {
111+
template <class SimdAbi>
112+
void operator()() {
113+
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;
114+
115+
types::for_each(simd_test_types(), ElementAlignedCopyToHelper<T, SimdAbi, array_size>());
116+
types::for_each(simd_test_types(), VectorAlignedCopyToHelper<T, SimdAbi, array_size>());
117+
types::for_each(simd_test_types(), OveralignedCopyToHelper<T, SimdAbi, array_size>());
118+
}
119+
};
120+
121+
template <class U, class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
122+
struct has_copy_from : std::false_type {};
123+
124+
template <class U, class T, class Flags, class SimdAbi>
125+
struct has_copy_from<U,
126+
T,
127+
Flags,
128+
SimdAbi,
129+
std::void_t<decltype(std::declval<ex::simd<T, SimdAbi>>().copy_from(
130+
std::declval<const U*>(), std::declval<Flags>()))>> : std::true_type {};
131+
132+
template <class U, class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
133+
struct has_copy_to : std::false_type {};
134+
135+
template <class U, class T, class Flags, class SimdAbi>
136+
struct has_copy_to<
137+
U,
138+
T,
139+
Flags,
140+
SimdAbi,
141+
std::void_t<decltype(std::declval<ex::simd<T, SimdAbi>>().copy_to(std::declval<U*>(), std::declval<Flags>()))>>
142+
: std::true_type {};
143+
144+
template <class T, std::size_t>
145+
struct CheckSimdCopyTraits {
146+
template <class SimdAbi>
147+
void operator()() {
148+
// These functions shall not participate in overload resolution unless
149+
// is_simd_flag_type_v<Flags> is true, and
150+
// U is a vectorizable type.
151+
static_assert(has_copy_from<int, T, ex::element_aligned_tag, SimdAbi>::value);
152+
static_assert(has_copy_to<int, T, ex::element_aligned_tag, SimdAbi>::value);
153+
154+
// is_simd_flag_type_v<Flags> is false
155+
static_assert(!has_copy_from<int, T, T, SimdAbi>::value);
156+
static_assert(!has_copy_to<int, T, T, SimdAbi>::value);
157+
static_assert(!has_copy_from<int, T, SimdAbi, SimdAbi>::value);
158+
static_assert(!has_copy_to<int, T, SimdAbi, SimdAbi>::value);
159+
160+
// U is not a vectorizable type.
161+
static_assert(!has_copy_from<SimdAbi, T, ex::element_aligned_tag, SimdAbi>::value);
162+
static_assert(!has_copy_to<SimdAbi, T, ex::element_aligned_tag, SimdAbi>::value);
163+
static_assert(!has_copy_from<ex::element_aligned_tag, T, ex::element_aligned_tag, SimdAbi>::value);
164+
static_assert(!has_copy_to<ex::element_aligned_tag, T, ex::element_aligned_tag, SimdAbi>::value);
165+
}
166+
};
167+
168+
int main(int, char**) {
169+
test_all_simd_abi<CheckSimdCopyFrom>();
170+
test_all_simd_abi<CheckSimdCopyTo>();
171+
test_all_simd_abi<CheckSimdCopyTraits>();
172+
return 0;
173+
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14
10+
11+
// <experimental/simd>
12+
//
13+
// [simd.class]
14+
// template<class Flags> void copy_from(const value_type* mem, Flags);
15+
// template<class Flags> void copy_to(value_type* mem, Flags);
16+
17+
#include "../test_utils.h"
18+
19+
namespace ex = std::experimental::parallelism_v2;
20+
21+
template <class T, std::size_t>
22+
struct CheckSimdMaskCopyFrom {
23+
template <class SimdAbi>
24+
void operator()() {
25+
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;
26+
27+
// element aligned tag
28+
constexpr std::size_t element_alignas_size = alignof(bool);
29+
alignas(element_alignas_size) bool element_buffer[array_size];
30+
for (size_t i = 0; i < array_size; ++i)
31+
element_buffer[i] = static_cast<bool>(i % 2);
32+
ex::simd_mask<T, SimdAbi> element_mask;
33+
element_mask.copy_from(element_buffer, ex::element_aligned_tag());
34+
assert_simd_mask_values_equal(element_mask, element_buffer);
35+
36+
// vector aligned tag
37+
constexpr std::size_t vector_alignas_size = ex::memory_alignment_v<ex::simd_mask<T, SimdAbi>>;
38+
alignas(vector_alignas_size) bool vector_buffer[array_size];
39+
for (size_t i = 0; i < array_size; ++i)
40+
vector_buffer[i] = static_cast<bool>(i % 2);
41+
ex::simd_mask<T, SimdAbi> vector_mask;
42+
vector_mask.copy_from(vector_buffer, ex::vector_aligned_tag());
43+
assert_simd_mask_values_equal(vector_mask, vector_buffer);
44+
45+
// overaligned tag
46+
constexpr std::size_t over_alignas_size = bit_ceil(sizeof(bool) + 1);
47+
alignas(over_alignas_size) bool overaligned_buffer[array_size];
48+
for (size_t i = 0; i < array_size; ++i)
49+
overaligned_buffer[i] = static_cast<bool>(i % 2);
50+
ex::simd_mask<T, SimdAbi> overaligned_mask;
51+
overaligned_mask.copy_from(overaligned_buffer, ex::overaligned_tag<over_alignas_size>());
52+
assert_simd_mask_values_equal(overaligned_mask, overaligned_buffer);
53+
}
54+
};
55+
56+
template <class T, std::size_t>
57+
struct CheckSimdMaskCopyTo {
58+
template <class SimdAbi>
59+
void operator()() {
60+
constexpr std::size_t array_size = ex::simd_size_v<T, SimdAbi>;
61+
62+
// element aligned tag
63+
constexpr std::size_t element_alignas_size = alignof(bool);
64+
alignas(element_alignas_size) bool element_buffer[array_size];
65+
ex::simd_mask<T, SimdAbi> element_mask(true);
66+
element_mask.copy_to(element_buffer, ex::element_aligned_tag());
67+
assert_simd_mask_values_equal(element_mask, element_buffer);
68+
69+
// vector aligned tag
70+
constexpr std::size_t vector_alignas_size = ex::memory_alignment_v<ex::simd_mask<T, SimdAbi>>;
71+
alignas(vector_alignas_size) bool vector_buffer[array_size];
72+
ex::simd_mask<T, SimdAbi> vector_mask(false);
73+
vector_mask.copy_to(vector_buffer, ex::vector_aligned_tag());
74+
assert_simd_mask_values_equal(vector_mask, vector_buffer);
75+
76+
// overaligned tag
77+
constexpr std::size_t over_alignas_size = bit_ceil(sizeof(bool) + 1);
78+
alignas(over_alignas_size) bool overaligned_buffer[array_size];
79+
ex::simd_mask<T, SimdAbi> overaligned_mask(true);
80+
overaligned_mask.copy_to(overaligned_buffer, ex::overaligned_tag<over_alignas_size>());
81+
assert_simd_mask_values_equal(overaligned_mask, overaligned_buffer);
82+
}
83+
};
84+
85+
template <class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
86+
struct has_copy_from : std::false_type {};
87+
88+
template <class T, class Flags, class SimdAbi>
89+
struct has_copy_from<T,
90+
Flags,
91+
SimdAbi,
92+
std::void_t<decltype(std::declval<ex::simd_mask<T, SimdAbi>>().copy_from(
93+
std::declval<const bool*>(), std::declval<Flags>()))>> : std::true_type {};
94+
95+
template <class T, class Flags, class SimdAbi = ex::simd_abi::compatible<T>, class = void>
96+
struct has_copy_to : std::false_type {};
97+
98+
template <class T, class Flags, class SimdAbi>
99+
struct has_copy_to<T,
100+
Flags,
101+
SimdAbi,
102+
std::void_t<decltype(std::declval<ex::simd_mask<T, SimdAbi>>().copy_to(
103+
std::declval<bool*>(), std::declval<Flags>()))>> : std::true_type {};
104+
105+
template <class T, std::size_t>
106+
struct CheckSimdMaskCopyTraits {
107+
template <class SimdAbi>
108+
void operator()() {
109+
// These functions shall not participate in overload resolution unless
110+
// is_simd_flag_type_v<Flags> is true
111+
static_assert(has_copy_from<T, ex::element_aligned_tag, SimdAbi>::value);
112+
static_assert(has_copy_to<T, ex::element_aligned_tag, SimdAbi>::value);
113+
114+
// is_simd_flag_type_v<Flags> is false
115+
static_assert(!has_copy_from<T, T, SimdAbi>::value);
116+
static_assert(!has_copy_to<T, T, SimdAbi>::value);
117+
static_assert(!has_copy_from<T, SimdAbi, SimdAbi>::value);
118+
static_assert(!has_copy_to<T, SimdAbi, SimdAbi>::value);
119+
}
120+
};
121+
122+
int main(int, char**) {
123+
test_all_simd_abi<CheckSimdMaskCopyFrom>();
124+
test_all_simd_abi<CheckSimdMaskCopyTo>();
125+
test_all_simd_abi<CheckSimdMaskCopyTraits>();
126+
return 0;
127+
}

0 commit comments

Comments
 (0)