|
13 | 13 | #define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
|
14 | 14 |
|
15 | 15 | #include "DeviceTypes.h"
|
| 16 | +#include "DeviceUtils.h" |
16 | 17 |
|
17 |
| -namespace ompx { |
| 18 | +#pragma omp begin declare target device_type(nohost) |
18 | 19 |
|
| 20 | +namespace ompx { |
19 | 21 | namespace atomic {
|
20 | 22 |
|
21 | 23 | enum OrderingTy {
|
@@ -48,51 +50,124 @@ uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
|
48 | 50 | /// result is stored in \p *Addr;
|
49 | 51 | /// {
|
50 | 52 |
|
51 |
| -#define ATOMIC_COMMON_OP(TY) \ |
52 |
| - TY add(TY *Addr, TY V, OrderingTy Ordering); \ |
53 |
| - TY mul(TY *Addr, TY V, OrderingTy Ordering); \ |
54 |
| - TY load(TY *Addr, OrderingTy Ordering); \ |
55 |
| - void store(TY *Addr, TY V, OrderingTy Ordering); \ |
56 |
| - bool cas(TY *Addr, TY ExpectedV, TY DesiredV, OrderingTy OrderingSucc, \ |
57 |
| - OrderingTy OrderingFail); |
58 |
| - |
59 |
| -#define ATOMIC_FP_ONLY_OP(TY) \ |
60 |
| - TY min(TY *Addr, TY V, OrderingTy Ordering); \ |
61 |
| - TY max(TY *Addr, TY V, OrderingTy Ordering); |
62 |
| - |
63 |
| -#define ATOMIC_INT_ONLY_OP(TY) \ |
64 |
| - TY min(TY *Addr, TY V, OrderingTy Ordering); \ |
65 |
| - TY max(TY *Addr, TY V, OrderingTy Ordering); \ |
66 |
| - TY bit_or(TY *Addr, TY V, OrderingTy Ordering); \ |
67 |
| - TY bit_and(TY *Addr, TY V, OrderingTy Ordering); \ |
68 |
| - TY bit_xor(TY *Addr, TY V, OrderingTy Ordering); |
69 |
| - |
70 |
| -#define ATOMIC_FP_OP(TY) \ |
71 |
| - ATOMIC_FP_ONLY_OP(TY) \ |
72 |
| - ATOMIC_COMMON_OP(TY) |
73 |
| - |
74 |
| -#define ATOMIC_INT_OP(TY) \ |
75 |
| - ATOMIC_INT_ONLY_OP(TY) \ |
76 |
| - ATOMIC_COMMON_OP(TY) |
77 |
| - |
78 |
| -// This needs to be kept in sync with the header. Also the reason we don't use |
79 |
| -// templates here. |
80 |
| -ATOMIC_INT_OP(int8_t) |
81 |
| -ATOMIC_INT_OP(int16_t) |
82 |
| -ATOMIC_INT_OP(int32_t) |
83 |
| -ATOMIC_INT_OP(int64_t) |
84 |
| -ATOMIC_INT_OP(uint8_t) |
85 |
| -ATOMIC_INT_OP(uint16_t) |
86 |
| -ATOMIC_INT_OP(uint32_t) |
87 |
| -ATOMIC_INT_OP(uint64_t) |
88 |
| -ATOMIC_FP_OP(float) |
89 |
| -ATOMIC_FP_OP(double) |
90 |
| - |
91 |
| -#undef ATOMIC_INT_ONLY_OP |
92 |
| -#undef ATOMIC_FP_ONLY_OP |
93 |
| -#undef ATOMIC_COMMON_OP |
94 |
| -#undef ATOMIC_INT_OP |
95 |
| -#undef ATOMIC_FP_OP |
| 53 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 54 | +bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc, |
| 55 | + atomic::OrderingTy OrderingFail) { |
| 56 | + return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false, |
| 57 | + OrderingSucc, OrderingFail, |
| 58 | + __MEMORY_SCOPE_DEVICE); |
| 59 | +} |
| 60 | + |
| 61 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 62 | +V add(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 63 | + return __scoped_atomic_fetch_add(Address, Val, Ordering, |
| 64 | + __MEMORY_SCOPE_DEVICE); |
| 65 | +} |
| 66 | + |
| 67 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 68 | +V load(Ty *Address, atomic::OrderingTy Ordering) { |
| 69 | + return add(Address, Ty(0), Ordering); |
| 70 | +} |
| 71 | + |
| 72 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 73 | +void store(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 74 | + __scoped_atomic_store_n(Address, Val, Ordering, __MEMORY_SCOPE_DEVICE); |
| 75 | +} |
| 76 | + |
| 77 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 78 | +V mul(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 79 | + Ty TypedCurrentVal, TypedResultVal, TypedNewVal; |
| 80 | + bool Success; |
| 81 | + do { |
| 82 | + TypedCurrentVal = atomic::load(Address, Ordering); |
| 83 | + TypedNewVal = TypedCurrentVal * Val; |
| 84 | + Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering, |
| 85 | + atomic::relaxed); |
| 86 | + } while (!Success); |
| 87 | + return TypedResultVal; |
| 88 | +} |
| 89 | + |
| 90 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 91 | +utils::enable_if_t<!utils::is_floating_point_v<V>, V> |
| 92 | +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 93 | + return __scoped_atomic_fetch_max(Address, Val, Ordering, |
| 94 | + __MEMORY_SCOPE_DEVICE); |
| 95 | +} |
| 96 | + |
| 97 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 98 | +utils::enable_if_t<utils::is_same_v<V, float>, V> |
| 99 | +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 100 | + if (Val >= 0) |
| 101 | + return utils::convertViaPun<float>( |
| 102 | + max((int32_t *)Address, utils::convertViaPun<int32_t>(Val), Ordering)); |
| 103 | + return utils::convertViaPun<float>( |
| 104 | + min((uint32_t *)Address, utils::convertViaPun<uint32_t>(Val), Ordering)); |
| 105 | +} |
| 106 | + |
| 107 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 108 | +utils::enable_if_t<utils::is_same_v<V, double>, V> |
| 109 | +max(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 110 | + if (Val >= 0) |
| 111 | + return utils::convertViaPun<double>( |
| 112 | + max((int64_t *)Address, utils::convertViaPun<int64_t>(Val), Ordering)); |
| 113 | + return utils::convertViaPun<double>( |
| 114 | + min((uint64_t *)Address, utils::convertViaPun<uint64_t>(Val), Ordering)); |
| 115 | +} |
| 116 | + |
| 117 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 118 | +utils::enable_if_t<!utils::is_floating_point_v<V>, V> |
| 119 | +min(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 120 | + return __scoped_atomic_fetch_min(Address, Val, Ordering, |
| 121 | + __MEMORY_SCOPE_DEVICE); |
| 122 | +} |
| 123 | + |
| 124 | +// TODO: Implement this with __atomic_fetch_max and remove the duplication. |
| 125 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 126 | +utils::enable_if_t<utils::is_same_v<V, float>, V> |
| 127 | +min(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 128 | + if (Val >= 0) |
| 129 | + return utils::convertViaPun<float>( |
| 130 | + min((int32_t *)Address, utils::convertViaPun<int32_t>(Val), Ordering)); |
| 131 | + return utils::convertViaPun<float>( |
| 132 | + max((uint32_t *)Address, utils::convertViaPun<uint32_t>(Val), Ordering)); |
| 133 | +} |
| 134 | + |
| 135 | +// TODO: Implement this with __atomic_fetch_max and remove the duplication. |
| 136 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 137 | +utils::enable_if_t<utils::is_same_v<V, double>, V> |
| 138 | +min(Ty *Address, utils::remove_addrspace_t<Ty> Val, |
| 139 | + atomic::OrderingTy Ordering) { |
| 140 | + if (Val >= 0) |
| 141 | + return utils::convertViaPun<double>( |
| 142 | + min((int64_t *)Address, utils::convertViaPun<int64_t>(Val), Ordering)); |
| 143 | + return utils::convertViaPun<double>( |
| 144 | + max((uint64_t *)Address, utils::convertViaPun<uint64_t>(Val), Ordering)); |
| 145 | +} |
| 146 | + |
| 147 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 148 | +V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 149 | + return __scoped_atomic_fetch_or(Address, Val, Ordering, |
| 150 | + __MEMORY_SCOPE_DEVICE); |
| 151 | +} |
| 152 | + |
| 153 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 154 | +V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 155 | + return __scoped_atomic_fetch_and(Address, Val, Ordering, |
| 156 | + __MEMORY_SCOPE_DEVICE); |
| 157 | +} |
| 158 | + |
| 159 | +template <typename Ty, typename V = utils::remove_addrspace_t<Ty>> |
| 160 | +V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering) { |
| 161 | + return __scoped_atomic_fetch_xor(Address, Val, Ordering, |
| 162 | + __MEMORY_SCOPE_DEVICE); |
| 163 | +} |
| 164 | + |
| 165 | +static inline uint32_t atomicExchange(uint32_t *Address, uint32_t Val, |
| 166 | + atomic::OrderingTy Ordering) { |
| 167 | + uint32_t R; |
| 168 | + __scoped_atomic_exchange(Address, &Val, &R, Ordering, __MEMORY_SCOPE_DEVICE); |
| 169 | + return R; |
| 170 | +} |
96 | 171 |
|
97 | 172 | ///}
|
98 | 173 |
|
@@ -145,4 +220,6 @@ void system(atomic::OrderingTy Ordering);
|
145 | 220 |
|
146 | 221 | } // namespace ompx
|
147 | 222 |
|
| 223 | +#pragma omp end declare target |
| 224 | + |
148 | 225 | #endif
|
0 commit comments