Skip to content

Commit daeb58b

Browse files
[SYCL] Implement enqueue free functions extension (#13512)
This commit implements the [sycl_ext_oneapi_enqueue_functions](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_enqueue_functions.asciidoc) extension with enqueue free functions. Optimization for avoiding event creation will happen in a follow-up patch. --------- Signed-off-by: Larsen, Steffen <[email protected]>
1 parent 719207d commit daeb58b

13 files changed

+1891
-0
lines changed
Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
//==------ enqueue_functions.hpp ------- SYCL enqueue free functions -------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <utility> // for std::forward
12+
13+
#include <sycl/event.hpp>
14+
#include <sycl/ext/oneapi/properties/properties.hpp>
15+
#include <sycl/handler.hpp>
16+
#include <sycl/nd_range.hpp>
17+
#include <sycl/queue.hpp>
18+
#include <sycl/range.hpp>
19+
20+
namespace sycl {
21+
inline namespace _V1 {
22+
namespace ext::oneapi::experimental {
23+
24+
namespace detail {
25+
// Trait for identifying sycl::range and sycl::nd_range.
26+
template <typename RangeT> struct is_range_or_nd_range : std::false_type {};
27+
template <int Dimensions>
28+
struct is_range_or_nd_range<range<Dimensions>> : std::true_type {};
29+
template <int Dimensions>
30+
struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {};
31+
32+
template <typename RangeT>
33+
constexpr bool is_range_or_nd_range_v = is_range_or_nd_range<RangeT>::value;
34+
35+
template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess;
36+
} // namespace detail
37+
38+
// Available only when Range is range or nd_range
39+
template <
40+
typename RangeT, typename PropertiesT = empty_properties_t,
41+
typename = std::enable_if_t<
42+
ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>>
43+
class launch_config {
44+
public:
45+
launch_config(RangeT Range, PropertiesT Properties = {})
46+
: MRange{Range}, MProperties{Properties} {}
47+
48+
private:
49+
RangeT MRange;
50+
PropertiesT MProperties;
51+
52+
const RangeT &getRange() const noexcept { return MRange; }
53+
54+
const PropertiesT &getProperties() const noexcept { return MProperties; }
55+
56+
template <typename LCRangeT, typename LCPropertiesT>
57+
friend struct detail::LaunchConfigAccess;
58+
};
59+
60+
namespace detail {
61+
// Helper for accessing the members of launch_config.
62+
template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess {
63+
LaunchConfigAccess(const launch_config<LCRangeT, LCPropertiesT> &LaunchConfig)
64+
: MLaunchConfig{LaunchConfig} {}
65+
66+
const launch_config<LCRangeT, LCPropertiesT> &MLaunchConfig;
67+
68+
const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); }
69+
70+
const LCPropertiesT &getProperties() const noexcept {
71+
return MLaunchConfig.getProperties();
72+
}
73+
};
74+
} // namespace detail
75+
76+
template <typename CommandGroupFunc>
77+
void submit(queue Q, CommandGroupFunc &&CGF) {
78+
// TODO: Use new submit without Events.
79+
Q.submit(std::forward<CommandGroupFunc>(CGF));
80+
}
81+
82+
template <typename CommandGroupFunc>
83+
event submit_with_event(queue Q, CommandGroupFunc &&CGF) {
84+
return Q.submit(std::forward<CommandGroupFunc>(CGF));
85+
}
86+
87+
template <typename KernelName = sycl::detail::auto_name, typename KernelType>
88+
void single_task(handler &CGH, const KernelType &KernelObj) {
89+
CGH.single_task<KernelName>(KernelObj);
90+
}
91+
92+
template <typename KernelName = sycl::detail::auto_name, typename KernelType>
93+
void single_task(queue Q, const KernelType &KernelObj) {
94+
submit(Q, [&](handler &CGH) { single_task<KernelName>(CGH, KernelObj); });
95+
}
96+
97+
template <typename... ArgsT>
98+
void single_task(handler &CGH, const kernel &KernelObj, ArgsT &&...Args) {
99+
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
100+
CGH.single_task(KernelObj);
101+
}
102+
103+
template <typename... ArgsT>
104+
void single_task(queue Q, const kernel &KernelObj, ArgsT &&...Args) {
105+
submit(Q, [&](handler &CGH) {
106+
single_task(CGH, KernelObj, std::forward<ArgsT>(Args)...);
107+
});
108+
}
109+
110+
// TODO: Make overloads for scalar arguments for range.
111+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
112+
typename KernelType, typename... ReductionsT>
113+
void parallel_for(handler &CGH, range<Dimensions> Range,
114+
const KernelType &KernelObj, ReductionsT &&...Reductions) {
115+
CGH.parallel_for<KernelName>(Range, std::forward<ReductionsT>(Reductions)...,
116+
KernelObj);
117+
}
118+
119+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
120+
typename KernelType, typename... ReductionsT>
121+
void parallel_for(queue Q, range<Dimensions> Range, const KernelType &KernelObj,
122+
ReductionsT &&...Reductions) {
123+
submit(Q, [&](handler &CGH) {
124+
parallel_for<KernelName>(CGH, Range, KernelObj,
125+
std::forward<ReductionsT>(Reductions)...);
126+
});
127+
}
128+
129+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
130+
typename Properties, typename KernelType, typename... ReductionsT>
131+
void parallel_for(handler &CGH,
132+
launch_config<range<Dimensions>, Properties> Config,
133+
const KernelType &KernelObj, ReductionsT &&...Reductions) {
134+
ext::oneapi::experimental::detail::LaunchConfigAccess<range<Dimensions>,
135+
Properties>
136+
ConfigAccess(Config);
137+
CGH.parallel_for<KernelName>(ConfigAccess.getRange(),
138+
std::forward<ReductionsT>(Reductions)...,
139+
KernelObj);
140+
}
141+
142+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
143+
typename Properties, typename KernelType, typename... ReductionsT>
144+
void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
145+
const KernelType &KernelObj, ReductionsT &&...Reductions) {
146+
submit(Q, [&](handler &CGH) {
147+
parallel_for<KernelName>(CGH, Config, KernelObj,
148+
std::forward<ReductionsT>(Reductions)...);
149+
});
150+
}
151+
152+
template <int Dimensions, typename... ArgsT>
153+
void parallel_for(handler &CGH, range<Dimensions> Range,
154+
const kernel &KernelObj, ArgsT &&...Args) {
155+
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
156+
CGH.parallel_for(Range, KernelObj);
157+
}
158+
159+
template <int Dimensions, typename... ArgsT>
160+
void parallel_for(queue Q, range<Dimensions> Range, const kernel &KernelObj,
161+
ArgsT &&...Args) {
162+
submit(Q, [&](handler &CGH) {
163+
parallel_for(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
164+
});
165+
}
166+
167+
template <int Dimensions, typename Properties, typename... ArgsT>
168+
void parallel_for(handler &CGH,
169+
launch_config<range<Dimensions>, Properties> Config,
170+
const kernel &KernelObj, ArgsT &&...Args) {
171+
ext::oneapi::experimental::detail::LaunchConfigAccess<range<Dimensions>,
172+
Properties>
173+
ConfigAccess(Config);
174+
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
175+
CGH.parallel_for(ConfigAccess.getRange(), KernelObj);
176+
}
177+
178+
template <int Dimensions, typename Properties, typename... ArgsT>
179+
void parallel_for(queue Q, launch_config<range<Dimensions>, Properties> Config,
180+
const kernel &KernelObj, ArgsT &&...Args) {
181+
submit(Q, [&](handler &CGH) {
182+
parallel_for(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
183+
});
184+
}
185+
186+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
187+
typename KernelType, typename... ReductionsT>
188+
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
189+
const KernelType &KernelObj, ReductionsT &&...Reductions) {
190+
CGH.parallel_for<KernelName>(Range, std::forward<ReductionsT>(Reductions)...,
191+
KernelObj);
192+
}
193+
194+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
195+
typename KernelType, typename... ReductionsT>
196+
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
197+
ReductionsT &&...Reductions) {
198+
submit(Q, [&](handler &CGH) {
199+
nd_launch(CGH, Range, KernelObj, std::forward<ReductionsT>(Reductions)...);
200+
});
201+
}
202+
203+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
204+
typename Properties, typename KernelType, typename... ReductionsT>
205+
void nd_launch(handler &CGH,
206+
launch_config<nd_range<Dimensions>, Properties> Config,
207+
const KernelType &KernelObj, ReductionsT &&...Reductions) {
208+
209+
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
210+
Properties>
211+
ConfigAccess(Config);
212+
CGH.parallel_for<KernelName>(ConfigAccess.getRange(),
213+
std::forward<ReductionsT>(Reductions)...,
214+
KernelObj);
215+
}
216+
217+
template <typename KernelName = sycl::detail::auto_name, int Dimensions,
218+
typename Properties, typename KernelType, typename... ReductionsT>
219+
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
220+
const KernelType &KernelObj, ReductionsT &&...Reductions) {
221+
submit(Q, [&](handler &CGH) {
222+
nd_launch(CGH, Config, KernelObj, std::forward<ReductionsT>(Reductions)...);
223+
});
224+
}
225+
226+
template <int Dimensions, typename... ArgsT>
227+
void nd_launch(handler &CGH, nd_range<Dimensions> Range,
228+
const kernel &KernelObj, ArgsT &&...Args) {
229+
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
230+
CGH.parallel_for(Range, KernelObj);
231+
}
232+
233+
template <int Dimensions, typename... ArgsT>
234+
void nd_launch(queue Q, nd_range<Dimensions> Range, const kernel &KernelObj,
235+
ArgsT &&...Args) {
236+
submit(Q, [&](handler &CGH) {
237+
nd_launch(CGH, Range, KernelObj, std::forward<ArgsT>(Args)...);
238+
});
239+
}
240+
241+
template <int Dimensions, typename Properties, typename... ArgsT>
242+
void nd_launch(handler &CGH,
243+
launch_config<nd_range<Dimensions>, Properties> Config,
244+
const kernel &KernelObj, ArgsT &&...Args) {
245+
ext::oneapi::experimental::detail::LaunchConfigAccess<nd_range<Dimensions>,
246+
Properties>
247+
ConfigAccess(Config);
248+
CGH.set_args<ArgsT...>(std::forward<ArgsT>(Args)...);
249+
CGH.parallel_for(ConfigAccess.getRange(), KernelObj);
250+
}
251+
252+
template <int Dimensions, typename Properties, typename... ArgsT>
253+
void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config,
254+
const kernel &KernelObj, ArgsT &&...Args) {
255+
submit(Q, [&](handler &CGH) {
256+
nd_launch(CGH, Config, KernelObj, std::forward<ArgsT>(Args)...);
257+
});
258+
}
259+
260+
inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) {
261+
CGH.memcpy(Dest, Src, NumBytes);
262+
}
263+
264+
inline void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes) {
265+
submit(Q, [&](handler &CGH) { memcpy(CGH, Dest, Src, NumBytes); });
266+
}
267+
268+
template <typename T>
269+
void copy(handler &CGH, const T *Src, T *Dest, size_t Count) {
270+
CGH.copy<T>(Src, Dest, Count);
271+
}
272+
273+
template <typename T> void copy(queue Q, const T *Src, T *Dest, size_t Count) {
274+
submit(Q, [&](handler &CGH) { copy<T>(CGH, Src, Dest, Count); });
275+
}
276+
277+
inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) {
278+
CGH.memset(Ptr, Value, NumBytes);
279+
}
280+
281+
inline void memset(queue Q, void *Ptr, int Value, size_t NumBytes) {
282+
submit(Q, [&](handler &CGH) { memset(CGH, Ptr, Value, NumBytes); });
283+
}
284+
285+
template <typename T>
286+
void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) {
287+
CGH.fill(Ptr, Pattern, Count);
288+
}
289+
290+
template <typename T>
291+
void fill(sycl::queue Q, T *Ptr, const T &Pattern, size_t Count) {
292+
submit(Q, [&](handler &CGH) { fill<T>(CGH, Ptr, Pattern, Count); });
293+
}
294+
295+
inline void prefetch(handler &CGH, void *Ptr, size_t NumBytes) {
296+
CGH.prefetch(Ptr, NumBytes);
297+
}
298+
299+
inline void prefetch(queue Q, void *Ptr, size_t NumBytes) {
300+
submit(Q, [&](handler &CGH) { prefetch(CGH, Ptr, NumBytes); });
301+
}
302+
303+
inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) {
304+
CGH.mem_advise(Ptr, NumBytes, Advice);
305+
}
306+
307+
inline void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice) {
308+
submit(Q, [&](handler &CGH) { mem_advise(CGH, Ptr, NumBytes, Advice); });
309+
}
310+
311+
inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); }
312+
313+
inline void barrier(queue Q) {
314+
submit(Q, [&](handler &CGH) { barrier(CGH); });
315+
}
316+
317+
inline void partial_barrier(handler &CGH, const std::vector<event> &Events) {
318+
CGH.ext_oneapi_barrier(Events);
319+
}
320+
321+
inline void partial_barrier(queue Q, const std::vector<event> &Events) {
322+
submit(Q, [&](handler &CGH) { partial_barrier(CGH, Events); });
323+
}
324+
325+
} // namespace ext::oneapi::experimental
326+
} // namespace _V1
327+
} // namespace sycl

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
#include <sycl/ext/oneapi/experimental/builtins.hpp>
8989
#include <sycl/ext/oneapi/experimental/composite_device.hpp>
9090
#include <sycl/ext/oneapi/experimental/cuda/barrier.hpp>
91+
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
9192
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
9293
#include <sycl/ext/oneapi/experimental/group_load_store.hpp>
9394
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: env SYCL_PI_TRACE=2 %{run} %t.out 2>&1 | FileCheck %s
3+
4+
// Tests the enqueue free function barriers.
5+
6+
#include <sycl/detail/core.hpp>
7+
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
8+
9+
namespace oneapiext = sycl::ext::oneapi::experimental;
10+
11+
int main() {
12+
sycl::context Context;
13+
sycl::queue Q1(Context, sycl::default_selector_v);
14+
15+
oneapiext::single_task(Q1, []() {});
16+
oneapiext::single_task(Q1, []() {});
17+
18+
oneapiext::barrier(Q1);
19+
20+
oneapiext::single_task(Q1, []() {});
21+
oneapiext::single_task(Q1, []() {});
22+
23+
oneapiext::barrier(Q1);
24+
25+
sycl::queue Q2(Context, sycl::default_selector_v);
26+
sycl::queue Q3(Context, sycl::default_selector_v);
27+
28+
sycl::event Event1 = oneapiext::submit_with_event(
29+
Q1, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); });
30+
31+
sycl::event Event2 = oneapiext::submit_with_event(
32+
Q2, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); });
33+
34+
oneapiext::partial_barrier(Q3, {Event1, Event2});
35+
36+
oneapiext::single_task(Q3, []() {});
37+
38+
sycl::event Event3 = oneapiext::submit_with_event(
39+
Q1, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); });
40+
41+
sycl::event Event4 = oneapiext::submit_with_event(
42+
Q2, [&](sycl::handler &CGH) { oneapiext::single_task(CGH, []() {}); });
43+
44+
oneapiext::partial_barrier(Q3, {Event3, Event4});
45+
46+
oneapiext::single_task(Q3, []() {});
47+
48+
Q1.wait();
49+
50+
return 0;
51+
}
52+
53+
// CHECK-COUNT-4:---> piEnqueueEventsWaitWithBarrier
54+
// CHECK-NOT:---> piEnqueueEventsWaitWithBarrier

0 commit comments

Comments
 (0)