Skip to content

Commit 430c1fd

Browse files
authored
[libomptarget][NFC] Outline parallel SPMD function (#78642)
This patch outlines the SPMD code path into a separate function that can be called directly.
1 parent ae8005f commit 430c1fd

File tree

1 file changed

+62
-46
lines changed

1 file changed

+62
-46
lines changed

openmp/libomptarget/DeviceRTL/src/Parallelism.cpp

Lines changed: 62 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,64 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
8383

8484
extern "C" {
8585

86+
[[clang::always_inline]] void __kmpc_parallel_spmd(IdentTy *ident,
87+
int32_t num_threads,
88+
void *fn, void **args,
89+
const int64_t nargs) {
90+
uint32_t TId = mapping::getThreadIdInBlock();
91+
uint32_t NumThreads = determineNumberOfThreads(num_threads);
92+
uint32_t PTeamSize =
93+
NumThreads == mapping::getMaxTeamThreads() ? 0 : NumThreads;
94+
// Avoid the race between the read of the `icv::Level` above and the write
95+
// below by synchronizing all threads here.
96+
synchronize::threadsAligned(atomic::seq_cst);
97+
{
98+
// Note that the order here is important. `icv::Level` has to be updated
99+
// last or the other updates will cause a thread specific state to be
100+
// created.
101+
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, PTeamSize,
102+
1u, TId == 0, ident,
103+
/*ForceTeamState=*/true);
104+
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0, ident,
105+
/*ForceTeamState=*/true);
106+
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident,
107+
/*ForceTeamState=*/true);
108+
109+
// Synchronize all threads after the main thread (TId == 0) set up the
110+
// team state properly.
111+
synchronize::threadsAligned(atomic::acq_rel);
112+
113+
state::ParallelTeamSize.assert_eq(PTeamSize, ident,
114+
/*ForceTeamState=*/true);
115+
icv::ActiveLevel.assert_eq(1u, ident, /*ForceTeamState=*/true);
116+
icv::Level.assert_eq(1u, ident, /*ForceTeamState=*/true);
117+
118+
// Ensure we synchronize before we run user code to avoid invalidating the
119+
// assumptions above.
120+
synchronize::threadsAligned(atomic::relaxed);
121+
122+
if (!PTeamSize || TId < PTeamSize)
123+
invokeMicrotask(TId, 0, fn, args, nargs);
124+
125+
// Synchronize all threads at the end of a parallel region.
126+
synchronize::threadsAligned(atomic::seq_cst);
127+
}
128+
129+
// Synchronize all threads to make sure every thread exits the scope above;
130+
// otherwise the following assertions and the assumption in
131+
// __kmpc_target_deinit may not hold.
132+
synchronize::threadsAligned(atomic::acq_rel);
133+
134+
state::ParallelTeamSize.assert_eq(1u, ident, /*ForceTeamState=*/true);
135+
icv::ActiveLevel.assert_eq(0u, ident, /*ForceTeamState=*/true);
136+
icv::Level.assert_eq(0u, ident, /*ForceTeamState=*/true);
137+
138+
// Ensure we synchronize to create an aligned region around the assumptions.
139+
synchronize::threadsAligned(atomic::relaxed);
140+
141+
return;
142+
}
143+
86144
[[clang::always_inline]] void
87145
__kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
88146
int32_t num_threads, int proc_bind, void *fn,
@@ -112,52 +170,10 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
112170
uint32_t MaxTeamThreads = mapping::getMaxTeamThreads();
113171
uint32_t PTeamSize = NumThreads == MaxTeamThreads ? 0 : NumThreads;
114172
if (mapping::isSPMDMode()) {
115-
// Avoid the race between the read of the `icv::Level` above and the write
116-
// below by synchronizing all threads here.
117-
synchronize::threadsAligned(atomic::seq_cst);
118-
{
119-
// Note that the order here is important. `icv::Level` has to be updated
120-
// last or the other updates will cause a thread specific state to be
121-
// created.
122-
state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, PTeamSize,
123-
1u, TId == 0, ident,
124-
/*ForceTeamState=*/true);
125-
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0,
126-
ident, /*ForceTeamState=*/true);
127-
state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident,
128-
/*ForceTeamState=*/true);
129-
130-
// Synchronize all threads after the main thread (TId == 0) set up the
131-
// team state properly.
132-
synchronize::threadsAligned(atomic::acq_rel);
133-
134-
state::ParallelTeamSize.assert_eq(PTeamSize, ident,
135-
/*ForceTeamState=*/true);
136-
icv::ActiveLevel.assert_eq(1u, ident, /*ForceTeamState=*/true);
137-
icv::Level.assert_eq(1u, ident, /*ForceTeamState=*/true);
138-
139-
// Ensure we synchronize before we run user code to avoid invalidating the
140-
// assumptions above.
141-
synchronize::threadsAligned(atomic::relaxed);
142-
143-
if (!PTeamSize || TId < PTeamSize)
144-
invokeMicrotask(TId, 0, fn, args, nargs);
145-
146-
// Synchronize all threads at the end of a parallel region.
147-
synchronize::threadsAligned(atomic::seq_cst);
148-
}
149-
150-
// Synchronize all threads to make sure every thread exits the scope above;
151-
// otherwise the following assertions and the assumption in
152-
// __kmpc_target_deinit may not hold.
153-
synchronize::threadsAligned(atomic::acq_rel);
154-
155-
state::ParallelTeamSize.assert_eq(1u, ident, /*ForceTeamState=*/true);
156-
icv::ActiveLevel.assert_eq(0u, ident, /*ForceTeamState=*/true);
157-
icv::Level.assert_eq(0u, ident, /*ForceTeamState=*/true);
158-
159-
// Ensure we synchronize to create an aligned region around the assumptions.
160-
synchronize::threadsAligned(atomic::relaxed);
173+
// This was moved to its own routine so it could be called directly
174+
// in certain situations to avoid resource consumption of unused
175+
// logic in parallel_51.
176+
__kmpc_parallel_spmd(ident, num_threads, fn, args, nargs);
161177

162178
return;
163179
}

0 commit comments

Comments
 (0)