@@ -83,6 +83,64 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
83
83
84
84
extern " C" {
85
85
86
+ [[clang::always_inline]] void __kmpc_parallel_spmd (IdentTy *ident,
87
+ int32_t num_threads,
88
+ void *fn, void **args,
89
+ const int64_t nargs) {
90
+ uint32_t TId = mapping::getThreadIdInBlock ();
91
+ uint32_t NumThreads = determineNumberOfThreads (num_threads);
92
+ uint32_t PTeamSize =
93
+ NumThreads == mapping::getMaxTeamThreads () ? 0 : NumThreads;
94
+ // Avoid the race between the read of the `icv::Level` above and the write
95
+ // below by synchronizing all threads here.
96
+ synchronize::threadsAligned (atomic::seq_cst);
97
+ {
98
+ // Note that the order here is important. `icv::Level` has to be updated
99
+ // last or the other updates will cause a thread specific state to be
100
+ // created.
101
+ state::ValueRAII ParallelTeamSizeRAII (state::ParallelTeamSize, PTeamSize,
102
+ 1u , TId == 0 , ident,
103
+ /* ForceTeamState=*/ true );
104
+ state::ValueRAII ActiveLevelRAII (icv::ActiveLevel, 1u , 0u , TId == 0 , ident,
105
+ /* ForceTeamState=*/ true );
106
+ state::ValueRAII LevelRAII (icv::Level, 1u , 0u , TId == 0 , ident,
107
+ /* ForceTeamState=*/ true );
108
+
109
+ // Synchronize all threads after the main thread (TId == 0) set up the
110
+ // team state properly.
111
+ synchronize::threadsAligned (atomic::acq_rel);
112
+
113
+ state::ParallelTeamSize.assert_eq (PTeamSize, ident,
114
+ /* ForceTeamState=*/ true );
115
+ icv::ActiveLevel.assert_eq (1u , ident, /* ForceTeamState=*/ true );
116
+ icv::Level.assert_eq (1u , ident, /* ForceTeamState=*/ true );
117
+
118
+ // Ensure we synchronize before we run user code to avoid invalidating the
119
+ // assumptions above.
120
+ synchronize::threadsAligned (atomic::relaxed);
121
+
122
+ if (!PTeamSize || TId < PTeamSize)
123
+ invokeMicrotask (TId, 0 , fn, args, nargs);
124
+
125
+ // Synchronize all threads at the end of a parallel region.
126
+ synchronize::threadsAligned (atomic::seq_cst);
127
+ }
128
+
129
+ // Synchronize all threads to make sure every thread exits the scope above;
130
+ // otherwise the following assertions and the assumption in
131
+ // __kmpc_target_deinit may not hold.
132
+ synchronize::threadsAligned (atomic::acq_rel);
133
+
134
+ state::ParallelTeamSize.assert_eq (1u , ident, /* ForceTeamState=*/ true );
135
+ icv::ActiveLevel.assert_eq (0u , ident, /* ForceTeamState=*/ true );
136
+ icv::Level.assert_eq (0u , ident, /* ForceTeamState=*/ true );
137
+
138
+ // Ensure we synchronize to create an aligned region around the assumptions.
139
+ synchronize::threadsAligned (atomic::relaxed);
140
+
141
+ return ;
142
+ }
143
+
86
144
[[clang::always_inline]] void
87
145
__kmpc_parallel_51 (IdentTy *ident, int32_t , int32_t if_expr,
88
146
int32_t num_threads, int proc_bind, void *fn,
@@ -112,52 +170,10 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
112
170
uint32_t MaxTeamThreads = mapping::getMaxTeamThreads ();
113
171
uint32_t PTeamSize = NumThreads == MaxTeamThreads ? 0 : NumThreads;
114
172
if (mapping::isSPMDMode ()) {
115
- // Avoid the race between the read of the `icv::Level` above and the write
116
- // below by synchronizing all threads here.
117
- synchronize::threadsAligned (atomic::seq_cst);
118
- {
119
- // Note that the order here is important. `icv::Level` has to be updated
120
- // last or the other updates will cause a thread specific state to be
121
- // created.
122
- state::ValueRAII ParallelTeamSizeRAII (state::ParallelTeamSize, PTeamSize,
123
- 1u , TId == 0 , ident,
124
- /* ForceTeamState=*/ true );
125
- state::ValueRAII ActiveLevelRAII (icv::ActiveLevel, 1u , 0u , TId == 0 ,
126
- ident, /* ForceTeamState=*/ true );
127
- state::ValueRAII LevelRAII (icv::Level, 1u , 0u , TId == 0 , ident,
128
- /* ForceTeamState=*/ true );
129
-
130
- // Synchronize all threads after the main thread (TId == 0) set up the
131
- // team state properly.
132
- synchronize::threadsAligned (atomic::acq_rel);
133
-
134
- state::ParallelTeamSize.assert_eq (PTeamSize, ident,
135
- /* ForceTeamState=*/ true );
136
- icv::ActiveLevel.assert_eq (1u , ident, /* ForceTeamState=*/ true );
137
- icv::Level.assert_eq (1u , ident, /* ForceTeamState=*/ true );
138
-
139
- // Ensure we synchronize before we run user code to avoid invalidating the
140
- // assumptions above.
141
- synchronize::threadsAligned (atomic::relaxed);
142
-
143
- if (!PTeamSize || TId < PTeamSize)
144
- invokeMicrotask (TId, 0 , fn, args, nargs);
145
-
146
- // Synchronize all threads at the end of a parallel region.
147
- synchronize::threadsAligned (atomic::seq_cst);
148
- }
149
-
150
- // Synchronize all threads to make sure every thread exits the scope above;
151
- // otherwise the following assertions and the assumption in
152
- // __kmpc_target_deinit may not hold.
153
- synchronize::threadsAligned (atomic::acq_rel);
154
-
155
- state::ParallelTeamSize.assert_eq (1u , ident, /* ForceTeamState=*/ true );
156
- icv::ActiveLevel.assert_eq (0u , ident, /* ForceTeamState=*/ true );
157
- icv::Level.assert_eq (0u , ident, /* ForceTeamState=*/ true );
158
-
159
- // Ensure we synchronize to create an aligned region around the assumptions.
160
- synchronize::threadsAligned (atomic::relaxed);
173
+ // This was moved to its own routine so it could be called directly
174
+ // in certain situations to avoid resource consumption of unused
175
+ // logic in parallel_51.
176
+ __kmpc_parallel_spmd (ident, num_threads, fn, args, nargs);
161
177
162
178
return ;
163
179
}
0 commit comments