Skip to content

Commit 50b68a3

Browse files
committed
[OpenMP][host runtime] Add support for teams affinity
This patch implements teams affinity on the host. The default is spread. A user can specify either spread, close, or primary using KMP_TEAMS_PROC_BIND environment variable. Unlike OMP_PROC_BIND, KMP_TEAMS_PROC_BIND is only a single value and is not a list of values. The values follow the same semantics under the OpenMP specification for parallel regions except T is the number of teams in a league instead of the number of threads in a parallel region. Differential Revision: https://reviews.llvm.org/D109921
1 parent 414abff commit 50b68a3

File tree

6 files changed

+606
-22
lines changed

6 files changed

+606
-22
lines changed

openmp/runtime/src/kmp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,7 @@ typedef struct kmp_nested_proc_bind_t {
849849
} kmp_nested_proc_bind_t;
850850

851851
extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
852+
extern kmp_proc_bind_t __kmp_teams_proc_bind;
852853

853854
extern int __kmp_display_affinity;
854855
extern char *__kmp_affinity_format;

openmp/runtime/src/kmp_global.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ char *__kmp_cpuinfo_file = NULL;
280280
#endif /* KMP_AFFINITY_SUPPORTED */
281281

282282
kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0};
283+
kmp_proc_bind_t __kmp_teams_proc_bind = proc_bind_spread;
283284
int __kmp_affinity_num_places = 0;
284285
int __kmp_display_affinity = FALSE;
285286
char *__kmp_affinity_format = NULL;

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 96 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,8 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
914914
assured that there are enough threads available, because we checked on that
915915
earlier within critical section forkjoin */
916916
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917-
kmp_info_t *master_th, int master_gtid) {
917+
kmp_info_t *master_th, int master_gtid,
918+
int fork_teams_workers) {
918919
int i;
919920
int use_hot_team;
920921

@@ -1003,7 +1004,12 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
10031004
}
10041005

10051006
#if KMP_AFFINITY_SUPPORTED
1006-
__kmp_partition_places(team);
1007+
// Do not partition the places list for teams construct workers who
1008+
// haven't actually been forked to do real work yet. This partitioning
1009+
// will take place in the parallel region nested within the teams construct.
1010+
if (!fork_teams_workers) {
1011+
__kmp_partition_places(team);
1012+
}
10071013
#endif
10081014
}
10091015

@@ -1597,6 +1603,41 @@ int __kmp_fork_call(ident_t *loc, int gtid,
15971603
}
15981604
#endif
15991605

1606+
// Figure out the proc_bind policy for the nested parallel within teams
1607+
kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1608+
// proc_bind_default means don't update
1609+
kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610+
if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611+
proc_bind = proc_bind_false;
1612+
} else {
1613+
// No proc_bind clause specified; use current proc-bind-var
1614+
if (proc_bind == proc_bind_default) {
1615+
proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1616+
}
1617+
/* else: The proc_bind policy was specified explicitly on parallel
1618+
clause.
1619+
This overrides proc-bind-var for this parallel region, but does not
1620+
change proc-bind-var. */
1621+
// Figure the value of proc-bind-var for the child threads.
1622+
if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623+
(__kmp_nested_proc_bind.bind_types[level + 1] !=
1624+
master_th->th.th_current_task->td_icvs.proc_bind)) {
1625+
proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1626+
}
1627+
}
1628+
KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1629+
// Need to change the bind-var ICV to correct value for each implicit task
1630+
if (proc_bind_icv != proc_bind_default &&
1631+
master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632+
kmp_info_t **other_threads = parent_team->t.t_threads;
1633+
for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634+
other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1635+
proc_bind_icv;
1636+
}
1637+
}
1638+
// Reset for next parallel region
1639+
master_th->th.th_set_proc_bind = proc_bind_default;
1640+
16001641
#if USE_ITT_BUILD && USE_ITT_NOTIFY
16011642
if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
16021643
KMP_ITT_DEBUG) &&
@@ -1613,6 +1654,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
16131654
parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
16141655
}
16151656
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1657+
#if KMP_AFFINITY_SUPPORTED
1658+
__kmp_partition_places(parent_team);
1659+
#endif
16161660

16171661
KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
16181662
"master_th=%p, gtid=%d\n",
@@ -1953,24 +1997,33 @@ int __kmp_fork_call(ident_t *loc, int gtid,
19531997

19541998
// Figure out the proc_bind_policy for the new team.
19551999
kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1956-
kmp_proc_bind_t proc_bind_icv =
1957-
proc_bind_default; // proc_bind_default means don't update
2000+
// proc_bind_default means don't update
2001+
kmp_proc_bind_t proc_bind_icv = proc_bind_default;
19582002
if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
19592003
proc_bind = proc_bind_false;
19602004
} else {
2005+
// No proc_bind clause specified; use current proc-bind-var for this
2006+
// parallel region
19612007
if (proc_bind == proc_bind_default) {
1962-
// No proc_bind clause specified; use current proc-bind-var for this
1963-
// parallel region
19642008
proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
19652009
}
2010+
// Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2011+
if (master_th->th.th_teams_microtask &&
2012+
microtask == (microtask_t)__kmp_teams_master) {
2013+
proc_bind = __kmp_teams_proc_bind;
2014+
}
19662015
/* else: The proc_bind policy was specified explicitly on parallel clause.
19672016
This overrides proc-bind-var for this parallel region, but does not
19682017
change proc-bind-var. */
19692018
// Figure the value of proc-bind-var for the child threads.
19702019
if ((level + 1 < __kmp_nested_proc_bind.used) &&
19712020
(__kmp_nested_proc_bind.bind_types[level + 1] !=
19722021
master_th->th.th_current_task->td_icvs.proc_bind)) {
1973-
proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2022+
// Do not modify the proc bind icv for the two teams construct forks
2023+
// They just let the proc bind icv pass through
2024+
if (!master_th->th.th_teams_microtask ||
2025+
!(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2026+
proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
19742027
}
19752028
}
19762029

@@ -2142,7 +2195,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
21422195
if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
21432196
root->r.r_active = TRUE;
21442197

2145-
__kmp_fork_team_threads(root, team, master_th, gtid);
2198+
__kmp_fork_team_threads(root, team, master_th, gtid, !ap);
21462199
__kmp_setup_icv_copy(team, nthreads,
21472200
&master_th->th.th_current_task->td_icvs, loc);
21482201

@@ -2411,6 +2464,14 @@ void __kmp_join_call(ident_t *loc, int gtid
24112464
} // active_level == 1
24122465
#endif /* USE_ITT_BUILD */
24132466

2467+
#if KMP_AFFINITY_SUPPORTED
2468+
if (!exit_teams) {
2469+
// Restore master thread's partition.
2470+
master_th->th.th_first_place = team->t.t_first_place;
2471+
master_th->th.th_last_place = team->t.t_last_place;
2472+
}
2473+
#endif // KMP_AFFINITY_SUPPORTED
2474+
24142475
if (master_th->th.th_teams_microtask && !exit_teams &&
24152476
team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
24162477
team->t.t_level == master_th->th.th_teams_level + 1) {
@@ -2518,11 +2579,6 @@ void __kmp_join_call(ident_t *loc, int gtid
25182579
master_th, team));
25192580
__kmp_pop_current_task_from_thread(master_th);
25202581

2521-
#if KMP_AFFINITY_SUPPORTED
2522-
// Restore master thread's partition.
2523-
master_th->th.th_first_place = team->t.t_first_place;
2524-
master_th->th.th_last_place = team->t.t_last_place;
2525-
#endif // KMP_AFFINITY_SUPPORTED
25262582
master_th->th.th_def_allocator = team->t.t_def_allocator;
25272583

25282584
#if OMPD_SUPPORT
@@ -5016,6 +5072,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
50165072
kmp_team_t *team;
50175073
int use_hot_team = !root->r.r_active;
50185074
int level = 0;
5075+
int do_place_partition = 1;
50195076

50205077
KA_TRACE(20, ("__kmp_allocate_team: called\n"));
50215078
KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
@@ -5037,6 +5094,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
50375094
++level; // not increment if #teams==1, or for outer fork of the teams;
50385095
// increment otherwise
50395096
}
5097+
// Do not perform the place partition if inner fork of the teams
5098+
// Wait until nested parallel region encountered inside teams construct
5099+
if ((master->th.th_teams_size.nteams == 1 &&
5100+
master->th.th_teams_level >= team->t.t_level) ||
5101+
(team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5102+
do_place_partition = 0;
50405103
}
50415104
hot_teams = master->th.th_hot_teams;
50425105
if (level < __kmp_hot_teams_max_level && hot_teams &&
@@ -5074,6 +5137,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
50745137
__kmp_resize_dist_barrier(team, old_nthr, new_nproc);
50755138
}
50765139

5140+
// If not doing the place partition, then reset the team's proc bind
5141+
// to indicate that partitioning of all threads still needs to take place
5142+
if (do_place_partition == 0)
5143+
team->t.t_proc_bind = proc_bind_default;
50775144
// Has the number of threads changed?
50785145
/* Let's assume the most common case is that the number of threads is
50795146
unchanged, and put that case first. */
@@ -5103,16 +5170,20 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
51035170
if ((team->t.t_size_changed == 0) &&
51045171
(team->t.t_proc_bind == new_proc_bind)) {
51055172
if (new_proc_bind == proc_bind_spread) {
5106-
__kmp_partition_places(
5107-
team, 1); // add flag to update only master for spread
5173+
if (do_place_partition) {
5174+
// add flag to update only master for spread
5175+
__kmp_partition_places(team, 1);
5176+
}
51085177
}
51095178
KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
51105179
"proc_bind = %d, partition = [%d,%d]\n",
51115180
team->t.t_id, new_proc_bind, team->t.t_first_place,
51125181
team->t.t_last_place));
51135182
} else {
5114-
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5115-
__kmp_partition_places(team);
5183+
if (do_place_partition) {
5184+
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5185+
__kmp_partition_places(team);
5186+
}
51165187
}
51175188
#else
51185189
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
@@ -5189,10 +5260,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
51895260
}
51905261
#endif
51915262

5192-
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5263+
if (do_place_partition) {
5264+
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
51935265
#if KMP_AFFINITY_SUPPORTED
5194-
__kmp_partition_places(team);
5266+
__kmp_partition_places(team);
51955267
#endif
5268+
}
51965269
} else { // team->t.t_nproc < new_nproc
51975270
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
51985271
kmp_affin_mask_t *old_mask;
@@ -5328,10 +5401,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
53285401
}
53295402
#endif
53305403

5331-
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5404+
if (do_place_partition) {
5405+
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
53325406
#if KMP_AFFINITY_SUPPORTED
5333-
__kmp_partition_places(team);
5407+
__kmp_partition_places(team);
53345408
#endif
5409+
}
53355410
} // Check changes in number of threads
53365411

53375412
kmp_info_t *master = team->t.t_threads[0];

openmp/runtime/src/kmp_settings.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3207,6 +3207,47 @@ static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer,
32073207
}
32083208
} // __kmp_stg_print_topology_method
32093209

3210+
// KMP_TEAMS_PROC_BIND
3211+
struct kmp_proc_bind_info_t {
3212+
const char *name;
3213+
kmp_proc_bind_t proc_bind;
3214+
};
3215+
static kmp_proc_bind_info_t proc_bind_table[] = {
3216+
{"spread", proc_bind_spread},
3217+
{"true", proc_bind_spread},
3218+
{"close", proc_bind_close},
3219+
// teams-bind = false means "replicate the primary thread's affinity"
3220+
{"false", proc_bind_primary},
3221+
{"primary", proc_bind_primary}};
3222+
static void __kmp_stg_parse_teams_proc_bind(char const *name, char const *value,
3223+
void *data) {
3224+
int valid;
3225+
const char *end;
3226+
valid = 0;
3227+
for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]);
3228+
++i) {
3229+
if (__kmp_match_str(proc_bind_table[i].name, value, &end)) {
3230+
__kmp_teams_proc_bind = proc_bind_table[i].proc_bind;
3231+
valid = 1;
3232+
break;
3233+
}
3234+
}
3235+
if (!valid) {
3236+
KMP_WARNING(StgInvalidValue, name, value);
3237+
}
3238+
}
3239+
static void __kmp_stg_print_teams_proc_bind(kmp_str_buf_t *buffer,
3240+
char const *name, void *data) {
3241+
const char *value = KMP_I18N_STR(NotDefined);
3242+
for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]);
3243+
++i) {
3244+
if (__kmp_teams_proc_bind == proc_bind_table[i].proc_bind) {
3245+
value = proc_bind_table[i].name;
3246+
break;
3247+
}
3248+
}
3249+
__kmp_stg_print_str(buffer, name, value);
3250+
}
32103251
#endif /* KMP_AFFINITY_SUPPORTED */
32113252

32123253
// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X*
@@ -5312,6 +5353,8 @@ static kmp_setting_t __kmp_stg_table[] = {
53125353
#endif /* KMP_GOMP_COMPAT */
53135354
{"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind,
53145355
NULL, 0, 0},
5356+
{"KMP_TEAMS_PROC_BIND", __kmp_stg_parse_teams_proc_bind,
5357+
__kmp_stg_print_teams_proc_bind, NULL, 0, 0},
53155358
{"OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0},
53165359
{"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method,
53175360
__kmp_stg_print_topology_method, NULL, 0, 0},

0 commit comments

Comments
 (0)