@@ -914,7 +914,8 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
914
914
assured that there are enough threads available, because we checked on that
915
915
earlier within critical section forkjoin */
916
916
static void __kmp_fork_team_threads (kmp_root_t *root, kmp_team_t *team,
917
- kmp_info_t *master_th, int master_gtid) {
917
+ kmp_info_t *master_th, int master_gtid,
918
+ int fork_teams_workers) {
918
919
int i;
919
920
int use_hot_team;
920
921
@@ -1003,7 +1004,12 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
1003
1004
}
1004
1005
1005
1006
#if KMP_AFFINITY_SUPPORTED
1006
- __kmp_partition_places (team);
1007
+ // Do not partition the places list for teams construct workers who
1008
+ // haven't actually been forked to do real work yet. This partitioning
1009
+ // will take place in the parallel region nested within the teams construct.
1010
+ if (!fork_teams_workers) {
1011
+ __kmp_partition_places (team);
1012
+ }
1007
1013
#endif
1008
1014
}
1009
1015
@@ -1597,6 +1603,41 @@ int __kmp_fork_call(ident_t *loc, int gtid,
1597
1603
}
1598
1604
#endif
1599
1605
1606
+ // Figure out the proc_bind policy for the nested parallel within teams
1607
+ kmp_proc_bind_t proc_bind = master_th->th .th_set_proc_bind ;
1608
+ // proc_bind_default means don't update
1609
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610
+ if (master_th->th .th_current_task ->td_icvs .proc_bind == proc_bind_false) {
1611
+ proc_bind = proc_bind_false;
1612
+ } else {
1613
+ // No proc_bind clause specified; use current proc-bind-var
1614
+ if (proc_bind == proc_bind_default) {
1615
+ proc_bind = master_th->th .th_current_task ->td_icvs .proc_bind ;
1616
+ }
1617
+ /* else: The proc_bind policy was specified explicitly on parallel
1618
+ clause.
1619
+ This overrides proc-bind-var for this parallel region, but does not
1620
+ change proc-bind-var. */
1621
+ // Figure the value of proc-bind-var for the child threads.
1622
+ if ((level + 1 < __kmp_nested_proc_bind.used ) &&
1623
+ (__kmp_nested_proc_bind.bind_types [level + 1 ] !=
1624
+ master_th->th .th_current_task ->td_icvs .proc_bind )) {
1625
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types [level + 1 ];
1626
+ }
1627
+ }
1628
+ KMP_CHECK_UPDATE (parent_team->t .t_proc_bind , proc_bind);
1629
+ // Need to change the bind-var ICV to correct value for each implicit task
1630
+ if (proc_bind_icv != proc_bind_default &&
1631
+ master_th->th .th_current_task ->td_icvs .proc_bind != proc_bind_icv) {
1632
+ kmp_info_t **other_threads = parent_team->t .t_threads ;
1633
+ for (i = 0 ; i < master_th->th .th_team_nproc ; ++i) {
1634
+ other_threads[i]->th .th_current_task ->td_icvs .proc_bind =
1635
+ proc_bind_icv;
1636
+ }
1637
+ }
1638
+ // Reset for next parallel region
1639
+ master_th->th .th_set_proc_bind = proc_bind_default;
1640
+
1600
1641
#if USE_ITT_BUILD && USE_ITT_NOTIFY
1601
1642
if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1602
1643
KMP_ITT_DEBUG) &&
@@ -1613,6 +1654,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
1613
1654
parent_team->t .t_stack_id = __kmp_itt_stack_caller_create ();
1614
1655
}
1615
1656
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1657
+ #if KMP_AFFINITY_SUPPORTED
1658
+ __kmp_partition_places (parent_team);
1659
+ #endif
1616
1660
1617
1661
KF_TRACE (10 , (" __kmp_fork_call: before internal fork: root=%p, team=%p, "
1618
1662
" master_th=%p, gtid=%d\n " ,
@@ -1953,24 +1997,33 @@ int __kmp_fork_call(ident_t *loc, int gtid,
1953
1997
1954
1998
// Figure out the proc_bind_policy for the new team.
1955
1999
kmp_proc_bind_t proc_bind = master_th->th .th_set_proc_bind ;
1956
- kmp_proc_bind_t proc_bind_icv =
1957
- proc_bind_default; // proc_bind_default means don't update
2000
+ // proc_bind_default means don't update
2001
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1958
2002
if (master_th->th .th_current_task ->td_icvs .proc_bind == proc_bind_false) {
1959
2003
proc_bind = proc_bind_false;
1960
2004
} else {
2005
+ // No proc_bind clause specified; use current proc-bind-var for this
2006
+ // parallel region
1961
2007
if (proc_bind == proc_bind_default) {
1962
- // No proc_bind clause specified; use current proc-bind-var for this
1963
- // parallel region
1964
2008
proc_bind = master_th->th .th_current_task ->td_icvs .proc_bind ;
1965
2009
}
2010
+ // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
2011
+ if (master_th->th .th_teams_microtask &&
2012
+ microtask == (microtask_t )__kmp_teams_master) {
2013
+ proc_bind = __kmp_teams_proc_bind;
2014
+ }
1966
2015
/* else: The proc_bind policy was specified explicitly on parallel clause.
1967
2016
This overrides proc-bind-var for this parallel region, but does not
1968
2017
change proc-bind-var. */
1969
2018
// Figure the value of proc-bind-var for the child threads.
1970
2019
if ((level + 1 < __kmp_nested_proc_bind.used ) &&
1971
2020
(__kmp_nested_proc_bind.bind_types [level + 1 ] !=
1972
2021
master_th->th .th_current_task ->td_icvs .proc_bind )) {
1973
- proc_bind_icv = __kmp_nested_proc_bind.bind_types [level + 1 ];
2022
+ // Do not modify the proc bind icv for the two teams construct forks
2023
+ // They just let the proc bind icv pass through
2024
+ if (!master_th->th .th_teams_microtask ||
2025
+ !(microtask == (microtask_t )__kmp_teams_master || ap == NULL ))
2026
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types [level + 1 ];
1974
2027
}
1975
2028
}
1976
2029
@@ -2142,7 +2195,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
2142
2195
if (!root->r .r_active ) // Only do assignment if it prevents cache ping-pong
2143
2196
root->r .r_active = TRUE ;
2144
2197
2145
- __kmp_fork_team_threads (root, team, master_th, gtid);
2198
+ __kmp_fork_team_threads (root, team, master_th, gtid, !ap );
2146
2199
__kmp_setup_icv_copy (team, nthreads,
2147
2200
&master_th->th .th_current_task ->td_icvs , loc);
2148
2201
@@ -2411,6 +2464,14 @@ void __kmp_join_call(ident_t *loc, int gtid
2411
2464
} // active_level == 1
2412
2465
#endif /* USE_ITT_BUILD */
2413
2466
2467
+ #if KMP_AFFINITY_SUPPORTED
2468
+ if (!exit_teams) {
2469
+ // Restore master thread's partition.
2470
+ master_th->th .th_first_place = team->t .t_first_place ;
2471
+ master_th->th .th_last_place = team->t .t_last_place ;
2472
+ }
2473
+ #endif // KMP_AFFINITY_SUPPORTED
2474
+
2414
2475
if (master_th->th .th_teams_microtask && !exit_teams &&
2415
2476
team->t .t_pkfn != (microtask_t )__kmp_teams_master &&
2416
2477
team->t .t_level == master_th->th .th_teams_level + 1 ) {
@@ -2518,11 +2579,6 @@ void __kmp_join_call(ident_t *loc, int gtid
2518
2579
master_th, team));
2519
2580
__kmp_pop_current_task_from_thread (master_th);
2520
2581
2521
- #if KMP_AFFINITY_SUPPORTED
2522
- // Restore master thread's partition.
2523
- master_th->th .th_first_place = team->t .t_first_place ;
2524
- master_th->th .th_last_place = team->t .t_last_place ;
2525
- #endif // KMP_AFFINITY_SUPPORTED
2526
2582
master_th->th .th_def_allocator = team->t .t_def_allocator ;
2527
2583
2528
2584
#if OMPD_SUPPORT
@@ -5016,6 +5072,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5016
5072
kmp_team_t *team;
5017
5073
int use_hot_team = !root->r .r_active ;
5018
5074
int level = 0 ;
5075
+ int do_place_partition = 1 ;
5019
5076
5020
5077
KA_TRACE (20 , (" __kmp_allocate_team: called\n " ));
5021
5078
KMP_DEBUG_ASSERT (new_nproc >= 1 && argc >= 0 );
@@ -5037,6 +5094,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5037
5094
++level; // not increment if #teams==1, or for outer fork of the teams;
5038
5095
// increment otherwise
5039
5096
}
5097
+ // Do not perform the place partition if inner fork of the teams
5098
+ // Wait until nested parallel region encountered inside teams construct
5099
+ if ((master->th .th_teams_size .nteams == 1 &&
5100
+ master->th .th_teams_level >= team->t .t_level ) ||
5101
+ (team->t .t_pkfn == (microtask_t )__kmp_teams_master))
5102
+ do_place_partition = 0 ;
5040
5103
}
5041
5104
hot_teams = master->th .th_hot_teams ;
5042
5105
if (level < __kmp_hot_teams_max_level && hot_teams &&
@@ -5074,6 +5137,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5074
5137
__kmp_resize_dist_barrier (team, old_nthr, new_nproc);
5075
5138
}
5076
5139
5140
+ // If not doing the place partition, then reset the team's proc bind
5141
+ // to indicate that partitioning of all threads still needs to take place
5142
+ if (do_place_partition == 0 )
5143
+ team->t .t_proc_bind = proc_bind_default;
5077
5144
// Has the number of threads changed?
5078
5145
/* Let's assume the most common case is that the number of threads is
5079
5146
unchanged, and put that case first. */
@@ -5103,16 +5170,20 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5103
5170
if ((team->t .t_size_changed == 0 ) &&
5104
5171
(team->t .t_proc_bind == new_proc_bind)) {
5105
5172
if (new_proc_bind == proc_bind_spread) {
5106
- __kmp_partition_places (
5107
- team, 1 ); // add flag to update only master for spread
5173
+ if (do_place_partition) {
5174
+ // add flag to update only master for spread
5175
+ __kmp_partition_places (team, 1 );
5176
+ }
5108
5177
}
5109
5178
KA_TRACE (200 , (" __kmp_allocate_team: reusing hot team #%d bindings: "
5110
5179
" proc_bind = %d, partition = [%d,%d]\n " ,
5111
5180
team->t .t_id , new_proc_bind, team->t .t_first_place ,
5112
5181
team->t .t_last_place ));
5113
5182
} else {
5114
- KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
5115
- __kmp_partition_places (team);
5183
+ if (do_place_partition) {
5184
+ KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
5185
+ __kmp_partition_places (team);
5186
+ }
5116
5187
}
5117
5188
#else
5118
5189
KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
@@ -5189,10 +5260,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5189
5260
}
5190
5261
#endif
5191
5262
5192
- KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
5263
+ if (do_place_partition) {
5264
+ KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
5193
5265
#if KMP_AFFINITY_SUPPORTED
5194
- __kmp_partition_places (team);
5266
+ __kmp_partition_places (team);
5195
5267
#endif
5268
+ }
5196
5269
} else { // team->t.t_nproc < new_nproc
5197
5270
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5198
5271
kmp_affin_mask_t *old_mask;
@@ -5328,10 +5401,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
5328
5401
}
5329
5402
#endif
5330
5403
5331
- KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
5404
+ if (do_place_partition) {
5405
+ KMP_CHECK_UPDATE (team->t .t_proc_bind , new_proc_bind);
5332
5406
#if KMP_AFFINITY_SUPPORTED
5333
- __kmp_partition_places (team);
5407
+ __kmp_partition_places (team);
5334
5408
#endif
5409
+ }
5335
5410
} // Check changes in number of threads
5336
5411
5337
5412
kmp_info_t *master = team->t .t_threads [0 ];
0 commit comments