Skip to content

Commit 77acb60

Browse files
committed
[OpenMP] Fix endian bug in hierarchical barrier code
The "oncore" flavor of the hierarchical barrier code uses a single uint64 to hold both a barrier state *and* a set of flags for the current node's "leaf kids" in the tree. To make this work, the barrier state is placed in the least-significant byte of the uint64, and the flags for leaf kids are placed in other bytes of the uint64 starting from the most-significant byte to represent the first child. At least, this is how it works on little-endian systems. On big-endian hosts, the current code unfortunately places the leaf kid flags starting from the *least* significant byte of the uint64, where they will overlap and clobber the barrier state byte, resulting in corrupted barrier behavior. To fix this, this PR changes the child flag offsets to always start with the MSB, on both big- and little- endian hosts. This was complicated a bit by the following issues: - The byte offset was stored "off-by-one" for some reason. This makes it impossible to represent the MSB on big- endian systems (i.e. byte 0, which would have to be represented by -1 if offset by one, which does not fit into the uint8 offset field). Fixed by storing and using the "offset" field as the actual byte number, not off-by-one. - The "offset" field was never used for the master node, but it was still computed. That computed value was also invalid as it pointed outside the target uint64, but that didn't matter as it was unused. For big-endian systems, however, the computation would have resulted in a *negative* value, which again cannot be stored in the uint8 field. Fixed by skipping the (unused) computation on the master node and setting the "offset" field to a dummy value. This is intended to be NFC on little-endian machines. Fixes: #116215
1 parent f06c187 commit 77acb60

File tree

1 file changed

+23
-8
lines changed

1 file changed

+23
-8
lines changed

openmp/runtime/src/kmp_barrier.cpp

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,6 +1292,15 @@ static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
12921292
kmp_bstate_t *thr_bar,
12931293
kmp_uint32 nproc, int gtid,
12941294
int tid, kmp_team_t *team) {
1295+
// Helper macro to identify bytes in a kmp_uint64 in an endian-independent
1296+
// way. Input 0 results in the byte address of the MSB, input 7 results
1297+
// in the byte address of the LSB.
1298+
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
1299+
#define __kmp_msb_byteoffset(offset) (offset)
1300+
#else
1301+
#define __kmp_msb_byteoffset(offset) (7 - (offset))
1302+
#endif
1303+
12951304
// Checks to determine if (re-)initialization is needed
12961305
bool uninitialized = thr_bar->team == NULL;
12971306
bool team_changed = team != thr_bar->team;
@@ -1306,6 +1315,7 @@ static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
13061315
if (uninitialized || team_sz_changed || tid_changed) {
13071316
thr_bar->my_level = thr_bar->depth - 1; // default for primary thread
13081317
thr_bar->parent_tid = -1; // default for primary thread
1318+
thr_bar->offset = -1; // unused for primary thread
13091319
if (!KMP_MASTER_TID(tid)) {
13101320
// if not primary thread, find parent thread in hierarchy
13111321
kmp_uint32 d = 0;
@@ -1325,10 +1335,14 @@ static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
13251335
}
13261336
++d;
13271337
}
1338+
1339+
kmp_uint32 offset = ((kmp_uint32)tid - thr_bar->parent_tid) /
1340+
thr_bar->skip_per_level[thr_bar->my_level];
1341+
offset = offset - 1;
1342+
KMP_ASSERT(offset < 7);
1343+
__kmp_type_convert(__kmp_msb_byteoffset(offset), &(thr_bar->offset));
13281344
}
1329-
__kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
1330-
(thr_bar->skip_per_level[thr_bar->my_level])),
1331-
&(thr_bar->offset));
1345+
13321346
thr_bar->old_tid = tid;
13331347
thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
13341348
thr_bar->team = team;
@@ -1350,9 +1364,11 @@ static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
13501364
__kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
13511365
thr_bar->leaf_state = 0;
13521366
for (int i = 0; i < thr_bar->leaf_kids; ++i)
1353-
((char *)&(thr_bar->leaf_state))[7 - i] = 1;
1367+
((char *)&(thr_bar->leaf_state))[__kmp_msb_byteoffset(i)] = 1;
13541368
}
13551369
return retval;
1370+
1371+
#undef __kmp_msb_byteoffset
13561372
}
13571373

13581374
static void __kmp_hierarchical_barrier_gather(
@@ -1506,8 +1522,7 @@ static void __kmp_hierarchical_barrier_gather(
15061522
} else {
15071523
// Leaf does special release on "offset" bits of parent's b_arrived flag
15081524
thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1509-
kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
1510-
thr_bar->offset + 1);
1525+
kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
15111526
flag.set_waiter(other_threads[thr_bar->parent_tid]);
15121527
flag.release();
15131528
}
@@ -1555,7 +1570,7 @@ static void __kmp_hierarchical_barrier_release(
15551570
// Wait on my "offset" bits on parent's b_go flag
15561571
thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
15571572
kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1558-
thr_bar->offset + 1, bt,
1573+
thr_bar->offset, bt,
15591574
this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
15601575
flag.wait(this_thr, TRUE);
15611576
if (thr_bar->wait_flag ==
@@ -1564,7 +1579,7 @@ static void __kmp_hierarchical_barrier_release(
15641579
KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
15651580
} else { // Reset my bits on parent's b_go flag
15661581
(RCAST(volatile char *,
1567-
&(thr_bar->parent_bar->b_go)))[thr_bar->offset + 1] = 0;
1582+
&(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0;
15681583
}
15691584
}
15701585
thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;

0 commit comments

Comments
 (0)