Skip to content

[OpenMP] Add interface for flushing with memory order and scope #133325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openmp/runtime/src/dllexports
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@
# __kmpc_end_taskq 128
# __kmpc_end_taskq_task 129
__kmpc_flush 130
__kmpc_flush_explicit
__kmpc_for_static_fini 135
__kmpc_for_static_init_4 136
__kmpc_for_static_init_8 137
Expand Down
2 changes: 2 additions & 0 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -4204,6 +4204,8 @@ KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);

KMP_EXPORT void __kmpc_flush(ident_t *);
KMP_EXPORT void __kmpc_flush_explicit(ident_t *, kmp_int32 order,
kmp_int32 scope);
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
Expand Down
33 changes: 33 additions & 0 deletions openmp/runtime/src/kmp_csupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,39 @@ void __kmpc_flush(ident_t *loc) {
#endif
}

/*!
@ingroup SYNCHRONIZATION
@param loc source location information.
@param order memory order input from user.
@param scope memory scope input from user.
Perform memory fence with explicit memory semantics.
*/
void __kmpc_flush_explicit(ident_t *loc, kmp_int32 order, kmp_int32 scope) {
// `scope` is not used on the initial device.
switch (order) {
case std::memory_order_relaxed:
[[fallthrough]];
case std::memory_order_acquire:
[[fallthrough]];
case std::memory_order_release:
[[fallthrough]];
case std::memory_order_acq_rel:
[[fallthrough]];
case std::memory_order_seq_cst:
std::atomic_thread_fence(static_cast<std::memory_order>(order));
break;
default:
KMP_BUILTIN_UNREACHABLE;
}

#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_flush) {
ompt_callbacks.ompt_callback(ompt_callback_flush)(
__ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}

/* -------------------------------------------------------------------------- */
/*!
@ingroup SYNCHRONIZATION
Expand Down
58 changes: 58 additions & 0 deletions openmp/runtime/test/flush/omp_flush_acquire_release.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// RUN: %libomp-compile-and-run
// REQUIRES: clang

// Test is based on OpenMP API Example (omp_5.0) acquire_release3.c
// https://github.com/OpenMP/Examples/blob/main/synchronization/sources/acquire_release.3.c

#include <stdio.h>
#include <omp.h>

typedef void ident_t;
extern void __kmpc_flush_explicit(ident_t *, int order, int scope);

int test_memorder(int write_order, int read_order) {
int x = 0, y = 0;
int num_fails = 0;
#pragma omp parallel num_threads(2)
{
int thrd = omp_get_thread_num();
if (thrd == 0) {
x = 10;
__kmpc_flush_explicit(NULL, write_order, 0);
#pragma omp atomic write // or with relaxed clause
y = 1;
} else {
int tmp = 0;
while (tmp == 0) {
#pragma omp atomic read // or with relaxed clause
tmp = y;
}
__kmpc_flush_explicit(NULL, read_order, 0);
// printf("x = %d\n", x); // always "x = 10"
if (x != 10)
num_fails++;
}
}
return num_fails;
}

int main() {
// Clang-based compiler has predefined macro __ATOMIC_<memory_order>.
int write_order[3] = {__ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, __ATOMIC_RELEASE};
int read_order[3] = {__ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE};

// Repeat 1000 times
for (int n = 0; n < 1000; n++) {
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
if (test_memorder(write_order[i], read_order[j])) {
printf("failed\n");
exit(EXIT_FAILURE);
}
}
}
}

printf("passed\n");
return EXIT_SUCCESS;
}