Skip to content

Commit 8adae60

Browse files
[libomptarget][nfc] Extract function from data_sharing, move to common
Summary: [libomptarget][nfc] Extract function from data_sharing, move to common Finding the first active thread in the warp is different on nvptx and amdgcn, mostly due to warp size and the desire for efficiency. Reviewers: ABataev, jdoerfert, grokos Reviewed By: jdoerfert Subscribers: jvesely, mgorny, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D71643
1 parent 9d38fd8 commit 8adae60

File tree

5 files changed

+21
-17
lines changed

5 files changed

+21
-17
lines changed

openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,13 @@ get_filename_component(devicertl_base_directory
5757
set(cuda_sources
5858
${devicertl_base_directory}/common/src/cancel.cu
5959
${devicertl_base_directory}/common/src/critical.cu
60-
${devicertl_base_directory}/common/src/loop.cu
60+
${devicertl_base_directory}/common/src/data_sharing.cu
6161
${devicertl_base_directory}/common/src/libcall.cu
62-
${devicertl_base_directory}/common/src/reduction.cu
62+
${devicertl_base_directory}/common/src/loop.cu
6363
${devicertl_base_directory}/common/src/omp_data.cu
6464
${devicertl_base_directory}/common/src/omptarget.cu
6565
${devicertl_base_directory}/common/src/parallel.cu
66+
${devicertl_base_directory}/common/src/reduction.cu
6667
${devicertl_base_directory}/common/src/sync.cu
6768
${devicertl_base_directory}/common/src/task.cu)
6869

openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h

+2
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
101101
return __lanemask_gt();
102102
}
103103

104+
EXTERN bool __kmpc_impl_is_first_active_thread();
105+
104106
INLINE uint32_t __kmpc_impl_smid() {
105107
return __smid();
106108
}

openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu renamed to openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu

+5-13
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,18 @@
1-
//===----- data_sharing.cu - NVPTX OpenMP debug utilities -------- CUDA -*-===//
1+
//===----- data_sharing.cu - OpenMP GPU data sharing ------------- CUDA -*-===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This file contains the implementation of data sharing environments/
9+
// This file contains the implementation of data sharing environments
1010
//
1111
//===----------------------------------------------------------------------===//
1212
#include "common/omptarget.h"
1313
#include "target_impl.h"
1414
#include <stdio.h>
1515

16-
// Return true if this is the first active thread in the warp.
17-
INLINE static bool IsWarpMasterActiveThread() {
18-
unsigned long long Mask = __kmpc_impl_activemask();
19-
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
20-
unsigned long long Sh = Mask << ShNum;
21-
// Truncate Sh to the 32 lower bits
22-
return (unsigned)Sh == 0;
23-
}
2416
// Return true if this is the master thread.
2517
INLINE static bool IsMasterThread(bool isSPMDExecutionMode) {
2618
return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock();
@@ -128,7 +120,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
128120
DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
129121

130122
// Only the warp active master needs to grow the stack.
131-
if (IsWarpMasterActiveThread()) {
123+
if (__kmpc_impl_is_first_active_thread()) {
132124
// Save the current active threads.
133125
ActiveT = CurActiveThreads;
134126

@@ -229,7 +221,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
229221
unsigned WID = GetWarpId();
230222

231223
if (IsEntryPoint) {
232-
if (IsWarpMasterActiveThread()) {
224+
if (__kmpc_impl_is_first_active_thread()) {
233225
DSPRINT0(DSFLAG, "Doing clean up\n");
234226

235227
// The master thread cleans the saved slot, because this is an environment
@@ -255,7 +247,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
255247
// warp diverged and returns in different places). This only works if we
256248
// assume that threads will converge right after the call site that started
257249
// the environment.
258-
if (IsWarpMasterActiveThread()) {
250+
if (__kmpc_impl_is_first_active_thread()) {
259251
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
260252

261253
DSPRINT0(DSFLAG, "Before restoring the stack\n");

openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,16 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
5353
set(cuda_src_files
5454
${devicertl_common_directory}/src/cancel.cu
5555
${devicertl_common_directory}/src/critical.cu
56-
src/data_sharing.cu
56+
${devicertl_common_directory}/src/data_sharing.cu
5757
${devicertl_common_directory}/src/libcall.cu
58-
src/target_impl.cu
5958
${devicertl_common_directory}/src/loop.cu
6059
${devicertl_common_directory}/src/omptarget.cu
6160
${devicertl_common_directory}/src/parallel.cu
6261
${devicertl_common_directory}/src/reduction.cu
6362
${devicertl_common_directory}/src/support.cu
6463
${devicertl_common_directory}/src/sync.cu
6564
${devicertl_common_directory}/src/task.cu
65+
src/target_impl.cu
6666
)
6767

6868
set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)

openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

+9
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
9494
return res;
9595
}
9696

97+
// Return true if this is the first active thread in the warp.
98+
INLINE bool __kmpc_impl_is_first_active_thread() {
99+
unsigned long long Mask = __kmpc_impl_activemask();
100+
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
101+
unsigned long long Sh = Mask << ShNum;
102+
// Truncate Sh to the 32 lower bits
103+
return (unsigned)Sh == 0;
104+
}
105+
97106
INLINE uint32_t __kmpc_impl_smid() {
98107
uint32_t id;
99108
asm("mov.u32 %0, %%smid;" : "=r"(id));

0 commit comments

Comments
 (0)