Skip to content

Commit d8eeb33

Browse files
committed
[OpenMP] Change build of OpenMP device runtime to be a separate runtime
Summary: Currently we build the OpenMP device runtime as part of the `offload/` project. This is problematic because it has several restrictions when compared to the normal offloading runtime. It can only be built with an up-to-date clang and we need to set the target appropriately. Currently we hack around this by creating the compiler invocation manually, but this patch moves it into a separate runtimes build. This follows the same build we use for libc, libc++, compiler-rt, and flang-rt. This also moves it from `offload/` into `openmp/` because it is still the `openmp/` runtime and I feel it is more appropriate. We do want a generic `offload/` library at some point, but it would be trivial to then add that as a separate library now that we have the infrastructure that makes adding these new libraries trivial. This most importantly will require that users update their build configs, mostly adding the following lines at a minimum. I was debating whether or not I should 'auto-upgrade' this, but I just went with a warning. ``` -DLLVM_RUNTIME_TARGETS='default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda' \ -DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=openmp \ -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=openmp \ ``` This also changed where the `.bc` version of the library lives, but it's still created.
1 parent 208257f commit d8eeb33

35 files changed

+158
-29
lines changed

offload/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ else()
113113
set(CMAKE_CXX_EXTENSIONS NO)
114114
endif()
115115

116+
# Emit a warning for people who haven't updated their build.
117+
if(NOT "openmp" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES AND
118+
NOT "openmp" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
119+
message(WARNING "Building the offloading runtime with no device library. See "
120+
"https://openmp.llvm.org//SupportAndFAQ.html for help.")
121+
endif()
122+
116123
# Set the path of all resulting libraries to a unified location so that it can
117124
# be used for testing.
118125
set(LIBOMPTARGET_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@@ -373,7 +380,6 @@ set(LIBOMPTARGET_LLVM_LIBRARY_INTDIR "${LIBOMPTARGET_INTDIR}" CACHE STRING
373380

374381
# Build offloading plugins and device RTLs if they are available.
375382
add_subdirectory(plugins-nextgen)
376-
add_subdirectory(DeviceRTL)
377383
add_subdirectory(tools)
378384

379385
# Build target agnostic offloading library.

offload/cmake/caches/AMDGPUBot.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@ set(LLVM_LIT_ARGS "-v --show-unsupported --timeout 100 --show-xfail -j 32" CACHE
1919

2020
set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "")
2121
set(CLANG_DEFAULT_RTLIB "compiler-rt" STRING "")
22+
23+
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa CACHE STRING "")
24+
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "openmp" CACHE STRING "")

offload/cmake/caches/Offload.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
55
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
66
set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
77
set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
8-
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "")
9-
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;libcxx;libcxxabi" CACHE STRING "")
8+
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")
9+
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")

openmp/CMakeLists.txt

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ else()
8888
set(CMAKE_CXX_EXTENSIONS NO)
8989
endif()
9090

91+
# Targeting the GPU directly requires a few flags to make CMake happy.
92+
if("${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn")
93+
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
94+
elseif("${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
95+
set(CMAKE_REQUIRED_FLAGS
96+
"${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
97+
endif()
98+
9199
# Check and set up common compiler flags.
92100
include(config-ix)
93101
include(HandleOpenMPOptions)
@@ -122,35 +130,41 @@ else()
122130
get_clang_resource_dir(LIBOMP_HEADERS_INSTALL_PATH SUBDIR include)
123131
endif()
124132

125-
# Build host runtime library, after LIBOMPTARGET variables are set since they are needed
126-
# to enable time profiling support in the OpenMP runtime.
127-
add_subdirectory(runtime)
128-
129-
set(ENABLE_OMPT_TOOLS ON)
130-
# Currently tools are not tested well on Windows or MacOS X.
131-
if (APPLE OR WIN32)
132-
set(ENABLE_OMPT_TOOLS OFF)
133-
endif()
133+
# Use the current compiler target to determine the appropriate runtime to build.
134+
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn|^nvptx" OR
135+
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn|^nvptx")
136+
add_subdirectory(device)
137+
else()
138+
# Build host runtime library, after LIBOMPTARGET variables are set since they
139+
# are needed to enable time profiling support in the OpenMP runtime.
140+
add_subdirectory(runtime)
141+
142+
set(ENABLE_OMPT_TOOLS ON)
143+
# Currently tools are not tested well on Windows or MacOS X.
144+
if (APPLE OR WIN32)
145+
set(ENABLE_OMPT_TOOLS OFF)
146+
endif()
134147

135-
option(OPENMP_ENABLE_OMPT_TOOLS "Enable building ompt based tools for OpenMP."
136-
${ENABLE_OMPT_TOOLS})
137-
if (OPENMP_ENABLE_OMPT_TOOLS)
138-
add_subdirectory(tools)
139-
endif()
148+
option(OPENMP_ENABLE_OMPT_TOOLS "Enable building ompt based tools for OpenMP."
149+
${ENABLE_OMPT_TOOLS})
150+
if (OPENMP_ENABLE_OMPT_TOOLS)
151+
add_subdirectory(tools)
152+
endif()
140153

141-
# Propagate OMPT support to offload
142-
if(NOT ${OPENMP_STANDALONE_BUILD})
143-
set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
144-
set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
145-
endif()
154+
# Propagate OMPT support to offload
155+
if(NOT ${OPENMP_STANDALONE_BUILD})
156+
set(LIBOMP_HAVE_OMPT_SUPPORT ${LIBOMP_HAVE_OMPT_SUPPORT} PARENT_SCOPE)
157+
set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_OMP_TOOLS_INCLUDE_DIR} PARENT_SCOPE)
158+
endif()
146159

147-
option(OPENMP_MSVC_NAME_SCHEME "Build dll with MSVC naming scheme." OFF)
160+
option(OPENMP_MSVC_NAME_SCHEME "Build dll with MSVC naming scheme." OFF)
148161

149-
# Build libompd.so
150-
add_subdirectory(libompd)
162+
# Build libompd.so
163+
add_subdirectory(libompd)
151164

152-
# Build documentation
153-
add_subdirectory(docs)
165+
# Build documentation
166+
add_subdirectory(docs)
154167

155-
# Now that we have seen all testsuites, create the check-openmp target.
156-
construct_check_openmp_target()
168+
# Now that we have seen all testsuites, create the check-openmp target.
169+
construct_check_openmp_target()
170+
endif()

openmp/device/CMakeLists.txt

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# Ensure the compiler is a valid clang when building the GPU target.
2+
set(req_ver "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
3+
if(LLVM_VERSION_MAJOR AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "[Cc]lang" AND
4+
${CMAKE_CXX_COMPILER_VERSION} VERSION_EQUAL "${req_ver}"))
5+
message(FATAL_ERROR "Cannot build GPU device runtime. CMake compiler "
6+
"'${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}' "
7+
" is not 'Clang ${req_ver}'.")
8+
endif()
9+
10+
set(src_files
11+
${CMAKE_CURRENT_SOURCE_DIR}/src/Allocator.cpp
12+
${CMAKE_CURRENT_SOURCE_DIR}/src/Configuration.cpp
13+
${CMAKE_CURRENT_SOURCE_DIR}/src/Debug.cpp
14+
${CMAKE_CURRENT_SOURCE_DIR}/src/Kernel.cpp
15+
${CMAKE_CURRENT_SOURCE_DIR}/src/LibC.cpp
16+
${CMAKE_CURRENT_SOURCE_DIR}/src/Mapping.cpp
17+
${CMAKE_CURRENT_SOURCE_DIR}/src/Misc.cpp
18+
${CMAKE_CURRENT_SOURCE_DIR}/src/Parallelism.cpp
19+
${CMAKE_CURRENT_SOURCE_DIR}/src/Profiling.cpp
20+
${CMAKE_CURRENT_SOURCE_DIR}/src/Reduction.cpp
21+
${CMAKE_CURRENT_SOURCE_DIR}/src/State.cpp
22+
${CMAKE_CURRENT_SOURCE_DIR}/src/Synchronization.cpp
23+
${CMAKE_CURRENT_SOURCE_DIR}/src/Tasking.cpp
24+
${CMAKE_CURRENT_SOURCE_DIR}/src/DeviceUtils.cpp
25+
${CMAKE_CURRENT_SOURCE_DIR}/src/Workshare.cpp
26+
)
27+
28+
list(APPEND compile_options -flto)
29+
list(APPEND compile_options -fvisibility=hidden)
30+
list(APPEND compile_options -nogpulib)
31+
list(APPEND compile_options -nostdlibinc)
32+
list(APPEND compile_options -fno-rtti)
33+
list(APPEND compile_options -fno-exceptions)
34+
list(APPEND compile_options -fconvergent-functions)
35+
list(APPEND compile_options -Wno-unknown-cuda-version)
36+
if(LLVM_DEFAULT_TARGET_TRIPLE)
37+
list(APPEND compile_options --target=${LLVM_DEFAULT_TARGET_TRIPLE})
38+
endif()
39+
40+
# We disable the slp vectorizer during the runtime optimization to avoid
41+
# vectorized accesses to the shared state. Generally, those are "good" but
42+
# the optimizer pipeline (esp. Attributor) does not fully support vectorized
43+
# instructions yet and we end up missing out on way more important constant
44+
# propagation. That said, we will run the vectorizer again after the runtime
45+
# has been linked into the user program.
46+
list(APPEND compile_flags "SHELL: -mllvm -vectorize-slp=false")
47+
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR
48+
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn")
49+
set(target_name "amdgpu")
50+
list(APPEND compile_flags "SHELL:-Xclang -mcode-object-version=none")
51+
elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR
52+
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
53+
set(target_name "nvptx")
54+
list(APPEND compile_flags --cuda-feature=+ptx63)
55+
endif()
56+
57+
# Trick to combine these into a bitcode file via the linker's LTO pass.
58+
add_executable(libompdevice ${src_files})
59+
set_target_properties(libompdevice PROPERTIES
60+
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
61+
LINKER_LANGUAGE CXX
62+
BUILD_RPATH ""
63+
INSTALL_RPATH ""
64+
RUNTIME_OUTPUT_NAME libomptarget-${target_name}.bc)
65+
66+
# If the user built with the GPU C library enabled we will use that instead.
67+
if(LIBOMPTARGET_GPU_LIBC_SUPPORT)
68+
target_compile_definitions(libompdevice PRIVATE OMPTARGET_HAS_LIBC)
69+
endif()
70+
target_compile_definitions(libompdevice PRIVATE SHARED_SCRATCHPAD_SIZE=512)
71+
72+
target_include_directories(libompdevice PRIVATE
73+
${CMAKE_CURRENT_SOURCE_DIR}/include
74+
${CMAKE_CURRENT_SOURCE_DIR}/../../libc
75+
${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include)
76+
target_compile_options(libompdevice PRIVATE ${compile_options})
77+
target_link_options(libompdevice PRIVATE
78+
"-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
79+
if(LLVM_DEFAULT_TARGET_TRIPLE)
80+
target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}")
81+
endif()
82+
install(TARGETS libompdevice
83+
PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ
84+
DESTINATION ${OPENMP_INSTALL_LIBDIR})
85+
86+
add_library(ompdevice.all_objs OBJECT IMPORTED)
87+
set_property(TARGET ompdevice.all_objs APPEND PROPERTY IMPORTED_OBJECTS
88+
${CMAKE_CURRENT_BINARY_DIR}/libomptarget-${target_name}.bc)
89+
90+
# Archive all the object files generated above into a static library
91+
add_library(ompdevice STATIC)
92+
add_dependencies(ompdevice libompdevice)
93+
set_target_properties(ompdevice PROPERTIES
94+
ARCHIVE_OUTPUT_DIRECTORY "${OPENMP_INSTALL_LIBDIR}"
95+
ARCHIVE_OUTPUT_NAME ompdevice
96+
LINKER_LANGUAGE CXX
97+
)
98+
target_link_libraries(ompdevice PRIVATE ompdevice.all_objs)
99+
install(TARGETS ompdevice ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

openmp/docs/SupportAndFAQ.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ Clang will be built with all backends enabled. When building with
7878
``LLVM_ENABLE_RUNTIMES="openmp"`` OpenMP should not be enabled in
7979
``LLVM_ENABLE_PROJECTS`` because it is enabled by default.
8080

81+
Support for the device library comes from a separate build of the OpenMP library
82+
that targets the GPU architecture. Building it requires enabling the runtime
83+
targets, or setting the target manually when doing a standalone build. This is
84+
done with the ``LLVM_RUNTIME_TARGETS`` option and then enabling the OpenMP
85+
runtime for the GPU target. ``RUNTIMES_<triple>_LLVM_ENABLE_RUNTIMES``. Refer to
86+
the cache file for the specific invocation.
87+
8188
For Nvidia offload, please see :ref:`build_nvidia_offload_capable_compiler`.
8289
For AMDGPU offload, please see :ref:`build_amdgpu_offload_capable_compiler`.
8390

0 commit comments

Comments
 (0)