Reapply "[libc] Remove 'packaged' GPU build support (#100208)"

jhuber6 · jhuber6 · commit 8d8fa01a66d5 · 2024-07-24T10:24:53.000-05:00
This reverts commit 550b83d.
diff --git a/libc/cmake/modules/LLVMLibCLibraryRules.cmake b/libc/cmake/modules/LLVMLibCLibraryRules.cmake
@@ -83,97 +83,6 @@ function(get_all_object_file_deps result fq_deps_list)
   set(${result} ${all_deps} PARENT_SCOPE)
 endfunction()
 
-# A rule to build a library from a collection of entrypoint objects and bundle
-# it into a GPU fatbinary. Usage is the same as 'add_entrypoint_library'.
-# Usage:
-#     add_gpu_entrypoint_library(
-#       DEPENDS <list of add_entrypoint_object targets>
-#     )
-function(add_gpu_entrypoint_library target_name base_target_name)
-  cmake_parse_arguments(
-    "ENTRYPOINT_LIBRARY"
-    "" # No optional arguments
-    "" # No single value arguments
-    "DEPENDS" # Multi-value arguments
-    ${ARGN}
-  )
-  if(NOT ENTRYPOINT_LIBRARY_DEPENDS)
-    message(FATAL_ERROR "'add_entrypoint_library' target requires a DEPENDS list "
-                        "of 'add_entrypoint_object' targets.")
-  endif()
-
-  get_fq_deps_list(fq_deps_list ${ENTRYPOINT_LIBRARY_DEPENDS})
-  get_all_object_file_deps(all_deps "${fq_deps_list}")
-
-  # The GPU 'libc' needs to be exported in a format that can be linked with
-  # offloading langauges like OpenMP or CUDA. This wraps every GPU object into a
-  # fat binary and adds them to a static library.
-  set(objects "")
-  foreach(dep IN LISTS all_deps)
-    set(object $<$<STREQUAL:$<TARGET_NAME_IF_EXISTS:${dep}>,${dep}>:$<TARGET_OBJECTS:${dep}>>)
-    string(FIND ${dep} "." last_dot_loc REVERSE)
-    math(EXPR name_loc "${last_dot_loc} + 1")
-    string(SUBSTRING ${dep} ${name_loc} -1 name)
-    if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
-      set(prefix --image=arch=generic,triple=nvptx64-nvidia-cuda,feature=+ptx63)
-    elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-      set(prefix --image=arch=generic,triple=amdgcn-amd-amdhsa)
-    endif()
-
-    # Use the 'clang-offload-packager' to merge these files into a binary blob.
-    add_custom_command(
-      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin"
-      COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/binary
-      COMMAND ${LIBC_CLANG_OFFLOAD_PACKAGER}
-              "${prefix},file=$<JOIN:${object},,file=>" -o
-              ${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin
-      DEPENDS ${dep} ${base_target_name}
-      COMMENT "Packaging LLVM offloading binary for '${object}'"
-    )
-    add_custom_target(${dep}.__gpubin__ DEPENDS ${dep}
-                      "${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
-    if(TARGET clang-offload-packager)
-      add_dependencies(${dep}.__gpubin__ clang-offload-packager)
-    endif()
-
-    # CMake does not permit setting the name on object files. In order to have
-    # human readable names we create an empty stub file with the entrypoint
-    # name. This empty file will then have the created binary blob embedded.
-    add_custom_command(
-      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
-      COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/stubs
-      COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp
-      DEPENDS ${dep} ${dep}.__gpubin__ ${base_target_name}
-    )
-    add_custom_target(${dep}.__stub__
-                      DEPENDS ${dep}.__gpubin__ "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp")
-
-    add_library(${dep}.__fatbin__
-      EXCLUDE_FROM_ALL OBJECT
-      "${CMAKE_CURRENT_BINARY_DIR}/stubs/${name}.cpp"
-    )
-
-    # This is always compiled for the LLVM host triple instead of the native GPU
-    # triple that is used by default in the build.
-    target_compile_options(${dep}.__fatbin__ BEFORE PRIVATE -nostdlib)
-    target_compile_options(${dep}.__fatbin__ PRIVATE
-      --target=${LLVM_HOST_TRIPLE}
-      "SHELL:-Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/binary/${name}.gpubin")
-    add_dependencies(${dep}.__fatbin__
-                     ${dep} ${dep}.__stub__ ${dep}.__gpubin__ ${base_target_name})
-
-    # Set the list of newly create fat binaries containing embedded device code.
-    list(APPEND objects $<TARGET_OBJECTS:${dep}.__fatbin__>)
-  endforeach()
-
-  add_library(
-    ${target_name}
-    STATIC
-      ${objects}
-  )
-  set_target_properties(${target_name} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${LIBC_LIBRARY_DIR})
-endfunction(add_gpu_entrypoint_library)
-
 # A rule to build a library from a collection of entrypoint objects and bundle
 # it in a single LLVM-IR bitcode file.
 # Usage:
diff --git a/libc/docs/gpu/building.rst b/libc/docs/gpu/building.rst
@@ -151,25 +151,6 @@ Build overview
 Once installed, the GPU build will create several files used for different
 targets. This section will briefly describe their purpose.
 
-**lib/<host-triple>/libcgpu-amdgpu.a or lib/libcgpu-amdgpu.a**
-  A static library containing fat binaries supporting AMD GPUs. These are built
-  using the support described in the `clang documentation
-  <https://clang.llvm.org/docs/OffloadingDesign.html>`_. These are intended to
-  be static libraries included natively for offloading languages like CUDA, HIP,
-  or OpenMP. This implements the standard C library.
-
-**lib/<host-triple>/libmgpu-amdgpu.a or lib/libmgpu-amdgpu.a**
-  A static library containing fat binaries that implements the standard math
-  library for AMD GPUs.
-
-**lib/<host-triple>/libcgpu-nvptx.a or lib/libcgpu-nvptx.a**
-  A static library containing fat binaries that implement the standard C library
-  for NVIDIA GPUs.
-
-**lib/<host-triple>/libmgpu-nvptx.a or lib/libmgpu-nvptx.a**
-  A static library containing fat binaries that implement the standard math
-  library for NVIDIA GPUs.
-
 **include/<target-triple>**
   The include directory where all of the generated headers for the target will
   go. These definitions are strictly for the GPU when being targeted directly.
diff --git a/libc/docs/gpu/using.rst b/libc/docs/gpu/using.rst
@@ -34,16 +34,17 @@ described in the `clang documentation
 by the OpenMP toolchain, but is currently opt-in for the CUDA and HIP toolchains
 through the ``--offload-new-driver``` and ``-fgpu-rdc`` flags.
 
-The installation should contain a static library called ``libcgpu-amdgpu.a`` or
-``libcgpu-nvptx.a`` depending on which GPU architectures your build targeted.
-These contain fat binaries compatible with the offloading toolchain such that
-they can be used directly.
+In order or link the GPU runtime, we simply pass this library to the embedded 
+device linker job. This can be done using the ``-Xoffload-linker`` option, which 
+forwards an argument to a ``clang`` job used to create the final GPU executable. 
+The toolchain should pick up the C libraries automatically in most cases, so 
+this shouldn't be necessary.
 
 .. code-block:: sh
 
-  $> clang openmp.c -fopenmp --offload-arch=gfx90a -lcgpu-amdgpu
-  $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -lcgpu-nvptx
-  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -lcgpu-amdgpu
+  $> clang openmp.c -fopenmp --offload-arch=gfx90a -Xoffload-linker -lc
+  $> clang cuda.cu --offload-arch=sm_80 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
+  $> clang hip.hip --offload-arch=gfx940 --offload-new-driver -fgpu-rdc -Xoffload-linker -lc
 
 This will automatically link in the needed function definitions if they were
 required by the user's application. Normally using the ``-fgpu-rdc`` option
diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt
@@ -40,20 +40,6 @@ foreach(archive IN ZIP_LISTS
   # Add the offloading version of the library for offloading languages. These
   # are installed in the standard search path separate from the other libraries.
   if(LIBC_TARGET_OS_IS_GPU)
-    add_gpu_entrypoint_library(
-      ${archive_1}gpu
-      ${archive_1}
-      DEPENDS
-        ${${archive_2}}
-    )
-    set_target_properties(
-      ${archive_1}gpu
-      PROPERTIES
-        ARCHIVE_OUTPUT_NAME ${archive_0}gpu-${LIBC_TARGET_ARCHITECTURE}
-        ARCHIVE_OUTPUT_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR}
-    )
-    list(APPEND added_gpu_archive_targets ${archive_1}gpu)
-
     add_bitcode_entrypoint_library(
       ${archive_1}bitcode
       ${archive_1}
@@ -65,7 +51,6 @@ foreach(archive IN ZIP_LISTS
       PROPERTIES
         OUTPUT_NAME ${archive_1}.bc
     )
-    add_dependencies(${archive_1}gpu ${archive_1}bitcode)
     list(APPEND added_gpu_bitcode_targets ${archive_1}bitcode)
   endif()
 endforeach()
diff --git a/offload/test/libc/assert.c b/offload/test/libc/assert.c
@@ -3,8 +3,9 @@
 
 // REQUIRES: libc
 
-// NVPTX without LTO uses the implementation in OpenMP currently.
+// AMDGPU and NVPTX without LTO uses the implementation in OpenMP currently.
 // UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: amdgcn-amd-amdhsa
 // REQUIRES: gpu
 
 #include <assert.h>
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
@@ -179,13 +179,6 @@ def remove_suffix_if_present(name):
         return name
 
 def add_libraries(source):
-    if config.libomptarget_has_libc:
-        if config.libomptarget_current_target.startswith('nvptx'):
-            return source + " " + config.llvm_library_dir + "/libcgpu-nvptx.a " + \
-                   config.llvm_library_intdir + "/libomptarget.devicertl.a"
-        elif config.libomptarget_current_target.startswith('amdgcn'):
-            return source + " " + config.llvm_library_dir + "/libcgpu-amdgpu.a " + \
-                   config.llvm_library_intdir + "/libomptarget.devicertl.a"
     return source + " " + config.llvm_library_intdir + "/libomptarget.devicertl.a"
 
 # Add platform targets