Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (#132096)" #132277

jhuber6 · 2025-03-20T20:04:41Z

Summary:
There were a few issues with the first one, leading to some errors and
warnings. Most importantly, this was building on MSVC which isn't
supported.

…)" Summary: There were a few issues with the first one, leading to some errors and warnings. Most importantly, this was building on MSVC which isn't supported.

llvmbot · 2025-03-20T20:05:15Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-backend-nvptx

Author: Joseph Huber (jhuber6)

Changes

Summary:
There were a few issues with the first one, leading to some errors and
warnings. Most importantly, this was building on MSVC which isn't
supported.

Patch is 23.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132277.diff

15 Files Affected:

(modified) libc/CMakeLists.txt (-7)
(modified) libc/src/__support/RPC/rpc_server.h (+6-1)
(removed) libc/utils/gpu/CMakeLists.txt (-1)
(removed) libc/utils/gpu/loader/CMakeLists.txt (-54)
(removed) libc/utils/gpu/loader/amdgpu/CMakeLists.txt (-10)
(removed) libc/utils/gpu/loader/nvptx/CMakeLists.txt (-9)
(modified) llvm/CMakeLists.txt (-4)
(modified) llvm/runtimes/CMakeLists.txt (-14)
(modified) llvm/tools/CMakeLists.txt (+4)
(added) llvm/tools/llvm-gpu-loader/CMakeLists.txt (+46)
(renamed) llvm/tools/llvm-gpu-loader/amdhsa.cpp (+7-7)
(renamed) llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp (+43-30)
(added) llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h (+110)
(renamed) llvm/tools/llvm-gpu-loader/nvptx.cpp (+5-4)
(renamed) llvm/tools/llvm-gpu-loader/server.h (+7-95)

diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index ad39ff6fbcb1e..b264dcb4974c7 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -59,13 +59,6 @@ set(LIBC_NAMESPACE ${default_namespace}
   CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'."
 )
 
-# We will build the GPU utilities if we are not doing a runtimes build.
-option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF)
-if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD))
-  add_subdirectory(utils/gpu)
-  return()
-endif()
-
 option(LIBC_CMAKE_VERBOSE_LOGGING
   "Log details warnings and notifications during CMake configuration." OFF)
 
diff --git a/libc/src/__support/RPC/rpc_server.h b/libc/src/__support/RPC/rpc_server.h
index 7387eba9ceb26..dc3d8030caa47 100644
--- a/libc/src/__support/RPC/rpc_server.h
+++ b/libc/src/__support/RPC/rpc_server.h
@@ -20,6 +20,11 @@
 #define __has_builtin(x) 0
 #endif
 
+// Workaround for missing __builtin_is_constant_evaluated in < GCC 10.
+#ifndef __builtin_is_constant_evaluated
+#define __builtin_is_constant_evaluated(x) 0
+#endif
+
 // Configs for using the LLVM libc writer interface.
 #define LIBC_COPT_USE_C_ASSERT
 #define LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY
@@ -28,7 +33,7 @@
 #define LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
 #define LIBC_COPT_PRINTF_DISABLE_STRERROR
 
-// The 'long double' type is 8 byte
+// The 'long double' type is 8 bytes.
 #define LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64
 
 #include "shared/rpc.h"
diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt
deleted file mode 100644
index e529646a1206e..0000000000000
--- a/libc/utils/gpu/CMakeLists.txt
+++ /dev/null
@@ -1 +0,0 @@
-add_subdirectory(loader)
diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt
deleted file mode 100644
index 9b3bd009dc0f1..0000000000000
--- a/libc/utils/gpu/loader/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-add_library(gpu_loader OBJECT Main.cpp)
-
-include(FindLibcCommonUtils)
-target_link_libraries(gpu_loader PUBLIC llvm-libc-common-utilities)
-
-target_include_directories(gpu_loader PUBLIC
-  ${CMAKE_CURRENT_SOURCE_DIR}
-  ${LIBC_SOURCE_DIR}/include
-  ${LIBC_SOURCE_DIR}
-  ${LLVM_MAIN_INCLUDE_DIR}
-  ${LLVM_BINARY_DIR}/include
-)
-if(NOT LLVM_ENABLE_RTTI)
-  target_compile_options(gpu_loader PUBLIC -fno-rtti)
-endif()
-
-find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
-if(hsa-runtime64_FOUND)
-  add_subdirectory(amdgpu)
-endif()
-
-# The CUDA loader requires LLVM to traverse the ELF image for symbols.
-find_package(CUDAToolkit 11.2 QUIET)
-if(CUDAToolkit_FOUND)
-  add_subdirectory(nvptx)
-endif()
-
-if(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-  add_custom_target(libc.utils.gpu.loader)
-  add_dependencies(libc.utils.gpu.loader amdhsa-loader)
-  set_target_properties(
-    libc.utils.gpu.loader
-    PROPERTIES
-      TARGET amdhsa-loader
-      EXECUTABLE "$<TARGET_FILE:amdhsa-loader>"
-  )
-elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
-  add_custom_target(libc.utils.gpu.loader)
-  add_dependencies(libc.utils.gpu.loader nvptx-loader)
-  set_target_properties(
-    libc.utils.gpu.loader
-    PROPERTIES
-      TARGET nvptx-loader
-      EXECUTABLE "$<TARGET_FILE:nvptx-loader>"
-  )
-endif()
-
-foreach(gpu_loader_tgt amdhsa-loader nvptx-loader)
-  if(TARGET ${gpu_loader_tgt})
-    install(TARGETS ${gpu_loader_tgt}
-            DESTINATION ${CMAKE_INSTALL_BINDIR}
-            COMPONENT libc)
-  endif()
-endforeach()
diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
deleted file mode 100644
index 17878daf0b6fe..0000000000000
--- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  BinaryFormat
-  Object
-  Option
-  Support
-  FrontendOffloading
-  )
-
-add_llvm_executable(amdhsa-loader amdhsa-loader.cpp)
-target_link_libraries(amdhsa-loader PRIVATE hsa-runtime64::hsa-runtime64 gpu_loader)
diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt
deleted file mode 100644
index 42510ac31dad4..0000000000000
--- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  BinaryFormat
-  Object
-  Option
-  Support
-  )
-
-add_llvm_executable(nvptx-loader nvptx-loader.cpp)
-target_link_libraries(nvptx-loader PRIVATE gpu_loader CUDA::cuda_driver)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 5639061bea206..e76bc9b9ab778 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -210,10 +210,6 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR
    "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda")
   set(LLVM_LIBC_GPU_BUILD ON)
 endif()
-if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD)
-  message(STATUS "Enabling libc project to build libc testing tools")
-  list(APPEND LLVM_ENABLE_PROJECTS "libc")
-endif()
 
 # LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the
 # `LLVM_ENABLE_PROJECTS` CMake cache variable.  This exists for
diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index 136099dc48ab8..51433d1ec9831 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -534,20 +534,6 @@ if(build_runtimes)
   endif()
   if(LLVM_LIBC_GPU_BUILD)
     list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON")
-    if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES)
-      if(TARGET amdhsa-loader)
-        list(APPEND extra_cmake_args
-             "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>")
-        list(APPEND extra_deps amdhsa-loader)
-      endif()
-    endif()
-    if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES)
-      if(TARGET nvptx-loader)
-        list(APPEND extra_cmake_args
-             "-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>")
-        list(APPEND extra_deps nvptx-loader)
-      endif()
-    endif()
     if(TARGET clang-offload-packager)
       list(APPEND extra_deps clang-offload-packager)
     endif()
diff --git a/llvm/tools/CMakeLists.txt b/llvm/tools/CMakeLists.txt
index b9c5a79849ec8..9fe6f8c6b9c21 100644
--- a/llvm/tools/CMakeLists.txt
+++ b/llvm/tools/CMakeLists.txt
@@ -9,6 +9,10 @@
 # traversing each directory.
 create_llvm_tool_options()
 
+if(NOT LLVM_COMPILER_IS_GCC_COMPATIBLE)
+  set(LLVM_TOOL_LLVM_GPU_LOADER_BUILD OFF)
+endif()
+
 if(NOT LLVM_BUILD_LLVM_DYLIB AND NOT LLVM_BUILD_LLVM_C_DYLIB)
   set(LLVM_TOOL_LLVM_SHLIB_BUILD Off)
 endif()
diff --git a/llvm/tools/llvm-gpu-loader/CMakeLists.txt b/llvm/tools/llvm-gpu-loader/CMakeLists.txt
new file mode 100644
index 0000000000000..b35a702476ada
--- /dev/null
+++ b/llvm/tools/llvm-gpu-loader/CMakeLists.txt
@@ -0,0 +1,46 @@
+set(LLVM_LINK_COMPONENTS
+  BinaryFormat
+  Object
+  Option
+  Support
+  FrontendOffloading
+  TargetParser
+)
+
+add_llvm_tool(llvm-gpu-loader
+  llvm-gpu-loader.cpp
+
+  # TODO: We intentionally split this currently due to statically linking the
+  #       GPU runtimes. Dynamically load the dependencies, possibly using the
+  #       LLVM offloading API when it is complete.
+  PARTIAL_SOURCES_INTENDED
+
+  DEPENDS
+  intrinsics_gen
+)
+
+# Locate the RPC server handling interface.
+include(FindLibcCommonUtils)
+target_link_libraries(llvm-gpu-loader PUBLIC llvm-libc-common-utilities)
+
+# Check for HSA support for targeting AMD GPUs.
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if(hsa-runtime64_FOUND)
+  target_sources(llvm-gpu-loader PRIVATE amdhsa.cpp)
+  target_compile_definitions(llvm-gpu-loader PRIVATE AMDHSA_SUPPORT)
+  target_link_libraries(llvm-gpu-loader PRIVATE hsa-runtime64::hsa-runtime64)
+
+  # Compatibility with the old amdhsa-loader name.
+  add_llvm_tool_symlink(amdhsa-loader llvm-gpu-loader)
+endif()
+
+# Check for CUDA support for targeting NVIDIA GPUs.
+find_package(CUDAToolkit 11.2 QUIET)
+if(CUDAToolkit_FOUND)
+  target_sources(llvm-gpu-loader PRIVATE nvptx.cpp)
+  target_compile_definitions(llvm-gpu-loader PRIVATE NVPTX_SUPPORT)
+  target_link_libraries(llvm-gpu-loader PRIVATE CUDA::cuda_driver)
+
+  # Compatibility with the old nvptx-loader name.
+  add_llvm_tool_symlink(nvptx-loader llvm-gpu-loader)
+endif()
diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/llvm/tools/llvm-gpu-loader/amdhsa.cpp
similarity index 98%
rename from libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
rename to llvm/tools/llvm-gpu-loader/amdhsa.cpp
index 00fde147b0abd..be1b6b7993920 100644
--- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp
+++ b/llvm/tools/llvm-gpu-loader/amdhsa.cpp
@@ -13,7 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Loader.h"
+#include "llvm-gpu-loader.h"
+#include "server.h"
 
 #include "hsa/hsa.h"
 #include "hsa/hsa_ext_amd.h"
@@ -260,9 +261,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
         // Register RPC callbacks for the malloc and free functions on HSA.
         auto malloc_handler = [&](size_t size) -> void * {
           void *dev_ptr = nullptr;
-          if (hsa_status_t err =
-                  hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
-                                               /*flags=*/0, &dev_ptr))
+          if (hsa_amd_memory_pool_allocate(coarsegrained_pool, size,
+                                           /*flags=*/0, &dev_ptr))
             dev_ptr = nullptr;
           hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_ptr);
           return dev_ptr;
@@ -330,9 +330,9 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent,
   return HSA_STATUS_SUCCESS;
 }
 
-int load(int argc, const char **argv, const char **envp, void *image,
-         size_t size, const LaunchParameters &params,
-         bool print_resource_usage) {
+int load_amdhsa(int argc, const char **argv, const char **envp, void *image,
+                size_t size, const LaunchParameters &params,
+                bool print_resource_usage) {
   // Initialize the HSA runtime used to communicate with the device.
   if (hsa_status_t err = hsa_init())
     handle_error(err);
diff --git a/libc/utils/gpu/loader/Main.cpp b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp
similarity index 69%
rename from libc/utils/gpu/loader/Main.cpp
rename to llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp
index c3aeeffd56368..a8204664e85eb 100644
--- a/libc/utils/gpu/loader/Main.cpp
+++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp
@@ -6,14 +6,17 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file opens a device image passed on the command line and passes it to
-// one of the loader implementations for launch.
+// This utility is used to launch standard programs onto the GPU in conjunction
+// with the LLVM 'libc' project. It is designed to mimic a standard emulator
+// workflow, allowing for unit tests to be run on the GPU directly.
 //
 //===----------------------------------------------------------------------===//
 
-#include "Loader.h"
+#include "llvm-gpu-loader.h"
 
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
@@ -21,6 +24,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Triple.h"
 
 #include <cerrno>
 #include <cstdio>
@@ -67,12 +71,6 @@ static cl::opt<bool>
                          cl::desc("Output resource usage of launched kernels"),
                          cl::init(false), cl::cat(loader_category));
 
-static cl::opt<bool>
-    no_parallelism("no-parallelism",
-                   cl::desc("Allows only a single process to use the GPU at a "
-                            "time. Useful to suppress out-of-resource errors"),
-                   cl::init(false), cl::cat(loader_category));
-
 static cl::opt<std::string> file(cl::Positional, cl::Required,
                                  cl::desc("<gpu executable>"),
                                  cl::cat(loader_category));
@@ -115,27 +113,42 @@ int main(int argc, const char **argv, const char **envp) {
   llvm::transform(args, std::back_inserter(new_argv),
                   [](const std::string &arg) { return arg.c_str(); });
 
-  // Claim a file lock on the executable so only a single process can enter this
-  // region if requested. This prevents the loader from spurious failures.
-  int fd = -1;
-  if (no_parallelism) {
-    fd = open(get_main_executable(argv[0]).c_str(), O_RDONLY);
-    if (flock(fd, LOCK_EX) == -1)
-      report_error(createStringError("Failed to lock '%s': %s", argv[0],
-                                     strerror(errno)));
-  }
-
-  // Drop the loader from the program arguments.
-  LaunchParameters params{threads_x, threads_y, threads_z,
-                          blocks_x,  blocks_y,  blocks_z};
-  int ret = load(new_argv.size(), new_argv.data(), envp,
-                 const_cast<char *>(image.getBufferStart()),
-                 image.getBufferSize(), params, print_resource_usage);
-
-  if (no_parallelism) {
-    if (flock(fd, LOCK_UN) == -1)
-      report_error(createStringError("Failed to unlock '%s': %s", argv[0],
-                                     strerror(errno)));
+  Expected<llvm::object::ELF64LEObjectFile> elf_or_err =
+      llvm::object::ELF64LEObjectFile::create(image);
+  if (!elf_or_err)
+    report_error(elf_or_err.takeError());
+
+  int ret = 1;
+  if (elf_or_err->getArch() == Triple::amdgcn) {
+#ifdef AMDHSA_SUPPORT
+    LaunchParameters params{threads_x, threads_y, threads_z,
+                            blocks_x,  blocks_y,  blocks_z};
+
+    ret = load_amdhsa(new_argv.size(), new_argv.data(), envp,
+                      const_cast<char *>(image.getBufferStart()),
+                      image.getBufferSize(), params, print_resource_usage);
+#else
+    report_error(createStringError(
+        "Unsupported architecture; %s",
+        Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
+#endif
+  } else if (elf_or_err->getArch() == Triple::nvptx64) {
+#ifdef NVPTX_SUPPORT
+    LaunchParameters params{threads_x, threads_y, threads_z,
+                            blocks_x,  blocks_y,  blocks_z};
+
+    ret = load_nvptx(new_argv.size(), new_argv.data(), envp,
+                     const_cast<char *>(image.getBufferStart()),
+                     image.getBufferSize(), params, print_resource_usage);
+#else
+    report_error(createStringError(
+        "Unsupported architecture; %s",
+        Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
+#endif
+  } else {
+    report_error(createStringError(
+        "Unsupported architecture; %s",
+        Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin()));
   }
 
   return ret;
diff --git a/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h
new file mode 100644
index 0000000000000..ed34d0bace978
--- /dev/null
+++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h
@@ -0,0 +1,110 @@
+//===-- Generic device loader interface -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
+#define LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+/// Generic launch parameters for configuration the number of blocks / threads.
+struct LaunchParameters {
+  uint32_t num_threads_x;
+  uint32_t num_threads_y;
+  uint32_t num_threads_z;
+  uint32_t num_blocks_x;
+  uint32_t num_blocks_y;
+  uint32_t num_blocks_z;
+};
+
+/// The arguments to the '_begin' kernel.
+struct begin_args_t {
+  int argc;
+  void *argv;
+  void *envp;
+};
+
+/// The arguments to the '_start' kernel.
+struct start_args_t {
+  int argc;
+  void *argv;
+  void *envp;
+  void *ret;
+};
+
+/// The arguments to the '_end' kernel.
+struct end_args_t {
+  int argc;
+};
+
+/// Generic interface to load the \p image and launch execution of the _start
+/// kernel on the target device. Copies \p argc and \p argv to the device.
+/// Returns the final value of the `main` function on the device.
+#ifdef AMDHSA_SUPPORT
+int load_amdhsa(int argc, const char **argv, const char **evnp, void *image,
+                size_t size, const LaunchParameters &params,
+                bool print_resource_usage);
+#endif
+#ifdef NVPTX_SUPPORT
+int load_nvptx(int argc, const char **argv, const char **evnp, void *image,
+               size_t size, const LaunchParameters &params,
+               bool print_resource_usage);
+#endif
+
+/// Return \p V aligned "upwards" according to \p Align.
+template <typename V, typename A> inline V align_up(V val, A align) {
+  return ((val + V(align) - 1) / V(align)) * V(align);
+}
+
+/// Copy the system's argument vector to GPU memory allocated using \p alloc.
+template <typename Allocator>
+void *copy_argument_vector(int argc, const char **argv, Allocator alloc) {
+  size_t argv_size = sizeof(char *) * (argc + 1);
+  size_t str_size = 0;
+  for (int i = 0; i < argc; ++i)
+    str_size += strlen(argv[i]) + 1;
+
+  // We allocate enough space for a null terminated array and all the strings.
+  void *dev_argv = alloc(argv_size + str_size);
+  if (!dev_argv)
+    return nullptr;
+
+  // Store the strings linerally in the same memory buffer.
+  void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
+  for (int i = 0; i < argc; ++i) {
+    size_t size = strlen(argv[i]) + 1;
+    std::memcpy(dev_str, argv[i], size);
+    static_cast<void **>(dev_argv)[i] = dev_str;
+    dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
+  }
+
+  // Ensure the vector is null terminated.
+  reinterpret_cast<void **>(dev_argv)[argc] = nullptr;
+  return dev_argv;
+}
+
+/// Copy the system's environment to GPU memory allocated using \p alloc.
+template <typename Allocator>
+void *copy_environment(const char **envp, Allocator alloc) {
+  int envc = 0;
+  for (const char **env = envp; *env != 0; ++env)
+    ++envc;
+
+  return copy_argument_vector(envc, envp, alloc);
+}
+
+inline void handle_error_impl(const char *file, int32_t line, const char *msg) {
+  fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg);
+  exit(EXIT_FAILURE);
+}
+#define handle_error(X) handle_error_impl(__FILE__, __LINE__, X)
+
+#endif // LLVM_TOOLS_LLVM_GPU_LOADER_LLVM_GPU_LOADER_H
diff --git a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp b/llvm/tools/llvm-gpu-loader/nvptx.cpp
similarity index 98%
rename from libc/utils/gpu/loader/nvptx/nvptx-loader.cpp
rename to llvm/tools/llvm-gpu-loader/nvptx.cpp
index 7d6c176c6f360..13c62d50e6077 100644
--- a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp
+++ b/llvm/tools/llvm-gpu-loader/nvptx.cpp
@@ -13,7 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Loader.h"
+#include "llvm-gpu-loader.h"
+#include "server.h"
 
 #include "cuda.h"
 
@@ -236,9 +237,9 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server,
   return CUDA_SUCCESS;
 }
 
-int load(int argc, const char **argv, const char **envp, void *image,
-         size_t size, const LaunchParameters &params,
-         bool print_resource_usage) {
+int load_nvptx(int argc, const char **argv, const char **envp, void *image,
+               size_t size, const LaunchParameters &params,
+               bool print_resource_usage) {
   if (CUresult err = cuInit(0))
     handle_error(err);
   // Obtain the first device found on the system.
diff --git a/libc/utils/gpu/loader/Loader.h b/llvm/tools/llvm-gpu-loader/server.h
similarity index 52%
rename from libc/utils/gpu/loader/Loader.h
rename to llvm/tools/llvm-gpu-loader/server.h
index ec05117a041ab..bc54b4b74915a 100644
--- a/libc/utils/gpu/loader/Loader.h
+++ b/llvm/tools/llvm-gpu-loader/server.h
@...
[truncated]

Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (llvm#132096…

8f51751

…)" Summary: There were a few issues with the first one, leading to some errors and warnings. Most importantly, this was building on MSVC which isn't supported.

jhuber6 requested review from JonChesterfield, jplehr, lntue and michaelrj-google March 20, 2025 20:04

llvmbot added backend:AMDGPU libc backend:NVPTX labels Mar 20, 2025

michaelrj-google approved these changes Mar 20, 2025

View reviewed changes

jhuber6 merged commit bd6df0f into llvm:main Mar 21, 2025
21 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (#132096)" #132277

Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (#132096)" #132277

Uh oh!

jhuber6 commented Mar 20, 2025

Uh oh!

llvmbot commented Mar 20, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (#132096)" #132277

Reapply "[LLVM] Make the GPU loader utilities an LLVM tool (#132096)" #132277

Uh oh!

Conversation

jhuber6 commented Mar 20, 2025

Uh oh!

llvmbot commented Mar 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Mar 20, 2025 •

edited

Loading