-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[CUDA][HIP] Make template implicitly host device #70369
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang Author: Yaxun (Sam) Liu (yxsamliu) ChangesCurrently std::is_invocable does not work for CUDA/HIP since its implementation requires checking whether a function is invocable in the context of a synthesized host function. In general, to make <type_traits> work with CUDA/HIP, the template functions need to be defined as Fixes: #69956 Fixes: SWDEV-428314 Full diff: https://github.com/llvm/llvm-project/pull/70369.diff 5 Files Affected:
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 02a0c81644b6c6d..64908dcd9b2b9c4 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -283,12 +283,18 @@ set(cuda_wrapper_files
cuda_wrappers/cmath
cuda_wrappers/complex
cuda_wrappers/new
+ cuda_wrappers/type_traits
)
set(cuda_wrapper_bits_files
cuda_wrappers/bits/shared_ptr_base.h
cuda_wrappers/bits/basic_string.h
cuda_wrappers/bits/basic_string.tcc
+ cuda_wrappers/bits/move.h
+)
+
+set(cuda_wrapper_utility_files
+ cuda_wrappers/__utility/swap.h
)
set(ppc_wrapper_files
@@ -363,7 +369,7 @@ endfunction(clang_generate_header)
# Copy header files from the source directory to the build directory
foreach( f ${files} ${cuda_wrapper_files} ${cuda_wrapper_bits_files}
${ppc_wrapper_files} ${openmp_wrapper_files} ${hlsl_files}
- ${llvm_libc_wrapper_files})
+ ${llvm_libc_wrapper_files} ${cuda_wrapper_utility_files})
copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} ${f})
endforeach( f )
@@ -468,7 +474,7 @@ add_header_target("arm-common-resource-headers" "${arm_common_files};${arm_commo
# Architecture/platform specific targets
add_header_target("arm-resource-headers" "${arm_only_files};${arm_only_generated_files}")
add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_only_generated_files}")
-add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files};${cuda_wrapper_bits_files}")
+add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files};${cuda_wrapper_bits_files};${cuda_wrapper_utility_files}")
add_header_target("hexagon-resource-headers" "${hexagon_files}")
add_header_target("hip-resource-headers" "${hip_files}")
add_header_target("loongarch-resource-headers" "${loongarch_files}")
@@ -561,6 +567,12 @@ install(
EXCLUDE_FROM_ALL
COMPONENT cuda-resource-headers)
+install(
+ FILES ${cuda_wrapper_utility_files}
+ DESTINATION ${header_install_dir}/cuda_wrappers/__utility
+ EXCLUDE_FROM_ALL
+ COMPONENT cuda-resource-headers)
+
install(
FILES ${cuda_files}
DESTINATION ${header_install_dir}
diff --git a/clang/lib/Headers/cuda_wrappers/__utility/swap.h b/clang/lib/Headers/cuda_wrappers/__utility/swap.h
new file mode 100644
index 000000000000000..128dc56ffc55755
--- /dev/null
+++ b/clang/lib/Headers/cuda_wrappers/__utility/swap.h
@@ -0,0 +1,3 @@
+#pragma clang force_cuda_host_device begin
+#include_next "__utility/swap.h"
+#pragma clang force_cuda_host_device end
diff --git a/clang/lib/Headers/cuda_wrappers/bits/move.h b/clang/lib/Headers/cuda_wrappers/bits/move.h
new file mode 100644
index 000000000000000..23580e36d094a16
--- /dev/null
+++ b/clang/lib/Headers/cuda_wrappers/bits/move.h
@@ -0,0 +1,3 @@
+#pragma clang force_cuda_host_device begin
+#include_next "bits/move.h"
+#pragma clang force_cuda_host_device end
diff --git a/clang/lib/Headers/cuda_wrappers/cmath b/clang/lib/Headers/cuda_wrappers/cmath
index 45f89beec9b4df4..512a422b977972f 100644
--- a/clang/lib/Headers/cuda_wrappers/cmath
+++ b/clang/lib/Headers/cuda_wrappers/cmath
@@ -27,6 +27,12 @@
#include_next <cmath>
#if defined(_LIBCPP_STD_VER)
+#if !defined(_LIBCPP_CONSTEXPR_SINCE_CXX14)
+#define _LIBCPP_CONSTEXPR_SINCE_CXX14
+#endif
+#if !defined(_LIBCPP_CONSTEXPR_SINCE_CXX20)
+#define _LIBCPP_CONSTEXPR_SINCE_CXX20
+#endif
// libc++ will need long double variants of these functions, but CUDA does not
// provide them. We'll provide their declarations, which should allow the
diff --git a/clang/lib/Headers/cuda_wrappers/type_traits b/clang/lib/Headers/cuda_wrappers/type_traits
new file mode 100644
index 000000000000000..a4e178dc2d34afc
--- /dev/null
+++ b/clang/lib/Headers/cuda_wrappers/type_traits
@@ -0,0 +1,31 @@
+/*===---- type_traits - CUDA wrapper for <type_traits> ---------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_CUDA_WRAPPERS_TYPE_TRAITS
+#define __CLANG_CUDA_WRAPPERS_TYPE_TRAITS
+
+#pragma clang force_cuda_host_device begin
+#include_next <type_traits>
+#pragma clang force_cuda_host_device end
+
+#endif // __CLANG_CUDA_WRAPPERS_TYPE_TRAITS
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks reasonable, the same approach we've always done for these things.
bf62743
to
5097f20
Compare
f68b605
to
0c44a09
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: llvm#69956 Fixes: SWDEV-428314
0c44a09
to
5b78370
Compare
ping This patch passes our internal CI. |
Now that we're making an even larger class of functions implicitly HD, the last logical step would be to make all unattributed functions implicitly HD, too (in a separate patch). After all, a template is as GPU-portable (or not) as a regular function. Unlike constexpr or compiler-generated glue for lambdas, template functions do not confer any benefits to our assumptions about whether the code will be compileable and working on a GPU. |
Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: llvm#69956 Fixes: SWDEV-428314
Added option -foffload-implicit-host-device-templates which is off by default.
When the option is on, template functions and specializations without
host/device attributes have implicit host device attributes.
They can be overridden by device template functions with the same signagure.
They are emitted on device side only if they are used on device side.
This feature is added as an extension.
__has_extension(cuda_implicit_host_device_templates)
can be used tocheck whether it is enabled.
This is to facilitate using standard C++ headers for device.
Fixes: #69956
Fixes: SWDEV-428314