-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV #110447
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV #110447
Conversation
@llvm/pr-subscribers-clang-codegen Author: Alex Voicu (AlexVlx) ChangesWhen compiling HIP source for AMDGCN flavoured SPIR-V that is expected to be consumed by the ROCm HIP RT, it's not desirable to set the OpenCL Kernel CC on Full diff: https://github.com/llvm/llvm-project/pull/110447.diff 4 Files Affected:
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index c44f38ef02a3f1..19dea3a55f28c7 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -815,7 +815,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
getLangOpts().GPUAllowDeviceInit);
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ if (getTriple().isSPIRV())
+ Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+ else
+ Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
Fn->addFnAttr("device-init");
}
@@ -973,7 +976,10 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
getLangOpts().GPUAllowDeviceInit);
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ if (getTriple().isSPIRV())
+ Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+ else
+ Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
Fn->addFnAttr("device-init");
}
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index a7beb9d222c3b5..0024f9d16983ed 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -3734,12 +3734,12 @@ static CallingConv getCCForDeclaratorChunk(
}
}
} else if (S.getLangOpts().CUDA) {
- // If we're compiling CUDA/HIP code and targeting SPIR-V we need to make
+ // If we're compiling CUDA/HIP code and targeting HIPSPV we need to make
// sure the kernels will be marked with the right calling convention so that
- // they will be visible by the APIs that ingest SPIR-V.
+ // they will be visible by the APIs that ingest SPIR-V. We do not do this
+ // when targeting AMDGCNSPIRV, as it does not rely on OpenCL.
llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
- if (Triple.getArch() == llvm::Triple::spirv32 ||
- Triple.getArch() == llvm::Triple::spirv64) {
+ if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
CC = CC_OpenCLKernel;
diff --git a/clang/test/CodeGenCUDA/device-init-fun.cu b/clang/test/CodeGenCUDA/device-init-fun.cu
index 4f3119a2269c61..aaf5b1be72b842 100644
--- a/clang/test/CodeGenCUDA/device-init-fun.cu
+++ b/clang/test/CodeGenCUDA/device-init-fun.cu
@@ -4,11 +4,17 @@
// RUN: -fgpu-allow-device-init -x hip \
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
// RUN: | FileCheck %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN: -fgpu-allow-device-init -x hip \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-SPIRV
#include "Inputs/cuda.h"
// CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]]
// CHECK: attributes #[[ATTR]] = {{.*}}"device-init"
+// CHECK-SPIRV: define internal spir_kernel void @_GLOBAL__sub_I_device_init_fun.cu(){{.*}} #[[ATTR:[0-9]*]]
+// CHECK-SPIRV: attributes #[[ATTR]] = {{.*}}"device-init"
__device__ void f();
diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
index 48473b92ccff3b..8b971666990992 100644
--- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -1,31 +1,37 @@
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
#include "Inputs/cuda.h"
// CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_ZN1A6kernelEv
class A {
public:
static __global__ void kernel(){}
};
// CHECK: define{{.*}} void @_Z10non_kernelv
+// CHECK-SPIRV: define{{.*}} void @_Z10non_kernelv
__device__ void non_kernel(){}
// CHECK: define{{.*}} amdgpu_kernel void @_Z6kerneli
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z6kerneli
__global__ void kernel(int x) {
non_kernel();
}
// CHECK: define{{.*}} amdgpu_kernel void @_Z11EmptyKernelIvEvv
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z11EmptyKernelIvEvv
template <typename T>
__global__ void EmptyKernel(void) {}
struct Dummy {
/// Type definition of the EmptyKernel kernel entry point
typedef void (*EmptyKernelPtr)();
- EmptyKernelPtr Empty() { return EmptyKernel<void>; }
+ EmptyKernelPtr Empty() { return EmptyKernel<void>; }
};
// CHECK: define{{.*}} amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
template<class T>
__global__ void template_kernel(T x) {}
|
@llvm/pr-subscribers-clang Author: Alex Voicu (AlexVlx) ChangesWhen compiling HIP source for AMDGCN flavoured SPIR-V that is expected to be consumed by the ROCm HIP RT, it's not desirable to set the OpenCL Kernel CC on Full diff: https://github.com/llvm/llvm-project/pull/110447.diff 4 Files Affected:
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index c44f38ef02a3f1..19dea3a55f28c7 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -815,7 +815,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
getLangOpts().GPUAllowDeviceInit);
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ if (getTriple().isSPIRV())
+ Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+ else
+ Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
Fn->addFnAttr("device-init");
}
@@ -973,7 +976,10 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
getLangOpts().GPUAllowDeviceInit);
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
- Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ if (getTriple().isSPIRV())
+ Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+ else
+ Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
Fn->addFnAttr("device-init");
}
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index a7beb9d222c3b5..0024f9d16983ed 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -3734,12 +3734,12 @@ static CallingConv getCCForDeclaratorChunk(
}
}
} else if (S.getLangOpts().CUDA) {
- // If we're compiling CUDA/HIP code and targeting SPIR-V we need to make
+ // If we're compiling CUDA/HIP code and targeting HIPSPV we need to make
// sure the kernels will be marked with the right calling convention so that
- // they will be visible by the APIs that ingest SPIR-V.
+ // they will be visible by the APIs that ingest SPIR-V. We do not do this
+ // when targeting AMDGCNSPIRV, as it does not rely on OpenCL.
llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
- if (Triple.getArch() == llvm::Triple::spirv32 ||
- Triple.getArch() == llvm::Triple::spirv64) {
+ if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
CC = CC_OpenCLKernel;
diff --git a/clang/test/CodeGenCUDA/device-init-fun.cu b/clang/test/CodeGenCUDA/device-init-fun.cu
index 4f3119a2269c61..aaf5b1be72b842 100644
--- a/clang/test/CodeGenCUDA/device-init-fun.cu
+++ b/clang/test/CodeGenCUDA/device-init-fun.cu
@@ -4,11 +4,17 @@
// RUN: -fgpu-allow-device-init -x hip \
// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
// RUN: | FileCheck %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN: -fgpu-allow-device-init -x hip \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-SPIRV
#include "Inputs/cuda.h"
// CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]]
// CHECK: attributes #[[ATTR]] = {{.*}}"device-init"
+// CHECK-SPIRV: define internal spir_kernel void @_GLOBAL__sub_I_device_init_fun.cu(){{.*}} #[[ATTR:[0-9]*]]
+// CHECK-SPIRV: attributes #[[ATTR]] = {{.*}}"device-init"
__device__ void f();
diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
index 48473b92ccff3b..8b971666990992 100644
--- a/clang/test/CodeGenCUDA/kernel-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -1,31 +1,37 @@
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
#include "Inputs/cuda.h"
// CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_ZN1A6kernelEv
class A {
public:
static __global__ void kernel(){}
};
// CHECK: define{{.*}} void @_Z10non_kernelv
+// CHECK-SPIRV: define{{.*}} void @_Z10non_kernelv
__device__ void non_kernel(){}
// CHECK: define{{.*}} amdgpu_kernel void @_Z6kerneli
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z6kerneli
__global__ void kernel(int x) {
non_kernel();
}
// CHECK: define{{.*}} amdgpu_kernel void @_Z11EmptyKernelIvEvv
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z11EmptyKernelIvEvv
template <typename T>
__global__ void EmptyKernel(void) {}
struct Dummy {
/// Type definition of the EmptyKernel kernel entry point
typedef void (*EmptyKernelPtr)();
- EmptyKernelPtr Empty() { return EmptyKernel<void>; }
+ EmptyKernelPtr Empty() { return EmptyKernel<void>; }
};
// CHECK: define{{.*}} amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
template<class T>
__global__ void template_kernel(T x) {}
|
…cnspirv_hip_does_not_need_ocl_cc
…cnspirv_hip_does_not_need_ocl_cc
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gentle ping.
…cnspirv_hip_does_not_need_ocl_cc
…cnspirv_hip_does_not_need_ocl_cc
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gentle ping.
…cnspirv_hip_does_not_need_ocl_cc
Adds the following patches AMDGPU: Remove wavefrontsize64 feature from dummy target llvm#117410 [LLVM][NFC] Use used's element type if available llvm#116804 [llvm][AMDGPU] Fold llvm.amdgcn.wavefrontsize early llvm#114481 [clang][Driver][HIP] Add support for mixing AMDGCNSPIRV & concrete offload-archs. llvm#113509 [clang][llvm][SPIR-V] Explicitly encode native integer widths for SPIR-V llvm#110695 [llvm][opt][Transforms] Replacement calloc should match replaced malloc llvm#110524 [clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV llvm#110447 [cuda][HIP] constant should imply constant llvm#110182 [llvm][SPIRV] Expose fast popcnt support for SPIR-V targets llvm#109845 [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface llvm#109415 [SPIRV][RFC] Rework / extend support for memory scopes llvm#106429 [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. llvm#102776 Change-Id: I2b9ab54aba1c9345b9b0eb84409e6ed6c3cdb6cd
When compiling HIP source for AMDGCN flavoured SPIR-V that is expected to be consumed by the ROCm HIP RT, it's not desirable to set the OpenCL Kernel CC on
__global__
functions. On one hand, this is not an OpenCL RT, so it doesn't compose with e.g. OCL specific attributes. On the other it is a "noisy" CC that carries semantics, and breaks overload resolution when using generic dispatchers such as those used by RAJA.