llvm · AlexVlx · Oct 22, 2024 · Sep 30, 2024 · Oct 6, 2024 · Oct 11, 2024
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -815,7 +815,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
   assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
          getLangOpts().GPUAllowDeviceInit);
   if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
-    Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+    if (getTriple().isSPIRV())
+      Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+    else
+      Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
     Fn->addFnAttr("device-init");
   }
 
@@ -973,7 +976,10 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
   assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
          getLangOpts().GPUAllowDeviceInit);
   if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
-    Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+    if (getTriple().isSPIRV())
+      Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+    else
+      Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
     Fn->addFnAttr("device-init");
   }
 

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
@@ -3738,12 +3738,12 @@ static CallingConv getCCForDeclaratorChunk(
       }
     }
   } else if (S.getLangOpts().CUDA) {
-    // If we're compiling CUDA/HIP code and targeting SPIR-V we need to make
+    // If we're compiling CUDA/HIP code and targeting HIPSPV we need to make
     // sure the kernels will be marked with the right calling convention so that
-    // they will be visible by the APIs that ingest SPIR-V.
+    // they will be visible by the APIs that ingest SPIR-V. We do not do this
+    // when targeting AMDGCNSPIRV, as it does not rely on OpenCL.
     llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
-    if (Triple.getArch() == llvm::Triple::spirv32 ||
-        Triple.getArch() == llvm::Triple::spirv64) {
+    if (Triple.isSPIRV() && Triple.getVendor() != llvm::Triple::AMD) {
       for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
         if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
           CC = CC_OpenCLKernel;

diff --git a/clang/test/CodeGenCUDA/device-init-fun.cu b/clang/test/CodeGenCUDA/device-init-fun.cu
@@ -4,11 +4,17 @@
 // RUN:     -fgpu-allow-device-init -x hip \
 // RUN:     -fno-threadsafe-statics -emit-llvm -o - %s \
 // RUN:     | FileCheck %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN:     -fgpu-allow-device-init -x hip \
+// RUN:     -fno-threadsafe-statics -emit-llvm -o - %s \
+// RUN:     | FileCheck %s --check-prefix=CHECK-SPIRV
 
 #include "Inputs/cuda.h"
 
 // CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]]
 // CHECK: attributes #[[ATTR]] = {{.*}}"device-init"
+// CHECK-SPIRV: define internal spir_kernel void @_GLOBAL__sub_I_device_init_fun.cu(){{.*}} #[[ATTR:[0-9]*]]
+// CHECK-SPIRV: attributes #[[ATTR]] = {{.*}}"device-init"
 
 __device__ void f();
 

diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -1,31 +1,37 @@
 // RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
 #include "Inputs/cuda.h"
 
 // CHECK: define{{.*}} amdgpu_kernel void @_ZN1A6kernelEv
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_ZN1A6kernelEv
 class A {
 public:
   static __global__ void kernel(){}
 };
 
 // CHECK: define{{.*}} void @_Z10non_kernelv
+// CHECK-SPIRV: define{{.*}} void @_Z10non_kernelv
 __device__ void non_kernel(){}
 
 // CHECK: define{{.*}} amdgpu_kernel void @_Z6kerneli
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z6kerneli
 __global__ void kernel(int x) {
   non_kernel();
 }
 
 // CHECK: define{{.*}} amdgpu_kernel void @_Z11EmptyKernelIvEvv
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z11EmptyKernelIvEvv
 template <typename T>
 __global__ void EmptyKernel(void) {}
 
 struct Dummy {
   /// Type definition of the EmptyKernel kernel entry point
   typedef void (*EmptyKernelPtr)();
-  EmptyKernelPtr Empty() { return EmptyKernel<void>; } 
+  EmptyKernelPtr Empty() { return EmptyKernel<void>; }
 };
 
 // CHECK: define{{.*}} amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
+// CHECK-SPIRV: define{{.*}} spir_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
 template<class T>
 __global__ void template_kernel(T x) {}