Skip to content

Commit c16297c

Browse files
authored
[CUDA][HIP] Fix host/device attribute of builtin (llvm#138162)
When a builtin function is passed a pointer with a different address space, clang creates an overloaded builtin function but does not copy the host/device attribute. This causes error when the builtin is called by device functions since CUDA/HIP relies on the host/device attribute to treat a builtin function as callable on both host and device sides. Fixed by copying the host/device attribute of the original builtin function to the created overloaded builtin function.
1 parent b972164 commit c16297c

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

clang/lib/Sema/SemaExpr.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6362,6 +6362,14 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
63626362
Params.push_back(Parm);
63636363
}
63646364
OverloadDecl->setParams(Params);
6365+
// We cannot merge host/device attributes of redeclarations. They have to
6366+
// be consistent when created.
6367+
if (Sema->LangOpts.CUDA) {
6368+
if (FDecl->hasAttr<CUDAHostAttr>())
6369+
OverloadDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
6370+
if (FDecl->hasAttr<CUDADeviceAttr>())
6371+
OverloadDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
6372+
}
63656373
Sema->mergeDeclAttributes(OverloadDecl, FDecl);
63666374
return OverloadDecl;
63676375
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -fsyntax-only -verify=host -xhip %s
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -fcuda-is-device -verify=dev -xhip %s
3+
4+
// dev-no-diagnostics
5+
6+
#include "Inputs/cuda.h"
7+
8+
__global__ void kernel() {
9+
__attribute__((address_space(0))) void *mem_ptr;
10+
(void)__builtin_amdgcn_is_shared(mem_ptr);
11+
}
12+
13+
template<typename T>
14+
__global__ void template_kernel(T *p) {
15+
__attribute__((address_space(0))) void *mem_ptr;
16+
(void)__builtin_amdgcn_is_shared(mem_ptr);
17+
}
18+
19+
void hfun() {
20+
__attribute__((address_space(0))) void *mem_ptr;
21+
(void)__builtin_amdgcn_is_shared(mem_ptr); // host-error {{reference to __device__ function '__builtin_amdgcn_is_shared' in __host__ function}}
22+
}
23+
24+
template<typename T>
25+
void template_hfun(T *p) {
26+
__attribute__((address_space(0))) void *mem_ptr;
27+
(void)__builtin_amdgcn_is_shared(mem_ptr); // host-error {{reference to __device__ function '__builtin_amdgcn_is_shared' in __host__ function}}
28+
}
29+
30+
31+
int main() {
32+
int *p;
33+
kernel<<<1,1>>>();
34+
template_kernel<<<1,1>>>(p);
35+
template_hfun(p); // host-note {{called by 'main'}}
36+
}

0 commit comments

Comments
 (0)