Skip to content

[clang][OpenMP][SPIR-V] Fix AS of globals and set the default AS to 4 #135251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions clang/lib/Basic/Targets/SPIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ static const unsigned SPIRDefIsPrivMap[] = {
// Used by both the SPIR and SPIR-V targets.
static const unsigned SPIRDefIsGenMap[] = {
4, // Default
// OpenCL address space values for this map are dummy and they can't be used
0, // opencl_global
// Some OpenCL address space values for this map are dummy and they can't be
// used
1, // opencl_global
0, // opencl_local
0, // opencl_constant
2, // opencl_constant
0, // opencl_private
0, // opencl_generic
0, // opencl_global_device
Expand Down Expand Up @@ -216,7 +217,8 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
/*DefaultIsGeneric=*/Opts.SYCLIsDevice ||
// The address mapping from HIP/CUDA language for device code is only
// defined for SPIR-V.
(getTriple().isSPIRV() && Opts.CUDAIsDevice));
(getTriple().isSPIRV() &&
(Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice)));
}

void setSupportedOpenCLOpts() override {
Expand Down
28 changes: 22 additions & 6 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,16 @@ void CGOpenMPRuntime::emitForDispatchInit(
Args);
}

llvm::Value *CGOpenMPRuntime::createRuntimeFunctionArgAddrSpaceCast(
CodeGenFunction &CGF, llvm::FunctionCallee RuntimeFcn, size_t ArgIdx,
llvm::Value *Arg) {
llvm::Type *ParamTy = RuntimeFcn.getFunctionType()->getParamType(ArgIdx);
llvm::Type *ArgTy = Arg->getType();
if (!ParamTy->isPointerTy())
return Arg;
return CGF.Builder.CreateAddrSpaceCast(Arg, ParamTy);
}

void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
Expand Down Expand Up @@ -2572,12 +2582,18 @@ static void emitForStaticInitCall(
ThreadId,
CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
M2)), // Schedule type
Values.IL.emitRawPointer(CGF), // &isLastIter
Values.LB.emitRawPointer(CGF), // &LB
Values.UB.emitRawPointer(CGF), // &UB
Values.ST.emitRawPointer(CGF), // &Stride
CGF.Builder.getIntN(Values.IVSize, 1), // Incr
Chunk // Chunk
CGOpenMPRuntime::createRuntimeFunctionArgAddrSpaceCast(
CGF, ForStaticInitFunction, 3,
Values.IL.emitRawPointer(CGF)), // &isLastIter
CGOpenMPRuntime::createRuntimeFunctionArgAddrSpaceCast(
CGF, ForStaticInitFunction, 4, Values.LB.emitRawPointer(CGF)), // &LB
CGOpenMPRuntime::createRuntimeFunctionArgAddrSpaceCast(
CGF, ForStaticInitFunction, 5, Values.UB.emitRawPointer(CGF)), // &UB
CGOpenMPRuntime::createRuntimeFunctionArgAddrSpaceCast(
CGF, ForStaticInitFunction, 6,
Values.ST.emitRawPointer(CGF)), // &Stride
CGF.Builder.getIntN(Values.IVSize, 1), // Incr
Chunk // Chunk
};
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
}
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1657,6 +1657,11 @@ class CGOpenMPRuntime {

/// Returns true if the variable is a local variable in untied task.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const;

static llvm::Value *
createRuntimeFunctionArgAddrSpaceCast(CodeGenFunction &CGF,
llvm::FunctionCallee RuntimeFcn,
size_t ArgIdx, llvm::Value *Arg);
};

/// Class supports emissionof SIMD-only code.
Expand Down
21 changes: 12 additions & 9 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1217,11 +1217,13 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *NumThreadsVal = NumThreads;
llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
llvm::FunctionCallee RuntimeFn = OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_parallel_51);
llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
if (WFn)
ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy);

llvm::Value *FnPtr = Bld.CreateAddrSpaceCast(OutlinedFn, CGM.Int8PtrTy);
FnPtr = Bld.CreateBitOrPointerCast(FnPtr, CGM.Int8PtrTy);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems spurious with opaque pointers?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this one is annoying, so we are trying to cast a fcn ptr to int8 ptr (both just opaque now as you said), but the int8 ptr type is addrspace 4, and the function is addrspace 0, so we can't cast the function to the ptr in a single cast because we need to deal with the addrspace

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well the bitcast won't do anything in IR with opaque pointers, at this point (and would also be a NOP with untyped ptrs in SPIRV), I believe, so we could just start cleaning some up. I was not suggesting that the AS cast be removed, but rather that it be made unnecessary by emitting OutlinedFn in the "right" AS from the get-go (which, in this case, might simply boil down to defining P4 in the DataLayout string?). However, that runs afoul of something that I personally find odd in the SPIR-V spec, which we might need to discuss elsewhere / offline, which is the fact that it appears that a choice was made to put pointers to function in private (https://github.com/KhronosGroup/SPIRV-LLVM-Translator/blob/ff7db6ef1c71860772079b7c26c56b3d016e4205/lib/SPIRV/SPIRVReader.cpp#L377). That seems like a recipe for great pain, and rather quaint considering where code usually ends up being stored as well as the fact that some targets play tricks with pointers to private (e.g. making them 32-bit only).

Copy link
Member Author

@sarnex sarnex Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, got it, thanks. Let me investigate more, will continue to use this PR for updates. Hopefully the root cause fix works because I definitely don't want to have to add addrcasts everywhere

// Create a private scope that will globalize the arguments
// passed from the outside of the target region.
// TODO: Is that needed?
Expand Down Expand Up @@ -1268,14 +1270,15 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
IfCondVal,
NumThreadsVal,
llvm::ConstantInt::get(CGF.Int32Ty, -1),
FnPtr,
ID,
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
CGF.VoidPtrPtrTy),
createRuntimeFunctionArgAddrSpaceCast(CGF, RuntimeFn, 5, FnPtr),
createRuntimeFunctionArgAddrSpaceCast(CGF, RuntimeFn, 6, ID),
createRuntimeFunctionArgAddrSpaceCast(
CGF, RuntimeFn, 7,
Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
CGF.VoidPtrPtrTy)),
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_parallel_51),
Args);

CGF.EmitRuntimeCall(RuntimeFn, Args);
};

RegionCodeGenTy RCG(ParallelGen);
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,8 @@ static llvm::Function *emitOutlinedFunctionPrologue(
F->removeFnAttr(llvm::Attribute::NoInline);
F->addFnAttr(llvm::Attribute::AlwaysInline);
}
if (CGM.getTriple().isSPIRV())
F->setCallingConv(llvm::CallingConv::SPIR_FUNC);

// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
Expand Down
11 changes: 11 additions & 0 deletions clang/test/CodeGenCUDASPIRV/printf.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// RUN: %clang_cc1 -fcuda-is-device -triple spirv32 -o - -emit-llvm -x cuda %s | FileCheck --check-prefix=CHECK-SPIRV32 %s
// RUN: %clang_cc1 -fcuda-is-device -triple spirv64 -o - -emit-llvm -x cuda %s | FileCheck --check-prefix=CHECK-SPIRV64 %s

// CHECK-SPIRV32: @.str = private unnamed_addr addrspace(4) constant [13 x i8] c"Hello World\0A\00", align 1
// CHECK-SPIRV64: @.str = private unnamed_addr addrspace(1) constant [13 x i8] c"Hello World\0A\00", align 1

extern "C" __attribute__((device)) int printf(const char* format, ...);

__attribute__((global)) void printf_kernel() {
printf("Hello World\n");
}
21 changes: 21 additions & 0 deletions clang/test/OpenMP/spirv_target_addrspace.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=spirv64 -emit-llvm-bc %s -o %t-host.bc
// RUN: %clang_cc1 -fopenmp -fopenmp-targets=spirv64 -fopenmp-is-target-device -triple spirv64 -fopenmp-host-ir-file-path %t-host.bc -emit-llvm %s -o - | FileCheck %s

extern int fcn(const char[]);

#pragma omp declare target
// CHECK: @global = addrspace(1) global i32 0, align 4
// CHECK: @.str = private unnamed_addr addrspace(1) constant [4 x i8] c"foo\00", align 1
int global = 0;
#pragma omp end declare target
int main() {
// CHECK: = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_{{.*}}_kernel_environment to ptr), ptr %{{.*}})
#pragma omp target
{
for(int i = 0; i < 1024; i++)
global++;
fcn("foo");
}
return global;
}

23 changes: 23 additions & 0 deletions clang/test/OpenMP/spirv_target_addrspace_simd.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=spirv64 -emit-llvm-bc %s -o %t-host.bc
// RUN: %clang_cc1 -fopenmp -fopenmp-targets=spirv64 -fopenmp-is-target-device -triple spirv64 -fopenmp-host-ir-file-path %t-host.bc -emit-llvm %s -o - | FileCheck %s

int main() {
int x = 0;

#pragma omp target teams distribute parallel for simd
for(int i = 0; i < 1024; i++)
x+=i;
return x;
}

// CHECK: @[[#STRLOC:]] = private unnamed_addr addrspace(1) constant [{{.*}} x i8] c{{.*}}, align 1
// CHECK: @[[#IDENT:]] = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 {{.*}}, i32 2050, i32 {{.*}}, i32 {{.*}}, ptr addrspacecast (ptr addrspace(1) @[[#STRLOC]] to ptr) }, align 8
// CHECK: define internal spir_func void @__omp_offloading_{{.*}}_omp_outlined(ptr addrspace(4) noalias noundef {{.*}}., ptr addrspace(4) noalias noundef {{.*}}, i64 noundef {{.*}}) #{{.*}} {
// CHECK: = load ptr addrspace(4), ptr addrspace(4) %{{.*}}, align 8
// CHECK: = load i32, ptr addrspace(4) %{{.*}}, align 4
// CHECK: = addrspacecast ptr addrspace(4) %{{.*}} to ptr
// CHECK: = addrspacecast ptr addrspace(4) %{{.*}} to ptr
// CHECK: = addrspacecast ptr addrspace(4) %{{.*}} to ptr
// CHECK: = addrspacecast ptr addrspace(4) %{{.*}} to ptr
// CHECK: call spir_func void @__kmpc_distribute_static_init{{.*}}(ptr addrspacecast (ptr addrspace(1) @[[#IDENT]] to ptr), i32 %{{.*}}, i32 {{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, i32 {{.*}}, i32 %{{.*}})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks wrong. Casts from global to private are not allowed in SPIR and SPIR-V.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussing this offline

// CHECK: call spir_func void @__kmpc_distribute_static_fini{{.*}}(ptr addrspacecast (ptr addrspace(1) @[[#IDENT]] to ptr), i32 %{{.*}})
13 changes: 11 additions & 2 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,14 @@ Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
ConstantInt::get(Int32, uint32_t(LocFlags)),
ConstantInt::get(Int32, Reserve2Flags),
ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};

size_t SrcLocStrArgIdx = 4;
if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
->getPointerAddressSpace() !=
IdentData[SrcLocStrArgIdx]->getType()->getPointerAddressSpace())
IdentData[SrcLocStrArgIdx] = ConstantExpr::getAddrSpaceCast(
SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));

Constant *Initializer =
ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);

Expand Down Expand Up @@ -950,8 +958,9 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
GV.getInitializer() == Initializer)
return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);

SrcLocStr = Builder.CreateGlobalString(LocStr, /* Name */ "",
/* AddressSpace */ 0, &M);
SrcLocStr = Builder.CreateGlobalString(
LocStr, /* Name */ "",
M.getDataLayout().getDefaultGlobalsAddressSpace(), &M);
}
return SrcLocStr;
}
Expand Down
Loading