Skip to content

Commit f443fbc

Browse files
[Flang][OpenMP][MLIR] Add support for -nogpulib option (#71045)
If -nogpulib option is passed by the user, then the OpenMP device runtime is not used and we should not emit globals to configure debugging at compile-time for the device runtime. Link to -nogpulib flag implementation for Clang: https://reviews.llvm.org/D125314
1 parent 8f78dd4 commit f443fbc

File tree

14 files changed

+62
-10
lines changed

14 files changed

+62
-10
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5197,7 +5197,7 @@ def nohipwrapperinc : Flag<["-"], "nohipwrapperinc">, Group<IncludePath_Group>,
51975197
HelpText<"Do not include the default HIP wrapper headers and include paths">;
51985198
def : Flag<["-"], "nocudainc">, Alias<nogpuinc>;
51995199
def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag<LangOpts<"NoGPULib">>,
5200-
Visibility<[ClangOption, CC1Option]>,
5200+
Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
52015201
HelpText<"Do not link device library for CUDA/HIP device compilation">;
52025202
def : Flag<["-"], "nocudalib">, Alias<nogpulib>;
52035203
def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,8 @@ void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs,
428428
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
429429
if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
430430
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
431+
if (Args.hasArg(options::OPT_nogpulib))
432+
CmdArgs.push_back("-nogpulib");
431433
}
432434
}
433435

flang/include/flang/Frontend/LangOptions.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ LANGOPT(Name, Bits, Default)
2121

2222
ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Fast) ///< FP Contract Mode (off/fast)
2323

24+
/// Indicate a build without the standard GPU libraries.
25+
LANGOPT(NoGPULib , 1, false)
2426
/// Permit floating point optimization without regard to infinities
2527
LANGOPT(NoHonorInfs, 1, false)
2628
/// Permit floating point optimization without regard to NaN

flang/include/flang/Tools/CrossToolHelpers.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,16 @@ struct OffloadModuleOpts {
5656
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
5757
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
5858
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
59-
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {})
59+
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
60+
bool NoGPULib = false)
6061
: OpenMPTargetDebug(OpenMPTargetDebug),
6162
OpenMPTeamSubscription(OpenMPTeamSubscription),
6263
OpenMPThreadSubscription(OpenMPThreadSubscription),
6364
OpenMPNoThreadState(OpenMPNoThreadState),
6465
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
6566
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
66-
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {}
67+
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
68+
NoGPULib(NoGPULib) {}
6769

6870
OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
6971
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
@@ -73,7 +75,7 @@ struct OffloadModuleOpts {
7375
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
7476
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
7577
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
76-
OMPHostIRFile(Opts.OMPHostIRFile) {}
78+
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}
7779

7880
uint32_t OpenMPTargetDebug = 0;
7981
bool OpenMPTeamSubscription = false;
@@ -84,6 +86,7 @@ struct OffloadModuleOpts {
8486
bool OpenMPIsGPU = false;
8587
uint32_t OpenMPVersion = 11;
8688
std::string OMPHostIRFile = {};
89+
bool NoGPULib = false;
8790
};
8891

8992
// Shares assinging of the OpenMP OffloadModuleInterface and its assorted
@@ -98,7 +101,7 @@ void setOffloadModuleInterfaceAttributes(
98101
if (Opts.OpenMPIsTargetDevice) {
99102
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
100103
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
101-
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion);
104+
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);
102105

103106
if (!Opts.OMPHostIRFile.empty())
104107
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,8 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
935935
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
936936
res.getLangOpts().OpenMPTargetDebug = 1;
937937
}
938+
if (args.hasArg(clang::driver::options::OPT_nogpulib))
939+
res.getLangOpts().NoGPULib = 1;
938940
}
939941

940942
switch (llvm::Triple(res.getTargetOpts().triple).getArch()) {

flang/test/Driver/driver-help-hidden.f90

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
! CHECK-NEXT: --no-offload-arch=<value>
128128
! CHECK-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
129129
! CHECK-NEXT: -nocpp Disable predefined and command line preprocessor macros
130+
! CHECK-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
130131
! CHECK-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
131132
! CHECK-NEXT: --offload-device-only Only compile for the offloading device.
132133
! CHECK-NEXT: --offload-host-device Compile for both the offloading host and device (default).

flang/test/Driver/driver-help.f90

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
! HELP-NEXT: --no-offload-arch=<value>
114114
! HELP-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
115115
! HELP-NEXT: -nocpp Disable predefined and command line preprocessor macros
116+
! HELP-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
116117
! HELP-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
117118
! HELP-NEXT: --offload-device-only Only compile for the offloading device.
118119
! HELP-NEXT: --offload-host-device Compile for both the offloading host and device (default).
@@ -249,6 +250,7 @@
249250
! HELP-FC1-NEXT: -mvscale-max=<value> Specify the vscale maximum. Defaults to the vector length agnostic value of "0". (AArch64/RISC-V only)
250251
! HELP-FC1-NEXT: -mvscale-min=<value> Specify the vscale minimum. Defaults to "1". (AArch64/RISC-V only)
251252
! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros
253+
! HELP-FC1-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
252254
! HELP-FC1-NEXT: -opt-record-file <value>
253255
! HELP-FC1-NEXT: File name to use for YAML optimization record output
254256
! HELP-FC1-NEXT: -opt-record-format <value>

flang/test/Lower/OpenMP/nogpulib.f90

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
!REQUIRES: amdgpu-registered-target
2+
3+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
4+
!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -o - %s | FileCheck %s
5+
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -nogpulib %s -o - | FileCheck %s -check-prefix=FLAG_SET
6+
!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -nogpulib -o - %s | FileCheck %s -check-prefix=FLAG_SET
7+
8+
!CHECK-NOT: module attributes {{{.*}}no_gpu_lib
9+
!FLAG_SET: module attributes {{{.*}}no_gpu_lib = true
10+
subroutine omp_subroutine()
11+
end subroutine omp_subroutine
12+

flang/tools/bbc/bbc.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ static llvm::cl::opt<bool> setOpenMPNoNestedParallelism(
181181
"a parallel region."),
182182
llvm::cl::init(false));
183183

184+
static llvm::cl::opt<bool>
185+
setNoGPULib("nogpulib",
186+
llvm::cl::desc("Do not link device library for CUDA/HIP device "
187+
"compilation"),
188+
llvm::cl::init(false));
189+
184190
static llvm::cl::opt<bool> enableOpenACC("fopenacc",
185191
llvm::cl::desc("enable openacc"),
186192
llvm::cl::init(false));
@@ -349,7 +355,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
349355
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
350356
setOpenMPThreadSubscription, setOpenMPNoThreadState,
351357
setOpenMPNoNestedParallelism, enableOpenMPDevice,
352-
enableOpenMPGPU, setOpenMPVersion);
358+
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
353359
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
354360
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
355361
}

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
6565
DefaultValuedParameter<"bool", "false">:$assume_threads_oversubscription,
6666
DefaultValuedParameter<"bool", "false">:$assume_no_thread_state,
6767
DefaultValuedParameter<"bool", "false">:$assume_no_nested_parallelism,
68+
DefaultValuedParameter<"bool", "false">:$no_gpu_lib,
6869
DefaultValuedParameter<"uint32_t", "50">:$openmp_device_version
6970
);
7071

mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,11 +198,12 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
198198
"bool":$assumeThreadsOversubscription,
199199
"bool":$assumeNoThreadState,
200200
"bool":$assumeNoNestedParallelism,
201-
"uint32_t":$openmpDeviceVersion), [{}], [{
201+
"uint32_t":$openmpDeviceVersion,
202+
"bool":$noGPULib), [{}], [{
202203
$_op->setAttr(("omp." + mlir::omp::FlagsAttr::getMnemonic()).str(),
203204
mlir::omp::FlagsAttr::get($_op->getContext(), debugKind,
204205
assumeTeamsOversubscription, assumeThreadsOversubscription,
205-
assumeNoThreadState, assumeNoNestedParallelism, openmpDeviceVersion));
206+
assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion));
206207
}]>,
207208
InterfaceMethod<
208209
/*description=*/[{

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2035,6 +2035,12 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
20352035

20362036
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
20372037

2038+
ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2039+
attribute.getOpenmpDeviceVersion());
2040+
2041+
if (attribute.getNoGpuLib())
2042+
return success();
2043+
20382044
ompBuilder->createGlobalFlag(
20392045
attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
20402046
"__omp_rtl_debug_kind");
@@ -2056,8 +2062,6 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
20562062
.getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
20572063
,
20582064
"__omp_rtl_assume_no_nested_parallelism");
2059-
ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
2060-
attribute.getOpenmpDeviceVersion());
20612065
return success();
20622066
}
20632067

mlir/test/Dialect/OpenMP/attr.mlir

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,
5454
// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, openmp_device_version = 51>} {
5555
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, openmp_device_version = 51>} {}
5656

57+
// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, no_gpu_lib = true, openmp_device_version = 51>} {
58+
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, no_gpu_lib = true, openmp_device_version = 51>} {}
59+
60+
// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, openmp_device_version = 51>} {
61+
module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, no_gpu_lib = false, openmp_device_version = 51>} {}
62+
5763
// CHECK: module attributes {omp.version = #omp.version<version = 51>} {
5864
module attributes {omp.version = #omp.version<version = 51>} {}
5965

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2530,6 +2530,16 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,
25302530

25312531
// -----
25322532

2533+
// CHECK-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
2534+
// CHECK-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
2535+
// CHECK-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
2536+
// CHECK-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
2537+
// CHECK-NOT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
2538+
module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true,
2539+
no_gpu_lib=true>} {}
2540+
2541+
// -----
2542+
25332543
module attributes {omp.is_target_device = false} {
25342544
// CHECK: define void @filter_nohost
25352545
llvm.func @filter_nohost() -> ()

0 commit comments

Comments
 (0)