Skip to content

[Flang][OpenMP][MLIR] Add support for -nogpulib option #71045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5092,7 +5092,7 @@ def nohipwrapperinc : Flag<["-"], "nohipwrapperinc">, Group<IncludePath_Group>,
HelpText<"Do not include the default HIP wrapper headers and include paths">;
def : Flag<["-"], "nocudainc">, Alias<nogpuinc>;
def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag<LangOpts<"NoGPULib">>,
Visibility<[ClangOption, CC1Option]>,
Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Do not link device library for CUDA/HIP device compilation">;
def : Flag<["-"], "nocudalib">, Alias<nogpulib>;
def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option]>,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs,
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
if (Args.hasArg(options::OPT_nogpulib))
CmdArgs.push_back("-nogpulib");
}
}

Expand Down
2 changes: 2 additions & 0 deletions flang/include/flang/Frontend/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ LANGOPT(Name, Bits, Default)

ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Fast) ///< FP Contract Mode (off/fast)

/// Indicate a build without the standard GPU libraries.
LANGOPT(NoGPULib , 1, false)
/// Permit floating point optimization without regard to infinities
LANGOPT(NoHonorInfs, 1, false)
/// Permit floating point optimization without regard to NaN
Expand Down
11 changes: 7 additions & 4 deletions flang/include/flang/Tools/CrossToolHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {})
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {}
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
NoGPULib(NoGPULib) {}

OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
Expand All @@ -70,7 +72,7 @@ struct OffloadModuleOpts {
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
OMPHostIRFile(Opts.OMPHostIRFile) {}
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}

uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
Expand All @@ -81,6 +83,7 @@ struct OffloadModuleOpts {
bool OpenMPIsGPU = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
bool NoGPULib = false;
};

// Shares assinging of the OpenMP OffloadModuleInterface and its assorted
Expand All @@ -95,7 +98,7 @@ void setOffloadModuleInterfaceAttributes(
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion);
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);

if (!Opts.OMPHostIRFile.empty())
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
Expand Down
2 changes: 2 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,8 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
res.getLangOpts().OpenMPTargetDebug = 1;
}
if (args.hasArg(clang::driver::options::OPT_nogpulib))
res.getLangOpts().NoGPULib = 1;
}

switch (llvm::Triple(res.getTargetOpts().triple).getArch()) {
Expand Down
1 change: 1 addition & 0 deletions flang/test/Driver/driver-help-hidden.f90
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
! CHECK-NEXT: --no-offload-arch=<value>
! CHECK-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
! CHECK-NEXT: -nocpp Disable predefined and command line preprocessor macros
! CHECK-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! CHECK-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
! CHECK-NEXT: --offload-device-only Only compile for the offloading device.
! CHECK-NEXT: --offload-host-device Compile for both the offloading host and device (default).
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Driver/driver-help.f90
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
! HELP-NEXT: --no-offload-arch=<value>
! HELP-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
! HELP-NEXT: -nocpp Disable predefined and command line preprocessor macros
! HELP-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! HELP-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
! HELP-NEXT: --offload-device-only Only compile for the offloading device.
! HELP-NEXT: --offload-host-device Compile for both the offloading host and device (default).
Expand Down Expand Up @@ -239,6 +240,7 @@
! HELP-FC1-NEXT: -mvscale-max=<value> Specify the vscale maximum. Defaults to the vector length agnostic value of "0". (AArch64/RISC-V only)
! HELP-FC1-NEXT: -mvscale-min=<value> Specify the vscale minimum. Defaults to "1". (AArch64/RISC-V only)
! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros
! HELP-FC1-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! HELP-FC1-NEXT: -opt-record-file <value>
! HELP-FC1-NEXT: File name to use for YAML optimization record output
! HELP-FC1-NEXT: -opt-record-format <value>
Expand Down
12 changes: 12 additions & 0 deletions flang/test/Lower/OpenMP/nogpulib.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
!REQUIRES: amdgpu-registered-target

!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -o - %s | FileCheck %s
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -nogpulib %s -o - | FileCheck %s -check-prefix=FLAG_SET
!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -nogpulib -o - %s | FileCheck %s -check-prefix=FLAG_SET

!CHECK-NOT: module attributes {{{.*}}no_gpu_lib
!FLAG_SET: module attributes {{{.*}}no_gpu_lib = true
subroutine omp_subroutine()
end subroutine omp_subroutine

8 changes: 7 additions & 1 deletion flang/tools/bbc/bbc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ static llvm::cl::opt<bool> setOpenMPNoNestedParallelism(
"a parallel region."),
llvm::cl::init(false));

static llvm::cl::opt<bool>
setNoGPULib("nogpulib",
llvm::cl::desc("Do not link device library for CUDA/HIP device "
"compilation"),
llvm::cl::init(false));

static llvm::cl::opt<bool> enableOpenACC("fopenacc",
llvm::cl::desc("enable openacc"),
llvm::cl::init(false));
Expand Down Expand Up @@ -316,7 +322,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
setOpenMPThreadSubscription, setOpenMPNoThreadState,
setOpenMPNoNestedParallelism, enableOpenMPDevice,
enableOpenMPGPU, setOpenMPVersion);
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}
Expand Down
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
DefaultValuedParameter<"bool", "false">:$assume_threads_oversubscription,
DefaultValuedParameter<"bool", "false">:$assume_no_thread_state,
DefaultValuedParameter<"bool", "false">:$assume_no_nested_parallelism,
DefaultValuedParameter<"bool", "false">:$no_gpu_lib,
DefaultValuedParameter<"uint32_t", "50">:$openmp_device_version
);

Expand Down
5 changes: 3 additions & 2 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,12 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
"bool":$assumeThreadsOversubscription,
"bool":$assumeNoThreadState,
"bool":$assumeNoNestedParallelism,
"uint32_t":$openmpDeviceVersion), [{}], [{
"uint32_t":$openmpDeviceVersion,
"bool":$noGPULib), [{}], [{
$_op->setAttr(("omp." + mlir::omp::FlagsAttr::getMnemonic()).str(),
mlir::omp::FlagsAttr::get($_op->getContext(), debugKind,
assumeTeamsOversubscription, assumeThreadsOversubscription,
assumeNoThreadState, assumeNoNestedParallelism, openmpDeviceVersion));
assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion));
}]>,
InterfaceMethod<
/*description=*/[{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2011,6 +2011,12 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,

llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();

ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
attribute.getOpenmpDeviceVersion());

if (attribute.getNoGpuLib())
return success();

ompBuilder->createGlobalFlag(
attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
"__omp_rtl_debug_kind");
Expand All @@ -2032,8 +2038,6 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
.getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
,
"__omp_rtl_assume_no_nested_parallelism");
ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
attribute.getOpenmpDeviceVersion());
return success();
}

Expand Down
6 changes: 6 additions & 0 deletions mlir/test/Dialect/OpenMP/attr.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,
// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, openmp_device_version = 51>} {}

// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, no_gpu_lib = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, no_gpu_lib = true, openmp_device_version = 51>} {}

// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, no_gpu_lib = false, openmp_device_version = 51>} {}

// CHECK: module attributes {omp.version = #omp.version<version = 51>} {
module attributes {omp.version = #omp.version<version = 51>} {}

Expand Down
10 changes: 10 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2530,6 +2530,16 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,

// -----

// CHECK-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
// CHECK-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
// CHECK-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
// CHECK-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
// CHECK-NOT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true,
no_gpu_lib=true>} {}

// -----

module attributes {omp.is_target_device = false} {
// CHECK: define void @filter_nohost
llvm.func @filter_nohost() -> ()
Expand Down