Skip to content

[HIP] diagnose -mwavefrontsize64 for gfx10+ #140185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ def err_drv_opt_unsupported_input_type : Error<
"'%0' invalid for input of type %1">;
def err_drv_amdgpu_ieee_without_no_honor_nans : Error<
"invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">;
def err_drv_wave64_requires_force_unsafe : Error<
"'-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture "
"gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override">;
def err_drv_argument_not_allowed_with : Error<
"invalid argument '%0' not allowed with '%1'">;
def err_drv_cannot_open_randomize_layout_seed_file : Error<
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5257,6 +5257,8 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
"Specify wavefront size 64", "Specify wavefront size 32",
" mode (AMDGPU only)">;
defm force_unsafe_wavefrontsize64 : SimpleMFlag<"force-unsafe-wavefrontsize64",
"Force", "Do not force", " wavefront size 64 for gfx10+ GPUs (AMDGPU only)">;
defm amdgpu_precise_memory_op
: SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
" precise memory mode (AMDGPU only)">;
Expand Down
36 changes: 25 additions & 11 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
// Add target ID features to -target-feature options. No diagnostics should
// be emitted here since invalid target ID is diagnosed at other places.
StringRef TargetID;
StringRef GpuArch;
if (Args.hasArg(options::OPT_mcpu_EQ))
TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
else if (Args.hasArg(options::OPT_march_EQ))
Expand All @@ -614,7 +615,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
llvm::StringMap<bool> FeatureMap;
auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
if (OptionalGpuArch) {
StringRef GpuArch = *OptionalGpuArch;
GpuArch = *OptionalGpuArch;
// Iterate through all possible target ID features for the given GPU.
// If it is mapped to true, add +feature.
// If it is mapped to false, add -feature.
Expand All @@ -628,9 +629,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
}
}
}

if (Args.hasFlag(options::OPT_mwavefrontsize64,
options::OPT_mno_wavefrontsize64, false))
if (toolchains::AMDGPUToolChain::isWave64(
D, Args, llvm::AMDGPU::parseArchAMDGCN(GpuArch),
/*DiagInvalid=*/false,
/*Explicit=*/true))
Features.push_back("+wavefrontsize64");

if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
Expand Down Expand Up @@ -767,15 +769,27 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
llvm::DenormalMode::getIEEE();
}

bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind) {
bool AMDGPUToolChain::isWave64(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind, bool DiagInvalid,
bool Explicit) {
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);

return !HasWave32 || DriverArgs.hasFlag(
options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
}
bool EnableWave64 = DriverArgs.hasFlag(
options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
bool ForceUnsafeWave64 =
DriverArgs.hasFlag(options::OPT_mforce_unsafe_wavefrontsize64,
options::OPT_mno_force_unsafe_wavefrontsize64, false);

if (DiagInvalid && HasWave32 && EnableWave64 && !ForceUnsafeWave64)
D.Diag(diag::err_drv_wave64_requires_force_unsafe);

if (Explicit)
return EnableWave64 || ForceUnsafeWave64;

return !HasWave32 || EnableWave64 || ForceUnsafeWave64;
}

/// ROCM Toolchain
ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
Expand Down Expand Up @@ -884,7 +898,7 @@ void ROCMToolChain::addClangTargetOptions(
ABIVer))
return;

bool Wave64 = isWave64(DriverArgs, Kind);
bool Wave64 = isWave64(getDriver(), DriverArgs, Kind);
// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
Expand Down Expand Up @@ -1012,7 +1026,7 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
bool CorrectSqrt = DriverArgs.hasFlag(
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
bool Wave64 = isWave64(DriverArgs, Kind);
bool Wave64 = isWave64(getDriver(), DriverArgs, Kind);

// GPU Sanitizer currently only supports ASan and is enabled through host
// ASan.
Expand Down
9 changes: 7 additions & 2 deletions clang/lib/Driver/ToolChains/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,13 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const override;

static bool isWave64(const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind);
/// Return whether the GPU of kind \p Kind has wavefront size 64 by driver
/// arguments \p DriverArgs. When \p Explicit is true, only return true
/// if wavefront size is set to 64 by arguments explicitly. When
/// \p DiagInvalid is true, diagnose invalid -mwavefrontsize64 arguments.
static bool isWave64(const Driver &D, const llvm::opt::ArgList &DriverArgs,
llvm::AMDGPU::GPUKind Kind, bool DiagInvalid = false,
bool Explicit = false);
/// Needed for using lto.
bool HasNativeLLVMSupport() const override {
return true;
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/HIPAMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ void HIPAMDToolChain::addClangTargetOptions(

assert(DeviceOffloadingKind == Action::OFK_HIP &&
"Only HIP offloading kinds are supported for GPUs.");
const StringRef GpuArch = getGPUArch(DriverArgs);
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
(void)isWave64(getDriver(), DriverArgs, Kind, /*DiagInvalid=*/true);

CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});

Expand Down
4 changes: 2 additions & 2 deletions clang/test/Driver/hip-macros.hip
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \
// RUN: --cuda-device-only -nogpuinc -nogpulib \
// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \
// RUN: %clang -E -dM --offload-arch=gfx1010 -mforce-unsafe-wavefrontsize64 \
// RUN: --cuda-device-only -nogpuinc -nogpulib \
// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \
Expand All @@ -16,7 +16,7 @@
// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \
// RUN: --cuda-device-only -nogpuinc -nogpulib \
// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// RUN: -mforce-unsafe-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64
// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32
// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64
Expand Down
8 changes: 7 additions & 1 deletion clang/test/Driver/hip-toolchain-features.hip
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,15 @@
// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64"
// DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode"

// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \
// RUN: -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \
// RUN: | FileCheck %s -check-prefix=WAVE64-ERR
// WAVE64-ERR: error: '-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override

// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \
// RUN: -mno-wavefrontsize64 -mforce-unsafe-wavefrontsize64 2>&1 \
// RUN: | FileCheck %s -check-prefix=WAVE64
// WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64"
// WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize16"
Expand Down
Loading