Skip to content

Commit 0a6f6df

Browse files
authored
[clang] Introduce SemaCUDA (#88559)
This patch moves CUDA-related `Sema` function into new `SemaCUDA` class, following the recent example of SYCL, OpenACC, and HLSL. This is a part of the effort to split Sema. Additional context can be found in #82217, #84184, #87634.
1 parent 94b3c19 commit 0a6f6df

24 files changed

+604
-539
lines changed

clang/include/clang/Basic/Cuda.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ enum class CudaArch {
126126
HIPDefault = CudaArch::GFX906,
127127
};
128128

129+
enum class CUDAFunctionTarget {
130+
Device,
131+
Global,
132+
Host,
133+
HostDevice,
134+
InvalidTarget
135+
};
136+
129137
static inline bool IsNVIDIAGpuArch(CudaArch A) {
130138
return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
131139
}

clang/include/clang/Sema/Sema.h

Lines changed: 14 additions & 290 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "clang/AST/TypeOrdering.h"
3939
#include "clang/Basic/BitmaskEnum.h"
4040
#include "clang/Basic/Builtins.h"
41+
#include "clang/Basic/Cuda.h"
4142
#include "clang/Basic/DarwinSDKInfo.h"
4243
#include "clang/Basic/ExpressionTraits.h"
4344
#include "clang/Basic/Module.h"
@@ -183,6 +184,7 @@ class Preprocessor;
183184
class PseudoDestructorTypeStorage;
184185
class PseudoObjectExpr;
185186
class QualType;
187+
class SemaCUDA;
186188
class SemaHLSL;
187189
class SemaOpenACC;
188190
class SemaSYCL;
@@ -435,14 +437,6 @@ enum class CXXSpecialMemberKind {
435437
Invalid
436438
};
437439

438-
enum class CUDAFunctionTarget {
439-
Device,
440-
Global,
441-
Host,
442-
HostDevice,
443-
InvalidTarget
444-
};
445-
446440
/// Sema - This implements semantic analysis and AST building for C.
447441
/// \nosubgrouping
448442
class Sema final : public SemaBase {
@@ -486,8 +480,7 @@ class Sema final : public SemaBase {
486480
// 35. Code Completion (SemaCodeComplete.cpp)
487481
// 36. FixIt Helpers (SemaFixItUtils.cpp)
488482
// 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp)
489-
// 38. CUDA (SemaCUDA.cpp)
490-
// 39. OpenMP Directives and Clauses (SemaOpenMP.cpp)
483+
// 38. OpenMP Directives and Clauses (SemaOpenMP.cpp)
491484

492485
/// \name Semantic Analysis
493486
/// Implementations are in Sema.cpp
@@ -981,9 +974,19 @@ class Sema final : public SemaBase {
981974
return DelayedDiagnostics.push(pool);
982975
}
983976

977+
/// Diagnostics that are emitted only if we discover that the given function
978+
/// must be codegen'ed. Because handling these correctly adds overhead to
979+
/// compilation, this is currently only enabled for CUDA compilations.
980+
SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags;
981+
984982
/// CurContext - This is the current declaration context of parsing.
985983
DeclContext *CurContext;
986984

985+
SemaCUDA &CUDA() {
986+
assert(CUDAPtr);
987+
return *CUDAPtr;
988+
}
989+
987990
SemaHLSL &HLSL() {
988991
assert(HLSLPtr);
989992
return *HLSLPtr;
@@ -1029,6 +1032,7 @@ class Sema final : public SemaBase {
10291032

10301033
mutable IdentifierInfo *Ident_super;
10311034

1035+
std::unique_ptr<SemaCUDA> CUDAPtr;
10321036
std::unique_ptr<SemaHLSL> HLSLPtr;
10331037
std::unique_ptr<SemaOpenACC> OpenACCPtr;
10341038
std::unique_ptr<SemaSYCL> SYCLPtr;
@@ -12908,258 +12912,6 @@ class Sema final : public SemaBase {
1290812912
//
1290912913
//
1291012914

12911-
/// \name CUDA
12912-
/// Implementations are in SemaCUDA.cpp
12913-
///@{
12914-
12915-
public:
12916-
/// Increments our count of the number of times we've seen a pragma forcing
12917-
/// functions to be __host__ __device__. So long as this count is greater
12918-
/// than zero, all functions encountered will be __host__ __device__.
12919-
void PushForceCUDAHostDevice();
12920-
12921-
/// Decrements our count of the number of times we've seen a pragma forcing
12922-
/// functions to be __host__ __device__. Returns false if the count is 0
12923-
/// before incrementing, so you can emit an error.
12924-
bool PopForceCUDAHostDevice();
12925-
12926-
ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
12927-
MultiExprArg ExecConfig,
12928-
SourceLocation GGGLoc);
12929-
12930-
/// Diagnostics that are emitted only if we discover that the given function
12931-
/// must be codegen'ed. Because handling these correctly adds overhead to
12932-
/// compilation, this is currently only enabled for CUDA compilations.
12933-
SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags;
12934-
12935-
/// A pair of a canonical FunctionDecl and a SourceLocation. When used as the
12936-
/// key in a hashtable, both the FD and location are hashed.
12937-
struct FunctionDeclAndLoc {
12938-
CanonicalDeclPtr<const FunctionDecl> FD;
12939-
SourceLocation Loc;
12940-
};
12941-
12942-
/// FunctionDecls and SourceLocations for which CheckCUDACall has emitted a
12943-
/// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the
12944-
/// same deferred diag twice.
12945-
llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags;
12946-
12947-
/// An inverse call graph, mapping known-emitted functions to one of their
12948-
/// known-emitted callers (plus the location of the call).
12949-
///
12950-
/// Functions that we can tell a priori must be emitted aren't added to this
12951-
/// map.
12952-
llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>,
12953-
/* Caller = */ FunctionDeclAndLoc>
12954-
DeviceKnownEmittedFns;
12955-
12956-
/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
12957-
/// context is "used as device code".
12958-
///
12959-
/// - If CurContext is a __host__ function, does not emit any diagnostics
12960-
/// unless \p EmitOnBothSides is true.
12961-
/// - If CurContext is a __device__ or __global__ function, emits the
12962-
/// diagnostics immediately.
12963-
/// - If CurContext is a __host__ __device__ function and we are compiling for
12964-
/// the device, creates a diagnostic which is emitted if and when we realize
12965-
/// that the function will be codegen'ed.
12966-
///
12967-
/// Example usage:
12968-
///
12969-
/// // Variable-length arrays are not allowed in CUDA device code.
12970-
/// if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla)
12971-
/// << llvm::to_underlying(CurrentCUDATarget()))
12972-
/// return ExprError();
12973-
/// // Otherwise, continue parsing as normal.
12974-
SemaDiagnosticBuilder CUDADiagIfDeviceCode(SourceLocation Loc,
12975-
unsigned DiagID);
12976-
12977-
/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
12978-
/// context is "used as host code".
12979-
///
12980-
/// Same as CUDADiagIfDeviceCode, with "host" and "device" switched.
12981-
SemaDiagnosticBuilder CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID);
12982-
12983-
/// Determines whether the given function is a CUDA device/host/kernel/etc.
12984-
/// function.
12985-
///
12986-
/// Use this rather than examining the function's attributes yourself -- you
12987-
/// will get it wrong. Returns CUDAFunctionTarget::Host if D is null.
12988-
CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
12989-
bool IgnoreImplicitHDAttr = false);
12990-
CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
12991-
12992-
enum CUDAVariableTarget {
12993-
CVT_Device, /// Emitted on device side with a shadow variable on host side
12994-
CVT_Host, /// Emitted on host side only
12995-
CVT_Both, /// Emitted on both sides with different addresses
12996-
CVT_Unified, /// Emitted as a unified address, e.g. managed variables
12997-
};
12998-
/// Determines whether the given variable is emitted on host or device side.
12999-
CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);
13000-
13001-
/// Defines kinds of CUDA global host/device context where a function may be
13002-
/// called.
13003-
enum CUDATargetContextKind {
13004-
CTCK_Unknown, /// Unknown context
13005-
CTCK_InitGlobalVar, /// Function called during global variable
13006-
/// initialization
13007-
};
13008-
13009-
/// Define the current global CUDA host/device context where a function may be
13010-
/// called. Only used when a function is called outside of any functions.
13011-
struct CUDATargetContext {
13012-
CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
13013-
CUDATargetContextKind Kind = CTCK_Unknown;
13014-
Decl *D = nullptr;
13015-
} CurCUDATargetCtx;
13016-
13017-
struct CUDATargetContextRAII {
13018-
Sema &S;
13019-
CUDATargetContext SavedCtx;
13020-
CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D);
13021-
~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
13022-
};
13023-
13024-
/// Gets the CUDA target for the current context.
13025-
CUDAFunctionTarget CurrentCUDATarget() {
13026-
return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
13027-
}
13028-
13029-
static bool isCUDAImplicitHostDeviceFunction(const FunctionDecl *D);
13030-
13031-
// CUDA function call preference. Must be ordered numerically from
13032-
// worst to best.
13033-
enum CUDAFunctionPreference {
13034-
CFP_Never, // Invalid caller/callee combination.
13035-
CFP_WrongSide, // Calls from host-device to host or device
13036-
// function that do not match current compilation
13037-
// mode.
13038-
CFP_HostDevice, // Any calls to host/device functions.
13039-
CFP_SameSide, // Calls from host-device to host or device
13040-
// function matching current compilation mode.
13041-
CFP_Native, // host-to-host or device-to-device calls.
13042-
};
13043-
13044-
/// Identifies relative preference of a given Caller/Callee
13045-
/// combination, based on their host/device attributes.
13046-
/// \param Caller function which needs address of \p Callee.
13047-
/// nullptr in case of global context.
13048-
/// \param Callee target function
13049-
///
13050-
/// \returns preference value for particular Caller/Callee combination.
13051-
CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
13052-
const FunctionDecl *Callee);
13053-
13054-
/// Determines whether Caller may invoke Callee, based on their CUDA
13055-
/// host/device attributes. Returns false if the call is not allowed.
13056-
///
13057-
/// Note: Will return true for CFP_WrongSide calls. These may appear in
13058-
/// semantically correct CUDA programs, but only if they're never codegen'ed.
13059-
bool IsAllowedCUDACall(const FunctionDecl *Caller,
13060-
const FunctionDecl *Callee) {
13061-
return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
13062-
}
13063-
13064-
/// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
13065-
/// depending on FD and the current compilation settings.
13066-
void maybeAddCUDAHostDeviceAttrs(FunctionDecl *FD,
13067-
const LookupResult &Previous);
13068-
13069-
/// May add implicit CUDAConstantAttr attribute to VD, depending on VD
13070-
/// and current compilation settings.
13071-
void MaybeAddCUDAConstantAttr(VarDecl *VD);
13072-
13073-
/// Check whether we're allowed to call Callee from the current context.
13074-
///
13075-
/// - If the call is never allowed in a semantically-correct program
13076-
/// (CFP_Never), emits an error and returns false.
13077-
///
13078-
/// - If the call is allowed in semantically-correct programs, but only if
13079-
/// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to
13080-
/// be emitted if and when the caller is codegen'ed, and returns true.
13081-
///
13082-
/// Will only create deferred diagnostics for a given SourceLocation once,
13083-
/// so you can safely call this multiple times without generating duplicate
13084-
/// deferred errors.
13085-
///
13086-
/// - Otherwise, returns true without emitting any diagnostics.
13087-
bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee);
13088-
13089-
void CUDACheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture);
13090-
13091-
/// Set __device__ or __host__ __device__ attributes on the given lambda
13092-
/// operator() method.
13093-
///
13094-
/// CUDA lambdas by default is host device function unless it has explicit
13095-
/// host or device attribute.
13096-
void CUDASetLambdaAttrs(CXXMethodDecl *Method);
13097-
13098-
/// Record \p FD if it is a CUDA/HIP implicit host device function used on
13099-
/// device side in device compilation.
13100-
void CUDARecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD);
13101-
13102-
/// Finds a function in \p Matches with highest calling priority
13103-
/// from \p Caller context and erases all functions with lower
13104-
/// calling priority.
13105-
void EraseUnwantedCUDAMatches(
13106-
const FunctionDecl *Caller,
13107-
SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
13108-
13109-
/// Given a implicit special member, infer its CUDA target from the
13110-
/// calls it needs to make to underlying base/field special members.
13111-
/// \param ClassDecl the class for which the member is being created.
13112-
/// \param CSM the kind of special member.
13113-
/// \param MemberDecl the special member itself.
13114-
/// \param ConstRHS true if this is a copy operation with a const object on
13115-
/// its RHS.
13116-
/// \param Diagnose true if this call should emit diagnostics.
13117-
/// \return true if there was an error inferring.
13118-
/// The result of this call is implicit CUDA target attribute(s) attached to
13119-
/// the member declaration.
13120-
bool inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
13121-
CXXSpecialMemberKind CSM,
13122-
CXXMethodDecl *MemberDecl,
13123-
bool ConstRHS, bool Diagnose);
13124-
13125-
/// \return true if \p CD can be considered empty according to CUDA
13126-
/// (E.2.3.1 in CUDA 7.5 Programming guide).
13127-
bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD);
13128-
bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD);
13129-
13130-
// \brief Checks that initializers of \p Var satisfy CUDA restrictions. In
13131-
// case of error emits appropriate diagnostic and invalidates \p Var.
13132-
//
13133-
// \details CUDA allows only empty constructors as initializers for global
13134-
// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
13135-
// __shared__ variables whether they are local or not (they all are implicitly
13136-
// static in CUDA). One exception is that CUDA allows constant initializers
13137-
// for __constant__ and __device__ variables.
13138-
void checkAllowedCUDAInitializer(VarDecl *VD);
13139-
13140-
/// Check whether NewFD is a valid overload for CUDA. Emits
13141-
/// diagnostics and invalidates NewFD if not.
13142-
void checkCUDATargetOverload(FunctionDecl *NewFD,
13143-
const LookupResult &Previous);
13144-
/// Copies target attributes from the template TD to the function FD.
13145-
void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
13146-
13147-
/// Returns the name of the launch configuration function. This is the name
13148-
/// of the function that will be called to configure kernel call, with the
13149-
/// parameters specified via <<<>>>.
13150-
std::string getCudaConfigureFuncName() const;
13151-
13152-
private:
13153-
unsigned ForceCUDAHostDeviceDepth = 0;
13154-
13155-
///@}
13156-
13157-
//
13158-
//
13159-
// -------------------------------------------------------------------------
13160-
//
13161-
//
13162-
1316312915
/// \name OpenMP Directives and Clauses
1316412916
/// Implementations are in SemaOpenMP.cpp
1316512917
///@{
@@ -14546,32 +14298,4 @@ std::unique_ptr<sema::RISCVIntrinsicManager>
1454614298
CreateRISCVIntrinsicManager(Sema &S);
1454714299
} // end namespace clang
1454814300

14549-
namespace llvm {
14550-
// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its
14551-
// SourceLocation.
14552-
template <> struct DenseMapInfo<clang::Sema::FunctionDeclAndLoc> {
14553-
using FunctionDeclAndLoc = clang::Sema::FunctionDeclAndLoc;
14554-
using FDBaseInfo =
14555-
DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>;
14556-
14557-
static FunctionDeclAndLoc getEmptyKey() {
14558-
return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()};
14559-
}
14560-
14561-
static FunctionDeclAndLoc getTombstoneKey() {
14562-
return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()};
14563-
}
14564-
14565-
static unsigned getHashValue(const FunctionDeclAndLoc &FDL) {
14566-
return hash_combine(FDBaseInfo::getHashValue(FDL.FD),
14567-
FDL.Loc.getHashValue());
14568-
}
14569-
14570-
static bool isEqual(const FunctionDeclAndLoc &LHS,
14571-
const FunctionDeclAndLoc &RHS) {
14572-
return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc;
14573-
}
14574-
};
14575-
} // namespace llvm
14576-
1457714301
#endif

clang/include/clang/Sema/SemaBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class SemaBase {
146146
/// if (SemaDiagnosticBuilder(...) << foo << bar)
147147
/// return ExprError();
148148
///
149-
/// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably
149+
/// But see DiagIfDeviceCode() and DiagIfHostCode() -- you probably
150150
/// want to use these instead of creating a SemaDiagnosticBuilder yourself.
151151
operator bool() const { return isImmediate(); }
152152

0 commit comments

Comments
 (0)