|
38 | 38 | #include "clang/AST/TypeOrdering.h"
|
39 | 39 | #include "clang/Basic/BitmaskEnum.h"
|
40 | 40 | #include "clang/Basic/Builtins.h"
|
| 41 | +#include "clang/Basic/Cuda.h" |
41 | 42 | #include "clang/Basic/DarwinSDKInfo.h"
|
42 | 43 | #include "clang/Basic/ExpressionTraits.h"
|
43 | 44 | #include "clang/Basic/Module.h"
|
@@ -183,6 +184,7 @@ class Preprocessor;
|
183 | 184 | class PseudoDestructorTypeStorage;
|
184 | 185 | class PseudoObjectExpr;
|
185 | 186 | class QualType;
|
| 187 | +class SemaCUDA; |
186 | 188 | class SemaHLSL;
|
187 | 189 | class SemaOpenACC;
|
188 | 190 | class SemaSYCL;
|
@@ -435,14 +437,6 @@ enum class CXXSpecialMemberKind {
|
435 | 437 | Invalid
|
436 | 438 | };
|
437 | 439 |
|
438 |
| -enum class CUDAFunctionTarget { |
439 |
| - Device, |
440 |
| - Global, |
441 |
| - Host, |
442 |
| - HostDevice, |
443 |
| - InvalidTarget |
444 |
| -}; |
445 |
| - |
446 | 440 | /// Sema - This implements semantic analysis and AST building for C.
|
447 | 441 | /// \nosubgrouping
|
448 | 442 | class Sema final : public SemaBase {
|
@@ -486,8 +480,7 @@ class Sema final : public SemaBase {
|
486 | 480 | // 35. Code Completion (SemaCodeComplete.cpp)
|
487 | 481 | // 36. FixIt Helpers (SemaFixItUtils.cpp)
|
488 | 482 | // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp)
|
489 |
| - // 38. CUDA (SemaCUDA.cpp) |
490 |
| - // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp) |
| 483 | + // 38. OpenMP Directives and Clauses (SemaOpenMP.cpp) |
491 | 484 |
|
492 | 485 | /// \name Semantic Analysis
|
493 | 486 | /// Implementations are in Sema.cpp
|
@@ -981,9 +974,19 @@ class Sema final : public SemaBase {
|
981 | 974 | return DelayedDiagnostics.push(pool);
|
982 | 975 | }
|
983 | 976 |
|
| 977 | + /// Diagnostics that are emitted only if we discover that the given function |
| 978 | + /// must be codegen'ed. Because handling these correctly adds overhead to |
| 979 | + /// compilation, this is currently only enabled for CUDA compilations. |
| 980 | + SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; |
| 981 | + |
984 | 982 | /// CurContext - This is the current declaration context of parsing.
|
985 | 983 | DeclContext *CurContext;
|
986 | 984 |
|
| 985 | + SemaCUDA &CUDA() { |
| 986 | + assert(CUDAPtr); |
| 987 | + return *CUDAPtr; |
| 988 | + } |
| 989 | + |
987 | 990 | SemaHLSL &HLSL() {
|
988 | 991 | assert(HLSLPtr);
|
989 | 992 | return *HLSLPtr;
|
@@ -1029,6 +1032,7 @@ class Sema final : public SemaBase {
|
1029 | 1032 |
|
1030 | 1033 | mutable IdentifierInfo *Ident_super;
|
1031 | 1034 |
|
| 1035 | + std::unique_ptr<SemaCUDA> CUDAPtr; |
1032 | 1036 | std::unique_ptr<SemaHLSL> HLSLPtr;
|
1033 | 1037 | std::unique_ptr<SemaOpenACC> OpenACCPtr;
|
1034 | 1038 | std::unique_ptr<SemaSYCL> SYCLPtr;
|
@@ -12908,258 +12912,6 @@ class Sema final : public SemaBase {
|
12908 | 12912 | //
|
12909 | 12913 | //
|
12910 | 12914 |
|
12911 |
| - /// \name CUDA |
12912 |
| - /// Implementations are in SemaCUDA.cpp |
12913 |
| - ///@{ |
12914 |
| - |
12915 |
| -public: |
12916 |
| - /// Increments our count of the number of times we've seen a pragma forcing |
12917 |
| - /// functions to be __host__ __device__. So long as this count is greater |
12918 |
| - /// than zero, all functions encountered will be __host__ __device__. |
12919 |
| - void PushForceCUDAHostDevice(); |
12920 |
| - |
12921 |
| - /// Decrements our count of the number of times we've seen a pragma forcing |
12922 |
| - /// functions to be __host__ __device__. Returns false if the count is 0 |
12923 |
| - /// before incrementing, so you can emit an error. |
12924 |
| - bool PopForceCUDAHostDevice(); |
12925 |
| - |
12926 |
| - ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
12927 |
| - MultiExprArg ExecConfig, |
12928 |
| - SourceLocation GGGLoc); |
12929 |
| - |
12930 |
| - /// Diagnostics that are emitted only if we discover that the given function |
12931 |
| - /// must be codegen'ed. Because handling these correctly adds overhead to |
12932 |
| - /// compilation, this is currently only enabled for CUDA compilations. |
12933 |
| - SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; |
12934 |
| - |
12935 |
| - /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the |
12936 |
| - /// key in a hashtable, both the FD and location are hashed. |
12937 |
| - struct FunctionDeclAndLoc { |
12938 |
| - CanonicalDeclPtr<const FunctionDecl> FD; |
12939 |
| - SourceLocation Loc; |
12940 |
| - }; |
12941 |
| - |
12942 |
| - /// FunctionDecls and SourceLocations for which CheckCUDACall has emitted a |
12943 |
| - /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the |
12944 |
| - /// same deferred diag twice. |
12945 |
| - llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags; |
12946 |
| - |
12947 |
| - /// An inverse call graph, mapping known-emitted functions to one of their |
12948 |
| - /// known-emitted callers (plus the location of the call). |
12949 |
| - /// |
12950 |
| - /// Functions that we can tell a priori must be emitted aren't added to this |
12951 |
| - /// map. |
12952 |
| - llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>, |
12953 |
| - /* Caller = */ FunctionDeclAndLoc> |
12954 |
| - DeviceKnownEmittedFns; |
12955 |
| - |
12956 |
| - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
12957 |
| - /// context is "used as device code". |
12958 |
| - /// |
12959 |
| - /// - If CurContext is a __host__ function, does not emit any diagnostics |
12960 |
| - /// unless \p EmitOnBothSides is true. |
12961 |
| - /// - If CurContext is a __device__ or __global__ function, emits the |
12962 |
| - /// diagnostics immediately. |
12963 |
| - /// - If CurContext is a __host__ __device__ function and we are compiling for |
12964 |
| - /// the device, creates a diagnostic which is emitted if and when we realize |
12965 |
| - /// that the function will be codegen'ed. |
12966 |
| - /// |
12967 |
| - /// Example usage: |
12968 |
| - /// |
12969 |
| - /// // Variable-length arrays are not allowed in CUDA device code. |
12970 |
| - /// if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) |
12971 |
| - /// << llvm::to_underlying(CurrentCUDATarget())) |
12972 |
| - /// return ExprError(); |
12973 |
| - /// // Otherwise, continue parsing as normal. |
12974 |
| - SemaDiagnosticBuilder CUDADiagIfDeviceCode(SourceLocation Loc, |
12975 |
| - unsigned DiagID); |
12976 |
| - |
12977 |
| - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
12978 |
| - /// context is "used as host code". |
12979 |
| - /// |
12980 |
| - /// Same as CUDADiagIfDeviceCode, with "host" and "device" switched. |
12981 |
| - SemaDiagnosticBuilder CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID); |
12982 |
| - |
12983 |
| - /// Determines whether the given function is a CUDA device/host/kernel/etc. |
12984 |
| - /// function. |
12985 |
| - /// |
12986 |
| - /// Use this rather than examining the function's attributes yourself -- you |
12987 |
| - /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. |
12988 |
| - CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, |
12989 |
| - bool IgnoreImplicitHDAttr = false); |
12990 |
| - CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); |
12991 |
| - |
12992 |
| - enum CUDAVariableTarget { |
12993 |
| - CVT_Device, /// Emitted on device side with a shadow variable on host side |
12994 |
| - CVT_Host, /// Emitted on host side only |
12995 |
| - CVT_Both, /// Emitted on both sides with different addresses |
12996 |
| - CVT_Unified, /// Emitted as a unified address, e.g. managed variables |
12997 |
| - }; |
12998 |
| - /// Determines whether the given variable is emitted on host or device side. |
12999 |
| - CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D); |
13000 |
| - |
13001 |
| - /// Defines kinds of CUDA global host/device context where a function may be |
13002 |
| - /// called. |
13003 |
| - enum CUDATargetContextKind { |
13004 |
| - CTCK_Unknown, /// Unknown context |
13005 |
| - CTCK_InitGlobalVar, /// Function called during global variable |
13006 |
| - /// initialization |
13007 |
| - }; |
13008 |
| - |
13009 |
| - /// Define the current global CUDA host/device context where a function may be |
13010 |
| - /// called. Only used when a function is called outside of any functions. |
13011 |
| - struct CUDATargetContext { |
13012 |
| - CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; |
13013 |
| - CUDATargetContextKind Kind = CTCK_Unknown; |
13014 |
| - Decl *D = nullptr; |
13015 |
| - } CurCUDATargetCtx; |
13016 |
| - |
13017 |
| - struct CUDATargetContextRAII { |
13018 |
| - Sema &S; |
13019 |
| - CUDATargetContext SavedCtx; |
13020 |
| - CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D); |
13021 |
| - ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } |
13022 |
| - }; |
13023 |
| - |
13024 |
| - /// Gets the CUDA target for the current context. |
13025 |
| - CUDAFunctionTarget CurrentCUDATarget() { |
13026 |
| - return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext)); |
13027 |
| - } |
13028 |
| - |
13029 |
| - static bool isCUDAImplicitHostDeviceFunction(const FunctionDecl *D); |
13030 |
| - |
13031 |
| - // CUDA function call preference. Must be ordered numerically from |
13032 |
| - // worst to best. |
13033 |
| - enum CUDAFunctionPreference { |
13034 |
| - CFP_Never, // Invalid caller/callee combination. |
13035 |
| - CFP_WrongSide, // Calls from host-device to host or device |
13036 |
| - // function that do not match current compilation |
13037 |
| - // mode. |
13038 |
| - CFP_HostDevice, // Any calls to host/device functions. |
13039 |
| - CFP_SameSide, // Calls from host-device to host or device |
13040 |
| - // function matching current compilation mode. |
13041 |
| - CFP_Native, // host-to-host or device-to-device calls. |
13042 |
| - }; |
13043 |
| - |
13044 |
| - /// Identifies relative preference of a given Caller/Callee |
13045 |
| - /// combination, based on their host/device attributes. |
13046 |
| - /// \param Caller function which needs address of \p Callee. |
13047 |
| - /// nullptr in case of global context. |
13048 |
| - /// \param Callee target function |
13049 |
| - /// |
13050 |
| - /// \returns preference value for particular Caller/Callee combination. |
13051 |
| - CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, |
13052 |
| - const FunctionDecl *Callee); |
13053 |
| - |
13054 |
| - /// Determines whether Caller may invoke Callee, based on their CUDA |
13055 |
| - /// host/device attributes. Returns false if the call is not allowed. |
13056 |
| - /// |
13057 |
| - /// Note: Will return true for CFP_WrongSide calls. These may appear in |
13058 |
| - /// semantically correct CUDA programs, but only if they're never codegen'ed. |
13059 |
| - bool IsAllowedCUDACall(const FunctionDecl *Caller, |
13060 |
| - const FunctionDecl *Callee) { |
13061 |
| - return IdentifyCUDAPreference(Caller, Callee) != CFP_Never; |
13062 |
| - } |
13063 |
| - |
13064 |
| - /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, |
13065 |
| - /// depending on FD and the current compilation settings. |
13066 |
| - void maybeAddCUDAHostDeviceAttrs(FunctionDecl *FD, |
13067 |
| - const LookupResult &Previous); |
13068 |
| - |
13069 |
| - /// May add implicit CUDAConstantAttr attribute to VD, depending on VD |
13070 |
| - /// and current compilation settings. |
13071 |
| - void MaybeAddCUDAConstantAttr(VarDecl *VD); |
13072 |
| - |
13073 |
| - /// Check whether we're allowed to call Callee from the current context. |
13074 |
| - /// |
13075 |
| - /// - If the call is never allowed in a semantically-correct program |
13076 |
| - /// (CFP_Never), emits an error and returns false. |
13077 |
| - /// |
13078 |
| - /// - If the call is allowed in semantically-correct programs, but only if |
13079 |
| - /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to |
13080 |
| - /// be emitted if and when the caller is codegen'ed, and returns true. |
13081 |
| - /// |
13082 |
| - /// Will only create deferred diagnostics for a given SourceLocation once, |
13083 |
| - /// so you can safely call this multiple times without generating duplicate |
13084 |
| - /// deferred errors. |
13085 |
| - /// |
13086 |
| - /// - Otherwise, returns true without emitting any diagnostics. |
13087 |
| - bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee); |
13088 |
| - |
13089 |
| - void CUDACheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); |
13090 |
| - |
13091 |
| - /// Set __device__ or __host__ __device__ attributes on the given lambda |
13092 |
| - /// operator() method. |
13093 |
| - /// |
13094 |
| - /// CUDA lambdas by default is host device function unless it has explicit |
13095 |
| - /// host or device attribute. |
13096 |
| - void CUDASetLambdaAttrs(CXXMethodDecl *Method); |
13097 |
| - |
13098 |
| - /// Record \p FD if it is a CUDA/HIP implicit host device function used on |
13099 |
| - /// device side in device compilation. |
13100 |
| - void CUDARecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); |
13101 |
| - |
13102 |
| - /// Finds a function in \p Matches with highest calling priority |
13103 |
| - /// from \p Caller context and erases all functions with lower |
13104 |
| - /// calling priority. |
13105 |
| - void EraseUnwantedCUDAMatches( |
13106 |
| - const FunctionDecl *Caller, |
13107 |
| - SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches); |
13108 |
| - |
13109 |
| - /// Given a implicit special member, infer its CUDA target from the |
13110 |
| - /// calls it needs to make to underlying base/field special members. |
13111 |
| - /// \param ClassDecl the class for which the member is being created. |
13112 |
| - /// \param CSM the kind of special member. |
13113 |
| - /// \param MemberDecl the special member itself. |
13114 |
| - /// \param ConstRHS true if this is a copy operation with a const object on |
13115 |
| - /// its RHS. |
13116 |
| - /// \param Diagnose true if this call should emit diagnostics. |
13117 |
| - /// \return true if there was an error inferring. |
13118 |
| - /// The result of this call is implicit CUDA target attribute(s) attached to |
13119 |
| - /// the member declaration. |
13120 |
| - bool inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
13121 |
| - CXXSpecialMemberKind CSM, |
13122 |
| - CXXMethodDecl *MemberDecl, |
13123 |
| - bool ConstRHS, bool Diagnose); |
13124 |
| - |
13125 |
| - /// \return true if \p CD can be considered empty according to CUDA |
13126 |
| - /// (E.2.3.1 in CUDA 7.5 Programming guide). |
13127 |
| - bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD); |
13128 |
| - bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD); |
13129 |
| - |
13130 |
| - // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In |
13131 |
| - // case of error emits appropriate diagnostic and invalidates \p Var. |
13132 |
| - // |
13133 |
| - // \details CUDA allows only empty constructors as initializers for global |
13134 |
| - // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
13135 |
| - // __shared__ variables whether they are local or not (they all are implicitly |
13136 |
| - // static in CUDA). One exception is that CUDA allows constant initializers |
13137 |
| - // for __constant__ and __device__ variables. |
13138 |
| - void checkAllowedCUDAInitializer(VarDecl *VD); |
13139 |
| - |
13140 |
| - /// Check whether NewFD is a valid overload for CUDA. Emits |
13141 |
| - /// diagnostics and invalidates NewFD if not. |
13142 |
| - void checkCUDATargetOverload(FunctionDecl *NewFD, |
13143 |
| - const LookupResult &Previous); |
13144 |
| - /// Copies target attributes from the template TD to the function FD. |
13145 |
| - void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); |
13146 |
| - |
13147 |
| - /// Returns the name of the launch configuration function. This is the name |
13148 |
| - /// of the function that will be called to configure kernel call, with the |
13149 |
| - /// parameters specified via <<<>>>. |
13150 |
| - std::string getCudaConfigureFuncName() const; |
13151 |
| - |
13152 |
| -private: |
13153 |
| - unsigned ForceCUDAHostDeviceDepth = 0; |
13154 |
| - |
13155 |
| - ///@} |
13156 |
| - |
13157 |
| - // |
13158 |
| - // |
13159 |
| - // ------------------------------------------------------------------------- |
13160 |
| - // |
13161 |
| - // |
13162 |
| - |
13163 | 12915 | /// \name OpenMP Directives and Clauses
|
13164 | 12916 | /// Implementations are in SemaOpenMP.cpp
|
13165 | 12917 | ///@{
|
@@ -14546,32 +14298,4 @@ std::unique_ptr<sema::RISCVIntrinsicManager>
|
14546 | 14298 | CreateRISCVIntrinsicManager(Sema &S);
|
14547 | 14299 | } // end namespace clang
|
14548 | 14300 |
|
14549 |
| -namespace llvm { |
14550 |
| -// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its |
14551 |
| -// SourceLocation. |
14552 |
| -template <> struct DenseMapInfo<clang::Sema::FunctionDeclAndLoc> { |
14553 |
| - using FunctionDeclAndLoc = clang::Sema::FunctionDeclAndLoc; |
14554 |
| - using FDBaseInfo = |
14555 |
| - DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>; |
14556 |
| - |
14557 |
| - static FunctionDeclAndLoc getEmptyKey() { |
14558 |
| - return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()}; |
14559 |
| - } |
14560 |
| - |
14561 |
| - static FunctionDeclAndLoc getTombstoneKey() { |
14562 |
| - return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()}; |
14563 |
| - } |
14564 |
| - |
14565 |
| - static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { |
14566 |
| - return hash_combine(FDBaseInfo::getHashValue(FDL.FD), |
14567 |
| - FDL.Loc.getHashValue()); |
14568 |
| - } |
14569 |
| - |
14570 |
| - static bool isEqual(const FunctionDeclAndLoc &LHS, |
14571 |
| - const FunctionDeclAndLoc &RHS) { |
14572 |
| - return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; |
14573 |
| - } |
14574 |
| -}; |
14575 |
| -} // namespace llvm |
14576 |
| - |
14577 | 14301 | #endif
|
0 commit comments