1 //===----- SemaCUDA.h ----- Semantic Analysis for CUDA constructs ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file declares semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_CLANG_SEMA_SEMACUDA_H 14 #define LLVM_CLANG_SEMA_SEMACUDA_H 15 16 #include "clang/AST/ASTFwd.h" 17 #include "clang/AST/DeclAccessPair.h" 18 #include "clang/AST/Redeclarable.h" 19 #include "clang/Basic/Cuda.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Sema/Lookup.h" 23 #include "clang/Sema/Ownership.h" 24 #include "clang/Sema/SemaBase.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/DenseMapInfo.h" 27 #include "llvm/ADT/DenseSet.h" 28 #include "llvm/ADT/Hashing.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include <string> 31 #include <utility> 32 33 namespace clang { 34 namespace sema { 35 class Capture; 36 } // namespace sema 37 38 class ASTReader; 39 class ASTWriter; 40 enum class CUDAFunctionTarget; 41 enum class CXXSpecialMemberKind; 42 class ParsedAttributesView; 43 class Scope; 44 45 class SemaCUDA : public SemaBase { 46 public: 47 SemaCUDA(Sema &S); 48 49 /// Increments our count of the number of times we've seen a pragma forcing 50 /// functions to be __host__ __device__. So long as this count is greater 51 /// than zero, all functions encountered will be __host__ __device__. 52 void PushForceHostDevice(); 53 54 /// Decrements our count of the number of times we've seen a pragma forcing 55 /// functions to be __host__ __device__. Returns false if the count is 0 56 /// before incrementing, so you can emit an error. 57 bool PopForceHostDevice(); 58 59 ExprResult ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, 60 MultiExprArg ExecConfig, 61 SourceLocation GGGLoc); 62 63 /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the 64 /// key in a hashtable, both the FD and location are hashed. 65 struct FunctionDeclAndLoc { 66 CanonicalDeclPtr<const FunctionDecl> FD; 67 SourceLocation Loc; 68 }; 69 70 /// FunctionDecls and SourceLocations for which CheckCall has emitted a 71 /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the 72 /// same deferred diag twice. 73 llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags; 74 75 /// An inverse call graph, mapping known-emitted functions to one of their 76 /// known-emitted callers (plus the location of the call). 77 /// 78 /// Functions that we can tell a priori must be emitted aren't added to this 79 /// map. 80 llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>, 81 /* Caller = */ FunctionDeclAndLoc> 82 DeviceKnownEmittedFns; 83 84 /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current 85 /// context is "used as device code". 86 /// 87 /// - If CurContext is a __host__ function, does not emit any diagnostics 88 /// unless \p EmitOnBothSides is true. 89 /// - If CurContext is a __device__ or __global__ function, emits the 90 /// diagnostics immediately. 91 /// - If CurContext is a __host__ __device__ function and we are compiling for 92 /// the device, creates a diagnostic which is emitted if and when we realize 93 /// that the function will be codegen'ed. 94 /// 95 /// Example usage: 96 /// 97 /// // Variable-length arrays are not allowed in CUDA device code. 98 /// if (DiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentTarget()) 99 /// return ExprError(); 100 /// // Otherwise, continue parsing as normal. 101 SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID); 102 103 /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current 104 /// context is "used as host code". 105 /// 106 /// Same as DiagIfDeviceCode, with "host" and "device" switched. 107 SemaDiagnosticBuilder DiagIfHostCode(SourceLocation Loc, unsigned DiagID); 108 109 /// Determines whether the given function is a CUDA device/host/kernel/etc. 110 /// function. 111 /// 112 /// Use this rather than examining the function's attributes yourself -- you 113 /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. 114 CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D, 115 bool IgnoreImplicitHDAttr = false); 116 CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs); 117 118 enum CUDAVariableTarget { 119 CVT_Device, /// Emitted on device side with a shadow variable on host side 120 CVT_Host, /// Emitted on host side only 121 CVT_Both, /// Emitted on both sides with different addresses 122 CVT_Unified, /// Emitted as a unified address, e.g. managed variables 123 }; 124 /// Determines whether the given variable is emitted on host or device side. 125 CUDAVariableTarget IdentifyTarget(const VarDecl *D); 126 127 /// Defines kinds of CUDA global host/device context where a function may be 128 /// called. 129 enum CUDATargetContextKind { 130 CTCK_Unknown, /// Unknown context 131 CTCK_InitGlobalVar, /// Function called during global variable 132 /// initialization 133 }; 134 135 /// Define the current global CUDA host/device context where a function may be 136 /// called. Only used when a function is called outside of any functions. 137 struct CUDATargetContext { 138 CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; 139 CUDATargetContextKind Kind = CTCK_Unknown; 140 Decl *D = nullptr; 141 } CurCUDATargetCtx; 142 143 struct CUDATargetContextRAII { 144 SemaCUDA &S; 145 SemaCUDA::CUDATargetContext SavedCtx; 146 CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, 147 Decl *D); 148 ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } 149 }; 150 151 /// Gets the CUDA target for the current context. 152 CUDAFunctionTarget CurrentTarget() { 153 return IdentifyTarget(dyn_cast<FunctionDecl>(SemaRef.CurContext)); 154 } 155 156 static bool isImplicitHostDeviceFunction(const FunctionDecl *D); 157 158 // CUDA function call preference. Must be ordered numerically from 159 // worst to best. 160 enum CUDAFunctionPreference { 161 CFP_Never, // Invalid caller/callee combination. 162 CFP_WrongSide, // Calls from host-device to host or device 163 // function that do not match current compilation 164 // mode. 165 CFP_HostDevice, // Any calls to host/device functions. 166 CFP_SameSide, // Calls from host-device to host or device 167 // function matching current compilation mode. 168 CFP_Native, // host-to-host or device-to-device calls. 169 }; 170 171 /// Identifies relative preference of a given Caller/Callee 172 /// combination, based on their host/device attributes. 173 /// \param Caller function which needs address of \p Callee. 174 /// nullptr in case of global context. 175 /// \param Callee target function 176 /// 177 /// \returns preference value for particular Caller/Callee combination. 178 CUDAFunctionPreference IdentifyPreference(const FunctionDecl *Caller, 179 const FunctionDecl *Callee); 180 181 /// Determines whether Caller may invoke Callee, based on their CUDA 182 /// host/device attributes. Returns false if the call is not allowed. 183 /// 184 /// Note: Will return true for CFP_WrongSide calls. These may appear in 185 /// semantically correct CUDA programs, but only if they're never codegen'ed. 186 bool IsAllowedCall(const FunctionDecl *Caller, const FunctionDecl *Callee) { 187 return IdentifyPreference(Caller, Callee) != CFP_Never; 188 } 189 190 /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, 191 /// depending on FD and the current compilation settings. 192 void maybeAddHostDeviceAttrs(FunctionDecl *FD, const LookupResult &Previous); 193 194 /// May add implicit CUDAConstantAttr attribute to VD, depending on VD 195 /// and current compilation settings. 196 void MaybeAddConstantAttr(VarDecl *VD); 197 198 /// Check whether we're allowed to call Callee from the current context. 199 /// 200 /// - If the call is never allowed in a semantically-correct program 201 /// (CFP_Never), emits an error and returns false. 202 /// 203 /// - If the call is allowed in semantically-correct programs, but only if 204 /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to 205 /// be emitted if and when the caller is codegen'ed, and returns true. 206 /// 207 /// Will only create deferred diagnostics for a given SourceLocation once, 208 /// so you can safely call this multiple times without generating duplicate 209 /// deferred errors. 210 /// 211 /// - Otherwise, returns true without emitting any diagnostics. 212 bool CheckCall(SourceLocation Loc, FunctionDecl *Callee); 213 214 void CheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); 215 216 /// Set __device__ or __host__ __device__ attributes on the given lambda 217 /// operator() method. 218 /// 219 /// CUDA lambdas by default is host device function unless it has explicit 220 /// host or device attribute. 221 void SetLambdaAttrs(CXXMethodDecl *Method); 222 223 /// Record \p FD if it is a CUDA/HIP implicit host device function used on 224 /// device side in device compilation. 225 void RecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); 226 227 /// Finds a function in \p Matches with highest calling priority 228 /// from \p Caller context and erases all functions with lower 229 /// calling priority. 230 void EraseUnwantedMatches( 231 const FunctionDecl *Caller, 232 llvm::SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> 233 &Matches); 234 235 /// Given a implicit special member, infer its CUDA target from the 236 /// calls it needs to make to underlying base/field special members. 237 /// \param ClassDecl the class for which the member is being created. 238 /// \param CSM the kind of special member. 239 /// \param MemberDecl the special member itself. 240 /// \param ConstRHS true if this is a copy operation with a const object on 241 /// its RHS. 242 /// \param Diagnose true if this call should emit diagnostics. 243 /// \return true if there was an error inferring. 244 /// The result of this call is implicit CUDA target attribute(s) attached to 245 /// the member declaration. 246 bool inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 247 CXXSpecialMemberKind CSM, 248 CXXMethodDecl *MemberDecl, 249 bool ConstRHS, bool Diagnose); 250 251 /// \return true if \p CD can be considered empty according to CUDA 252 /// (E.2.3.1 in CUDA 7.5 Programming guide). 253 bool isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD); 254 bool isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *CD); 255 256 // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In 257 // case of error emits appropriate diagnostic and invalidates \p Var. 258 // 259 // \details CUDA allows only empty constructors as initializers for global 260 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all 261 // __shared__ variables whether they are local or not (they all are implicitly 262 // static in CUDA). One exception is that CUDA allows constant initializers 263 // for __constant__ and __device__ variables. 264 void checkAllowedInitializer(VarDecl *VD); 265 266 /// Check whether NewFD is a valid overload for CUDA. Emits 267 /// diagnostics and invalidates NewFD if not. 268 void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous); 269 /// Copies target attributes from the template TD to the function FD. 270 void inheritTargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); 271 272 /// Returns the name of the launch configuration function. This is the name 273 /// of the function that will be called to configure kernel call, with the 274 /// parameters specified via <<<>>>. 275 std::string getConfigureFuncName() const; 276 277 private: 278 unsigned ForceHostDeviceDepth = 0; 279 280 friend class ASTReader; 281 friend class ASTWriter; 282 }; 283 284 } // namespace clang 285 286 namespace llvm { 287 // Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its 288 // SourceLocation. 289 template <> struct DenseMapInfo<clang::SemaCUDA::FunctionDeclAndLoc> { 290 using FunctionDeclAndLoc = clang::SemaCUDA::FunctionDeclAndLoc; 291 using FDBaseInfo = 292 DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>; 293 294 static FunctionDeclAndLoc getEmptyKey() { 295 return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()}; 296 } 297 298 static FunctionDeclAndLoc getTombstoneKey() { 299 return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()}; 300 } 301 302 static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { 303 return hash_combine(FDBaseInfo::getHashValue(FDL.FD), 304 FDL.Loc.getHashValue()); 305 } 306 307 static bool isEqual(const FunctionDeclAndLoc &LHS, 308 const FunctionDeclAndLoc &RHS) { 309 return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; 310 } 311 }; 312 } // namespace llvm 313 314 #endif // LLVM_CLANG_SEMA_SEMACUDA_H 315