xref: /llvm-project/clang/include/clang/Sema/SemaCUDA.h (revision 27d37ee4d067f42e9a46a0871d3cb961323e5c85)
1 //===----- SemaCUDA.h ----- Semantic Analysis for CUDA constructs ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file declares semantic analysis for CUDA constructs.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CLANG_SEMA_SEMACUDA_H
14 #define LLVM_CLANG_SEMA_SEMACUDA_H
15 
16 #include "clang/AST/ASTFwd.h"
17 #include "clang/AST/DeclAccessPair.h"
18 #include "clang/AST/Redeclarable.h"
19 #include "clang/Basic/Cuda.h"
20 #include "clang/Basic/LLVM.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Sema/Lookup.h"
23 #include "clang/Sema/Ownership.h"
24 #include "clang/Sema/SemaBase.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/DenseMapInfo.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/ADT/Hashing.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include <string>
31 #include <utility>
32 
33 namespace clang {
34 namespace sema {
35 class Capture;
36 } // namespace sema
37 
38 class ASTReader;
39 class ASTWriter;
40 enum class CUDAFunctionTarget;
41 enum class CXXSpecialMemberKind;
42 class ParsedAttributesView;
43 class Scope;
44 
45 class SemaCUDA : public SemaBase {
46 public:
47   SemaCUDA(Sema &S);
48 
49   /// Increments our count of the number of times we've seen a pragma forcing
50   /// functions to be __host__ __device__.  So long as this count is greater
51   /// than zero, all functions encountered will be __host__ __device__.
52   void PushForceHostDevice();
53 
54   /// Decrements our count of the number of times we've seen a pragma forcing
55   /// functions to be __host__ __device__.  Returns false if the count is 0
56   /// before incrementing, so you can emit an error.
57   bool PopForceHostDevice();
58 
59   ExprResult ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
60                                  MultiExprArg ExecConfig,
61                                  SourceLocation GGGLoc);
62 
63   /// A pair of a canonical FunctionDecl and a SourceLocation.  When used as the
64   /// key in a hashtable, both the FD and location are hashed.
65   struct FunctionDeclAndLoc {
66     CanonicalDeclPtr<const FunctionDecl> FD;
67     SourceLocation Loc;
68   };
69 
70   /// FunctionDecls and SourceLocations for which CheckCall has emitted a
71   /// (maybe deferred) "bad call" diagnostic.  We use this to avoid emitting the
72   /// same deferred diag twice.
73   llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags;
74 
75   /// An inverse call graph, mapping known-emitted functions to one of their
76   /// known-emitted callers (plus the location of the call).
77   ///
78   /// Functions that we can tell a priori must be emitted aren't added to this
79   /// map.
80   llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>,
81                  /* Caller = */ FunctionDeclAndLoc>
82       DeviceKnownEmittedFns;
83 
84   /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
85   /// context is "used as device code".
86   ///
87   /// - If CurContext is a __host__ function, does not emit any diagnostics
88   ///   unless \p EmitOnBothSides is true.
89   /// - If CurContext is a __device__ or __global__ function, emits the
90   ///   diagnostics immediately.
91   /// - If CurContext is a __host__ __device__ function and we are compiling for
92   ///   the device, creates a diagnostic which is emitted if and when we realize
93   ///   that the function will be codegen'ed.
94   ///
95   /// Example usage:
96   ///
97   ///  // Variable-length arrays are not allowed in CUDA device code.
98   ///  if (DiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentTarget())
99   ///    return ExprError();
100   ///  // Otherwise, continue parsing as normal.
101   SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID);
102 
103   /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
104   /// context is "used as host code".
105   ///
106   /// Same as DiagIfDeviceCode, with "host" and "device" switched.
107   SemaDiagnosticBuilder DiagIfHostCode(SourceLocation Loc, unsigned DiagID);
108 
109   /// Determines whether the given function is a CUDA device/host/kernel/etc.
110   /// function.
111   ///
112   /// Use this rather than examining the function's attributes yourself -- you
113   /// will get it wrong.  Returns CUDAFunctionTarget::Host if D is null.
114   CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D,
115                                     bool IgnoreImplicitHDAttr = false);
116   CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs);
117 
118   enum CUDAVariableTarget {
119     CVT_Device,  /// Emitted on device side with a shadow variable on host side
120     CVT_Host,    /// Emitted on host side only
121     CVT_Both,    /// Emitted on both sides with different addresses
122     CVT_Unified, /// Emitted as a unified address, e.g. managed variables
123   };
124   /// Determines whether the given variable is emitted on host or device side.
125   CUDAVariableTarget IdentifyTarget(const VarDecl *D);
126 
127   /// Defines kinds of CUDA global host/device context where a function may be
128   /// called.
129   enum CUDATargetContextKind {
130     CTCK_Unknown,       /// Unknown context
131     CTCK_InitGlobalVar, /// Function called during global variable
132                         /// initialization
133   };
134 
135   /// Define the current global CUDA host/device context where a function may be
136   /// called. Only used when a function is called outside of any functions.
137   struct CUDATargetContext {
138     CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
139     CUDATargetContextKind Kind = CTCK_Unknown;
140     Decl *D = nullptr;
141   } CurCUDATargetCtx;
142 
143   struct CUDATargetContextRAII {
144     SemaCUDA &S;
145     SemaCUDA::CUDATargetContext SavedCtx;
146     CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K,
147                           Decl *D);
148     ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
149   };
150 
151   /// Gets the CUDA target for the current context.
152   CUDAFunctionTarget CurrentTarget() {
153     return IdentifyTarget(dyn_cast<FunctionDecl>(SemaRef.CurContext));
154   }
155 
156   static bool isImplicitHostDeviceFunction(const FunctionDecl *D);
157 
158   // CUDA function call preference. Must be ordered numerically from
159   // worst to best.
160   enum CUDAFunctionPreference {
161     CFP_Never,      // Invalid caller/callee combination.
162     CFP_WrongSide,  // Calls from host-device to host or device
163                     // function that do not match current compilation
164                     // mode.
165     CFP_HostDevice, // Any calls to host/device functions.
166     CFP_SameSide,   // Calls from host-device to host or device
167                     // function matching current compilation mode.
168     CFP_Native,     // host-to-host or device-to-device calls.
169   };
170 
171   /// Identifies relative preference of a given Caller/Callee
172   /// combination, based on their host/device attributes.
173   /// \param Caller function which needs address of \p Callee.
174   ///               nullptr in case of global context.
175   /// \param Callee target function
176   ///
177   /// \returns preference value for particular Caller/Callee combination.
178   CUDAFunctionPreference IdentifyPreference(const FunctionDecl *Caller,
179                                             const FunctionDecl *Callee);
180 
181   /// Determines whether Caller may invoke Callee, based on their CUDA
182   /// host/device attributes.  Returns false if the call is not allowed.
183   ///
184   /// Note: Will return true for CFP_WrongSide calls.  These may appear in
185   /// semantically correct CUDA programs, but only if they're never codegen'ed.
186   bool IsAllowedCall(const FunctionDecl *Caller, const FunctionDecl *Callee) {
187     return IdentifyPreference(Caller, Callee) != CFP_Never;
188   }
189 
190   /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
191   /// depending on FD and the current compilation settings.
192   void maybeAddHostDeviceAttrs(FunctionDecl *FD, const LookupResult &Previous);
193 
194   /// May add implicit CUDAConstantAttr attribute to VD, depending on VD
195   /// and current compilation settings.
196   void MaybeAddConstantAttr(VarDecl *VD);
197 
198   /// Check whether we're allowed to call Callee from the current context.
199   ///
200   /// - If the call is never allowed in a semantically-correct program
201   ///   (CFP_Never), emits an error and returns false.
202   ///
203   /// - If the call is allowed in semantically-correct programs, but only if
204   ///   it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to
205   ///   be emitted if and when the caller is codegen'ed, and returns true.
206   ///
207   ///   Will only create deferred diagnostics for a given SourceLocation once,
208   ///   so you can safely call this multiple times without generating duplicate
209   ///   deferred errors.
210   ///
211   /// - Otherwise, returns true without emitting any diagnostics.
212   bool CheckCall(SourceLocation Loc, FunctionDecl *Callee);
213 
214   void CheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture);
215 
216   /// Set __device__ or __host__ __device__ attributes on the given lambda
217   /// operator() method.
218   ///
219   /// CUDA lambdas by default is host device function unless it has explicit
220   /// host or device attribute.
221   void SetLambdaAttrs(CXXMethodDecl *Method);
222 
223   /// Record \p FD if it is a CUDA/HIP implicit host device function used on
224   /// device side in device compilation.
225   void RecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD);
226 
227   /// Finds a function in \p Matches with highest calling priority
228   /// from \p Caller context and erases all functions with lower
229   /// calling priority.
230   void EraseUnwantedMatches(
231       const FunctionDecl *Caller,
232       llvm::SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>>
233           &Matches);
234 
235   /// Given a implicit special member, infer its CUDA target from the
236   /// calls it needs to make to underlying base/field special members.
237   /// \param ClassDecl the class for which the member is being created.
238   /// \param CSM the kind of special member.
239   /// \param MemberDecl the special member itself.
240   /// \param ConstRHS true if this is a copy operation with a const object on
241   ///        its RHS.
242   /// \param Diagnose true if this call should emit diagnostics.
243   /// \return true if there was an error inferring.
244   /// The result of this call is implicit CUDA target attribute(s) attached to
245   /// the member declaration.
246   bool inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
247                                            CXXSpecialMemberKind CSM,
248                                            CXXMethodDecl *MemberDecl,
249                                            bool ConstRHS, bool Diagnose);
250 
251   /// \return true if \p CD can be considered empty according to CUDA
252   /// (E.2.3.1 in CUDA 7.5 Programming guide).
253   bool isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD);
254   bool isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *CD);
255 
256   // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In
257   // case of error emits appropriate diagnostic and invalidates \p Var.
258   //
259   // \details CUDA allows only empty constructors as initializers for global
260   // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
261   // __shared__ variables whether they are local or not (they all are implicitly
262   // static in CUDA). One exception is that CUDA allows constant initializers
263   // for __constant__ and __device__ variables.
264   void checkAllowedInitializer(VarDecl *VD);
265 
266   /// Check whether NewFD is a valid overload for CUDA. Emits
267   /// diagnostics and invalidates NewFD if not.
268   void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous);
269   /// Copies target attributes from the template TD to the function FD.
270   void inheritTargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
271 
272   /// Returns the name of the launch configuration function.  This is the name
273   /// of the function that will be called to configure kernel call, with the
274   /// parameters specified via <<<>>>.
275   std::string getConfigureFuncName() const;
276 
277 private:
278   unsigned ForceHostDeviceDepth = 0;
279 
280   friend class ASTReader;
281   friend class ASTWriter;
282 };
283 
284 } // namespace clang
285 
286 namespace llvm {
287 // Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its
288 // SourceLocation.
289 template <> struct DenseMapInfo<clang::SemaCUDA::FunctionDeclAndLoc> {
290   using FunctionDeclAndLoc = clang::SemaCUDA::FunctionDeclAndLoc;
291   using FDBaseInfo =
292       DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>;
293 
294   static FunctionDeclAndLoc getEmptyKey() {
295     return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()};
296   }
297 
298   static FunctionDeclAndLoc getTombstoneKey() {
299     return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()};
300   }
301 
302   static unsigned getHashValue(const FunctionDeclAndLoc &FDL) {
303     return hash_combine(FDBaseInfo::getHashValue(FDL.FD),
304                         FDL.Loc.getHashValue());
305   }
306 
307   static bool isEqual(const FunctionDeclAndLoc &LHS,
308                       const FunctionDeclAndLoc &RHS) {
309     return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc;
310   }
311 };
312 } // namespace llvm
313 
314 #endif // LLVM_CLANG_SEMA_SEMACUDA_H
315