xref: /llvm-project/clang/lib/Sema/SemaCUDA.cpp (revision 0865ecc5150b9a55ba1f9e30b6d463a66ac362a6)
1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements semantic analysis for CUDA constructs.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Sema/SemaCUDA.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Basic/Cuda.h"
18 #include "clang/Basic/TargetInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "clang/Sema/Lookup.h"
21 #include "clang/Sema/ScopeInfo.h"
22 #include "clang/Sema/Sema.h"
23 #include "clang/Sema/Template.h"
24 #include "llvm/ADT/STLForwardCompat.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include <optional>
27 using namespace clang;
28 
29 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {}
30 
31 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
32   if (!D)
33     return false;
34   if (auto *A = D->getAttr<AttrT>())
35     return !A->isImplicit();
36   return false;
37 }
38 
39 void SemaCUDA::PushForceHostDevice() {
40   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
41   ForceHostDeviceDepth++;
42 }
43 
44 bool SemaCUDA::PopForceHostDevice() {
45   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
46   if (ForceHostDeviceDepth == 0)
47     return false;
48   ForceHostDeviceDepth--;
49   return true;
50 }
51 
52 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
53                                          MultiExprArg ExecConfig,
54                                          SourceLocation GGGLoc) {
55   FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
56   if (!ConfigDecl)
57     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
58                      << getConfigureFuncName());
59   QualType ConfigQTy = ConfigDecl->getType();
60 
61   DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr(
62       getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
63   SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl);
64 
65   return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
66                                /*IsExecConfig=*/true);
67 }
68 
69 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
70   bool HasHostAttr = false;
71   bool HasDeviceAttr = false;
72   bool HasGlobalAttr = false;
73   bool HasInvalidTargetAttr = false;
74   for (const ParsedAttr &AL : Attrs) {
75     switch (AL.getKind()) {
76     case ParsedAttr::AT_CUDAGlobal:
77       HasGlobalAttr = true;
78       break;
79     case ParsedAttr::AT_CUDAHost:
80       HasHostAttr = true;
81       break;
82     case ParsedAttr::AT_CUDADevice:
83       HasDeviceAttr = true;
84       break;
85     case ParsedAttr::AT_CUDAInvalidTarget:
86       HasInvalidTargetAttr = true;
87       break;
88     default:
89       break;
90     }
91   }
92 
93   if (HasInvalidTargetAttr)
94     return CUDAFunctionTarget::InvalidTarget;
95 
96   if (HasGlobalAttr)
97     return CUDAFunctionTarget::Global;
98 
99   if (HasHostAttr && HasDeviceAttr)
100     return CUDAFunctionTarget::HostDevice;
101 
102   if (HasDeviceAttr)
103     return CUDAFunctionTarget::Device;
104 
105   return CUDAFunctionTarget::Host;
106 }
107 
108 template <typename A>
109 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
110   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
111            return isa<A>(Attribute) &&
112                   !(IgnoreImplicitAttr && Attribute->isImplicit());
113          });
114 }
115 
116 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
117     SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
118     : S(S_) {
119   SavedCtx = S.CurCUDATargetCtx;
120   assert(K == SemaCUDA::CTCK_InitGlobalVar);
121   auto *VD = dyn_cast_or_null<VarDecl>(D);
122   if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
123     auto Target = CUDAFunctionTarget::Host;
124     if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
125          !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
126         hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
127         hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
128       Target = CUDAFunctionTarget::Device;
129     S.CurCUDATargetCtx = {Target, K, VD};
130   }
131 }
132 
133 /// IdentifyTarget - Determine the CUDA compilation target for this function
134 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
135                                             bool IgnoreImplicitHDAttr) {
136   // Code that lives outside a function gets the target from CurCUDATargetCtx.
137   if (D == nullptr)
138     return CurCUDATargetCtx.Target;
139 
140   if (D->hasAttr<CUDAInvalidTargetAttr>())
141     return CUDAFunctionTarget::InvalidTarget;
142 
143   if (D->hasAttr<CUDAGlobalAttr>())
144     return CUDAFunctionTarget::Global;
145 
146   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
147     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
148       return CUDAFunctionTarget::HostDevice;
149     return CUDAFunctionTarget::Device;
150   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
151     return CUDAFunctionTarget::Host;
152   } else if ((D->isImplicit() || !D->isUserProvided()) &&
153              !IgnoreImplicitHDAttr) {
154     // Some implicit declarations (like intrinsic functions) are not marked.
155     // Set the most lenient target on them for maximal flexibility.
156     return CUDAFunctionTarget::HostDevice;
157   }
158 
159   return CUDAFunctionTarget::Host;
160 }
161 
162 /// IdentifyTarget - Determine the CUDA compilation target for this variable.
163 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
164   if (Var->hasAttr<HIPManagedAttr>())
165     return CVT_Unified;
166   // Only constexpr and const variabless with implicit constant attribute
167   // are emitted on both sides. Such variables are promoted to device side
168   // only if they have static constant intializers on device side.
169   if ((Var->isConstexpr() || Var->getType().isConstQualified()) &&
170       Var->hasAttr<CUDAConstantAttr>() &&
171       !hasExplicitAttr<CUDAConstantAttr>(Var))
172     return CVT_Both;
173   if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() ||
174       Var->hasAttr<CUDASharedAttr>() ||
175       Var->getType()->isCUDADeviceBuiltinSurfaceType() ||
176       Var->getType()->isCUDADeviceBuiltinTextureType())
177     return CVT_Device;
178   // Function-scope static variable without explicit device or constant
179   // attribute are emitted
180   //  - on both sides in host device functions
181   //  - on device side in device or global functions
182   if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) {
183     switch (IdentifyTarget(FD)) {
184     case CUDAFunctionTarget::HostDevice:
185       return CVT_Both;
186     case CUDAFunctionTarget::Device:
187     case CUDAFunctionTarget::Global:
188       return CVT_Device;
189     default:
190       return CVT_Host;
191     }
192   }
193   return CVT_Host;
194 }
195 
196 // * CUDA Call preference table
197 //
198 // F - from,
199 // T - to
200 // Ph - preference in host mode
201 // Pd - preference in device mode
202 // H  - handled in (x)
203 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
204 //
205 // | F  | T  | Ph  | Pd  |  H  |
206 // |----+----+-----+-----+-----+
207 // | d  | d  | N   | N   | (c) |
208 // | d  | g  | --  | --  | (a) |
209 // | d  | h  | --  | --  | (e) |
210 // | d  | hd | HD  | HD  | (b) |
211 // | g  | d  | N   | N   | (c) |
212 // | g  | g  | --  | --  | (a) |
213 // | g  | h  | --  | --  | (e) |
214 // | g  | hd | HD  | HD  | (b) |
215 // | h  | d  | --  | --  | (e) |
216 // | h  | g  | N   | N   | (c) |
217 // | h  | h  | N   | N   | (c) |
218 // | h  | hd | HD  | HD  | (b) |
219 // | hd | d  | WS  | SS  | (d) |
220 // | hd | g  | SS  | --  |(d/a)|
221 // | hd | h  | SS  | WS  | (d) |
222 // | hd | hd | HD  | HD  | (b) |
223 
224 SemaCUDA::CUDAFunctionPreference
225 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
226                              const FunctionDecl *Callee) {
227   assert(Callee && "Callee must be valid.");
228 
229   // Treat ctor/dtor as host device function in device var initializer to allow
230   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
231   // will be diagnosed by checkAllowedInitializer.
232   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
233       CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
234       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
235     return CFP_HostDevice;
236 
237   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
238   CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee);
239 
240   // If one of the targets is invalid, the check always fails, no matter what
241   // the other target is.
242   if (CallerTarget == CUDAFunctionTarget::InvalidTarget ||
243       CalleeTarget == CUDAFunctionTarget::InvalidTarget)
244     return CFP_Never;
245 
246   // (a) Can't call global from some contexts until we support CUDA's
247   // dynamic parallelism.
248   if (CalleeTarget == CUDAFunctionTarget::Global &&
249       (CallerTarget == CUDAFunctionTarget::Global ||
250        CallerTarget == CUDAFunctionTarget::Device))
251     return CFP_Never;
252 
253   // (b) Calling HostDevice is OK for everyone.
254   if (CalleeTarget == CUDAFunctionTarget::HostDevice)
255     return CFP_HostDevice;
256 
257   // (c) Best case scenarios
258   if (CalleeTarget == CallerTarget ||
259       (CallerTarget == CUDAFunctionTarget::Host &&
260        CalleeTarget == CUDAFunctionTarget::Global) ||
261       (CallerTarget == CUDAFunctionTarget::Global &&
262        CalleeTarget == CUDAFunctionTarget::Device))
263     return CFP_Native;
264 
265   // HipStdPar mode is special, in that assessing whether a device side call to
266   // a host target is deferred to a subsequent pass, and cannot unambiguously be
267   // adjudicated in the AST, hence we optimistically allow them to pass here.
268   if (getLangOpts().HIPStdPar &&
269       (CallerTarget == CUDAFunctionTarget::Global ||
270        CallerTarget == CUDAFunctionTarget::Device ||
271        CallerTarget == CUDAFunctionTarget::HostDevice) &&
272       CalleeTarget == CUDAFunctionTarget::Host)
273     return CFP_HostDevice;
274 
275   // (d) HostDevice behavior depends on compilation mode.
276   if (CallerTarget == CUDAFunctionTarget::HostDevice) {
277     // It's OK to call a compilation-mode matching function from an HD one.
278     if ((getLangOpts().CUDAIsDevice &&
279          CalleeTarget == CUDAFunctionTarget::Device) ||
280         (!getLangOpts().CUDAIsDevice &&
281          (CalleeTarget == CUDAFunctionTarget::Host ||
282           CalleeTarget == CUDAFunctionTarget::Global)))
283       return CFP_SameSide;
284 
285     // Calls from HD to non-mode-matching functions (i.e., to host functions
286     // when compiling in device mode or to device functions when compiling in
287     // host mode) are allowed at the sema level, but eventually rejected if
288     // they're ever codegened.  TODO: Reject said calls earlier.
289     return CFP_WrongSide;
290   }
291 
292   // (e) Calling across device/host boundary is not something you should do.
293   if ((CallerTarget == CUDAFunctionTarget::Host &&
294        CalleeTarget == CUDAFunctionTarget::Device) ||
295       (CallerTarget == CUDAFunctionTarget::Device &&
296        CalleeTarget == CUDAFunctionTarget::Host) ||
297       (CallerTarget == CUDAFunctionTarget::Global &&
298        CalleeTarget == CUDAFunctionTarget::Host))
299     return CFP_Never;
300 
301   llvm_unreachable("All cases should've been handled by now.");
302 }
303 
304 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
305   if (!D)
306     return false;
307   if (auto *A = D->getAttr<AttrT>())
308     return A->isImplicit();
309   return D->isImplicit();
310 }
311 
312 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
313   bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
314   bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
315   return IsImplicitDevAttr && IsImplicitHostAttr;
316 }
317 
318 void SemaCUDA::EraseUnwantedMatches(
319     const FunctionDecl *Caller,
320     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
321   if (Matches.size() <= 1)
322     return;
323 
324   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
325 
326   // Gets the CUDA function preference for a call from Caller to Match.
327   auto GetCFP = [&](const Pair &Match) {
328     return IdentifyPreference(Caller, Match.second);
329   };
330 
331   // Find the best call preference among the functions in Matches.
332   CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
333       Matches.begin(), Matches.end(),
334       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
335 
336   // Erase all functions with lower priority.
337   llvm::erase_if(Matches,
338                  [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
339 }
340 
341 /// When an implicitly-declared special member has to invoke more than one
342 /// base/field special member, conflicts may occur in the targets of these
343 /// members. For example, if one base's member __host__ and another's is
344 /// __device__, it's a conflict.
345 /// This function figures out if the given targets \param Target1 and
346 /// \param Target2 conflict, and if they do not it fills in
347 /// \param ResolvedTarget with a target that resolves for both calls.
348 /// \return true if there's a conflict, false otherwise.
349 static bool
350 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
351                                 CUDAFunctionTarget Target2,
352                                 CUDAFunctionTarget *ResolvedTarget) {
353   // Only free functions and static member functions may be global.
354   assert(Target1 != CUDAFunctionTarget::Global);
355   assert(Target2 != CUDAFunctionTarget::Global);
356 
357   if (Target1 == CUDAFunctionTarget::HostDevice) {
358     *ResolvedTarget = Target2;
359   } else if (Target2 == CUDAFunctionTarget::HostDevice) {
360     *ResolvedTarget = Target1;
361   } else if (Target1 != Target2) {
362     return true;
363   } else {
364     *ResolvedTarget = Target1;
365   }
366 
367   return false;
368 }
369 
370 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
371                                                    CXXSpecialMemberKind CSM,
372                                                    CXXMethodDecl *MemberDecl,
373                                                    bool ConstRHS,
374                                                    bool Diagnose) {
375   // If the defaulted special member is defined lexically outside of its
376   // owning class, or the special member already has explicit device or host
377   // attributes, do not infer.
378   bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
379   bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
380   bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
381   bool HasExplicitAttr =
382       (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
383       (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
384   if (!InClass || HasExplicitAttr)
385     return false;
386 
387   std::optional<CUDAFunctionTarget> InferredTarget;
388 
389   // We're going to invoke special member lookup; mark that these special
390   // members are called from this one, and not from its caller.
391   Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
392 
393   // Look for special members in base classes that should be invoked from here.
394   // Infer the target of this member base on the ones it should call.
395   // Skip direct and indirect virtual bases for abstract classes.
396   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
397   for (const auto &B : ClassDecl->bases()) {
398     if (!B.isVirtual()) {
399       Bases.push_back(&B);
400     }
401   }
402 
403   if (!ClassDecl->isAbstract()) {
404     llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases()));
405   }
406 
407   for (const auto *B : Bases) {
408     const RecordType *BaseType = B->getType()->getAs<RecordType>();
409     if (!BaseType) {
410       continue;
411     }
412 
413     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
414     Sema::SpecialMemberOverloadResult SMOR =
415         SemaRef.LookupSpecialMember(BaseClassDecl, CSM,
416                                     /* ConstArg */ ConstRHS,
417                                     /* VolatileArg */ false,
418                                     /* RValueThis */ false,
419                                     /* ConstThis */ false,
420                                     /* VolatileThis */ false);
421 
422     if (!SMOR.getMethod())
423       continue;
424 
425     CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod());
426     if (!InferredTarget) {
427       InferredTarget = BaseMethodTarget;
428     } else {
429       bool ResolutionError = resolveCalleeCUDATargetConflict(
430           *InferredTarget, BaseMethodTarget, &*InferredTarget);
431       if (ResolutionError) {
432         if (Diagnose) {
433           Diag(ClassDecl->getLocation(),
434                diag::note_implicit_member_target_infer_collision)
435               << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
436               << llvm::to_underlying(BaseMethodTarget);
437         }
438         MemberDecl->addAttr(
439             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
440         return true;
441       }
442     }
443   }
444 
445   // Same as for bases, but now for special members of fields.
446   for (const auto *F : ClassDecl->fields()) {
447     if (F->isInvalidDecl()) {
448       continue;
449     }
450 
451     const RecordType *FieldType =
452         getASTContext().getBaseElementType(F->getType())->getAs<RecordType>();
453     if (!FieldType) {
454       continue;
455     }
456 
457     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
458     Sema::SpecialMemberOverloadResult SMOR =
459         SemaRef.LookupSpecialMember(FieldRecDecl, CSM,
460                                     /* ConstArg */ ConstRHS && !F->isMutable(),
461                                     /* VolatileArg */ false,
462                                     /* RValueThis */ false,
463                                     /* ConstThis */ false,
464                                     /* VolatileThis */ false);
465 
466     if (!SMOR.getMethod())
467       continue;
468 
469     CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod());
470     if (!InferredTarget) {
471       InferredTarget = FieldMethodTarget;
472     } else {
473       bool ResolutionError = resolveCalleeCUDATargetConflict(
474           *InferredTarget, FieldMethodTarget, &*InferredTarget);
475       if (ResolutionError) {
476         if (Diagnose) {
477           Diag(ClassDecl->getLocation(),
478                diag::note_implicit_member_target_infer_collision)
479               << (unsigned)CSM << llvm::to_underlying(*InferredTarget)
480               << llvm::to_underlying(FieldMethodTarget);
481         }
482         MemberDecl->addAttr(
483             CUDAInvalidTargetAttr::CreateImplicit(getASTContext()));
484         return true;
485       }
486     }
487   }
488 
489 
490   // If no target was inferred, mark this member as __host__ __device__;
491   // it's the least restrictive option that can be invoked from any target.
492   bool NeedsH = true, NeedsD = true;
493   if (InferredTarget) {
494     if (*InferredTarget == CUDAFunctionTarget::Device)
495       NeedsH = false;
496     else if (*InferredTarget == CUDAFunctionTarget::Host)
497       NeedsD = false;
498   }
499 
500   // We either setting attributes first time, or the inferred ones must match
501   // previously set ones.
502   if (NeedsD && !HasD)
503     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
504   if (NeedsH && !HasH)
505     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
506 
507   return false;
508 }
509 
510 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
511   if (!CD->isDefined() && CD->isTemplateInstantiation())
512     SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
513 
514   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
515   // empty at a point in the translation unit, if it is either a
516   // trivial constructor
517   if (CD->isTrivial())
518     return true;
519 
520   // ... or it satisfies all of the following conditions:
521   // The constructor function has been defined.
522   // The constructor function has no parameters,
523   // and the function body is an empty compound statement.
524   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
525     return false;
526 
527   // Its class has no virtual functions and no virtual base classes.
528   if (CD->getParent()->isDynamicClass())
529     return false;
530 
531   // Union ctor does not call ctors of its data members.
532   if (CD->getParent()->isUnion())
533     return true;
534 
535   // The only form of initializer allowed is an empty constructor.
536   // This will recursively check all base classes and member initializers
537   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
538         if (const CXXConstructExpr *CE =
539                 dyn_cast<CXXConstructExpr>(CI->getInit()))
540           return isEmptyConstructor(Loc, CE->getConstructor());
541         return false;
542       }))
543     return false;
544 
545   return true;
546 }
547 
548 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
549   // No destructor -> no problem.
550   if (!DD)
551     return true;
552 
553   if (!DD->isDefined() && DD->isTemplateInstantiation())
554     SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
555 
556   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
557   // empty at a point in the translation unit, if it is either a
558   // trivial constructor
559   if (DD->isTrivial())
560     return true;
561 
562   // ... or it satisfies all of the following conditions:
563   // The destructor function has been defined.
564   // and the function body is an empty compound statement.
565   if (!DD->hasTrivialBody())
566     return false;
567 
568   const CXXRecordDecl *ClassDecl = DD->getParent();
569 
570   // Its class has no virtual functions and no virtual base classes.
571   if (ClassDecl->isDynamicClass())
572     return false;
573 
574   // Union does not have base class and union dtor does not call dtors of its
575   // data members.
576   if (DD->getParent()->isUnion())
577     return true;
578 
579   // Only empty destructors are allowed. This will recursively check
580   // destructors for all base classes...
581   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
582         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
583           return isEmptyDestructor(Loc, RD->getDestructor());
584         return true;
585       }))
586     return false;
587 
588   // ... and member fields.
589   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
590         if (CXXRecordDecl *RD = Field->getType()
591                                     ->getBaseElementTypeUnsafe()
592                                     ->getAsCXXRecordDecl())
593           return isEmptyDestructor(Loc, RD->getDestructor());
594         return true;
595       }))
596     return false;
597 
598   return true;
599 }
600 
601 namespace {
602 enum CUDAInitializerCheckKind {
603   CICK_DeviceOrConstant, // Check initializer for device/constant variable
604   CICK_Shared,           // Check initializer for shared variable
605 };
606 
607 bool IsDependentVar(VarDecl *VD) {
608   if (VD->getType()->isDependentType())
609     return true;
610   if (const auto *Init = VD->getInit())
611     return Init->isValueDependent();
612   return false;
613 }
614 
615 // Check whether a variable has an allowed initializer for a CUDA device side
616 // variable with global storage. \p VD may be a host variable to be checked for
617 // potential promotion to device side variable.
618 //
619 // CUDA/HIP allows only empty constructors as initializers for global
620 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
621 // __shared__ variables whether they are local or not (they all are implicitly
622 // static in CUDA). One exception is that CUDA allows constant initializers
623 // for __constant__ and __device__ variables.
624 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
625                                            CUDAInitializerCheckKind CheckKind) {
626   assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
627   assert(!IsDependentVar(VD) && "do not check dependent var");
628   const Expr *Init = VD->getInit();
629   auto IsEmptyInit = [&](const Expr *Init) {
630     if (!Init)
631       return true;
632     if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) {
633       return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor());
634     }
635     return false;
636   };
637   auto IsConstantInit = [&](const Expr *Init) {
638     assert(Init);
639     ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
640                                                     /*NoWronSidedVars=*/true);
641     return Init->isConstantInitializer(S.getASTContext(),
642                                        VD->getType()->isReferenceType());
643   };
644   auto HasEmptyDtor = [&](VarDecl *VD) {
645     if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
646       return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor());
647     return true;
648   };
649   if (CheckKind == CICK_Shared)
650     return IsEmptyInit(Init) && HasEmptyDtor(VD);
651   return S.getLangOpts().GPUAllowDeviceInit ||
652          ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD));
653 }
654 } // namespace
655 
656 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
657   // Return early if VD is inside a non-instantiated template function since
658   // the implicit constructor is not defined yet.
659   if (const FunctionDecl *FD =
660           dyn_cast_or_null<FunctionDecl>(VD->getDeclContext());
661       FD && FD->isDependentContext())
662     return;
663 
664   // Do not check dependent variables since the ctor/dtor/initializer are not
665   // determined. Do it after instantiation.
666   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() ||
667       IsDependentVar(VD))
668     return;
669   const Expr *Init = VD->getInit();
670   bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
671   bool IsDeviceOrConstantVar =
672       !IsSharedVar &&
673       (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>());
674   if (IsDeviceOrConstantVar || IsSharedVar) {
675     if (HasAllowedCUDADeviceStaticInitializer(
676             *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
677       return;
678     Diag(VD->getLocation(),
679          IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
680         << Init->getSourceRange();
681     VD->setInvalidDecl();
682   } else {
683     // This is a host-side global variable.  Check that the initializer is
684     // callable from the host side.
685     const FunctionDecl *InitFn = nullptr;
686     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
687       InitFn = CE->getConstructor();
688     } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
689       InitFn = CE->getDirectCallee();
690     }
691     if (InitFn) {
692       CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn);
693       if (InitFnTarget != CUDAFunctionTarget::Host &&
694           InitFnTarget != CUDAFunctionTarget::HostDevice) {
695         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
696             << llvm::to_underlying(InitFnTarget) << InitFn;
697         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
698         VD->setInvalidDecl();
699       }
700     }
701   }
702 }
703 
704 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
705     const FunctionDecl *Callee) {
706   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
707   if (!Caller)
708     return;
709 
710   if (!isImplicitHostDeviceFunction(Callee))
711     return;
712 
713   CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller);
714 
715   // Record whether an implicit host device function is used on device side.
716   if (CallerTarget != CUDAFunctionTarget::Device &&
717       CallerTarget != CUDAFunctionTarget::Global &&
718       (CallerTarget != CUDAFunctionTarget::HostDevice ||
719        (isImplicitHostDeviceFunction(Caller) &&
720         !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
721     return;
722 
723   getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
724 }
725 
726 // With -fcuda-host-device-constexpr, an unattributed constexpr function is
727 // treated as implicitly __host__ __device__, unless:
728 //  * it is a variadic function (device-side variadic functions are not
729 //    allowed), or
730 //  * a __device__ function with this signature was already declared, in which
731 //    case in which case we output an error, unless the __device__ decl is in a
732 //    system header, in which case we leave the constexpr function unattributed.
733 //
734 // In addition, all function decls are treated as __host__ __device__ when
735 // ForceHostDeviceDepth > 0 (corresponding to code within a
736 //   #pragma clang force_cuda_host_device_begin/end
737 // pair).
738 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
739                                        const LookupResult &Previous) {
740   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
741 
742   if (ForceHostDeviceDepth > 0) {
743     if (!NewD->hasAttr<CUDAHostAttr>())
744       NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
745     if (!NewD->hasAttr<CUDADeviceAttr>())
746       NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
747     return;
748   }
749 
750   // If a template function has no host/device/global attributes,
751   // make it implicitly host device function.
752   if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
753       !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
754       !NewD->hasAttr<CUDAGlobalAttr>() &&
755       (NewD->getDescribedFunctionTemplate() ||
756        NewD->isFunctionTemplateSpecialization())) {
757     NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
758     NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
759     return;
760   }
761 
762   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
763       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
764       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
765     return;
766 
767   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
768   // attributes?
769   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
770     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
771       D = Using->getTargetDecl();
772     FunctionDecl *OldD = D->getAsFunction();
773     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
774            !OldD->hasAttr<CUDAHostAttr>() &&
775            !SemaRef.IsOverload(NewD, OldD,
776                                /* UseMemberUsingDeclRules = */ false,
777                                /* ConsiderCudaAttrs = */ false);
778   };
779   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
780   if (It != Previous.end()) {
781     // We found a __device__ function with the same name and signature as NewD
782     // (ignoring CUDA attrs).  This is an error unless that function is defined
783     // in a system header, in which case we simply return without making NewD
784     // host+device.
785     NamedDecl *Match = *It;
786     if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) {
787       Diag(NewD->getLocation(),
788            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
789           << NewD;
790       Diag(Match->getLocation(),
791            diag::note_cuda_conflicting_device_function_declared_here);
792     }
793     return;
794   }
795 
796   NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
797   NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
798 }
799 
800 // TODO: `__constant__` memory may be a limited resource for certain targets.
801 // A safeguard may be needed at the end of compilation pipeline if
802 // `__constant__` memory usage goes beyond limit.
803 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
804   // Do not promote dependent variables since the cotr/dtor/initializer are
805   // not determined. Do it after instantiation.
806   if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
807       !VD->hasAttr<CUDASharedAttr>() &&
808       (VD->isFileVarDecl() || VD->isStaticDataMember()) &&
809       !IsDependentVar(VD) &&
810       ((VD->isConstexpr() || VD->getType().isConstQualified()) &&
811        HasAllowedCUDADeviceStaticInitializer(*this, VD,
812                                              CICK_DeviceOrConstant))) {
813     VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
814   }
815 }
816 
817 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
818                                                            unsigned DiagID) {
819   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
820   FunctionDecl *CurFunContext =
821       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
822   SemaDiagnosticBuilder::Kind DiagKind = [&] {
823     if (!CurFunContext)
824       return SemaDiagnosticBuilder::K_Nop;
825     switch (CurrentTarget()) {
826     case CUDAFunctionTarget::Global:
827     case CUDAFunctionTarget::Device:
828       return SemaDiagnosticBuilder::K_Immediate;
829     case CUDAFunctionTarget::HostDevice:
830       // An HD function counts as host code if we're compiling for host, and
831       // device code if we're compiling for device.  Defer any errors in device
832       // mode until the function is known-emitted.
833       if (!getLangOpts().CUDAIsDevice)
834         return SemaDiagnosticBuilder::K_Nop;
835       if (SemaRef.IsLastErrorImmediate &&
836           getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
837         return SemaDiagnosticBuilder::K_Immediate;
838       return (SemaRef.getEmissionStatus(CurFunContext) ==
839               Sema::FunctionEmissionStatus::Emitted)
840                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
841                  : SemaDiagnosticBuilder::K_Deferred;
842     default:
843       return SemaDiagnosticBuilder::K_Nop;
844     }
845   }();
846   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
847 }
848 
849 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
850                                                      unsigned DiagID) {
851   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
852   FunctionDecl *CurFunContext =
853       SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
854   SemaDiagnosticBuilder::Kind DiagKind = [&] {
855     if (!CurFunContext)
856       return SemaDiagnosticBuilder::K_Nop;
857     switch (CurrentTarget()) {
858     case CUDAFunctionTarget::Host:
859       return SemaDiagnosticBuilder::K_Immediate;
860     case CUDAFunctionTarget::HostDevice:
861       // An HD function counts as host code if we're compiling for host, and
862       // device code if we're compiling for device.  Defer any errors in device
863       // mode until the function is known-emitted.
864       if (getLangOpts().CUDAIsDevice)
865         return SemaDiagnosticBuilder::K_Nop;
866       if (SemaRef.IsLastErrorImmediate &&
867           getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
868         return SemaDiagnosticBuilder::K_Immediate;
869       return (SemaRef.getEmissionStatus(CurFunContext) ==
870               Sema::FunctionEmissionStatus::Emitted)
871                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
872                  : SemaDiagnosticBuilder::K_Deferred;
873     default:
874       return SemaDiagnosticBuilder::K_Nop;
875     }
876   }();
877   return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef);
878 }
879 
880 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
881   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
882   assert(Callee && "Callee may not be null.");
883 
884   const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
885   if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
886     return true;
887 
888   // FIXME: Is bailing out early correct here?  Should we instead assume that
889   // the caller is a global initializer?
890   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
891   if (!Caller)
892     return true;
893 
894   // If the caller is known-emitted, mark the callee as known-emitted.
895   // Otherwise, mark the call in our call graph so we can traverse it later.
896   bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) ==
897                             Sema::FunctionEmissionStatus::Emitted;
898   SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
899                                           CallerKnownEmitted] {
900     switch (IdentifyPreference(Caller, Callee)) {
901     case CFP_Never:
902     case CFP_WrongSide:
903       assert(Caller && "Never/wrongSide calls require a non-null caller");
904       // If we know the caller will be emitted, we know this wrong-side call
905       // will be emitted, so it's an immediate error.  Otherwise, defer the
906       // error until we know the caller is emitted.
907       return CallerKnownEmitted
908                  ? SemaDiagnosticBuilder::K_ImmediateWithCallStack
909                  : SemaDiagnosticBuilder::K_Deferred;
910     default:
911       return SemaDiagnosticBuilder::K_Nop;
912     }
913   }();
914 
915   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
916     // For -fgpu-rdc, keep track of external kernels used by host functions.
917     if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
918         Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
919         (!Caller || (!Caller->getDescribedFunctionTemplate() &&
920                      getASTContext().GetGVALinkageForFunction(Caller) ==
921                          GVA_StrongExternal)))
922       getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
923     return true;
924   }
925 
926   // Avoid emitting this error twice for the same location.  Using a hashtable
927   // like this is unfortunate, but because we must continue parsing as normal
928   // after encountering a deferred error, it's otherwise very tricky for us to
929   // ensure that we only emit this deferred error once.
930   if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
931     return true;
932 
933   SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller,
934                         SemaRef)
935       << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee
936       << llvm::to_underlying(IdentifyTarget(Caller));
937   if (!Callee->getBuiltinID())
938     SemaDiagnosticBuilder(DiagKind, Callee->getLocation(),
939                           diag::note_previous_decl, Caller, SemaRef)
940         << Callee;
941   return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
942          DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
943 }
944 
945 // Check the wrong-sided reference capture of lambda for CUDA/HIP.
946 // A lambda function may capture a stack variable by reference when it is
947 // defined and uses the capture by reference when the lambda is called. When
948 // the capture and use happen on different sides, the capture is invalid and
949 // should be diagnosed.
950 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
951                                   const sema::Capture &Capture) {
952   // In host compilation we only need to check lambda functions emitted on host
953   // side. In such lambda functions, a reference capture is invalid only
954   // if the lambda structure is populated by a device function or kernel then
955   // is passed to and called by a host function. However that is impossible,
956   // since a device function or kernel can only call a device function, also a
957   // kernel cannot pass a lambda back to a host function since we cannot
958   // define a kernel argument type which can hold the lambda before the lambda
959   // itself is defined.
960   if (!getLangOpts().CUDAIsDevice)
961     return;
962 
963   // File-scope lambda can only do init captures for global variables, which
964   // results in passing by value for these global variables.
965   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
966   if (!Caller)
967     return;
968 
969   // In device compilation, we only need to check lambda functions which are
970   // emitted on device side. For such lambdas, a reference capture is invalid
971   // only if the lambda structure is populated by a host function then passed
972   // to and called in a device function or kernel.
973   bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
974   bool CallerIsHost =
975       !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
976   bool ShouldCheck = CalleeIsDevice && CallerIsHost;
977   if (!ShouldCheck || !Capture.isReferenceCapture())
978     return;
979   auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
980   if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
981     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
982                           diag::err_capture_bad_target, Callee, SemaRef)
983         << Capture.getVariable();
984   } else if (Capture.isThisCapture()) {
985     // Capture of this pointer is allowed since this pointer may be pointing to
986     // managed memory which is accessible on both device and host sides. It only
987     // results in invalid memory access if this pointer points to memory not
988     // accessible on device side.
989     SemaDiagnosticBuilder(DiagKind, Capture.getLocation(),
990                           diag::warn_maybe_capture_bad_target_this_ptr, Callee,
991                           SemaRef);
992   }
993 }
994 
995 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
996   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
997   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
998     return;
999   Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
1000   Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext()));
1001 }
1002 
1003 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1004                                    const LookupResult &Previous) {
1005   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1006   CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD);
1007   for (NamedDecl *OldND : Previous) {
1008     FunctionDecl *OldFD = OldND->getAsFunction();
1009     if (!OldFD)
1010       continue;
1011 
1012     CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD);
1013     // Don't allow HD and global functions to overload other functions with the
1014     // same signature.  We allow overloading based on CUDA attributes so that
1015     // functions can have different implementations on the host and device, but
1016     // HD/global functions "exist" in some sense on both the host and device, so
1017     // should have the same implementation on both sides.
1018     if (NewTarget != OldTarget &&
1019         !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
1020                             /* ConsiderCudaAttrs = */ false)) {
1021       if ((NewTarget == CUDAFunctionTarget::HostDevice &&
1022            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1023              isImplicitHostDeviceFunction(NewFD) &&
1024              OldTarget == CUDAFunctionTarget::Device)) ||
1025           (OldTarget == CUDAFunctionTarget::HostDevice &&
1026            !(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1027              isImplicitHostDeviceFunction(OldFD) &&
1028              NewTarget == CUDAFunctionTarget::Device)) ||
1029           (NewTarget == CUDAFunctionTarget::Global) ||
1030           (OldTarget == CUDAFunctionTarget::Global)) {
1031         Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
1032             << llvm::to_underlying(NewTarget) << NewFD->getDeclName()
1033             << llvm::to_underlying(OldTarget) << OldFD;
1034         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1035         NewFD->setInvalidDecl();
1036         break;
1037       }
1038       if ((NewTarget == CUDAFunctionTarget::Host &&
1039            OldTarget == CUDAFunctionTarget::Device) ||
1040           (NewTarget == CUDAFunctionTarget::Device &&
1041            OldTarget == CUDAFunctionTarget::Host)) {
1042         Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare)
1043             << llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget);
1044         Diag(OldFD->getLocation(), diag::note_previous_declaration);
1045       }
1046     }
1047   }
1048 }
1049 
1050 template <typename AttrTy>
1051 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1052                               const FunctionDecl &TemplateFD) {
1053   if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1054     AttrTy *Clone = Attribute->clone(S.Context);
1055     Clone->setInherited(true);
1056     FD->addAttr(Clone);
1057   }
1058 }
1059 
1060 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1061                                   const FunctionTemplateDecl &TD) {
1062   const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1063   copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD);
1064   copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD);
1065   copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD);
1066 }
1067 
1068 std::string SemaCUDA::getConfigureFuncName() const {
1069   if (getLangOpts().OffloadViaLLVM)
1070     return "__llvmPushCallConfiguration";
1071 
1072   if (getLangOpts().HIP)
1073     return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1074                                             : "hipConfigureCall";
1075 
1076   // New CUDA kernel launch sequence.
1077   if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1078                          CudaFeature::CUDA_USES_NEW_LAUNCH))
1079     return "__cudaPushCallConfiguration";
1080 
1081   // Legacy CUDA kernel configuration call
1082   return "cudaConfigureCall";
1083 }
1084