xref: /openbsd-src/gnu/llvm/clang/lib/Sema/SemaCUDA.cpp (revision e5dd70708596ae51455a0ffa086a00c5b29f8583)
1*e5dd7070Spatrick //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2*e5dd7070Spatrick //
3*e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5*e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e5dd7070Spatrick //
7*e5dd7070Spatrick //===----------------------------------------------------------------------===//
8*e5dd7070Spatrick /// \file
9*e5dd7070Spatrick /// This file implements semantic analysis for CUDA constructs.
10*e5dd7070Spatrick ///
11*e5dd7070Spatrick //===----------------------------------------------------------------------===//
12*e5dd7070Spatrick 
13*e5dd7070Spatrick #include "clang/AST/ASTContext.h"
14*e5dd7070Spatrick #include "clang/AST/Decl.h"
15*e5dd7070Spatrick #include "clang/AST/ExprCXX.h"
16*e5dd7070Spatrick #include "clang/Basic/Cuda.h"
17*e5dd7070Spatrick #include "clang/Lex/Preprocessor.h"
18*e5dd7070Spatrick #include "clang/Sema/Lookup.h"
19*e5dd7070Spatrick #include "clang/Sema/Sema.h"
20*e5dd7070Spatrick #include "clang/Sema/SemaDiagnostic.h"
21*e5dd7070Spatrick #include "clang/Sema/SemaInternal.h"
22*e5dd7070Spatrick #include "clang/Sema/Template.h"
23*e5dd7070Spatrick #include "llvm/ADT/Optional.h"
24*e5dd7070Spatrick #include "llvm/ADT/SmallVector.h"
25*e5dd7070Spatrick using namespace clang;
26*e5dd7070Spatrick 
27*e5dd7070Spatrick void Sema::PushForceCUDAHostDevice() {
28*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
29*e5dd7070Spatrick   ForceCUDAHostDeviceDepth++;
30*e5dd7070Spatrick }
31*e5dd7070Spatrick 
32*e5dd7070Spatrick bool Sema::PopForceCUDAHostDevice() {
33*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
34*e5dd7070Spatrick   if (ForceCUDAHostDeviceDepth == 0)
35*e5dd7070Spatrick     return false;
36*e5dd7070Spatrick   ForceCUDAHostDeviceDepth--;
37*e5dd7070Spatrick   return true;
38*e5dd7070Spatrick }
39*e5dd7070Spatrick 
40*e5dd7070Spatrick ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
41*e5dd7070Spatrick                                          MultiExprArg ExecConfig,
42*e5dd7070Spatrick                                          SourceLocation GGGLoc) {
43*e5dd7070Spatrick   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
44*e5dd7070Spatrick   if (!ConfigDecl)
45*e5dd7070Spatrick     return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
46*e5dd7070Spatrick                      << getCudaConfigureFuncName());
47*e5dd7070Spatrick   QualType ConfigQTy = ConfigDecl->getType();
48*e5dd7070Spatrick 
49*e5dd7070Spatrick   DeclRefExpr *ConfigDR = new (Context)
50*e5dd7070Spatrick       DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
51*e5dd7070Spatrick   MarkFunctionReferenced(LLLLoc, ConfigDecl);
52*e5dd7070Spatrick 
53*e5dd7070Spatrick   return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
54*e5dd7070Spatrick                        /*IsExecConfig=*/true);
55*e5dd7070Spatrick }
56*e5dd7070Spatrick 
57*e5dd7070Spatrick Sema::CUDAFunctionTarget
58*e5dd7070Spatrick Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
59*e5dd7070Spatrick   bool HasHostAttr = false;
60*e5dd7070Spatrick   bool HasDeviceAttr = false;
61*e5dd7070Spatrick   bool HasGlobalAttr = false;
62*e5dd7070Spatrick   bool HasInvalidTargetAttr = false;
63*e5dd7070Spatrick   for (const ParsedAttr &AL : Attrs) {
64*e5dd7070Spatrick     switch (AL.getKind()) {
65*e5dd7070Spatrick     case ParsedAttr::AT_CUDAGlobal:
66*e5dd7070Spatrick       HasGlobalAttr = true;
67*e5dd7070Spatrick       break;
68*e5dd7070Spatrick     case ParsedAttr::AT_CUDAHost:
69*e5dd7070Spatrick       HasHostAttr = true;
70*e5dd7070Spatrick       break;
71*e5dd7070Spatrick     case ParsedAttr::AT_CUDADevice:
72*e5dd7070Spatrick       HasDeviceAttr = true;
73*e5dd7070Spatrick       break;
74*e5dd7070Spatrick     case ParsedAttr::AT_CUDAInvalidTarget:
75*e5dd7070Spatrick       HasInvalidTargetAttr = true;
76*e5dd7070Spatrick       break;
77*e5dd7070Spatrick     default:
78*e5dd7070Spatrick       break;
79*e5dd7070Spatrick     }
80*e5dd7070Spatrick   }
81*e5dd7070Spatrick 
82*e5dd7070Spatrick   if (HasInvalidTargetAttr)
83*e5dd7070Spatrick     return CFT_InvalidTarget;
84*e5dd7070Spatrick 
85*e5dd7070Spatrick   if (HasGlobalAttr)
86*e5dd7070Spatrick     return CFT_Global;
87*e5dd7070Spatrick 
88*e5dd7070Spatrick   if (HasHostAttr && HasDeviceAttr)
89*e5dd7070Spatrick     return CFT_HostDevice;
90*e5dd7070Spatrick 
91*e5dd7070Spatrick   if (HasDeviceAttr)
92*e5dd7070Spatrick     return CFT_Device;
93*e5dd7070Spatrick 
94*e5dd7070Spatrick   return CFT_Host;
95*e5dd7070Spatrick }
96*e5dd7070Spatrick 
97*e5dd7070Spatrick template <typename A>
98*e5dd7070Spatrick static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
99*e5dd7070Spatrick   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
100*e5dd7070Spatrick            return isa<A>(Attribute) &&
101*e5dd7070Spatrick                   !(IgnoreImplicitAttr && Attribute->isImplicit());
102*e5dd7070Spatrick          });
103*e5dd7070Spatrick }
104*e5dd7070Spatrick 
105*e5dd7070Spatrick /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
106*e5dd7070Spatrick Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
107*e5dd7070Spatrick                                                   bool IgnoreImplicitHDAttr) {
108*e5dd7070Spatrick   // Code that lives outside a function is run on the host.
109*e5dd7070Spatrick   if (D == nullptr)
110*e5dd7070Spatrick     return CFT_Host;
111*e5dd7070Spatrick 
112*e5dd7070Spatrick   if (D->hasAttr<CUDAInvalidTargetAttr>())
113*e5dd7070Spatrick     return CFT_InvalidTarget;
114*e5dd7070Spatrick 
115*e5dd7070Spatrick   if (D->hasAttr<CUDAGlobalAttr>())
116*e5dd7070Spatrick     return CFT_Global;
117*e5dd7070Spatrick 
118*e5dd7070Spatrick   if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) {
119*e5dd7070Spatrick     if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr))
120*e5dd7070Spatrick       return CFT_HostDevice;
121*e5dd7070Spatrick     return CFT_Device;
122*e5dd7070Spatrick   } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) {
123*e5dd7070Spatrick     return CFT_Host;
124*e5dd7070Spatrick   } else if (D->isImplicit() && !IgnoreImplicitHDAttr) {
125*e5dd7070Spatrick     // Some implicit declarations (like intrinsic functions) are not marked.
126*e5dd7070Spatrick     // Set the most lenient target on them for maximal flexibility.
127*e5dd7070Spatrick     return CFT_HostDevice;
128*e5dd7070Spatrick   }
129*e5dd7070Spatrick 
130*e5dd7070Spatrick   return CFT_Host;
131*e5dd7070Spatrick }
132*e5dd7070Spatrick 
133*e5dd7070Spatrick // * CUDA Call preference table
134*e5dd7070Spatrick //
135*e5dd7070Spatrick // F - from,
136*e5dd7070Spatrick // T - to
137*e5dd7070Spatrick // Ph - preference in host mode
138*e5dd7070Spatrick // Pd - preference in device mode
139*e5dd7070Spatrick // H  - handled in (x)
140*e5dd7070Spatrick // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
141*e5dd7070Spatrick //
142*e5dd7070Spatrick // | F  | T  | Ph  | Pd  |  H  |
143*e5dd7070Spatrick // |----+----+-----+-----+-----+
144*e5dd7070Spatrick // | d  | d  | N   | N   | (c) |
145*e5dd7070Spatrick // | d  | g  | --  | --  | (a) |
146*e5dd7070Spatrick // | d  | h  | --  | --  | (e) |
147*e5dd7070Spatrick // | d  | hd | HD  | HD  | (b) |
148*e5dd7070Spatrick // | g  | d  | N   | N   | (c) |
149*e5dd7070Spatrick // | g  | g  | --  | --  | (a) |
150*e5dd7070Spatrick // | g  | h  | --  | --  | (e) |
151*e5dd7070Spatrick // | g  | hd | HD  | HD  | (b) |
152*e5dd7070Spatrick // | h  | d  | --  | --  | (e) |
153*e5dd7070Spatrick // | h  | g  | N   | N   | (c) |
154*e5dd7070Spatrick // | h  | h  | N   | N   | (c) |
155*e5dd7070Spatrick // | h  | hd | HD  | HD  | (b) |
156*e5dd7070Spatrick // | hd | d  | WS  | SS  | (d) |
157*e5dd7070Spatrick // | hd | g  | SS  | --  |(d/a)|
158*e5dd7070Spatrick // | hd | h  | SS  | WS  | (d) |
159*e5dd7070Spatrick // | hd | hd | HD  | HD  | (b) |
160*e5dd7070Spatrick 
161*e5dd7070Spatrick Sema::CUDAFunctionPreference
162*e5dd7070Spatrick Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
163*e5dd7070Spatrick                              const FunctionDecl *Callee) {
164*e5dd7070Spatrick   assert(Callee && "Callee must be valid.");
165*e5dd7070Spatrick   CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
166*e5dd7070Spatrick   CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
167*e5dd7070Spatrick 
168*e5dd7070Spatrick   // If one of the targets is invalid, the check always fails, no matter what
169*e5dd7070Spatrick   // the other target is.
170*e5dd7070Spatrick   if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
171*e5dd7070Spatrick     return CFP_Never;
172*e5dd7070Spatrick 
173*e5dd7070Spatrick   // (a) Can't call global from some contexts until we support CUDA's
174*e5dd7070Spatrick   // dynamic parallelism.
175*e5dd7070Spatrick   if (CalleeTarget == CFT_Global &&
176*e5dd7070Spatrick       (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
177*e5dd7070Spatrick     return CFP_Never;
178*e5dd7070Spatrick 
179*e5dd7070Spatrick   // (b) Calling HostDevice is OK for everyone.
180*e5dd7070Spatrick   if (CalleeTarget == CFT_HostDevice)
181*e5dd7070Spatrick     return CFP_HostDevice;
182*e5dd7070Spatrick 
183*e5dd7070Spatrick   // (c) Best case scenarios
184*e5dd7070Spatrick   if (CalleeTarget == CallerTarget ||
185*e5dd7070Spatrick       (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
186*e5dd7070Spatrick       (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
187*e5dd7070Spatrick     return CFP_Native;
188*e5dd7070Spatrick 
189*e5dd7070Spatrick   // (d) HostDevice behavior depends on compilation mode.
190*e5dd7070Spatrick   if (CallerTarget == CFT_HostDevice) {
191*e5dd7070Spatrick     // It's OK to call a compilation-mode matching function from an HD one.
192*e5dd7070Spatrick     if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
193*e5dd7070Spatrick         (!getLangOpts().CUDAIsDevice &&
194*e5dd7070Spatrick          (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
195*e5dd7070Spatrick       return CFP_SameSide;
196*e5dd7070Spatrick 
197*e5dd7070Spatrick     // Calls from HD to non-mode-matching functions (i.e., to host functions
198*e5dd7070Spatrick     // when compiling in device mode or to device functions when compiling in
199*e5dd7070Spatrick     // host mode) are allowed at the sema level, but eventually rejected if
200*e5dd7070Spatrick     // they're ever codegened.  TODO: Reject said calls earlier.
201*e5dd7070Spatrick     return CFP_WrongSide;
202*e5dd7070Spatrick   }
203*e5dd7070Spatrick 
204*e5dd7070Spatrick   // (e) Calling across device/host boundary is not something you should do.
205*e5dd7070Spatrick   if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
206*e5dd7070Spatrick       (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
207*e5dd7070Spatrick       (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
208*e5dd7070Spatrick     return CFP_Never;
209*e5dd7070Spatrick 
210*e5dd7070Spatrick   llvm_unreachable("All cases should've been handled by now.");
211*e5dd7070Spatrick }
212*e5dd7070Spatrick 
213*e5dd7070Spatrick void Sema::EraseUnwantedCUDAMatches(
214*e5dd7070Spatrick     const FunctionDecl *Caller,
215*e5dd7070Spatrick     SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
216*e5dd7070Spatrick   if (Matches.size() <= 1)
217*e5dd7070Spatrick     return;
218*e5dd7070Spatrick 
219*e5dd7070Spatrick   using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
220*e5dd7070Spatrick 
221*e5dd7070Spatrick   // Gets the CUDA function preference for a call from Caller to Match.
222*e5dd7070Spatrick   auto GetCFP = [&](const Pair &Match) {
223*e5dd7070Spatrick     return IdentifyCUDAPreference(Caller, Match.second);
224*e5dd7070Spatrick   };
225*e5dd7070Spatrick 
226*e5dd7070Spatrick   // Find the best call preference among the functions in Matches.
227*e5dd7070Spatrick   CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
228*e5dd7070Spatrick       Matches.begin(), Matches.end(),
229*e5dd7070Spatrick       [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
230*e5dd7070Spatrick 
231*e5dd7070Spatrick   // Erase all functions with lower priority.
232*e5dd7070Spatrick   llvm::erase_if(Matches,
233*e5dd7070Spatrick                  [&](const Pair &Match) { return GetCFP(Match) < BestCFP; });
234*e5dd7070Spatrick }
235*e5dd7070Spatrick 
236*e5dd7070Spatrick /// When an implicitly-declared special member has to invoke more than one
237*e5dd7070Spatrick /// base/field special member, conflicts may occur in the targets of these
238*e5dd7070Spatrick /// members. For example, if one base's member __host__ and another's is
239*e5dd7070Spatrick /// __device__, it's a conflict.
240*e5dd7070Spatrick /// This function figures out if the given targets \param Target1 and
241*e5dd7070Spatrick /// \param Target2 conflict, and if they do not it fills in
242*e5dd7070Spatrick /// \param ResolvedTarget with a target that resolves for both calls.
243*e5dd7070Spatrick /// \return true if there's a conflict, false otherwise.
244*e5dd7070Spatrick static bool
245*e5dd7070Spatrick resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
246*e5dd7070Spatrick                                 Sema::CUDAFunctionTarget Target2,
247*e5dd7070Spatrick                                 Sema::CUDAFunctionTarget *ResolvedTarget) {
248*e5dd7070Spatrick   // Only free functions and static member functions may be global.
249*e5dd7070Spatrick   assert(Target1 != Sema::CFT_Global);
250*e5dd7070Spatrick   assert(Target2 != Sema::CFT_Global);
251*e5dd7070Spatrick 
252*e5dd7070Spatrick   if (Target1 == Sema::CFT_HostDevice) {
253*e5dd7070Spatrick     *ResolvedTarget = Target2;
254*e5dd7070Spatrick   } else if (Target2 == Sema::CFT_HostDevice) {
255*e5dd7070Spatrick     *ResolvedTarget = Target1;
256*e5dd7070Spatrick   } else if (Target1 != Target2) {
257*e5dd7070Spatrick     return true;
258*e5dd7070Spatrick   } else {
259*e5dd7070Spatrick     *ResolvedTarget = Target1;
260*e5dd7070Spatrick   }
261*e5dd7070Spatrick 
262*e5dd7070Spatrick   return false;
263*e5dd7070Spatrick }
264*e5dd7070Spatrick 
265*e5dd7070Spatrick bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
266*e5dd7070Spatrick                                                    CXXSpecialMember CSM,
267*e5dd7070Spatrick                                                    CXXMethodDecl *MemberDecl,
268*e5dd7070Spatrick                                                    bool ConstRHS,
269*e5dd7070Spatrick                                                    bool Diagnose) {
270*e5dd7070Spatrick   // If the defaulted special member is defined lexically outside of its
271*e5dd7070Spatrick   // owning class, or the special member already has explicit device or host
272*e5dd7070Spatrick   // attributes, do not infer.
273*e5dd7070Spatrick   bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
274*e5dd7070Spatrick   bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
275*e5dd7070Spatrick   bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
276*e5dd7070Spatrick   bool HasExplicitAttr =
277*e5dd7070Spatrick       (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) ||
278*e5dd7070Spatrick       (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
279*e5dd7070Spatrick   if (!InClass || HasExplicitAttr)
280*e5dd7070Spatrick     return false;
281*e5dd7070Spatrick 
282*e5dd7070Spatrick   llvm::Optional<CUDAFunctionTarget> InferredTarget;
283*e5dd7070Spatrick 
284*e5dd7070Spatrick   // We're going to invoke special member lookup; mark that these special
285*e5dd7070Spatrick   // members are called from this one, and not from its caller.
286*e5dd7070Spatrick   ContextRAII MethodContext(*this, MemberDecl);
287*e5dd7070Spatrick 
288*e5dd7070Spatrick   // Look for special members in base classes that should be invoked from here.
289*e5dd7070Spatrick   // Infer the target of this member base on the ones it should call.
290*e5dd7070Spatrick   // Skip direct and indirect virtual bases for abstract classes.
291*e5dd7070Spatrick   llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
292*e5dd7070Spatrick   for (const auto &B : ClassDecl->bases()) {
293*e5dd7070Spatrick     if (!B.isVirtual()) {
294*e5dd7070Spatrick       Bases.push_back(&B);
295*e5dd7070Spatrick     }
296*e5dd7070Spatrick   }
297*e5dd7070Spatrick 
298*e5dd7070Spatrick   if (!ClassDecl->isAbstract()) {
299*e5dd7070Spatrick     for (const auto &VB : ClassDecl->vbases()) {
300*e5dd7070Spatrick       Bases.push_back(&VB);
301*e5dd7070Spatrick     }
302*e5dd7070Spatrick   }
303*e5dd7070Spatrick 
304*e5dd7070Spatrick   for (const auto *B : Bases) {
305*e5dd7070Spatrick     const RecordType *BaseType = B->getType()->getAs<RecordType>();
306*e5dd7070Spatrick     if (!BaseType) {
307*e5dd7070Spatrick       continue;
308*e5dd7070Spatrick     }
309*e5dd7070Spatrick 
310*e5dd7070Spatrick     CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
311*e5dd7070Spatrick     Sema::SpecialMemberOverloadResult SMOR =
312*e5dd7070Spatrick         LookupSpecialMember(BaseClassDecl, CSM,
313*e5dd7070Spatrick                             /* ConstArg */ ConstRHS,
314*e5dd7070Spatrick                             /* VolatileArg */ false,
315*e5dd7070Spatrick                             /* RValueThis */ false,
316*e5dd7070Spatrick                             /* ConstThis */ false,
317*e5dd7070Spatrick                             /* VolatileThis */ false);
318*e5dd7070Spatrick 
319*e5dd7070Spatrick     if (!SMOR.getMethod())
320*e5dd7070Spatrick       continue;
321*e5dd7070Spatrick 
322*e5dd7070Spatrick     CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod());
323*e5dd7070Spatrick     if (!InferredTarget.hasValue()) {
324*e5dd7070Spatrick       InferredTarget = BaseMethodTarget;
325*e5dd7070Spatrick     } else {
326*e5dd7070Spatrick       bool ResolutionError = resolveCalleeCUDATargetConflict(
327*e5dd7070Spatrick           InferredTarget.getValue(), BaseMethodTarget,
328*e5dd7070Spatrick           InferredTarget.getPointer());
329*e5dd7070Spatrick       if (ResolutionError) {
330*e5dd7070Spatrick         if (Diagnose) {
331*e5dd7070Spatrick           Diag(ClassDecl->getLocation(),
332*e5dd7070Spatrick                diag::note_implicit_member_target_infer_collision)
333*e5dd7070Spatrick               << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
334*e5dd7070Spatrick         }
335*e5dd7070Spatrick         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
336*e5dd7070Spatrick         return true;
337*e5dd7070Spatrick       }
338*e5dd7070Spatrick     }
339*e5dd7070Spatrick   }
340*e5dd7070Spatrick 
341*e5dd7070Spatrick   // Same as for bases, but now for special members of fields.
342*e5dd7070Spatrick   for (const auto *F : ClassDecl->fields()) {
343*e5dd7070Spatrick     if (F->isInvalidDecl()) {
344*e5dd7070Spatrick       continue;
345*e5dd7070Spatrick     }
346*e5dd7070Spatrick 
347*e5dd7070Spatrick     const RecordType *FieldType =
348*e5dd7070Spatrick         Context.getBaseElementType(F->getType())->getAs<RecordType>();
349*e5dd7070Spatrick     if (!FieldType) {
350*e5dd7070Spatrick       continue;
351*e5dd7070Spatrick     }
352*e5dd7070Spatrick 
353*e5dd7070Spatrick     CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
354*e5dd7070Spatrick     Sema::SpecialMemberOverloadResult SMOR =
355*e5dd7070Spatrick         LookupSpecialMember(FieldRecDecl, CSM,
356*e5dd7070Spatrick                             /* ConstArg */ ConstRHS && !F->isMutable(),
357*e5dd7070Spatrick                             /* VolatileArg */ false,
358*e5dd7070Spatrick                             /* RValueThis */ false,
359*e5dd7070Spatrick                             /* ConstThis */ false,
360*e5dd7070Spatrick                             /* VolatileThis */ false);
361*e5dd7070Spatrick 
362*e5dd7070Spatrick     if (!SMOR.getMethod())
363*e5dd7070Spatrick       continue;
364*e5dd7070Spatrick 
365*e5dd7070Spatrick     CUDAFunctionTarget FieldMethodTarget =
366*e5dd7070Spatrick         IdentifyCUDATarget(SMOR.getMethod());
367*e5dd7070Spatrick     if (!InferredTarget.hasValue()) {
368*e5dd7070Spatrick       InferredTarget = FieldMethodTarget;
369*e5dd7070Spatrick     } else {
370*e5dd7070Spatrick       bool ResolutionError = resolveCalleeCUDATargetConflict(
371*e5dd7070Spatrick           InferredTarget.getValue(), FieldMethodTarget,
372*e5dd7070Spatrick           InferredTarget.getPointer());
373*e5dd7070Spatrick       if (ResolutionError) {
374*e5dd7070Spatrick         if (Diagnose) {
375*e5dd7070Spatrick           Diag(ClassDecl->getLocation(),
376*e5dd7070Spatrick                diag::note_implicit_member_target_infer_collision)
377*e5dd7070Spatrick               << (unsigned)CSM << InferredTarget.getValue()
378*e5dd7070Spatrick               << FieldMethodTarget;
379*e5dd7070Spatrick         }
380*e5dd7070Spatrick         MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
381*e5dd7070Spatrick         return true;
382*e5dd7070Spatrick       }
383*e5dd7070Spatrick     }
384*e5dd7070Spatrick   }
385*e5dd7070Spatrick 
386*e5dd7070Spatrick 
387*e5dd7070Spatrick   // If no target was inferred, mark this member as __host__ __device__;
388*e5dd7070Spatrick   // it's the least restrictive option that can be invoked from any target.
389*e5dd7070Spatrick   bool NeedsH = true, NeedsD = true;
390*e5dd7070Spatrick   if (InferredTarget.hasValue()) {
391*e5dd7070Spatrick     if (InferredTarget.getValue() == CFT_Device)
392*e5dd7070Spatrick       NeedsH = false;
393*e5dd7070Spatrick     else if (InferredTarget.getValue() == CFT_Host)
394*e5dd7070Spatrick       NeedsD = false;
395*e5dd7070Spatrick   }
396*e5dd7070Spatrick 
397*e5dd7070Spatrick   // We either setting attributes first time, or the inferred ones must match
398*e5dd7070Spatrick   // previously set ones.
399*e5dd7070Spatrick   if (NeedsD && !HasD)
400*e5dd7070Spatrick     MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
401*e5dd7070Spatrick   if (NeedsH && !HasH)
402*e5dd7070Spatrick     MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
403*e5dd7070Spatrick 
404*e5dd7070Spatrick   return false;
405*e5dd7070Spatrick }
406*e5dd7070Spatrick 
407*e5dd7070Spatrick bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
408*e5dd7070Spatrick   if (!CD->isDefined() && CD->isTemplateInstantiation())
409*e5dd7070Spatrick     InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
410*e5dd7070Spatrick 
411*e5dd7070Spatrick   // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
412*e5dd7070Spatrick   // empty at a point in the translation unit, if it is either a
413*e5dd7070Spatrick   // trivial constructor
414*e5dd7070Spatrick   if (CD->isTrivial())
415*e5dd7070Spatrick     return true;
416*e5dd7070Spatrick 
417*e5dd7070Spatrick   // ... or it satisfies all of the following conditions:
418*e5dd7070Spatrick   // The constructor function has been defined.
419*e5dd7070Spatrick   // The constructor function has no parameters,
420*e5dd7070Spatrick   // and the function body is an empty compound statement.
421*e5dd7070Spatrick   if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
422*e5dd7070Spatrick     return false;
423*e5dd7070Spatrick 
424*e5dd7070Spatrick   // Its class has no virtual functions and no virtual base classes.
425*e5dd7070Spatrick   if (CD->getParent()->isDynamicClass())
426*e5dd7070Spatrick     return false;
427*e5dd7070Spatrick 
428*e5dd7070Spatrick   // The only form of initializer allowed is an empty constructor.
429*e5dd7070Spatrick   // This will recursively check all base classes and member initializers
430*e5dd7070Spatrick   if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
431*e5dd7070Spatrick         if (const CXXConstructExpr *CE =
432*e5dd7070Spatrick                 dyn_cast<CXXConstructExpr>(CI->getInit()))
433*e5dd7070Spatrick           return isEmptyCudaConstructor(Loc, CE->getConstructor());
434*e5dd7070Spatrick         return false;
435*e5dd7070Spatrick       }))
436*e5dd7070Spatrick     return false;
437*e5dd7070Spatrick 
438*e5dd7070Spatrick   return true;
439*e5dd7070Spatrick }
440*e5dd7070Spatrick 
441*e5dd7070Spatrick bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
442*e5dd7070Spatrick   // No destructor -> no problem.
443*e5dd7070Spatrick   if (!DD)
444*e5dd7070Spatrick     return true;
445*e5dd7070Spatrick 
446*e5dd7070Spatrick   if (!DD->isDefined() && DD->isTemplateInstantiation())
447*e5dd7070Spatrick     InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
448*e5dd7070Spatrick 
449*e5dd7070Spatrick   // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
450*e5dd7070Spatrick   // empty at a point in the translation unit, if it is either a
451*e5dd7070Spatrick   // trivial constructor
452*e5dd7070Spatrick   if (DD->isTrivial())
453*e5dd7070Spatrick     return true;
454*e5dd7070Spatrick 
455*e5dd7070Spatrick   // ... or it satisfies all of the following conditions:
456*e5dd7070Spatrick   // The destructor function has been defined.
457*e5dd7070Spatrick   // and the function body is an empty compound statement.
458*e5dd7070Spatrick   if (!DD->hasTrivialBody())
459*e5dd7070Spatrick     return false;
460*e5dd7070Spatrick 
461*e5dd7070Spatrick   const CXXRecordDecl *ClassDecl = DD->getParent();
462*e5dd7070Spatrick 
463*e5dd7070Spatrick   // Its class has no virtual functions and no virtual base classes.
464*e5dd7070Spatrick   if (ClassDecl->isDynamicClass())
465*e5dd7070Spatrick     return false;
466*e5dd7070Spatrick 
467*e5dd7070Spatrick   // Only empty destructors are allowed. This will recursively check
468*e5dd7070Spatrick   // destructors for all base classes...
469*e5dd7070Spatrick   if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
470*e5dd7070Spatrick         if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
471*e5dd7070Spatrick           return isEmptyCudaDestructor(Loc, RD->getDestructor());
472*e5dd7070Spatrick         return true;
473*e5dd7070Spatrick       }))
474*e5dd7070Spatrick     return false;
475*e5dd7070Spatrick 
476*e5dd7070Spatrick   // ... and member fields.
477*e5dd7070Spatrick   if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
478*e5dd7070Spatrick         if (CXXRecordDecl *RD = Field->getType()
479*e5dd7070Spatrick                                     ->getBaseElementTypeUnsafe()
480*e5dd7070Spatrick                                     ->getAsCXXRecordDecl())
481*e5dd7070Spatrick           return isEmptyCudaDestructor(Loc, RD->getDestructor());
482*e5dd7070Spatrick         return true;
483*e5dd7070Spatrick       }))
484*e5dd7070Spatrick     return false;
485*e5dd7070Spatrick 
486*e5dd7070Spatrick   return true;
487*e5dd7070Spatrick }
488*e5dd7070Spatrick 
489*e5dd7070Spatrick void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
490*e5dd7070Spatrick   if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage())
491*e5dd7070Spatrick     return;
492*e5dd7070Spatrick   const Expr *Init = VD->getInit();
493*e5dd7070Spatrick   if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() ||
494*e5dd7070Spatrick       VD->hasAttr<CUDASharedAttr>()) {
495*e5dd7070Spatrick     if (LangOpts.GPUAllowDeviceInit)
496*e5dd7070Spatrick       return;
497*e5dd7070Spatrick     assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>());
498*e5dd7070Spatrick     bool AllowedInit = false;
499*e5dd7070Spatrick     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init))
500*e5dd7070Spatrick       AllowedInit =
501*e5dd7070Spatrick           isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor());
502*e5dd7070Spatrick     // We'll allow constant initializers even if it's a non-empty
503*e5dd7070Spatrick     // constructor according to CUDA rules. This deviates from NVCC,
504*e5dd7070Spatrick     // but allows us to handle things like constexpr constructors.
505*e5dd7070Spatrick     if (!AllowedInit &&
506*e5dd7070Spatrick         (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()))
507*e5dd7070Spatrick       AllowedInit = VD->getInit()->isConstantInitializer(
508*e5dd7070Spatrick           Context, VD->getType()->isReferenceType());
509*e5dd7070Spatrick 
510*e5dd7070Spatrick     // Also make sure that destructor, if there is one, is empty.
511*e5dd7070Spatrick     if (AllowedInit)
512*e5dd7070Spatrick       if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl())
513*e5dd7070Spatrick         AllowedInit =
514*e5dd7070Spatrick             isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor());
515*e5dd7070Spatrick 
516*e5dd7070Spatrick     if (!AllowedInit) {
517*e5dd7070Spatrick       Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>()
518*e5dd7070Spatrick                                   ? diag::err_shared_var_init
519*e5dd7070Spatrick                                   : diag::err_dynamic_var_init)
520*e5dd7070Spatrick           << Init->getSourceRange();
521*e5dd7070Spatrick       VD->setInvalidDecl();
522*e5dd7070Spatrick     }
523*e5dd7070Spatrick   } else {
524*e5dd7070Spatrick     // This is a host-side global variable.  Check that the initializer is
525*e5dd7070Spatrick     // callable from the host side.
526*e5dd7070Spatrick     const FunctionDecl *InitFn = nullptr;
527*e5dd7070Spatrick     if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) {
528*e5dd7070Spatrick       InitFn = CE->getConstructor();
529*e5dd7070Spatrick     } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) {
530*e5dd7070Spatrick       InitFn = CE->getDirectCallee();
531*e5dd7070Spatrick     }
532*e5dd7070Spatrick     if (InitFn) {
533*e5dd7070Spatrick       CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn);
534*e5dd7070Spatrick       if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) {
535*e5dd7070Spatrick         Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer)
536*e5dd7070Spatrick             << InitFnTarget << InitFn;
537*e5dd7070Spatrick         Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn;
538*e5dd7070Spatrick         VD->setInvalidDecl();
539*e5dd7070Spatrick       }
540*e5dd7070Spatrick     }
541*e5dd7070Spatrick   }
542*e5dd7070Spatrick }
543*e5dd7070Spatrick 
544*e5dd7070Spatrick // With -fcuda-host-device-constexpr, an unattributed constexpr function is
545*e5dd7070Spatrick // treated as implicitly __host__ __device__, unless:
546*e5dd7070Spatrick //  * it is a variadic function (device-side variadic functions are not
547*e5dd7070Spatrick //    allowed), or
548*e5dd7070Spatrick //  * a __device__ function with this signature was already declared, in which
549*e5dd7070Spatrick //    case in which case we output an error, unless the __device__ decl is in a
550*e5dd7070Spatrick //    system header, in which case we leave the constexpr function unattributed.
551*e5dd7070Spatrick //
552*e5dd7070Spatrick // In addition, all function decls are treated as __host__ __device__ when
553*e5dd7070Spatrick // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
554*e5dd7070Spatrick //   #pragma clang force_cuda_host_device_begin/end
555*e5dd7070Spatrick // pair).
556*e5dd7070Spatrick void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
557*e5dd7070Spatrick                                        const LookupResult &Previous) {
558*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
559*e5dd7070Spatrick 
560*e5dd7070Spatrick   if (ForceCUDAHostDeviceDepth > 0) {
561*e5dd7070Spatrick     if (!NewD->hasAttr<CUDAHostAttr>())
562*e5dd7070Spatrick       NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
563*e5dd7070Spatrick     if (!NewD->hasAttr<CUDADeviceAttr>())
564*e5dd7070Spatrick       NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
565*e5dd7070Spatrick     return;
566*e5dd7070Spatrick   }
567*e5dd7070Spatrick 
568*e5dd7070Spatrick   if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
569*e5dd7070Spatrick       NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
570*e5dd7070Spatrick       NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
571*e5dd7070Spatrick     return;
572*e5dd7070Spatrick 
573*e5dd7070Spatrick   // Is D a __device__ function with the same signature as NewD, ignoring CUDA
574*e5dd7070Spatrick   // attributes?
575*e5dd7070Spatrick   auto IsMatchingDeviceFn = [&](NamedDecl *D) {
576*e5dd7070Spatrick     if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
577*e5dd7070Spatrick       D = Using->getTargetDecl();
578*e5dd7070Spatrick     FunctionDecl *OldD = D->getAsFunction();
579*e5dd7070Spatrick     return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
580*e5dd7070Spatrick            !OldD->hasAttr<CUDAHostAttr>() &&
581*e5dd7070Spatrick            !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
582*e5dd7070Spatrick                        /* ConsiderCudaAttrs = */ false);
583*e5dd7070Spatrick   };
584*e5dd7070Spatrick   auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
585*e5dd7070Spatrick   if (It != Previous.end()) {
586*e5dd7070Spatrick     // We found a __device__ function with the same name and signature as NewD
587*e5dd7070Spatrick     // (ignoring CUDA attrs).  This is an error unless that function is defined
588*e5dd7070Spatrick     // in a system header, in which case we simply return without making NewD
589*e5dd7070Spatrick     // host+device.
590*e5dd7070Spatrick     NamedDecl *Match = *It;
591*e5dd7070Spatrick     if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
592*e5dd7070Spatrick       Diag(NewD->getLocation(),
593*e5dd7070Spatrick            diag::err_cuda_unattributed_constexpr_cannot_overload_device)
594*e5dd7070Spatrick           << NewD;
595*e5dd7070Spatrick       Diag(Match->getLocation(),
596*e5dd7070Spatrick            diag::note_cuda_conflicting_device_function_declared_here);
597*e5dd7070Spatrick     }
598*e5dd7070Spatrick     return;
599*e5dd7070Spatrick   }
600*e5dd7070Spatrick 
601*e5dd7070Spatrick   NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
602*e5dd7070Spatrick   NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
603*e5dd7070Spatrick }
604*e5dd7070Spatrick 
605*e5dd7070Spatrick Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
606*e5dd7070Spatrick                                                    unsigned DiagID) {
607*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
608*e5dd7070Spatrick   DeviceDiagBuilder::Kind DiagKind = [this] {
609*e5dd7070Spatrick     switch (CurrentCUDATarget()) {
610*e5dd7070Spatrick     case CFT_Global:
611*e5dd7070Spatrick     case CFT_Device:
612*e5dd7070Spatrick       return DeviceDiagBuilder::K_Immediate;
613*e5dd7070Spatrick     case CFT_HostDevice:
614*e5dd7070Spatrick       // An HD function counts as host code if we're compiling for host, and
615*e5dd7070Spatrick       // device code if we're compiling for device.  Defer any errors in device
616*e5dd7070Spatrick       // mode until the function is known-emitted.
617*e5dd7070Spatrick       if (getLangOpts().CUDAIsDevice) {
618*e5dd7070Spatrick         return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
619*e5dd7070Spatrick                 FunctionEmissionStatus::Emitted)
620*e5dd7070Spatrick                    ? DeviceDiagBuilder::K_ImmediateWithCallStack
621*e5dd7070Spatrick                    : DeviceDiagBuilder::K_Deferred;
622*e5dd7070Spatrick       }
623*e5dd7070Spatrick       return DeviceDiagBuilder::K_Nop;
624*e5dd7070Spatrick 
625*e5dd7070Spatrick     default:
626*e5dd7070Spatrick       return DeviceDiagBuilder::K_Nop;
627*e5dd7070Spatrick     }
628*e5dd7070Spatrick   }();
629*e5dd7070Spatrick   return DeviceDiagBuilder(DiagKind, Loc, DiagID,
630*e5dd7070Spatrick                            dyn_cast<FunctionDecl>(CurContext), *this);
631*e5dd7070Spatrick }
632*e5dd7070Spatrick 
633*e5dd7070Spatrick Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
634*e5dd7070Spatrick                                                  unsigned DiagID) {
635*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
636*e5dd7070Spatrick   DeviceDiagBuilder::Kind DiagKind = [this] {
637*e5dd7070Spatrick     switch (CurrentCUDATarget()) {
638*e5dd7070Spatrick     case CFT_Host:
639*e5dd7070Spatrick       return DeviceDiagBuilder::K_Immediate;
640*e5dd7070Spatrick     case CFT_HostDevice:
641*e5dd7070Spatrick       // An HD function counts as host code if we're compiling for host, and
642*e5dd7070Spatrick       // device code if we're compiling for device.  Defer any errors in device
643*e5dd7070Spatrick       // mode until the function is known-emitted.
644*e5dd7070Spatrick       if (getLangOpts().CUDAIsDevice)
645*e5dd7070Spatrick         return DeviceDiagBuilder::K_Nop;
646*e5dd7070Spatrick 
647*e5dd7070Spatrick       return (getEmissionStatus(cast<FunctionDecl>(CurContext)) ==
648*e5dd7070Spatrick               FunctionEmissionStatus::Emitted)
649*e5dd7070Spatrick                  ? DeviceDiagBuilder::K_ImmediateWithCallStack
650*e5dd7070Spatrick                  : DeviceDiagBuilder::K_Deferred;
651*e5dd7070Spatrick     default:
652*e5dd7070Spatrick       return DeviceDiagBuilder::K_Nop;
653*e5dd7070Spatrick     }
654*e5dd7070Spatrick   }();
655*e5dd7070Spatrick   return DeviceDiagBuilder(DiagKind, Loc, DiagID,
656*e5dd7070Spatrick                            dyn_cast<FunctionDecl>(CurContext), *this);
657*e5dd7070Spatrick }
658*e5dd7070Spatrick 
659*e5dd7070Spatrick bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
660*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
661*e5dd7070Spatrick   assert(Callee && "Callee may not be null.");
662*e5dd7070Spatrick 
663*e5dd7070Spatrick   auto &ExprEvalCtx = ExprEvalContexts.back();
664*e5dd7070Spatrick   if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
665*e5dd7070Spatrick     return true;
666*e5dd7070Spatrick 
667*e5dd7070Spatrick   // FIXME: Is bailing out early correct here?  Should we instead assume that
668*e5dd7070Spatrick   // the caller is a global initializer?
669*e5dd7070Spatrick   FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
670*e5dd7070Spatrick   if (!Caller)
671*e5dd7070Spatrick     return true;
672*e5dd7070Spatrick 
673*e5dd7070Spatrick   // If the caller is known-emitted, mark the callee as known-emitted.
674*e5dd7070Spatrick   // Otherwise, mark the call in our call graph so we can traverse it later.
675*e5dd7070Spatrick   bool CallerKnownEmitted =
676*e5dd7070Spatrick       getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted;
677*e5dd7070Spatrick   if (CallerKnownEmitted) {
678*e5dd7070Spatrick     // Host-side references to a __global__ function refer to the stub, so the
679*e5dd7070Spatrick     // function itself is never emitted and therefore should not be marked.
680*e5dd7070Spatrick     if (!shouldIgnoreInHostDeviceCheck(Callee))
681*e5dd7070Spatrick       markKnownEmitted(
682*e5dd7070Spatrick           *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) {
683*e5dd7070Spatrick             return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted;
684*e5dd7070Spatrick           });
685*e5dd7070Spatrick   } else {
686*e5dd7070Spatrick     // If we have
687*e5dd7070Spatrick     //   host fn calls kernel fn calls host+device,
688*e5dd7070Spatrick     // the HD function does not get instantiated on the host.  We model this by
689*e5dd7070Spatrick     // omitting at the call to the kernel from the callgraph.  This ensures
690*e5dd7070Spatrick     // that, when compiling for host, only HD functions actually called from the
691*e5dd7070Spatrick     // host get marked as known-emitted.
692*e5dd7070Spatrick     if (!shouldIgnoreInHostDeviceCheck(Callee))
693*e5dd7070Spatrick       DeviceCallGraph[Caller].insert({Callee, Loc});
694*e5dd7070Spatrick   }
695*e5dd7070Spatrick 
696*e5dd7070Spatrick   DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee,
697*e5dd7070Spatrick                                       CallerKnownEmitted] {
698*e5dd7070Spatrick     switch (IdentifyCUDAPreference(Caller, Callee)) {
699*e5dd7070Spatrick     case CFP_Never:
700*e5dd7070Spatrick       return DeviceDiagBuilder::K_Immediate;
701*e5dd7070Spatrick     case CFP_WrongSide:
702*e5dd7070Spatrick       assert(Caller && "WrongSide calls require a non-null caller");
703*e5dd7070Spatrick       // If we know the caller will be emitted, we know this wrong-side call
704*e5dd7070Spatrick       // will be emitted, so it's an immediate error.  Otherwise, defer the
705*e5dd7070Spatrick       // error until we know the caller is emitted.
706*e5dd7070Spatrick       return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack
707*e5dd7070Spatrick                                 : DeviceDiagBuilder::K_Deferred;
708*e5dd7070Spatrick     default:
709*e5dd7070Spatrick       return DeviceDiagBuilder::K_Nop;
710*e5dd7070Spatrick     }
711*e5dd7070Spatrick   }();
712*e5dd7070Spatrick 
713*e5dd7070Spatrick   if (DiagKind == DeviceDiagBuilder::K_Nop)
714*e5dd7070Spatrick     return true;
715*e5dd7070Spatrick 
716*e5dd7070Spatrick   // Avoid emitting this error twice for the same location.  Using a hashtable
717*e5dd7070Spatrick   // like this is unfortunate, but because we must continue parsing as normal
718*e5dd7070Spatrick   // after encountering a deferred error, it's otherwise very tricky for us to
719*e5dd7070Spatrick   // ensure that we only emit this deferred error once.
720*e5dd7070Spatrick   if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
721*e5dd7070Spatrick     return true;
722*e5dd7070Spatrick 
723*e5dd7070Spatrick   DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
724*e5dd7070Spatrick       << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
725*e5dd7070Spatrick   DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
726*e5dd7070Spatrick                     Caller, *this)
727*e5dd7070Spatrick       << Callee;
728*e5dd7070Spatrick   return DiagKind != DeviceDiagBuilder::K_Immediate &&
729*e5dd7070Spatrick          DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack;
730*e5dd7070Spatrick }
731*e5dd7070Spatrick 
732*e5dd7070Spatrick void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
733*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
734*e5dd7070Spatrick   if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
735*e5dd7070Spatrick     return;
736*e5dd7070Spatrick   FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
737*e5dd7070Spatrick   if (!CurFn)
738*e5dd7070Spatrick     return;
739*e5dd7070Spatrick   CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
740*e5dd7070Spatrick   if (Target == CFT_Global || Target == CFT_Device) {
741*e5dd7070Spatrick     Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
742*e5dd7070Spatrick   } else if (Target == CFT_HostDevice) {
743*e5dd7070Spatrick     Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
744*e5dd7070Spatrick     Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
745*e5dd7070Spatrick   }
746*e5dd7070Spatrick }
747*e5dd7070Spatrick 
748*e5dd7070Spatrick void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
749*e5dd7070Spatrick                                    const LookupResult &Previous) {
750*e5dd7070Spatrick   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
751*e5dd7070Spatrick   CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
752*e5dd7070Spatrick   for (NamedDecl *OldND : Previous) {
753*e5dd7070Spatrick     FunctionDecl *OldFD = OldND->getAsFunction();
754*e5dd7070Spatrick     if (!OldFD)
755*e5dd7070Spatrick       continue;
756*e5dd7070Spatrick 
757*e5dd7070Spatrick     CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
758*e5dd7070Spatrick     // Don't allow HD and global functions to overload other functions with the
759*e5dd7070Spatrick     // same signature.  We allow overloading based on CUDA attributes so that
760*e5dd7070Spatrick     // functions can have different implementations on the host and device, but
761*e5dd7070Spatrick     // HD/global functions "exist" in some sense on both the host and device, so
762*e5dd7070Spatrick     // should have the same implementation on both sides.
763*e5dd7070Spatrick     if (NewTarget != OldTarget &&
764*e5dd7070Spatrick         ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
765*e5dd7070Spatrick          (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
766*e5dd7070Spatrick         !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
767*e5dd7070Spatrick                     /* ConsiderCudaAttrs = */ false)) {
768*e5dd7070Spatrick       Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
769*e5dd7070Spatrick           << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
770*e5dd7070Spatrick       Diag(OldFD->getLocation(), diag::note_previous_declaration);
771*e5dd7070Spatrick       NewFD->setInvalidDecl();
772*e5dd7070Spatrick       break;
773*e5dd7070Spatrick     }
774*e5dd7070Spatrick   }
775*e5dd7070Spatrick }
776*e5dd7070Spatrick 
777*e5dd7070Spatrick template <typename AttrTy>
778*e5dd7070Spatrick static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
779*e5dd7070Spatrick                               const FunctionDecl &TemplateFD) {
780*e5dd7070Spatrick   if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
781*e5dd7070Spatrick     AttrTy *Clone = Attribute->clone(S.Context);
782*e5dd7070Spatrick     Clone->setInherited(true);
783*e5dd7070Spatrick     FD->addAttr(Clone);
784*e5dd7070Spatrick   }
785*e5dd7070Spatrick }
786*e5dd7070Spatrick 
787*e5dd7070Spatrick void Sema::inheritCUDATargetAttrs(FunctionDecl *FD,
788*e5dd7070Spatrick                                   const FunctionTemplateDecl &TD) {
789*e5dd7070Spatrick   const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
790*e5dd7070Spatrick   copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD);
791*e5dd7070Spatrick   copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
792*e5dd7070Spatrick   copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
793*e5dd7070Spatrick }
794*e5dd7070Spatrick 
795*e5dd7070Spatrick std::string Sema::getCudaConfigureFuncName() const {
796*e5dd7070Spatrick   if (getLangOpts().HIP)
797*e5dd7070Spatrick     return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
798*e5dd7070Spatrick                                             : "hipConfigureCall";
799*e5dd7070Spatrick 
800*e5dd7070Spatrick   // New CUDA kernel launch sequence.
801*e5dd7070Spatrick   if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
802*e5dd7070Spatrick                          CudaFeature::CUDA_USES_NEW_LAUNCH))
803*e5dd7070Spatrick     return "__cudaPushCallConfiguration";
804*e5dd7070Spatrick 
805*e5dd7070Spatrick   // Legacy CUDA kernel configuration call
806*e5dd7070Spatrick   return "cudaConfigureCall";
807*e5dd7070Spatrick }
808