1*e5dd7070Spatrick //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2*e5dd7070Spatrick // 3*e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information. 5*e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e5dd7070Spatrick // 7*e5dd7070Spatrick //===----------------------------------------------------------------------===// 8*e5dd7070Spatrick /// \file 9*e5dd7070Spatrick /// This file implements semantic analysis for CUDA constructs. 10*e5dd7070Spatrick /// 11*e5dd7070Spatrick //===----------------------------------------------------------------------===// 12*e5dd7070Spatrick 13*e5dd7070Spatrick #include "clang/AST/ASTContext.h" 14*e5dd7070Spatrick #include "clang/AST/Decl.h" 15*e5dd7070Spatrick #include "clang/AST/ExprCXX.h" 16*e5dd7070Spatrick #include "clang/Basic/Cuda.h" 17*e5dd7070Spatrick #include "clang/Lex/Preprocessor.h" 18*e5dd7070Spatrick #include "clang/Sema/Lookup.h" 19*e5dd7070Spatrick #include "clang/Sema/Sema.h" 20*e5dd7070Spatrick #include "clang/Sema/SemaDiagnostic.h" 21*e5dd7070Spatrick #include "clang/Sema/SemaInternal.h" 22*e5dd7070Spatrick #include "clang/Sema/Template.h" 23*e5dd7070Spatrick #include "llvm/ADT/Optional.h" 24*e5dd7070Spatrick #include "llvm/ADT/SmallVector.h" 25*e5dd7070Spatrick using namespace clang; 26*e5dd7070Spatrick 27*e5dd7070Spatrick void Sema::PushForceCUDAHostDevice() { 28*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29*e5dd7070Spatrick ForceCUDAHostDeviceDepth++; 30*e5dd7070Spatrick } 31*e5dd7070Spatrick 32*e5dd7070Spatrick bool Sema::PopForceCUDAHostDevice() { 33*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34*e5dd7070Spatrick if (ForceCUDAHostDeviceDepth == 0) 35*e5dd7070Spatrick return false; 36*e5dd7070Spatrick ForceCUDAHostDeviceDepth--; 37*e5dd7070Spatrick return true; 38*e5dd7070Spatrick } 39*e5dd7070Spatrick 40*e5dd7070Spatrick ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41*e5dd7070Spatrick MultiExprArg ExecConfig, 42*e5dd7070Spatrick SourceLocation GGGLoc) { 43*e5dd7070Spatrick FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44*e5dd7070Spatrick if (!ConfigDecl) 45*e5dd7070Spatrick return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46*e5dd7070Spatrick << getCudaConfigureFuncName()); 47*e5dd7070Spatrick QualType ConfigQTy = ConfigDecl->getType(); 48*e5dd7070Spatrick 49*e5dd7070Spatrick DeclRefExpr *ConfigDR = new (Context) 50*e5dd7070Spatrick DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51*e5dd7070Spatrick MarkFunctionReferenced(LLLLoc, ConfigDecl); 52*e5dd7070Spatrick 53*e5dd7070Spatrick return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54*e5dd7070Spatrick /*IsExecConfig=*/true); 55*e5dd7070Spatrick } 56*e5dd7070Spatrick 57*e5dd7070Spatrick Sema::CUDAFunctionTarget 58*e5dd7070Spatrick Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 59*e5dd7070Spatrick bool HasHostAttr = false; 60*e5dd7070Spatrick bool HasDeviceAttr = false; 61*e5dd7070Spatrick bool HasGlobalAttr = false; 62*e5dd7070Spatrick bool HasInvalidTargetAttr = false; 63*e5dd7070Spatrick for (const ParsedAttr &AL : Attrs) { 64*e5dd7070Spatrick switch (AL.getKind()) { 65*e5dd7070Spatrick case ParsedAttr::AT_CUDAGlobal: 66*e5dd7070Spatrick HasGlobalAttr = true; 67*e5dd7070Spatrick break; 68*e5dd7070Spatrick case ParsedAttr::AT_CUDAHost: 69*e5dd7070Spatrick HasHostAttr = true; 70*e5dd7070Spatrick break; 71*e5dd7070Spatrick case ParsedAttr::AT_CUDADevice: 72*e5dd7070Spatrick HasDeviceAttr = true; 73*e5dd7070Spatrick break; 74*e5dd7070Spatrick case ParsedAttr::AT_CUDAInvalidTarget: 75*e5dd7070Spatrick HasInvalidTargetAttr = true; 76*e5dd7070Spatrick break; 77*e5dd7070Spatrick default: 78*e5dd7070Spatrick break; 79*e5dd7070Spatrick } 80*e5dd7070Spatrick } 81*e5dd7070Spatrick 82*e5dd7070Spatrick if (HasInvalidTargetAttr) 83*e5dd7070Spatrick return CFT_InvalidTarget; 84*e5dd7070Spatrick 85*e5dd7070Spatrick if (HasGlobalAttr) 86*e5dd7070Spatrick return CFT_Global; 87*e5dd7070Spatrick 88*e5dd7070Spatrick if (HasHostAttr && HasDeviceAttr) 89*e5dd7070Spatrick return CFT_HostDevice; 90*e5dd7070Spatrick 91*e5dd7070Spatrick if (HasDeviceAttr) 92*e5dd7070Spatrick return CFT_Device; 93*e5dd7070Spatrick 94*e5dd7070Spatrick return CFT_Host; 95*e5dd7070Spatrick } 96*e5dd7070Spatrick 97*e5dd7070Spatrick template <typename A> 98*e5dd7070Spatrick static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 99*e5dd7070Spatrick return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 100*e5dd7070Spatrick return isa<A>(Attribute) && 101*e5dd7070Spatrick !(IgnoreImplicitAttr && Attribute->isImplicit()); 102*e5dd7070Spatrick }); 103*e5dd7070Spatrick } 104*e5dd7070Spatrick 105*e5dd7070Spatrick /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 106*e5dd7070Spatrick Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 107*e5dd7070Spatrick bool IgnoreImplicitHDAttr) { 108*e5dd7070Spatrick // Code that lives outside a function is run on the host. 109*e5dd7070Spatrick if (D == nullptr) 110*e5dd7070Spatrick return CFT_Host; 111*e5dd7070Spatrick 112*e5dd7070Spatrick if (D->hasAttr<CUDAInvalidTargetAttr>()) 113*e5dd7070Spatrick return CFT_InvalidTarget; 114*e5dd7070Spatrick 115*e5dd7070Spatrick if (D->hasAttr<CUDAGlobalAttr>()) 116*e5dd7070Spatrick return CFT_Global; 117*e5dd7070Spatrick 118*e5dd7070Spatrick if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 119*e5dd7070Spatrick if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 120*e5dd7070Spatrick return CFT_HostDevice; 121*e5dd7070Spatrick return CFT_Device; 122*e5dd7070Spatrick } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 123*e5dd7070Spatrick return CFT_Host; 124*e5dd7070Spatrick } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 125*e5dd7070Spatrick // Some implicit declarations (like intrinsic functions) are not marked. 126*e5dd7070Spatrick // Set the most lenient target on them for maximal flexibility. 127*e5dd7070Spatrick return CFT_HostDevice; 128*e5dd7070Spatrick } 129*e5dd7070Spatrick 130*e5dd7070Spatrick return CFT_Host; 131*e5dd7070Spatrick } 132*e5dd7070Spatrick 133*e5dd7070Spatrick // * CUDA Call preference table 134*e5dd7070Spatrick // 135*e5dd7070Spatrick // F - from, 136*e5dd7070Spatrick // T - to 137*e5dd7070Spatrick // Ph - preference in host mode 138*e5dd7070Spatrick // Pd - preference in device mode 139*e5dd7070Spatrick // H - handled in (x) 140*e5dd7070Spatrick // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 141*e5dd7070Spatrick // 142*e5dd7070Spatrick // | F | T | Ph | Pd | H | 143*e5dd7070Spatrick // |----+----+-----+-----+-----+ 144*e5dd7070Spatrick // | d | d | N | N | (c) | 145*e5dd7070Spatrick // | d | g | -- | -- | (a) | 146*e5dd7070Spatrick // | d | h | -- | -- | (e) | 147*e5dd7070Spatrick // | d | hd | HD | HD | (b) | 148*e5dd7070Spatrick // | g | d | N | N | (c) | 149*e5dd7070Spatrick // | g | g | -- | -- | (a) | 150*e5dd7070Spatrick // | g | h | -- | -- | (e) | 151*e5dd7070Spatrick // | g | hd | HD | HD | (b) | 152*e5dd7070Spatrick // | h | d | -- | -- | (e) | 153*e5dd7070Spatrick // | h | g | N | N | (c) | 154*e5dd7070Spatrick // | h | h | N | N | (c) | 155*e5dd7070Spatrick // | h | hd | HD | HD | (b) | 156*e5dd7070Spatrick // | hd | d | WS | SS | (d) | 157*e5dd7070Spatrick // | hd | g | SS | -- |(d/a)| 158*e5dd7070Spatrick // | hd | h | SS | WS | (d) | 159*e5dd7070Spatrick // | hd | hd | HD | HD | (b) | 160*e5dd7070Spatrick 161*e5dd7070Spatrick Sema::CUDAFunctionPreference 162*e5dd7070Spatrick Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 163*e5dd7070Spatrick const FunctionDecl *Callee) { 164*e5dd7070Spatrick assert(Callee && "Callee must be valid."); 165*e5dd7070Spatrick CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 166*e5dd7070Spatrick CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 167*e5dd7070Spatrick 168*e5dd7070Spatrick // If one of the targets is invalid, the check always fails, no matter what 169*e5dd7070Spatrick // the other target is. 170*e5dd7070Spatrick if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 171*e5dd7070Spatrick return CFP_Never; 172*e5dd7070Spatrick 173*e5dd7070Spatrick // (a) Can't call global from some contexts until we support CUDA's 174*e5dd7070Spatrick // dynamic parallelism. 175*e5dd7070Spatrick if (CalleeTarget == CFT_Global && 176*e5dd7070Spatrick (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 177*e5dd7070Spatrick return CFP_Never; 178*e5dd7070Spatrick 179*e5dd7070Spatrick // (b) Calling HostDevice is OK for everyone. 180*e5dd7070Spatrick if (CalleeTarget == CFT_HostDevice) 181*e5dd7070Spatrick return CFP_HostDevice; 182*e5dd7070Spatrick 183*e5dd7070Spatrick // (c) Best case scenarios 184*e5dd7070Spatrick if (CalleeTarget == CallerTarget || 185*e5dd7070Spatrick (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 186*e5dd7070Spatrick (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 187*e5dd7070Spatrick return CFP_Native; 188*e5dd7070Spatrick 189*e5dd7070Spatrick // (d) HostDevice behavior depends on compilation mode. 190*e5dd7070Spatrick if (CallerTarget == CFT_HostDevice) { 191*e5dd7070Spatrick // It's OK to call a compilation-mode matching function from an HD one. 192*e5dd7070Spatrick if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 193*e5dd7070Spatrick (!getLangOpts().CUDAIsDevice && 194*e5dd7070Spatrick (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 195*e5dd7070Spatrick return CFP_SameSide; 196*e5dd7070Spatrick 197*e5dd7070Spatrick // Calls from HD to non-mode-matching functions (i.e., to host functions 198*e5dd7070Spatrick // when compiling in device mode or to device functions when compiling in 199*e5dd7070Spatrick // host mode) are allowed at the sema level, but eventually rejected if 200*e5dd7070Spatrick // they're ever codegened. TODO: Reject said calls earlier. 201*e5dd7070Spatrick return CFP_WrongSide; 202*e5dd7070Spatrick } 203*e5dd7070Spatrick 204*e5dd7070Spatrick // (e) Calling across device/host boundary is not something you should do. 205*e5dd7070Spatrick if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 206*e5dd7070Spatrick (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 207*e5dd7070Spatrick (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 208*e5dd7070Spatrick return CFP_Never; 209*e5dd7070Spatrick 210*e5dd7070Spatrick llvm_unreachable("All cases should've been handled by now."); 211*e5dd7070Spatrick } 212*e5dd7070Spatrick 213*e5dd7070Spatrick void Sema::EraseUnwantedCUDAMatches( 214*e5dd7070Spatrick const FunctionDecl *Caller, 215*e5dd7070Spatrick SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 216*e5dd7070Spatrick if (Matches.size() <= 1) 217*e5dd7070Spatrick return; 218*e5dd7070Spatrick 219*e5dd7070Spatrick using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 220*e5dd7070Spatrick 221*e5dd7070Spatrick // Gets the CUDA function preference for a call from Caller to Match. 222*e5dd7070Spatrick auto GetCFP = [&](const Pair &Match) { 223*e5dd7070Spatrick return IdentifyCUDAPreference(Caller, Match.second); 224*e5dd7070Spatrick }; 225*e5dd7070Spatrick 226*e5dd7070Spatrick // Find the best call preference among the functions in Matches. 227*e5dd7070Spatrick CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 228*e5dd7070Spatrick Matches.begin(), Matches.end(), 229*e5dd7070Spatrick [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 230*e5dd7070Spatrick 231*e5dd7070Spatrick // Erase all functions with lower priority. 232*e5dd7070Spatrick llvm::erase_if(Matches, 233*e5dd7070Spatrick [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 234*e5dd7070Spatrick } 235*e5dd7070Spatrick 236*e5dd7070Spatrick /// When an implicitly-declared special member has to invoke more than one 237*e5dd7070Spatrick /// base/field special member, conflicts may occur in the targets of these 238*e5dd7070Spatrick /// members. For example, if one base's member __host__ and another's is 239*e5dd7070Spatrick /// __device__, it's a conflict. 240*e5dd7070Spatrick /// This function figures out if the given targets \param Target1 and 241*e5dd7070Spatrick /// \param Target2 conflict, and if they do not it fills in 242*e5dd7070Spatrick /// \param ResolvedTarget with a target that resolves for both calls. 243*e5dd7070Spatrick /// \return true if there's a conflict, false otherwise. 244*e5dd7070Spatrick static bool 245*e5dd7070Spatrick resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 246*e5dd7070Spatrick Sema::CUDAFunctionTarget Target2, 247*e5dd7070Spatrick Sema::CUDAFunctionTarget *ResolvedTarget) { 248*e5dd7070Spatrick // Only free functions and static member functions may be global. 249*e5dd7070Spatrick assert(Target1 != Sema::CFT_Global); 250*e5dd7070Spatrick assert(Target2 != Sema::CFT_Global); 251*e5dd7070Spatrick 252*e5dd7070Spatrick if (Target1 == Sema::CFT_HostDevice) { 253*e5dd7070Spatrick *ResolvedTarget = Target2; 254*e5dd7070Spatrick } else if (Target2 == Sema::CFT_HostDevice) { 255*e5dd7070Spatrick *ResolvedTarget = Target1; 256*e5dd7070Spatrick } else if (Target1 != Target2) { 257*e5dd7070Spatrick return true; 258*e5dd7070Spatrick } else { 259*e5dd7070Spatrick *ResolvedTarget = Target1; 260*e5dd7070Spatrick } 261*e5dd7070Spatrick 262*e5dd7070Spatrick return false; 263*e5dd7070Spatrick } 264*e5dd7070Spatrick 265*e5dd7070Spatrick bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 266*e5dd7070Spatrick CXXSpecialMember CSM, 267*e5dd7070Spatrick CXXMethodDecl *MemberDecl, 268*e5dd7070Spatrick bool ConstRHS, 269*e5dd7070Spatrick bool Diagnose) { 270*e5dd7070Spatrick // If the defaulted special member is defined lexically outside of its 271*e5dd7070Spatrick // owning class, or the special member already has explicit device or host 272*e5dd7070Spatrick // attributes, do not infer. 273*e5dd7070Spatrick bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 274*e5dd7070Spatrick bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 275*e5dd7070Spatrick bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 276*e5dd7070Spatrick bool HasExplicitAttr = 277*e5dd7070Spatrick (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 278*e5dd7070Spatrick (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 279*e5dd7070Spatrick if (!InClass || HasExplicitAttr) 280*e5dd7070Spatrick return false; 281*e5dd7070Spatrick 282*e5dd7070Spatrick llvm::Optional<CUDAFunctionTarget> InferredTarget; 283*e5dd7070Spatrick 284*e5dd7070Spatrick // We're going to invoke special member lookup; mark that these special 285*e5dd7070Spatrick // members are called from this one, and not from its caller. 286*e5dd7070Spatrick ContextRAII MethodContext(*this, MemberDecl); 287*e5dd7070Spatrick 288*e5dd7070Spatrick // Look for special members in base classes that should be invoked from here. 289*e5dd7070Spatrick // Infer the target of this member base on the ones it should call. 290*e5dd7070Spatrick // Skip direct and indirect virtual bases for abstract classes. 291*e5dd7070Spatrick llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 292*e5dd7070Spatrick for (const auto &B : ClassDecl->bases()) { 293*e5dd7070Spatrick if (!B.isVirtual()) { 294*e5dd7070Spatrick Bases.push_back(&B); 295*e5dd7070Spatrick } 296*e5dd7070Spatrick } 297*e5dd7070Spatrick 298*e5dd7070Spatrick if (!ClassDecl->isAbstract()) { 299*e5dd7070Spatrick for (const auto &VB : ClassDecl->vbases()) { 300*e5dd7070Spatrick Bases.push_back(&VB); 301*e5dd7070Spatrick } 302*e5dd7070Spatrick } 303*e5dd7070Spatrick 304*e5dd7070Spatrick for (const auto *B : Bases) { 305*e5dd7070Spatrick const RecordType *BaseType = B->getType()->getAs<RecordType>(); 306*e5dd7070Spatrick if (!BaseType) { 307*e5dd7070Spatrick continue; 308*e5dd7070Spatrick } 309*e5dd7070Spatrick 310*e5dd7070Spatrick CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 311*e5dd7070Spatrick Sema::SpecialMemberOverloadResult SMOR = 312*e5dd7070Spatrick LookupSpecialMember(BaseClassDecl, CSM, 313*e5dd7070Spatrick /* ConstArg */ ConstRHS, 314*e5dd7070Spatrick /* VolatileArg */ false, 315*e5dd7070Spatrick /* RValueThis */ false, 316*e5dd7070Spatrick /* ConstThis */ false, 317*e5dd7070Spatrick /* VolatileThis */ false); 318*e5dd7070Spatrick 319*e5dd7070Spatrick if (!SMOR.getMethod()) 320*e5dd7070Spatrick continue; 321*e5dd7070Spatrick 322*e5dd7070Spatrick CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 323*e5dd7070Spatrick if (!InferredTarget.hasValue()) { 324*e5dd7070Spatrick InferredTarget = BaseMethodTarget; 325*e5dd7070Spatrick } else { 326*e5dd7070Spatrick bool ResolutionError = resolveCalleeCUDATargetConflict( 327*e5dd7070Spatrick InferredTarget.getValue(), BaseMethodTarget, 328*e5dd7070Spatrick InferredTarget.getPointer()); 329*e5dd7070Spatrick if (ResolutionError) { 330*e5dd7070Spatrick if (Diagnose) { 331*e5dd7070Spatrick Diag(ClassDecl->getLocation(), 332*e5dd7070Spatrick diag::note_implicit_member_target_infer_collision) 333*e5dd7070Spatrick << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 334*e5dd7070Spatrick } 335*e5dd7070Spatrick MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 336*e5dd7070Spatrick return true; 337*e5dd7070Spatrick } 338*e5dd7070Spatrick } 339*e5dd7070Spatrick } 340*e5dd7070Spatrick 341*e5dd7070Spatrick // Same as for bases, but now for special members of fields. 342*e5dd7070Spatrick for (const auto *F : ClassDecl->fields()) { 343*e5dd7070Spatrick if (F->isInvalidDecl()) { 344*e5dd7070Spatrick continue; 345*e5dd7070Spatrick } 346*e5dd7070Spatrick 347*e5dd7070Spatrick const RecordType *FieldType = 348*e5dd7070Spatrick Context.getBaseElementType(F->getType())->getAs<RecordType>(); 349*e5dd7070Spatrick if (!FieldType) { 350*e5dd7070Spatrick continue; 351*e5dd7070Spatrick } 352*e5dd7070Spatrick 353*e5dd7070Spatrick CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 354*e5dd7070Spatrick Sema::SpecialMemberOverloadResult SMOR = 355*e5dd7070Spatrick LookupSpecialMember(FieldRecDecl, CSM, 356*e5dd7070Spatrick /* ConstArg */ ConstRHS && !F->isMutable(), 357*e5dd7070Spatrick /* VolatileArg */ false, 358*e5dd7070Spatrick /* RValueThis */ false, 359*e5dd7070Spatrick /* ConstThis */ false, 360*e5dd7070Spatrick /* VolatileThis */ false); 361*e5dd7070Spatrick 362*e5dd7070Spatrick if (!SMOR.getMethod()) 363*e5dd7070Spatrick continue; 364*e5dd7070Spatrick 365*e5dd7070Spatrick CUDAFunctionTarget FieldMethodTarget = 366*e5dd7070Spatrick IdentifyCUDATarget(SMOR.getMethod()); 367*e5dd7070Spatrick if (!InferredTarget.hasValue()) { 368*e5dd7070Spatrick InferredTarget = FieldMethodTarget; 369*e5dd7070Spatrick } else { 370*e5dd7070Spatrick bool ResolutionError = resolveCalleeCUDATargetConflict( 371*e5dd7070Spatrick InferredTarget.getValue(), FieldMethodTarget, 372*e5dd7070Spatrick InferredTarget.getPointer()); 373*e5dd7070Spatrick if (ResolutionError) { 374*e5dd7070Spatrick if (Diagnose) { 375*e5dd7070Spatrick Diag(ClassDecl->getLocation(), 376*e5dd7070Spatrick diag::note_implicit_member_target_infer_collision) 377*e5dd7070Spatrick << (unsigned)CSM << InferredTarget.getValue() 378*e5dd7070Spatrick << FieldMethodTarget; 379*e5dd7070Spatrick } 380*e5dd7070Spatrick MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 381*e5dd7070Spatrick return true; 382*e5dd7070Spatrick } 383*e5dd7070Spatrick } 384*e5dd7070Spatrick } 385*e5dd7070Spatrick 386*e5dd7070Spatrick 387*e5dd7070Spatrick // If no target was inferred, mark this member as __host__ __device__; 388*e5dd7070Spatrick // it's the least restrictive option that can be invoked from any target. 389*e5dd7070Spatrick bool NeedsH = true, NeedsD = true; 390*e5dd7070Spatrick if (InferredTarget.hasValue()) { 391*e5dd7070Spatrick if (InferredTarget.getValue() == CFT_Device) 392*e5dd7070Spatrick NeedsH = false; 393*e5dd7070Spatrick else if (InferredTarget.getValue() == CFT_Host) 394*e5dd7070Spatrick NeedsD = false; 395*e5dd7070Spatrick } 396*e5dd7070Spatrick 397*e5dd7070Spatrick // We either setting attributes first time, or the inferred ones must match 398*e5dd7070Spatrick // previously set ones. 399*e5dd7070Spatrick if (NeedsD && !HasD) 400*e5dd7070Spatrick MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 401*e5dd7070Spatrick if (NeedsH && !HasH) 402*e5dd7070Spatrick MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 403*e5dd7070Spatrick 404*e5dd7070Spatrick return false; 405*e5dd7070Spatrick } 406*e5dd7070Spatrick 407*e5dd7070Spatrick bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 408*e5dd7070Spatrick if (!CD->isDefined() && CD->isTemplateInstantiation()) 409*e5dd7070Spatrick InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 410*e5dd7070Spatrick 411*e5dd7070Spatrick // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 412*e5dd7070Spatrick // empty at a point in the translation unit, if it is either a 413*e5dd7070Spatrick // trivial constructor 414*e5dd7070Spatrick if (CD->isTrivial()) 415*e5dd7070Spatrick return true; 416*e5dd7070Spatrick 417*e5dd7070Spatrick // ... or it satisfies all of the following conditions: 418*e5dd7070Spatrick // The constructor function has been defined. 419*e5dd7070Spatrick // The constructor function has no parameters, 420*e5dd7070Spatrick // and the function body is an empty compound statement. 421*e5dd7070Spatrick if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 422*e5dd7070Spatrick return false; 423*e5dd7070Spatrick 424*e5dd7070Spatrick // Its class has no virtual functions and no virtual base classes. 425*e5dd7070Spatrick if (CD->getParent()->isDynamicClass()) 426*e5dd7070Spatrick return false; 427*e5dd7070Spatrick 428*e5dd7070Spatrick // The only form of initializer allowed is an empty constructor. 429*e5dd7070Spatrick // This will recursively check all base classes and member initializers 430*e5dd7070Spatrick if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 431*e5dd7070Spatrick if (const CXXConstructExpr *CE = 432*e5dd7070Spatrick dyn_cast<CXXConstructExpr>(CI->getInit())) 433*e5dd7070Spatrick return isEmptyCudaConstructor(Loc, CE->getConstructor()); 434*e5dd7070Spatrick return false; 435*e5dd7070Spatrick })) 436*e5dd7070Spatrick return false; 437*e5dd7070Spatrick 438*e5dd7070Spatrick return true; 439*e5dd7070Spatrick } 440*e5dd7070Spatrick 441*e5dd7070Spatrick bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 442*e5dd7070Spatrick // No destructor -> no problem. 443*e5dd7070Spatrick if (!DD) 444*e5dd7070Spatrick return true; 445*e5dd7070Spatrick 446*e5dd7070Spatrick if (!DD->isDefined() && DD->isTemplateInstantiation()) 447*e5dd7070Spatrick InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 448*e5dd7070Spatrick 449*e5dd7070Spatrick // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 450*e5dd7070Spatrick // empty at a point in the translation unit, if it is either a 451*e5dd7070Spatrick // trivial constructor 452*e5dd7070Spatrick if (DD->isTrivial()) 453*e5dd7070Spatrick return true; 454*e5dd7070Spatrick 455*e5dd7070Spatrick // ... or it satisfies all of the following conditions: 456*e5dd7070Spatrick // The destructor function has been defined. 457*e5dd7070Spatrick // and the function body is an empty compound statement. 458*e5dd7070Spatrick if (!DD->hasTrivialBody()) 459*e5dd7070Spatrick return false; 460*e5dd7070Spatrick 461*e5dd7070Spatrick const CXXRecordDecl *ClassDecl = DD->getParent(); 462*e5dd7070Spatrick 463*e5dd7070Spatrick // Its class has no virtual functions and no virtual base classes. 464*e5dd7070Spatrick if (ClassDecl->isDynamicClass()) 465*e5dd7070Spatrick return false; 466*e5dd7070Spatrick 467*e5dd7070Spatrick // Only empty destructors are allowed. This will recursively check 468*e5dd7070Spatrick // destructors for all base classes... 469*e5dd7070Spatrick if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 470*e5dd7070Spatrick if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 471*e5dd7070Spatrick return isEmptyCudaDestructor(Loc, RD->getDestructor()); 472*e5dd7070Spatrick return true; 473*e5dd7070Spatrick })) 474*e5dd7070Spatrick return false; 475*e5dd7070Spatrick 476*e5dd7070Spatrick // ... and member fields. 477*e5dd7070Spatrick if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 478*e5dd7070Spatrick if (CXXRecordDecl *RD = Field->getType() 479*e5dd7070Spatrick ->getBaseElementTypeUnsafe() 480*e5dd7070Spatrick ->getAsCXXRecordDecl()) 481*e5dd7070Spatrick return isEmptyCudaDestructor(Loc, RD->getDestructor()); 482*e5dd7070Spatrick return true; 483*e5dd7070Spatrick })) 484*e5dd7070Spatrick return false; 485*e5dd7070Spatrick 486*e5dd7070Spatrick return true; 487*e5dd7070Spatrick } 488*e5dd7070Spatrick 489*e5dd7070Spatrick void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 490*e5dd7070Spatrick if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 491*e5dd7070Spatrick return; 492*e5dd7070Spatrick const Expr *Init = VD->getInit(); 493*e5dd7070Spatrick if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 494*e5dd7070Spatrick VD->hasAttr<CUDASharedAttr>()) { 495*e5dd7070Spatrick if (LangOpts.GPUAllowDeviceInit) 496*e5dd7070Spatrick return; 497*e5dd7070Spatrick assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 498*e5dd7070Spatrick bool AllowedInit = false; 499*e5dd7070Spatrick if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 500*e5dd7070Spatrick AllowedInit = 501*e5dd7070Spatrick isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 502*e5dd7070Spatrick // We'll allow constant initializers even if it's a non-empty 503*e5dd7070Spatrick // constructor according to CUDA rules. This deviates from NVCC, 504*e5dd7070Spatrick // but allows us to handle things like constexpr constructors. 505*e5dd7070Spatrick if (!AllowedInit && 506*e5dd7070Spatrick (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) 507*e5dd7070Spatrick AllowedInit = VD->getInit()->isConstantInitializer( 508*e5dd7070Spatrick Context, VD->getType()->isReferenceType()); 509*e5dd7070Spatrick 510*e5dd7070Spatrick // Also make sure that destructor, if there is one, is empty. 511*e5dd7070Spatrick if (AllowedInit) 512*e5dd7070Spatrick if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 513*e5dd7070Spatrick AllowedInit = 514*e5dd7070Spatrick isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 515*e5dd7070Spatrick 516*e5dd7070Spatrick if (!AllowedInit) { 517*e5dd7070Spatrick Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 518*e5dd7070Spatrick ? diag::err_shared_var_init 519*e5dd7070Spatrick : diag::err_dynamic_var_init) 520*e5dd7070Spatrick << Init->getSourceRange(); 521*e5dd7070Spatrick VD->setInvalidDecl(); 522*e5dd7070Spatrick } 523*e5dd7070Spatrick } else { 524*e5dd7070Spatrick // This is a host-side global variable. Check that the initializer is 525*e5dd7070Spatrick // callable from the host side. 526*e5dd7070Spatrick const FunctionDecl *InitFn = nullptr; 527*e5dd7070Spatrick if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 528*e5dd7070Spatrick InitFn = CE->getConstructor(); 529*e5dd7070Spatrick } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 530*e5dd7070Spatrick InitFn = CE->getDirectCallee(); 531*e5dd7070Spatrick } 532*e5dd7070Spatrick if (InitFn) { 533*e5dd7070Spatrick CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 534*e5dd7070Spatrick if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 535*e5dd7070Spatrick Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 536*e5dd7070Spatrick << InitFnTarget << InitFn; 537*e5dd7070Spatrick Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 538*e5dd7070Spatrick VD->setInvalidDecl(); 539*e5dd7070Spatrick } 540*e5dd7070Spatrick } 541*e5dd7070Spatrick } 542*e5dd7070Spatrick } 543*e5dd7070Spatrick 544*e5dd7070Spatrick // With -fcuda-host-device-constexpr, an unattributed constexpr function is 545*e5dd7070Spatrick // treated as implicitly __host__ __device__, unless: 546*e5dd7070Spatrick // * it is a variadic function (device-side variadic functions are not 547*e5dd7070Spatrick // allowed), or 548*e5dd7070Spatrick // * a __device__ function with this signature was already declared, in which 549*e5dd7070Spatrick // case in which case we output an error, unless the __device__ decl is in a 550*e5dd7070Spatrick // system header, in which case we leave the constexpr function unattributed. 551*e5dd7070Spatrick // 552*e5dd7070Spatrick // In addition, all function decls are treated as __host__ __device__ when 553*e5dd7070Spatrick // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 554*e5dd7070Spatrick // #pragma clang force_cuda_host_device_begin/end 555*e5dd7070Spatrick // pair). 556*e5dd7070Spatrick void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 557*e5dd7070Spatrick const LookupResult &Previous) { 558*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 559*e5dd7070Spatrick 560*e5dd7070Spatrick if (ForceCUDAHostDeviceDepth > 0) { 561*e5dd7070Spatrick if (!NewD->hasAttr<CUDAHostAttr>()) 562*e5dd7070Spatrick NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 563*e5dd7070Spatrick if (!NewD->hasAttr<CUDADeviceAttr>()) 564*e5dd7070Spatrick NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 565*e5dd7070Spatrick return; 566*e5dd7070Spatrick } 567*e5dd7070Spatrick 568*e5dd7070Spatrick if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 569*e5dd7070Spatrick NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 570*e5dd7070Spatrick NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 571*e5dd7070Spatrick return; 572*e5dd7070Spatrick 573*e5dd7070Spatrick // Is D a __device__ function with the same signature as NewD, ignoring CUDA 574*e5dd7070Spatrick // attributes? 575*e5dd7070Spatrick auto IsMatchingDeviceFn = [&](NamedDecl *D) { 576*e5dd7070Spatrick if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 577*e5dd7070Spatrick D = Using->getTargetDecl(); 578*e5dd7070Spatrick FunctionDecl *OldD = D->getAsFunction(); 579*e5dd7070Spatrick return OldD && OldD->hasAttr<CUDADeviceAttr>() && 580*e5dd7070Spatrick !OldD->hasAttr<CUDAHostAttr>() && 581*e5dd7070Spatrick !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 582*e5dd7070Spatrick /* ConsiderCudaAttrs = */ false); 583*e5dd7070Spatrick }; 584*e5dd7070Spatrick auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 585*e5dd7070Spatrick if (It != Previous.end()) { 586*e5dd7070Spatrick // We found a __device__ function with the same name and signature as NewD 587*e5dd7070Spatrick // (ignoring CUDA attrs). This is an error unless that function is defined 588*e5dd7070Spatrick // in a system header, in which case we simply return without making NewD 589*e5dd7070Spatrick // host+device. 590*e5dd7070Spatrick NamedDecl *Match = *It; 591*e5dd7070Spatrick if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 592*e5dd7070Spatrick Diag(NewD->getLocation(), 593*e5dd7070Spatrick diag::err_cuda_unattributed_constexpr_cannot_overload_device) 594*e5dd7070Spatrick << NewD; 595*e5dd7070Spatrick Diag(Match->getLocation(), 596*e5dd7070Spatrick diag::note_cuda_conflicting_device_function_declared_here); 597*e5dd7070Spatrick } 598*e5dd7070Spatrick return; 599*e5dd7070Spatrick } 600*e5dd7070Spatrick 601*e5dd7070Spatrick NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 602*e5dd7070Spatrick NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 603*e5dd7070Spatrick } 604*e5dd7070Spatrick 605*e5dd7070Spatrick Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 606*e5dd7070Spatrick unsigned DiagID) { 607*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 608*e5dd7070Spatrick DeviceDiagBuilder::Kind DiagKind = [this] { 609*e5dd7070Spatrick switch (CurrentCUDATarget()) { 610*e5dd7070Spatrick case CFT_Global: 611*e5dd7070Spatrick case CFT_Device: 612*e5dd7070Spatrick return DeviceDiagBuilder::K_Immediate; 613*e5dd7070Spatrick case CFT_HostDevice: 614*e5dd7070Spatrick // An HD function counts as host code if we're compiling for host, and 615*e5dd7070Spatrick // device code if we're compiling for device. Defer any errors in device 616*e5dd7070Spatrick // mode until the function is known-emitted. 617*e5dd7070Spatrick if (getLangOpts().CUDAIsDevice) { 618*e5dd7070Spatrick return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 619*e5dd7070Spatrick FunctionEmissionStatus::Emitted) 620*e5dd7070Spatrick ? DeviceDiagBuilder::K_ImmediateWithCallStack 621*e5dd7070Spatrick : DeviceDiagBuilder::K_Deferred; 622*e5dd7070Spatrick } 623*e5dd7070Spatrick return DeviceDiagBuilder::K_Nop; 624*e5dd7070Spatrick 625*e5dd7070Spatrick default: 626*e5dd7070Spatrick return DeviceDiagBuilder::K_Nop; 627*e5dd7070Spatrick } 628*e5dd7070Spatrick }(); 629*e5dd7070Spatrick return DeviceDiagBuilder(DiagKind, Loc, DiagID, 630*e5dd7070Spatrick dyn_cast<FunctionDecl>(CurContext), *this); 631*e5dd7070Spatrick } 632*e5dd7070Spatrick 633*e5dd7070Spatrick Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 634*e5dd7070Spatrick unsigned DiagID) { 635*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 636*e5dd7070Spatrick DeviceDiagBuilder::Kind DiagKind = [this] { 637*e5dd7070Spatrick switch (CurrentCUDATarget()) { 638*e5dd7070Spatrick case CFT_Host: 639*e5dd7070Spatrick return DeviceDiagBuilder::K_Immediate; 640*e5dd7070Spatrick case CFT_HostDevice: 641*e5dd7070Spatrick // An HD function counts as host code if we're compiling for host, and 642*e5dd7070Spatrick // device code if we're compiling for device. Defer any errors in device 643*e5dd7070Spatrick // mode until the function is known-emitted. 644*e5dd7070Spatrick if (getLangOpts().CUDAIsDevice) 645*e5dd7070Spatrick return DeviceDiagBuilder::K_Nop; 646*e5dd7070Spatrick 647*e5dd7070Spatrick return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 648*e5dd7070Spatrick FunctionEmissionStatus::Emitted) 649*e5dd7070Spatrick ? DeviceDiagBuilder::K_ImmediateWithCallStack 650*e5dd7070Spatrick : DeviceDiagBuilder::K_Deferred; 651*e5dd7070Spatrick default: 652*e5dd7070Spatrick return DeviceDiagBuilder::K_Nop; 653*e5dd7070Spatrick } 654*e5dd7070Spatrick }(); 655*e5dd7070Spatrick return DeviceDiagBuilder(DiagKind, Loc, DiagID, 656*e5dd7070Spatrick dyn_cast<FunctionDecl>(CurContext), *this); 657*e5dd7070Spatrick } 658*e5dd7070Spatrick 659*e5dd7070Spatrick bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 660*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 661*e5dd7070Spatrick assert(Callee && "Callee may not be null."); 662*e5dd7070Spatrick 663*e5dd7070Spatrick auto &ExprEvalCtx = ExprEvalContexts.back(); 664*e5dd7070Spatrick if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 665*e5dd7070Spatrick return true; 666*e5dd7070Spatrick 667*e5dd7070Spatrick // FIXME: Is bailing out early correct here? Should we instead assume that 668*e5dd7070Spatrick // the caller is a global initializer? 669*e5dd7070Spatrick FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 670*e5dd7070Spatrick if (!Caller) 671*e5dd7070Spatrick return true; 672*e5dd7070Spatrick 673*e5dd7070Spatrick // If the caller is known-emitted, mark the callee as known-emitted. 674*e5dd7070Spatrick // Otherwise, mark the call in our call graph so we can traverse it later. 675*e5dd7070Spatrick bool CallerKnownEmitted = 676*e5dd7070Spatrick getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 677*e5dd7070Spatrick if (CallerKnownEmitted) { 678*e5dd7070Spatrick // Host-side references to a __global__ function refer to the stub, so the 679*e5dd7070Spatrick // function itself is never emitted and therefore should not be marked. 680*e5dd7070Spatrick if (!shouldIgnoreInHostDeviceCheck(Callee)) 681*e5dd7070Spatrick markKnownEmitted( 682*e5dd7070Spatrick *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { 683*e5dd7070Spatrick return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; 684*e5dd7070Spatrick }); 685*e5dd7070Spatrick } else { 686*e5dd7070Spatrick // If we have 687*e5dd7070Spatrick // host fn calls kernel fn calls host+device, 688*e5dd7070Spatrick // the HD function does not get instantiated on the host. We model this by 689*e5dd7070Spatrick // omitting at the call to the kernel from the callgraph. This ensures 690*e5dd7070Spatrick // that, when compiling for host, only HD functions actually called from the 691*e5dd7070Spatrick // host get marked as known-emitted. 692*e5dd7070Spatrick if (!shouldIgnoreInHostDeviceCheck(Callee)) 693*e5dd7070Spatrick DeviceCallGraph[Caller].insert({Callee, Loc}); 694*e5dd7070Spatrick } 695*e5dd7070Spatrick 696*e5dd7070Spatrick DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, 697*e5dd7070Spatrick CallerKnownEmitted] { 698*e5dd7070Spatrick switch (IdentifyCUDAPreference(Caller, Callee)) { 699*e5dd7070Spatrick case CFP_Never: 700*e5dd7070Spatrick return DeviceDiagBuilder::K_Immediate; 701*e5dd7070Spatrick case CFP_WrongSide: 702*e5dd7070Spatrick assert(Caller && "WrongSide calls require a non-null caller"); 703*e5dd7070Spatrick // If we know the caller will be emitted, we know this wrong-side call 704*e5dd7070Spatrick // will be emitted, so it's an immediate error. Otherwise, defer the 705*e5dd7070Spatrick // error until we know the caller is emitted. 706*e5dd7070Spatrick return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack 707*e5dd7070Spatrick : DeviceDiagBuilder::K_Deferred; 708*e5dd7070Spatrick default: 709*e5dd7070Spatrick return DeviceDiagBuilder::K_Nop; 710*e5dd7070Spatrick } 711*e5dd7070Spatrick }(); 712*e5dd7070Spatrick 713*e5dd7070Spatrick if (DiagKind == DeviceDiagBuilder::K_Nop) 714*e5dd7070Spatrick return true; 715*e5dd7070Spatrick 716*e5dd7070Spatrick // Avoid emitting this error twice for the same location. Using a hashtable 717*e5dd7070Spatrick // like this is unfortunate, but because we must continue parsing as normal 718*e5dd7070Spatrick // after encountering a deferred error, it's otherwise very tricky for us to 719*e5dd7070Spatrick // ensure that we only emit this deferred error once. 720*e5dd7070Spatrick if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 721*e5dd7070Spatrick return true; 722*e5dd7070Spatrick 723*e5dd7070Spatrick DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 724*e5dd7070Spatrick << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 725*e5dd7070Spatrick DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 726*e5dd7070Spatrick Caller, *this) 727*e5dd7070Spatrick << Callee; 728*e5dd7070Spatrick return DiagKind != DeviceDiagBuilder::K_Immediate && 729*e5dd7070Spatrick DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; 730*e5dd7070Spatrick } 731*e5dd7070Spatrick 732*e5dd7070Spatrick void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 733*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 734*e5dd7070Spatrick if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 735*e5dd7070Spatrick return; 736*e5dd7070Spatrick FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 737*e5dd7070Spatrick if (!CurFn) 738*e5dd7070Spatrick return; 739*e5dd7070Spatrick CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 740*e5dd7070Spatrick if (Target == CFT_Global || Target == CFT_Device) { 741*e5dd7070Spatrick Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 742*e5dd7070Spatrick } else if (Target == CFT_HostDevice) { 743*e5dd7070Spatrick Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 744*e5dd7070Spatrick Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 745*e5dd7070Spatrick } 746*e5dd7070Spatrick } 747*e5dd7070Spatrick 748*e5dd7070Spatrick void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 749*e5dd7070Spatrick const LookupResult &Previous) { 750*e5dd7070Spatrick assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 751*e5dd7070Spatrick CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 752*e5dd7070Spatrick for (NamedDecl *OldND : Previous) { 753*e5dd7070Spatrick FunctionDecl *OldFD = OldND->getAsFunction(); 754*e5dd7070Spatrick if (!OldFD) 755*e5dd7070Spatrick continue; 756*e5dd7070Spatrick 757*e5dd7070Spatrick CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 758*e5dd7070Spatrick // Don't allow HD and global functions to overload other functions with the 759*e5dd7070Spatrick // same signature. We allow overloading based on CUDA attributes so that 760*e5dd7070Spatrick // functions can have different implementations on the host and device, but 761*e5dd7070Spatrick // HD/global functions "exist" in some sense on both the host and device, so 762*e5dd7070Spatrick // should have the same implementation on both sides. 763*e5dd7070Spatrick if (NewTarget != OldTarget && 764*e5dd7070Spatrick ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 765*e5dd7070Spatrick (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 766*e5dd7070Spatrick !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 767*e5dd7070Spatrick /* ConsiderCudaAttrs = */ false)) { 768*e5dd7070Spatrick Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 769*e5dd7070Spatrick << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 770*e5dd7070Spatrick Diag(OldFD->getLocation(), diag::note_previous_declaration); 771*e5dd7070Spatrick NewFD->setInvalidDecl(); 772*e5dd7070Spatrick break; 773*e5dd7070Spatrick } 774*e5dd7070Spatrick } 775*e5dd7070Spatrick } 776*e5dd7070Spatrick 777*e5dd7070Spatrick template <typename AttrTy> 778*e5dd7070Spatrick static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 779*e5dd7070Spatrick const FunctionDecl &TemplateFD) { 780*e5dd7070Spatrick if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 781*e5dd7070Spatrick AttrTy *Clone = Attribute->clone(S.Context); 782*e5dd7070Spatrick Clone->setInherited(true); 783*e5dd7070Spatrick FD->addAttr(Clone); 784*e5dd7070Spatrick } 785*e5dd7070Spatrick } 786*e5dd7070Spatrick 787*e5dd7070Spatrick void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 788*e5dd7070Spatrick const FunctionTemplateDecl &TD) { 789*e5dd7070Spatrick const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 790*e5dd7070Spatrick copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 791*e5dd7070Spatrick copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 792*e5dd7070Spatrick copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 793*e5dd7070Spatrick } 794*e5dd7070Spatrick 795*e5dd7070Spatrick std::string Sema::getCudaConfigureFuncName() const { 796*e5dd7070Spatrick if (getLangOpts().HIP) 797*e5dd7070Spatrick return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 798*e5dd7070Spatrick : "hipConfigureCall"; 799*e5dd7070Spatrick 800*e5dd7070Spatrick // New CUDA kernel launch sequence. 801*e5dd7070Spatrick if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 802*e5dd7070Spatrick CudaFeature::CUDA_USES_NEW_LAUNCH)) 803*e5dd7070Spatrick return "__cudaPushCallConfiguration"; 804*e5dd7070Spatrick 805*e5dd7070Spatrick // Legacy CUDA kernel configuration call 806*e5dd7070Spatrick return "cudaConfigureCall"; 807*e5dd7070Spatrick } 808