1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Sema/Lookup.h" 19 #include "clang/Sema/Sema.h" 20 #include "clang/Sema/SemaDiagnostic.h" 21 #include "clang/Sema/SemaInternal.h" 22 #include "clang/Sema/Template.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/ADT/SmallVector.h" 25 using namespace clang; 26 27 void Sema::PushForceCUDAHostDevice() { 28 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 29 ForceCUDAHostDeviceDepth++; 30 } 31 32 bool Sema::PopForceCUDAHostDevice() { 33 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 34 if (ForceCUDAHostDeviceDepth == 0) 35 return false; 36 ForceCUDAHostDeviceDepth--; 37 return true; 38 } 39 40 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 41 MultiExprArg ExecConfig, 42 SourceLocation GGGLoc) { 43 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 44 if (!ConfigDecl) 45 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 46 << getCudaConfigureFuncName()); 47 QualType ConfigQTy = ConfigDecl->getType(); 48 49 DeclRefExpr *ConfigDR = new (Context) 50 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 51 MarkFunctionReferenced(LLLLoc, ConfigDecl); 52 53 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 54 /*IsExecConfig=*/true); 55 } 56 57 Sema::CUDAFunctionTarget 58 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 59 bool HasHostAttr = false; 60 bool HasDeviceAttr = false; 61 bool HasGlobalAttr = false; 62 bool HasInvalidTargetAttr = false; 63 for (const ParsedAttr &AL : Attrs) { 64 switch (AL.getKind()) { 65 case ParsedAttr::AT_CUDAGlobal: 66 HasGlobalAttr = true; 67 break; 68 case ParsedAttr::AT_CUDAHost: 69 HasHostAttr = true; 70 break; 71 case ParsedAttr::AT_CUDADevice: 72 HasDeviceAttr = true; 73 break; 74 case ParsedAttr::AT_CUDAInvalidTarget: 75 HasInvalidTargetAttr = true; 76 break; 77 default: 78 break; 79 } 80 } 81 82 if (HasInvalidTargetAttr) 83 return CFT_InvalidTarget; 84 85 if (HasGlobalAttr) 86 return CFT_Global; 87 88 if (HasHostAttr && HasDeviceAttr) 89 return CFT_HostDevice; 90 91 if (HasDeviceAttr) 92 return CFT_Device; 93 94 return CFT_Host; 95 } 96 97 template <typename A> 98 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 99 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 100 return isa<A>(Attribute) && 101 !(IgnoreImplicitAttr && Attribute->isImplicit()); 102 }); 103 } 104 105 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 106 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 107 bool IgnoreImplicitHDAttr) { 108 // Code that lives outside a function is run on the host. 109 if (D == nullptr) 110 return CFT_Host; 111 112 if (D->hasAttr<CUDAInvalidTargetAttr>()) 113 return CFT_InvalidTarget; 114 115 if (D->hasAttr<CUDAGlobalAttr>()) 116 return CFT_Global; 117 118 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 119 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 120 return CFT_HostDevice; 121 return CFT_Device; 122 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 123 return CFT_Host; 124 } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { 125 // Some implicit declarations (like intrinsic functions) are not marked. 126 // Set the most lenient target on them for maximal flexibility. 127 return CFT_HostDevice; 128 } 129 130 return CFT_Host; 131 } 132 133 // * CUDA Call preference table 134 // 135 // F - from, 136 // T - to 137 // Ph - preference in host mode 138 // Pd - preference in device mode 139 // H - handled in (x) 140 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 141 // 142 // | F | T | Ph | Pd | H | 143 // |----+----+-----+-----+-----+ 144 // | d | d | N | N | (c) | 145 // | d | g | -- | -- | (a) | 146 // | d | h | -- | -- | (e) | 147 // | d | hd | HD | HD | (b) | 148 // | g | d | N | N | (c) | 149 // | g | g | -- | -- | (a) | 150 // | g | h | -- | -- | (e) | 151 // | g | hd | HD | HD | (b) | 152 // | h | d | -- | -- | (e) | 153 // | h | g | N | N | (c) | 154 // | h | h | N | N | (c) | 155 // | h | hd | HD | HD | (b) | 156 // | hd | d | WS | SS | (d) | 157 // | hd | g | SS | -- |(d/a)| 158 // | hd | h | SS | WS | (d) | 159 // | hd | hd | HD | HD | (b) | 160 161 Sema::CUDAFunctionPreference 162 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 163 const FunctionDecl *Callee) { 164 assert(Callee && "Callee must be valid."); 165 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 166 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 167 168 // If one of the targets is invalid, the check always fails, no matter what 169 // the other target is. 170 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 171 return CFP_Never; 172 173 // (a) Can't call global from some contexts until we support CUDA's 174 // dynamic parallelism. 175 if (CalleeTarget == CFT_Global && 176 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 177 return CFP_Never; 178 179 // (b) Calling HostDevice is OK for everyone. 180 if (CalleeTarget == CFT_HostDevice) 181 return CFP_HostDevice; 182 183 // (c) Best case scenarios 184 if (CalleeTarget == CallerTarget || 185 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 186 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 187 return CFP_Native; 188 189 // (d) HostDevice behavior depends on compilation mode. 190 if (CallerTarget == CFT_HostDevice) { 191 // It's OK to call a compilation-mode matching function from an HD one. 192 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 193 (!getLangOpts().CUDAIsDevice && 194 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 195 return CFP_SameSide; 196 197 // Calls from HD to non-mode-matching functions (i.e., to host functions 198 // when compiling in device mode or to device functions when compiling in 199 // host mode) are allowed at the sema level, but eventually rejected if 200 // they're ever codegened. TODO: Reject said calls earlier. 201 return CFP_WrongSide; 202 } 203 204 // (e) Calling across device/host boundary is not something you should do. 205 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 206 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 207 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 208 return CFP_Never; 209 210 llvm_unreachable("All cases should've been handled by now."); 211 } 212 213 void Sema::EraseUnwantedCUDAMatches( 214 const FunctionDecl *Caller, 215 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 216 if (Matches.size() <= 1) 217 return; 218 219 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 220 221 // Gets the CUDA function preference for a call from Caller to Match. 222 auto GetCFP = [&](const Pair &Match) { 223 return IdentifyCUDAPreference(Caller, Match.second); 224 }; 225 226 // Find the best call preference among the functions in Matches. 227 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 228 Matches.begin(), Matches.end(), 229 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 230 231 // Erase all functions with lower priority. 232 llvm::erase_if(Matches, 233 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 234 } 235 236 /// When an implicitly-declared special member has to invoke more than one 237 /// base/field special member, conflicts may occur in the targets of these 238 /// members. For example, if one base's member __host__ and another's is 239 /// __device__, it's a conflict. 240 /// This function figures out if the given targets \param Target1 and 241 /// \param Target2 conflict, and if they do not it fills in 242 /// \param ResolvedTarget with a target that resolves for both calls. 243 /// \return true if there's a conflict, false otherwise. 244 static bool 245 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 246 Sema::CUDAFunctionTarget Target2, 247 Sema::CUDAFunctionTarget *ResolvedTarget) { 248 // Only free functions and static member functions may be global. 249 assert(Target1 != Sema::CFT_Global); 250 assert(Target2 != Sema::CFT_Global); 251 252 if (Target1 == Sema::CFT_HostDevice) { 253 *ResolvedTarget = Target2; 254 } else if (Target2 == Sema::CFT_HostDevice) { 255 *ResolvedTarget = Target1; 256 } else if (Target1 != Target2) { 257 return true; 258 } else { 259 *ResolvedTarget = Target1; 260 } 261 262 return false; 263 } 264 265 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 266 CXXSpecialMember CSM, 267 CXXMethodDecl *MemberDecl, 268 bool ConstRHS, 269 bool Diagnose) { 270 // If the defaulted special member is defined lexically outside of its 271 // owning class, or the special member already has explicit device or host 272 // attributes, do not infer. 273 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 274 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 275 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 276 bool HasExplicitAttr = 277 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 278 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 279 if (!InClass || HasExplicitAttr) 280 return false; 281 282 llvm::Optional<CUDAFunctionTarget> InferredTarget; 283 284 // We're going to invoke special member lookup; mark that these special 285 // members are called from this one, and not from its caller. 286 ContextRAII MethodContext(*this, MemberDecl); 287 288 // Look for special members in base classes that should be invoked from here. 289 // Infer the target of this member base on the ones it should call. 290 // Skip direct and indirect virtual bases for abstract classes. 291 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 292 for (const auto &B : ClassDecl->bases()) { 293 if (!B.isVirtual()) { 294 Bases.push_back(&B); 295 } 296 } 297 298 if (!ClassDecl->isAbstract()) { 299 for (const auto &VB : ClassDecl->vbases()) { 300 Bases.push_back(&VB); 301 } 302 } 303 304 for (const auto *B : Bases) { 305 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 306 if (!BaseType) { 307 continue; 308 } 309 310 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 311 Sema::SpecialMemberOverloadResult SMOR = 312 LookupSpecialMember(BaseClassDecl, CSM, 313 /* ConstArg */ ConstRHS, 314 /* VolatileArg */ false, 315 /* RValueThis */ false, 316 /* ConstThis */ false, 317 /* VolatileThis */ false); 318 319 if (!SMOR.getMethod()) 320 continue; 321 322 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 323 if (!InferredTarget.hasValue()) { 324 InferredTarget = BaseMethodTarget; 325 } else { 326 bool ResolutionError = resolveCalleeCUDATargetConflict( 327 InferredTarget.getValue(), BaseMethodTarget, 328 InferredTarget.getPointer()); 329 if (ResolutionError) { 330 if (Diagnose) { 331 Diag(ClassDecl->getLocation(), 332 diag::note_implicit_member_target_infer_collision) 333 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 334 } 335 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 336 return true; 337 } 338 } 339 } 340 341 // Same as for bases, but now for special members of fields. 342 for (const auto *F : ClassDecl->fields()) { 343 if (F->isInvalidDecl()) { 344 continue; 345 } 346 347 const RecordType *FieldType = 348 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 349 if (!FieldType) { 350 continue; 351 } 352 353 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 354 Sema::SpecialMemberOverloadResult SMOR = 355 LookupSpecialMember(FieldRecDecl, CSM, 356 /* ConstArg */ ConstRHS && !F->isMutable(), 357 /* VolatileArg */ false, 358 /* RValueThis */ false, 359 /* ConstThis */ false, 360 /* VolatileThis */ false); 361 362 if (!SMOR.getMethod()) 363 continue; 364 365 CUDAFunctionTarget FieldMethodTarget = 366 IdentifyCUDATarget(SMOR.getMethod()); 367 if (!InferredTarget.hasValue()) { 368 InferredTarget = FieldMethodTarget; 369 } else { 370 bool ResolutionError = resolveCalleeCUDATargetConflict( 371 InferredTarget.getValue(), FieldMethodTarget, 372 InferredTarget.getPointer()); 373 if (ResolutionError) { 374 if (Diagnose) { 375 Diag(ClassDecl->getLocation(), 376 diag::note_implicit_member_target_infer_collision) 377 << (unsigned)CSM << InferredTarget.getValue() 378 << FieldMethodTarget; 379 } 380 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 381 return true; 382 } 383 } 384 } 385 386 387 // If no target was inferred, mark this member as __host__ __device__; 388 // it's the least restrictive option that can be invoked from any target. 389 bool NeedsH = true, NeedsD = true; 390 if (InferredTarget.hasValue()) { 391 if (InferredTarget.getValue() == CFT_Device) 392 NeedsH = false; 393 else if (InferredTarget.getValue() == CFT_Host) 394 NeedsD = false; 395 } 396 397 // We either setting attributes first time, or the inferred ones must match 398 // previously set ones. 399 if (NeedsD && !HasD) 400 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 401 if (NeedsH && !HasH) 402 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 403 404 return false; 405 } 406 407 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 408 if (!CD->isDefined() && CD->isTemplateInstantiation()) 409 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 410 411 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 412 // empty at a point in the translation unit, if it is either a 413 // trivial constructor 414 if (CD->isTrivial()) 415 return true; 416 417 // ... or it satisfies all of the following conditions: 418 // The constructor function has been defined. 419 // The constructor function has no parameters, 420 // and the function body is an empty compound statement. 421 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 422 return false; 423 424 // Its class has no virtual functions and no virtual base classes. 425 if (CD->getParent()->isDynamicClass()) 426 return false; 427 428 // The only form of initializer allowed is an empty constructor. 429 // This will recursively check all base classes and member initializers 430 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 431 if (const CXXConstructExpr *CE = 432 dyn_cast<CXXConstructExpr>(CI->getInit())) 433 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 434 return false; 435 })) 436 return false; 437 438 return true; 439 } 440 441 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 442 // No destructor -> no problem. 443 if (!DD) 444 return true; 445 446 if (!DD->isDefined() && DD->isTemplateInstantiation()) 447 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 448 449 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 450 // empty at a point in the translation unit, if it is either a 451 // trivial constructor 452 if (DD->isTrivial()) 453 return true; 454 455 // ... or it satisfies all of the following conditions: 456 // The destructor function has been defined. 457 // and the function body is an empty compound statement. 458 if (!DD->hasTrivialBody()) 459 return false; 460 461 const CXXRecordDecl *ClassDecl = DD->getParent(); 462 463 // Its class has no virtual functions and no virtual base classes. 464 if (ClassDecl->isDynamicClass()) 465 return false; 466 467 // Only empty destructors are allowed. This will recursively check 468 // destructors for all base classes... 469 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 470 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 471 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 472 return true; 473 })) 474 return false; 475 476 // ... and member fields. 477 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 478 if (CXXRecordDecl *RD = Field->getType() 479 ->getBaseElementTypeUnsafe() 480 ->getAsCXXRecordDecl()) 481 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 482 return true; 483 })) 484 return false; 485 486 return true; 487 } 488 489 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 490 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 491 return; 492 const Expr *Init = VD->getInit(); 493 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 494 VD->hasAttr<CUDASharedAttr>()) { 495 assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()); 496 bool AllowedInit = false; 497 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 498 AllowedInit = 499 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 500 // We'll allow constant initializers even if it's a non-empty 501 // constructor according to CUDA rules. This deviates from NVCC, 502 // but allows us to handle things like constexpr constructors. 503 if (!AllowedInit && 504 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) 505 AllowedInit = VD->getInit()->isConstantInitializer( 506 Context, VD->getType()->isReferenceType()); 507 508 // Also make sure that destructor, if there is one, is empty. 509 if (AllowedInit) 510 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 511 AllowedInit = 512 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 513 514 if (!AllowedInit) { 515 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 516 ? diag::err_shared_var_init 517 : diag::err_dynamic_var_init) 518 << Init->getSourceRange(); 519 VD->setInvalidDecl(); 520 } 521 } else { 522 // This is a host-side global variable. Check that the initializer is 523 // callable from the host side. 524 const FunctionDecl *InitFn = nullptr; 525 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 526 InitFn = CE->getConstructor(); 527 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 528 InitFn = CE->getDirectCallee(); 529 } 530 if (InitFn) { 531 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 532 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 533 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 534 << InitFnTarget << InitFn; 535 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 536 VD->setInvalidDecl(); 537 } 538 } 539 } 540 } 541 542 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 543 // treated as implicitly __host__ __device__, unless: 544 // * it is a variadic function (device-side variadic functions are not 545 // allowed), or 546 // * a __device__ function with this signature was already declared, in which 547 // case in which case we output an error, unless the __device__ decl is in a 548 // system header, in which case we leave the constexpr function unattributed. 549 // 550 // In addition, all function decls are treated as __host__ __device__ when 551 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 552 // #pragma clang force_cuda_host_device_begin/end 553 // pair). 554 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 555 const LookupResult &Previous) { 556 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 557 558 if (ForceCUDAHostDeviceDepth > 0) { 559 if (!NewD->hasAttr<CUDAHostAttr>()) 560 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 561 if (!NewD->hasAttr<CUDADeviceAttr>()) 562 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 563 return; 564 } 565 566 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 567 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 568 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 569 return; 570 571 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 572 // attributes? 573 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 574 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 575 D = Using->getTargetDecl(); 576 FunctionDecl *OldD = D->getAsFunction(); 577 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 578 !OldD->hasAttr<CUDAHostAttr>() && 579 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 580 /* ConsiderCudaAttrs = */ false); 581 }; 582 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 583 if (It != Previous.end()) { 584 // We found a __device__ function with the same name and signature as NewD 585 // (ignoring CUDA attrs). This is an error unless that function is defined 586 // in a system header, in which case we simply return without making NewD 587 // host+device. 588 NamedDecl *Match = *It; 589 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 590 Diag(NewD->getLocation(), 591 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 592 << NewD; 593 Diag(Match->getLocation(), 594 diag::note_cuda_conflicting_device_function_declared_here); 595 } 596 return; 597 } 598 599 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 600 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 601 } 602 603 Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 604 unsigned DiagID) { 605 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 606 DeviceDiagBuilder::Kind DiagKind = [this] { 607 switch (CurrentCUDATarget()) { 608 case CFT_Global: 609 case CFT_Device: 610 return DeviceDiagBuilder::K_Immediate; 611 case CFT_HostDevice: 612 // An HD function counts as host code if we're compiling for host, and 613 // device code if we're compiling for device. Defer any errors in device 614 // mode until the function is known-emitted. 615 if (getLangOpts().CUDAIsDevice) { 616 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 617 FunctionEmissionStatus::Emitted) 618 ? DeviceDiagBuilder::K_ImmediateWithCallStack 619 : DeviceDiagBuilder::K_Deferred; 620 } 621 return DeviceDiagBuilder::K_Nop; 622 623 default: 624 return DeviceDiagBuilder::K_Nop; 625 } 626 }(); 627 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 628 dyn_cast<FunctionDecl>(CurContext), *this); 629 } 630 631 Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 632 unsigned DiagID) { 633 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 634 DeviceDiagBuilder::Kind DiagKind = [this] { 635 switch (CurrentCUDATarget()) { 636 case CFT_Host: 637 return DeviceDiagBuilder::K_Immediate; 638 case CFT_HostDevice: 639 // An HD function counts as host code if we're compiling for host, and 640 // device code if we're compiling for device. Defer any errors in device 641 // mode until the function is known-emitted. 642 if (getLangOpts().CUDAIsDevice) 643 return DeviceDiagBuilder::K_Nop; 644 645 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 646 FunctionEmissionStatus::Emitted) 647 ? DeviceDiagBuilder::K_ImmediateWithCallStack 648 : DeviceDiagBuilder::K_Deferred; 649 default: 650 return DeviceDiagBuilder::K_Nop; 651 } 652 }(); 653 return DeviceDiagBuilder(DiagKind, Loc, DiagID, 654 dyn_cast<FunctionDecl>(CurContext), *this); 655 } 656 657 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 658 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 659 assert(Callee && "Callee may not be null."); 660 661 auto &ExprEvalCtx = ExprEvalContexts.back(); 662 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 663 return true; 664 665 // FIXME: Is bailing out early correct here? Should we instead assume that 666 // the caller is a global initializer? 667 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 668 if (!Caller) 669 return true; 670 671 // If the caller is known-emitted, mark the callee as known-emitted. 672 // Otherwise, mark the call in our call graph so we can traverse it later. 673 bool CallerKnownEmitted = 674 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 675 if (CallerKnownEmitted) { 676 // Host-side references to a __global__ function refer to the stub, so the 677 // function itself is never emitted and therefore should not be marked. 678 if (!shouldIgnoreInHostDeviceCheck(Callee)) 679 markKnownEmitted( 680 *this, Caller, Callee, Loc, [](Sema &S, FunctionDecl *FD) { 681 return S.getEmissionStatus(FD) == FunctionEmissionStatus::Emitted; 682 }); 683 } else { 684 // If we have 685 // host fn calls kernel fn calls host+device, 686 // the HD function does not get instantiated on the host. We model this by 687 // omitting at the call to the kernel from the callgraph. This ensures 688 // that, when compiling for host, only HD functions actually called from the 689 // host get marked as known-emitted. 690 if (!shouldIgnoreInHostDeviceCheck(Callee)) 691 DeviceCallGraph[Caller].insert({Callee, Loc}); 692 } 693 694 DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, 695 CallerKnownEmitted] { 696 switch (IdentifyCUDAPreference(Caller, Callee)) { 697 case CFP_Never: 698 return DeviceDiagBuilder::K_Immediate; 699 case CFP_WrongSide: 700 assert(Caller && "WrongSide calls require a non-null caller"); 701 // If we know the caller will be emitted, we know this wrong-side call 702 // will be emitted, so it's an immediate error. Otherwise, defer the 703 // error until we know the caller is emitted. 704 return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack 705 : DeviceDiagBuilder::K_Deferred; 706 default: 707 return DeviceDiagBuilder::K_Nop; 708 } 709 }(); 710 711 if (DiagKind == DeviceDiagBuilder::K_Nop) 712 return true; 713 714 // Avoid emitting this error twice for the same location. Using a hashtable 715 // like this is unfortunate, but because we must continue parsing as normal 716 // after encountering a deferred error, it's otherwise very tricky for us to 717 // ensure that we only emit this deferred error once. 718 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 719 return true; 720 721 DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 722 << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); 723 DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, 724 Caller, *this) 725 << Callee; 726 return DiagKind != DeviceDiagBuilder::K_Immediate && 727 DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; 728 } 729 730 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 731 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 732 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 733 return; 734 FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); 735 if (!CurFn) 736 return; 737 CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); 738 if (Target == CFT_Global || Target == CFT_Device) { 739 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 740 } else if (Target == CFT_HostDevice) { 741 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 742 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 743 } 744 } 745 746 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 747 const LookupResult &Previous) { 748 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 749 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 750 for (NamedDecl *OldND : Previous) { 751 FunctionDecl *OldFD = OldND->getAsFunction(); 752 if (!OldFD) 753 continue; 754 755 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 756 // Don't allow HD and global functions to overload other functions with the 757 // same signature. We allow overloading based on CUDA attributes so that 758 // functions can have different implementations on the host and device, but 759 // HD/global functions "exist" in some sense on both the host and device, so 760 // should have the same implementation on both sides. 761 if (NewTarget != OldTarget && 762 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 763 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 764 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 765 /* ConsiderCudaAttrs = */ false)) { 766 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 767 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 768 Diag(OldFD->getLocation(), diag::note_previous_declaration); 769 NewFD->setInvalidDecl(); 770 break; 771 } 772 } 773 } 774 775 template <typename AttrTy> 776 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 777 const FunctionDecl &TemplateFD) { 778 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 779 AttrTy *Clone = Attribute->clone(S.Context); 780 Clone->setInherited(true); 781 FD->addAttr(Clone); 782 } 783 } 784 785 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 786 const FunctionTemplateDecl &TD) { 787 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 788 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 789 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 790 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 791 } 792 793 std::string Sema::getCudaConfigureFuncName() const { 794 if (getLangOpts().HIP) 795 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 796 : "hipConfigureCall"; 797 798 // New CUDA kernel launch sequence. 799 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 800 CudaFeature::CUDA_USES_NEW_LAUNCH)) 801 return "__cudaPushCallConfiguration"; 802 803 // Legacy CUDA kernel configuration call 804 return "cudaConfigureCall"; 805 } 806