1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/AST/ASTContext.h" 14 #include "clang/AST/Decl.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/Basic/Cuda.h" 17 #include "clang/Basic/TargetInfo.h" 18 #include "clang/Lex/Preprocessor.h" 19 #include "clang/Sema/Lookup.h" 20 #include "clang/Sema/ScopeInfo.h" 21 #include "clang/Sema/Sema.h" 22 #include "clang/Sema/SemaDiagnostic.h" 23 #include "clang/Sema/SemaInternal.h" 24 #include "clang/Sema/Template.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/SmallVector.h" 27 using namespace clang; 28 29 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { 30 if (!D) 31 return false; 32 if (auto *A = D->getAttr<AttrT>()) 33 return !A->isImplicit(); 34 return false; 35 } 36 37 void Sema::PushForceCUDAHostDevice() { 38 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 39 ForceCUDAHostDeviceDepth++; 40 } 41 42 bool Sema::PopForceCUDAHostDevice() { 43 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 44 if (ForceCUDAHostDeviceDepth == 0) 45 return false; 46 ForceCUDAHostDeviceDepth--; 47 return true; 48 } 49 50 ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, 51 MultiExprArg ExecConfig, 52 SourceLocation GGGLoc) { 53 FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); 54 if (!ConfigDecl) 55 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 56 << getCudaConfigureFuncName()); 57 QualType ConfigQTy = ConfigDecl->getType(); 58 59 DeclRefExpr *ConfigDR = new (Context) 60 DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 61 MarkFunctionReferenced(LLLLoc, ConfigDecl); 62 63 return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 64 /*IsExecConfig=*/true); 65 } 66 67 Sema::CUDAFunctionTarget 68 Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { 69 bool HasHostAttr = false; 70 bool HasDeviceAttr = false; 71 bool HasGlobalAttr = false; 72 bool HasInvalidTargetAttr = false; 73 for (const ParsedAttr &AL : Attrs) { 74 switch (AL.getKind()) { 75 case ParsedAttr::AT_CUDAGlobal: 76 HasGlobalAttr = true; 77 break; 78 case ParsedAttr::AT_CUDAHost: 79 HasHostAttr = true; 80 break; 81 case ParsedAttr::AT_CUDADevice: 82 HasDeviceAttr = true; 83 break; 84 case ParsedAttr::AT_CUDAInvalidTarget: 85 HasInvalidTargetAttr = true; 86 break; 87 default: 88 break; 89 } 90 } 91 92 if (HasInvalidTargetAttr) 93 return CFT_InvalidTarget; 94 95 if (HasGlobalAttr) 96 return CFT_Global; 97 98 if (HasHostAttr && HasDeviceAttr) 99 return CFT_HostDevice; 100 101 if (HasDeviceAttr) 102 return CFT_Device; 103 104 return CFT_Host; 105 } 106 107 template <typename A> 108 static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { 109 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 110 return isa<A>(Attribute) && 111 !(IgnoreImplicitAttr && Attribute->isImplicit()); 112 }); 113 } 114 115 /// IdentifyCUDATarget - Determine the CUDA compilation target for this function 116 Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, 117 bool IgnoreImplicitHDAttr) { 118 // Code that lives outside a function is run on the host. 119 if (D == nullptr) 120 return CFT_Host; 121 122 if (D->hasAttr<CUDAInvalidTargetAttr>()) 123 return CFT_InvalidTarget; 124 125 if (D->hasAttr<CUDAGlobalAttr>()) 126 return CFT_Global; 127 128 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 129 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 130 return CFT_HostDevice; 131 return CFT_Device; 132 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 133 return CFT_Host; 134 } else if ((D->isImplicit() || !D->isUserProvided()) && 135 !IgnoreImplicitHDAttr) { 136 // Some implicit declarations (like intrinsic functions) are not marked. 137 // Set the most lenient target on them for maximal flexibility. 138 return CFT_HostDevice; 139 } 140 141 return CFT_Host; 142 } 143 144 /// IdentifyTarget - Determine the CUDA compilation target for this variable. 145 Sema::CUDAVariableTarget Sema::IdentifyCUDATarget(const VarDecl *Var) { 146 if (Var->hasAttr<HIPManagedAttr>()) 147 return CVT_Unified; 148 if (Var->isConstexpr() && !hasExplicitAttr<CUDAConstantAttr>(Var)) 149 return CVT_Both; 150 if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || 151 Var->hasAttr<CUDASharedAttr>() || 152 Var->getType()->isCUDADeviceBuiltinSurfaceType() || 153 Var->getType()->isCUDADeviceBuiltinTextureType()) 154 return CVT_Device; 155 // Function-scope static variable without explicit device or constant 156 // attribute are emitted 157 // - on both sides in host device functions 158 // - on device side in device or global functions 159 if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { 160 switch (IdentifyCUDATarget(FD)) { 161 case CFT_HostDevice: 162 return CVT_Both; 163 case CFT_Device: 164 case CFT_Global: 165 return CVT_Device; 166 default: 167 return CVT_Host; 168 } 169 } 170 return CVT_Host; 171 } 172 173 // * CUDA Call preference table 174 // 175 // F - from, 176 // T - to 177 // Ph - preference in host mode 178 // Pd - preference in device mode 179 // H - handled in (x) 180 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 181 // 182 // | F | T | Ph | Pd | H | 183 // |----+----+-----+-----+-----+ 184 // | d | d | N | N | (c) | 185 // | d | g | -- | -- | (a) | 186 // | d | h | -- | -- | (e) | 187 // | d | hd | HD | HD | (b) | 188 // | g | d | N | N | (c) | 189 // | g | g | -- | -- | (a) | 190 // | g | h | -- | -- | (e) | 191 // | g | hd | HD | HD | (b) | 192 // | h | d | -- | -- | (e) | 193 // | h | g | N | N | (c) | 194 // | h | h | N | N | (c) | 195 // | h | hd | HD | HD | (b) | 196 // | hd | d | WS | SS | (d) | 197 // | hd | g | SS | -- |(d/a)| 198 // | hd | h | SS | WS | (d) | 199 // | hd | hd | HD | HD | (b) | 200 201 Sema::CUDAFunctionPreference 202 Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, 203 const FunctionDecl *Callee) { 204 assert(Callee && "Callee must be valid."); 205 CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); 206 CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); 207 208 // If one of the targets is invalid, the check always fails, no matter what 209 // the other target is. 210 if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) 211 return CFP_Never; 212 213 // (a) Can't call global from some contexts until we support CUDA's 214 // dynamic parallelism. 215 if (CalleeTarget == CFT_Global && 216 (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) 217 return CFP_Never; 218 219 // (b) Calling HostDevice is OK for everyone. 220 if (CalleeTarget == CFT_HostDevice) 221 return CFP_HostDevice; 222 223 // (c) Best case scenarios 224 if (CalleeTarget == CallerTarget || 225 (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || 226 (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) 227 return CFP_Native; 228 229 // (d) HostDevice behavior depends on compilation mode. 230 if (CallerTarget == CFT_HostDevice) { 231 // It's OK to call a compilation-mode matching function from an HD one. 232 if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || 233 (!getLangOpts().CUDAIsDevice && 234 (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) 235 return CFP_SameSide; 236 237 // Calls from HD to non-mode-matching functions (i.e., to host functions 238 // when compiling in device mode or to device functions when compiling in 239 // host mode) are allowed at the sema level, but eventually rejected if 240 // they're ever codegened. TODO: Reject said calls earlier. 241 return CFP_WrongSide; 242 } 243 244 // (e) Calling across device/host boundary is not something you should do. 245 if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || 246 (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || 247 (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) 248 return CFP_Never; 249 250 llvm_unreachable("All cases should've been handled by now."); 251 } 252 253 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 254 if (!D) 255 return false; 256 if (auto *A = D->getAttr<AttrT>()) 257 return A->isImplicit(); 258 return D->isImplicit(); 259 } 260 261 bool Sema::isCUDAImplicitHostDeviceFunction(const FunctionDecl *D) { 262 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 263 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 264 return IsImplicitDevAttr && IsImplicitHostAttr; 265 } 266 267 void Sema::EraseUnwantedCUDAMatches( 268 const FunctionDecl *Caller, 269 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 270 if (Matches.size() <= 1) 271 return; 272 273 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 274 275 // Gets the CUDA function preference for a call from Caller to Match. 276 auto GetCFP = [&](const Pair &Match) { 277 return IdentifyCUDAPreference(Caller, Match.second); 278 }; 279 280 // Find the best call preference among the functions in Matches. 281 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 282 Matches.begin(), Matches.end(), 283 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 284 285 // Erase all functions with lower priority. 286 llvm::erase_if(Matches, 287 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 288 } 289 290 /// When an implicitly-declared special member has to invoke more than one 291 /// base/field special member, conflicts may occur in the targets of these 292 /// members. For example, if one base's member __host__ and another's is 293 /// __device__, it's a conflict. 294 /// This function figures out if the given targets \param Target1 and 295 /// \param Target2 conflict, and if they do not it fills in 296 /// \param ResolvedTarget with a target that resolves for both calls. 297 /// \return true if there's a conflict, false otherwise. 298 static bool 299 resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, 300 Sema::CUDAFunctionTarget Target2, 301 Sema::CUDAFunctionTarget *ResolvedTarget) { 302 // Only free functions and static member functions may be global. 303 assert(Target1 != Sema::CFT_Global); 304 assert(Target2 != Sema::CFT_Global); 305 306 if (Target1 == Sema::CFT_HostDevice) { 307 *ResolvedTarget = Target2; 308 } else if (Target2 == Sema::CFT_HostDevice) { 309 *ResolvedTarget = Target1; 310 } else if (Target1 != Target2) { 311 return true; 312 } else { 313 *ResolvedTarget = Target1; 314 } 315 316 return false; 317 } 318 319 bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 320 CXXSpecialMember CSM, 321 CXXMethodDecl *MemberDecl, 322 bool ConstRHS, 323 bool Diagnose) { 324 // If the defaulted special member is defined lexically outside of its 325 // owning class, or the special member already has explicit device or host 326 // attributes, do not infer. 327 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 328 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 329 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 330 bool HasExplicitAttr = 331 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 332 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 333 if (!InClass || HasExplicitAttr) 334 return false; 335 336 llvm::Optional<CUDAFunctionTarget> InferredTarget; 337 338 // We're going to invoke special member lookup; mark that these special 339 // members are called from this one, and not from its caller. 340 ContextRAII MethodContext(*this, MemberDecl); 341 342 // Look for special members in base classes that should be invoked from here. 343 // Infer the target of this member base on the ones it should call. 344 // Skip direct and indirect virtual bases for abstract classes. 345 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 346 for (const auto &B : ClassDecl->bases()) { 347 if (!B.isVirtual()) { 348 Bases.push_back(&B); 349 } 350 } 351 352 if (!ClassDecl->isAbstract()) { 353 for (const auto &VB : ClassDecl->vbases()) { 354 Bases.push_back(&VB); 355 } 356 } 357 358 for (const auto *B : Bases) { 359 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 360 if (!BaseType) { 361 continue; 362 } 363 364 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 365 Sema::SpecialMemberOverloadResult SMOR = 366 LookupSpecialMember(BaseClassDecl, CSM, 367 /* ConstArg */ ConstRHS, 368 /* VolatileArg */ false, 369 /* RValueThis */ false, 370 /* ConstThis */ false, 371 /* VolatileThis */ false); 372 373 if (!SMOR.getMethod()) 374 continue; 375 376 CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); 377 if (!InferredTarget.hasValue()) { 378 InferredTarget = BaseMethodTarget; 379 } else { 380 bool ResolutionError = resolveCalleeCUDATargetConflict( 381 InferredTarget.getValue(), BaseMethodTarget, 382 InferredTarget.getPointer()); 383 if (ResolutionError) { 384 if (Diagnose) { 385 Diag(ClassDecl->getLocation(), 386 diag::note_implicit_member_target_infer_collision) 387 << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; 388 } 389 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 390 return true; 391 } 392 } 393 } 394 395 // Same as for bases, but now for special members of fields. 396 for (const auto *F : ClassDecl->fields()) { 397 if (F->isInvalidDecl()) { 398 continue; 399 } 400 401 const RecordType *FieldType = 402 Context.getBaseElementType(F->getType())->getAs<RecordType>(); 403 if (!FieldType) { 404 continue; 405 } 406 407 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 408 Sema::SpecialMemberOverloadResult SMOR = 409 LookupSpecialMember(FieldRecDecl, CSM, 410 /* ConstArg */ ConstRHS && !F->isMutable(), 411 /* VolatileArg */ false, 412 /* RValueThis */ false, 413 /* ConstThis */ false, 414 /* VolatileThis */ false); 415 416 if (!SMOR.getMethod()) 417 continue; 418 419 CUDAFunctionTarget FieldMethodTarget = 420 IdentifyCUDATarget(SMOR.getMethod()); 421 if (!InferredTarget.hasValue()) { 422 InferredTarget = FieldMethodTarget; 423 } else { 424 bool ResolutionError = resolveCalleeCUDATargetConflict( 425 InferredTarget.getValue(), FieldMethodTarget, 426 InferredTarget.getPointer()); 427 if (ResolutionError) { 428 if (Diagnose) { 429 Diag(ClassDecl->getLocation(), 430 diag::note_implicit_member_target_infer_collision) 431 << (unsigned)CSM << InferredTarget.getValue() 432 << FieldMethodTarget; 433 } 434 MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); 435 return true; 436 } 437 } 438 } 439 440 441 // If no target was inferred, mark this member as __host__ __device__; 442 // it's the least restrictive option that can be invoked from any target. 443 bool NeedsH = true, NeedsD = true; 444 if (InferredTarget.hasValue()) { 445 if (InferredTarget.getValue() == CFT_Device) 446 NeedsH = false; 447 else if (InferredTarget.getValue() == CFT_Host) 448 NeedsD = false; 449 } 450 451 // We either setting attributes first time, or the inferred ones must match 452 // previously set ones. 453 if (NeedsD && !HasD) 454 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 455 if (NeedsH && !HasH) 456 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); 457 458 return false; 459 } 460 461 bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 462 if (!CD->isDefined() && CD->isTemplateInstantiation()) 463 InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 464 465 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 466 // empty at a point in the translation unit, if it is either a 467 // trivial constructor 468 if (CD->isTrivial()) 469 return true; 470 471 // ... or it satisfies all of the following conditions: 472 // The constructor function has been defined. 473 // The constructor function has no parameters, 474 // and the function body is an empty compound statement. 475 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 476 return false; 477 478 // Its class has no virtual functions and no virtual base classes. 479 if (CD->getParent()->isDynamicClass()) 480 return false; 481 482 // Union ctor does not call ctors of its data members. 483 if (CD->getParent()->isUnion()) 484 return true; 485 486 // The only form of initializer allowed is an empty constructor. 487 // This will recursively check all base classes and member initializers 488 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 489 if (const CXXConstructExpr *CE = 490 dyn_cast<CXXConstructExpr>(CI->getInit())) 491 return isEmptyCudaConstructor(Loc, CE->getConstructor()); 492 return false; 493 })) 494 return false; 495 496 return true; 497 } 498 499 bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 500 // No destructor -> no problem. 501 if (!DD) 502 return true; 503 504 if (!DD->isDefined() && DD->isTemplateInstantiation()) 505 InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 506 507 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 508 // empty at a point in the translation unit, if it is either a 509 // trivial constructor 510 if (DD->isTrivial()) 511 return true; 512 513 // ... or it satisfies all of the following conditions: 514 // The destructor function has been defined. 515 // and the function body is an empty compound statement. 516 if (!DD->hasTrivialBody()) 517 return false; 518 519 const CXXRecordDecl *ClassDecl = DD->getParent(); 520 521 // Its class has no virtual functions and no virtual base classes. 522 if (ClassDecl->isDynamicClass()) 523 return false; 524 525 // Union does not have base class and union dtor does not call dtors of its 526 // data members. 527 if (DD->getParent()->isUnion()) 528 return true; 529 530 // Only empty destructors are allowed. This will recursively check 531 // destructors for all base classes... 532 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 533 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 534 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 535 return true; 536 })) 537 return false; 538 539 // ... and member fields. 540 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 541 if (CXXRecordDecl *RD = Field->getType() 542 ->getBaseElementTypeUnsafe() 543 ->getAsCXXRecordDecl()) 544 return isEmptyCudaDestructor(Loc, RD->getDestructor()); 545 return true; 546 })) 547 return false; 548 549 return true; 550 } 551 552 void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { 553 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) 554 return; 555 const Expr *Init = VD->getInit(); 556 if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || 557 VD->hasAttr<CUDASharedAttr>()) { 558 if (LangOpts.GPUAllowDeviceInit) 559 return; 560 bool AllowedInit = false; 561 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) 562 AllowedInit = 563 isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); 564 // We'll allow constant initializers even if it's a non-empty 565 // constructor according to CUDA rules. This deviates from NVCC, 566 // but allows us to handle things like constexpr constructors. 567 if (!AllowedInit && 568 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) { 569 auto *Init = VD->getInit(); 570 // isConstantInitializer cannot be called with dependent value, therefore 571 // we skip checking dependent value here. This is OK since 572 // checkAllowedCUDAInitializer is called again when the template is 573 // instantiated. 574 AllowedInit = 575 VD->getType()->isDependentType() || Init->isValueDependent() || 576 Init->isConstantInitializer(Context, 577 VD->getType()->isReferenceType()); 578 } 579 580 // Also make sure that destructor, if there is one, is empty. 581 if (AllowedInit) 582 if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) 583 AllowedInit = 584 isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); 585 586 if (!AllowedInit) { 587 Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() 588 ? diag::err_shared_var_init 589 : diag::err_dynamic_var_init) 590 << Init->getSourceRange(); 591 VD->setInvalidDecl(); 592 } 593 } else { 594 // This is a host-side global variable. Check that the initializer is 595 // callable from the host side. 596 const FunctionDecl *InitFn = nullptr; 597 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 598 InitFn = CE->getConstructor(); 599 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 600 InitFn = CE->getDirectCallee(); 601 } 602 if (InitFn) { 603 CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); 604 if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { 605 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 606 << InitFnTarget << InitFn; 607 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 608 VD->setInvalidDecl(); 609 } 610 } 611 } 612 } 613 614 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 615 // treated as implicitly __host__ __device__, unless: 616 // * it is a variadic function (device-side variadic functions are not 617 // allowed), or 618 // * a __device__ function with this signature was already declared, in which 619 // case in which case we output an error, unless the __device__ decl is in a 620 // system header, in which case we leave the constexpr function unattributed. 621 // 622 // In addition, all function decls are treated as __host__ __device__ when 623 // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a 624 // #pragma clang force_cuda_host_device_begin/end 625 // pair). 626 void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, 627 const LookupResult &Previous) { 628 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 629 630 if (ForceCUDAHostDeviceDepth > 0) { 631 if (!NewD->hasAttr<CUDAHostAttr>()) 632 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 633 if (!NewD->hasAttr<CUDADeviceAttr>()) 634 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 635 return; 636 } 637 638 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 639 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 640 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 641 return; 642 643 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 644 // attributes? 645 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 646 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 647 D = Using->getTargetDecl(); 648 FunctionDecl *OldD = D->getAsFunction(); 649 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 650 !OldD->hasAttr<CUDAHostAttr>() && 651 !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, 652 /* ConsiderCudaAttrs = */ false); 653 }; 654 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 655 if (It != Previous.end()) { 656 // We found a __device__ function with the same name and signature as NewD 657 // (ignoring CUDA attrs). This is an error unless that function is defined 658 // in a system header, in which case we simply return without making NewD 659 // host+device. 660 NamedDecl *Match = *It; 661 if (!getSourceManager().isInSystemHeader(Match->getLocation())) { 662 Diag(NewD->getLocation(), 663 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 664 << NewD; 665 Diag(Match->getLocation(), 666 diag::note_cuda_conflicting_device_function_declared_here); 667 } 668 return; 669 } 670 671 NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); 672 NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 673 } 674 675 void Sema::MaybeAddCUDAConstantAttr(VarDecl *VD) { 676 if (getLangOpts().CUDAIsDevice && VD->isConstexpr() && 677 (VD->isFileVarDecl() || VD->isStaticDataMember()) && 678 !VD->hasAttr<CUDAConstantAttr>()) { 679 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 680 } 681 } 682 683 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, 684 unsigned DiagID) { 685 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 686 SemaDiagnosticBuilder::Kind DiagKind = [&] { 687 if (!isa<FunctionDecl>(CurContext)) 688 return SemaDiagnosticBuilder::K_Nop; 689 switch (CurrentCUDATarget()) { 690 case CFT_Global: 691 case CFT_Device: 692 return SemaDiagnosticBuilder::K_Immediate; 693 case CFT_HostDevice: 694 // An HD function counts as host code if we're compiling for host, and 695 // device code if we're compiling for device. Defer any errors in device 696 // mode until the function is known-emitted. 697 if (!getLangOpts().CUDAIsDevice) 698 return SemaDiagnosticBuilder::K_Nop; 699 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 700 return SemaDiagnosticBuilder::K_Immediate; 701 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 702 FunctionEmissionStatus::Emitted) 703 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 704 : SemaDiagnosticBuilder::K_Deferred; 705 default: 706 return SemaDiagnosticBuilder::K_Nop; 707 } 708 }(); 709 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 710 dyn_cast<FunctionDecl>(CurContext), *this); 711 } 712 713 Sema::SemaDiagnosticBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, 714 unsigned DiagID) { 715 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 716 SemaDiagnosticBuilder::Kind DiagKind = [&] { 717 if (!isa<FunctionDecl>(CurContext)) 718 return SemaDiagnosticBuilder::K_Nop; 719 switch (CurrentCUDATarget()) { 720 case CFT_Host: 721 return SemaDiagnosticBuilder::K_Immediate; 722 case CFT_HostDevice: 723 // An HD function counts as host code if we're compiling for host, and 724 // device code if we're compiling for device. Defer any errors in device 725 // mode until the function is known-emitted. 726 if (getLangOpts().CUDAIsDevice) 727 return SemaDiagnosticBuilder::K_Nop; 728 if (IsLastErrorImmediate && Diags.getDiagnosticIDs()->isBuiltinNote(DiagID)) 729 return SemaDiagnosticBuilder::K_Immediate; 730 return (getEmissionStatus(cast<FunctionDecl>(CurContext)) == 731 FunctionEmissionStatus::Emitted) 732 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 733 : SemaDiagnosticBuilder::K_Deferred; 734 default: 735 return SemaDiagnosticBuilder::K_Nop; 736 } 737 }(); 738 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, 739 dyn_cast<FunctionDecl>(CurContext), *this); 740 } 741 742 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { 743 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 744 assert(Callee && "Callee may not be null."); 745 746 auto &ExprEvalCtx = ExprEvalContexts.back(); 747 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 748 return true; 749 750 // FIXME: Is bailing out early correct here? Should we instead assume that 751 // the caller is a global initializer? 752 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 753 if (!Caller) 754 return true; 755 756 // If the caller is known-emitted, mark the callee as known-emitted. 757 // Otherwise, mark the call in our call graph so we can traverse it later. 758 bool CallerKnownEmitted = 759 getEmissionStatus(Caller) == FunctionEmissionStatus::Emitted; 760 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 761 CallerKnownEmitted] { 762 switch (IdentifyCUDAPreference(Caller, Callee)) { 763 case CFP_Never: 764 case CFP_WrongSide: 765 assert(Caller && "Never/wrongSide calls require a non-null caller"); 766 // If we know the caller will be emitted, we know this wrong-side call 767 // will be emitted, so it's an immediate error. Otherwise, defer the 768 // error until we know the caller is emitted. 769 return CallerKnownEmitted 770 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 771 : SemaDiagnosticBuilder::K_Deferred; 772 default: 773 return SemaDiagnosticBuilder::K_Nop; 774 } 775 }(); 776 777 if (DiagKind == SemaDiagnosticBuilder::K_Nop) 778 return true; 779 780 // Avoid emitting this error twice for the same location. Using a hashtable 781 // like this is unfortunate, but because we must continue parsing as normal 782 // after encountering a deferred error, it's otherwise very tricky for us to 783 // ensure that we only emit this deferred error once. 784 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 785 return true; 786 787 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) 788 << IdentifyCUDATarget(Callee) << /*function*/ 0 << Callee 789 << IdentifyCUDATarget(Caller); 790 if (!Callee->getBuiltinID()) 791 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 792 diag::note_previous_decl, Caller, *this) 793 << Callee; 794 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 795 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 796 } 797 798 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 799 // A lambda function may capture a stack variable by reference when it is 800 // defined and uses the capture by reference when the lambda is called. When 801 // the capture and use happen on different sides, the capture is invalid and 802 // should be diagnosed. 803 void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, 804 const sema::Capture &Capture) { 805 // In host compilation we only need to check lambda functions emitted on host 806 // side. In such lambda functions, a reference capture is invalid only 807 // if the lambda structure is populated by a device function or kernel then 808 // is passed to and called by a host function. However that is impossible, 809 // since a device function or kernel can only call a device function, also a 810 // kernel cannot pass a lambda back to a host function since we cannot 811 // define a kernel argument type which can hold the lambda before the lambda 812 // itself is defined. 813 if (!LangOpts.CUDAIsDevice) 814 return; 815 816 // File-scope lambda can only do init captures for global variables, which 817 // results in passing by value for these global variables. 818 FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); 819 if (!Caller) 820 return; 821 822 // In device compilation, we only need to check lambda functions which are 823 // emitted on device side. For such lambdas, a reference capture is invalid 824 // only if the lambda structure is populated by a host function then passed 825 // to and called in a device function or kernel. 826 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 827 bool CallerIsHost = 828 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 829 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 830 if (!ShouldCheck || !Capture.isReferenceCapture()) 831 return; 832 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 833 if (Capture.isVariableCapture()) { 834 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 835 diag::err_capture_bad_target, Callee, *this) 836 << Capture.getVariable(); 837 } else if (Capture.isThisCapture()) { 838 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 839 diag::err_capture_bad_target_this_ptr, Callee, *this); 840 } 841 return; 842 } 843 844 void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { 845 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 846 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 847 return; 848 Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); 849 Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); 850 } 851 852 void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, 853 const LookupResult &Previous) { 854 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 855 CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); 856 for (NamedDecl *OldND : Previous) { 857 FunctionDecl *OldFD = OldND->getAsFunction(); 858 if (!OldFD) 859 continue; 860 861 CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); 862 // Don't allow HD and global functions to overload other functions with the 863 // same signature. We allow overloading based on CUDA attributes so that 864 // functions can have different implementations on the host and device, but 865 // HD/global functions "exist" in some sense on both the host and device, so 866 // should have the same implementation on both sides. 867 if (NewTarget != OldTarget && 868 ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || 869 (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && 870 !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 871 /* ConsiderCudaAttrs = */ false)) { 872 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 873 << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; 874 Diag(OldFD->getLocation(), diag::note_previous_declaration); 875 NewFD->setInvalidDecl(); 876 break; 877 } 878 } 879 } 880 881 template <typename AttrTy> 882 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 883 const FunctionDecl &TemplateFD) { 884 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 885 AttrTy *Clone = Attribute->clone(S.Context); 886 Clone->setInherited(true); 887 FD->addAttr(Clone); 888 } 889 } 890 891 void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, 892 const FunctionTemplateDecl &TD) { 893 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 894 copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); 895 copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); 896 copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); 897 } 898 899 std::string Sema::getCudaConfigureFuncName() const { 900 if (getLangOpts().HIP) 901 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 902 : "hipConfigureCall"; 903 904 // New CUDA kernel launch sequence. 905 if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), 906 CudaFeature::CUDA_USES_NEW_LAUNCH)) 907 return "__cudaPushCallConfiguration"; 908 909 // Legacy CUDA kernel configuration call 910 return "cudaConfigureCall"; 911 } 912