1 //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements semantic analysis for CUDA constructs. 10 /// 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Sema/SemaCUDA.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Decl.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Basic/Cuda.h" 18 #include "clang/Basic/TargetInfo.h" 19 #include "clang/Lex/Preprocessor.h" 20 #include "clang/Sema/Lookup.h" 21 #include "clang/Sema/ScopeInfo.h" 22 #include "clang/Sema/Sema.h" 23 #include "clang/Sema/Template.h" 24 #include "llvm/ADT/STLForwardCompat.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include <optional> 27 using namespace clang; 28 29 SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} 30 31 template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { 32 if (!D) 33 return false; 34 if (auto *A = D->getAttr<AttrT>()) 35 return !A->isImplicit(); 36 return false; 37 } 38 39 void SemaCUDA::PushForceHostDevice() { 40 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 41 ForceHostDeviceDepth++; 42 } 43 44 bool SemaCUDA::PopForceHostDevice() { 45 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 46 if (ForceHostDeviceDepth == 0) 47 return false; 48 ForceHostDeviceDepth--; 49 return true; 50 } 51 52 ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, 53 MultiExprArg ExecConfig, 54 SourceLocation GGGLoc) { 55 FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); 56 if (!ConfigDecl) 57 return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) 58 << getConfigureFuncName()); 59 QualType ConfigQTy = ConfigDecl->getType(); 60 61 DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( 62 getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); 63 SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl); 64 65 return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, 66 /*IsExecConfig=*/true); 67 } 68 69 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { 70 bool HasHostAttr = false; 71 bool HasDeviceAttr = false; 72 bool HasGlobalAttr = false; 73 bool HasInvalidTargetAttr = false; 74 for (const ParsedAttr &AL : Attrs) { 75 switch (AL.getKind()) { 76 case ParsedAttr::AT_CUDAGlobal: 77 HasGlobalAttr = true; 78 break; 79 case ParsedAttr::AT_CUDAHost: 80 HasHostAttr = true; 81 break; 82 case ParsedAttr::AT_CUDADevice: 83 HasDeviceAttr = true; 84 break; 85 case ParsedAttr::AT_CUDAInvalidTarget: 86 HasInvalidTargetAttr = true; 87 break; 88 default: 89 break; 90 } 91 } 92 93 if (HasInvalidTargetAttr) 94 return CUDAFunctionTarget::InvalidTarget; 95 96 if (HasGlobalAttr) 97 return CUDAFunctionTarget::Global; 98 99 if (HasHostAttr && HasDeviceAttr) 100 return CUDAFunctionTarget::HostDevice; 101 102 if (HasDeviceAttr) 103 return CUDAFunctionTarget::Device; 104 105 return CUDAFunctionTarget::Host; 106 } 107 108 template <typename A> 109 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { 110 return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { 111 return isa<A>(Attribute) && 112 !(IgnoreImplicitAttr && Attribute->isImplicit()); 113 }); 114 } 115 116 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII( 117 SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) 118 : S(S_) { 119 SavedCtx = S.CurCUDATargetCtx; 120 assert(K == SemaCUDA::CTCK_InitGlobalVar); 121 auto *VD = dyn_cast_or_null<VarDecl>(D); 122 if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { 123 auto Target = CUDAFunctionTarget::Host; 124 if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && 125 !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || 126 hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || 127 hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) 128 Target = CUDAFunctionTarget::Device; 129 S.CurCUDATargetCtx = {Target, K, VD}; 130 } 131 } 132 133 /// IdentifyTarget - Determine the CUDA compilation target for this function 134 CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, 135 bool IgnoreImplicitHDAttr) { 136 // Code that lives outside a function gets the target from CurCUDATargetCtx. 137 if (D == nullptr) 138 return CurCUDATargetCtx.Target; 139 140 if (D->hasAttr<CUDAInvalidTargetAttr>()) 141 return CUDAFunctionTarget::InvalidTarget; 142 143 if (D->hasAttr<CUDAGlobalAttr>()) 144 return CUDAFunctionTarget::Global; 145 146 if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { 147 if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) 148 return CUDAFunctionTarget::HostDevice; 149 return CUDAFunctionTarget::Device; 150 } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { 151 return CUDAFunctionTarget::Host; 152 } else if ((D->isImplicit() || !D->isUserProvided()) && 153 !IgnoreImplicitHDAttr) { 154 // Some implicit declarations (like intrinsic functions) are not marked. 155 // Set the most lenient target on them for maximal flexibility. 156 return CUDAFunctionTarget::HostDevice; 157 } 158 159 return CUDAFunctionTarget::Host; 160 } 161 162 /// IdentifyTarget - Determine the CUDA compilation target for this variable. 163 SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { 164 if (Var->hasAttr<HIPManagedAttr>()) 165 return CVT_Unified; 166 // Only constexpr and const variabless with implicit constant attribute 167 // are emitted on both sides. Such variables are promoted to device side 168 // only if they have static constant intializers on device side. 169 if ((Var->isConstexpr() || Var->getType().isConstQualified()) && 170 Var->hasAttr<CUDAConstantAttr>() && 171 !hasExplicitAttr<CUDAConstantAttr>(Var)) 172 return CVT_Both; 173 if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || 174 Var->hasAttr<CUDASharedAttr>() || 175 Var->getType()->isCUDADeviceBuiltinSurfaceType() || 176 Var->getType()->isCUDADeviceBuiltinTextureType()) 177 return CVT_Device; 178 // Function-scope static variable without explicit device or constant 179 // attribute are emitted 180 // - on both sides in host device functions 181 // - on device side in device or global functions 182 if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { 183 switch (IdentifyTarget(FD)) { 184 case CUDAFunctionTarget::HostDevice: 185 return CVT_Both; 186 case CUDAFunctionTarget::Device: 187 case CUDAFunctionTarget::Global: 188 return CVT_Device; 189 default: 190 return CVT_Host; 191 } 192 } 193 return CVT_Host; 194 } 195 196 // * CUDA Call preference table 197 // 198 // F - from, 199 // T - to 200 // Ph - preference in host mode 201 // Pd - preference in device mode 202 // H - handled in (x) 203 // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. 204 // 205 // | F | T | Ph | Pd | H | 206 // |----+----+-----+-----+-----+ 207 // | d | d | N | N | (c) | 208 // | d | g | -- | -- | (a) | 209 // | d | h | -- | -- | (e) | 210 // | d | hd | HD | HD | (b) | 211 // | g | d | N | N | (c) | 212 // | g | g | -- | -- | (a) | 213 // | g | h | -- | -- | (e) | 214 // | g | hd | HD | HD | (b) | 215 // | h | d | -- | -- | (e) | 216 // | h | g | N | N | (c) | 217 // | h | h | N | N | (c) | 218 // | h | hd | HD | HD | (b) | 219 // | hd | d | WS | SS | (d) | 220 // | hd | g | SS | -- |(d/a)| 221 // | hd | h | SS | WS | (d) | 222 // | hd | hd | HD | HD | (b) | 223 224 SemaCUDA::CUDAFunctionPreference 225 SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, 226 const FunctionDecl *Callee) { 227 assert(Callee && "Callee must be valid."); 228 229 // Treat ctor/dtor as host device function in device var initializer to allow 230 // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor 231 // will be diagnosed by checkAllowedInitializer. 232 if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && 233 CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && 234 (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee))) 235 return CFP_HostDevice; 236 237 CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); 238 CUDAFunctionTarget CalleeTarget = IdentifyTarget(Callee); 239 240 // If one of the targets is invalid, the check always fails, no matter what 241 // the other target is. 242 if (CallerTarget == CUDAFunctionTarget::InvalidTarget || 243 CalleeTarget == CUDAFunctionTarget::InvalidTarget) 244 return CFP_Never; 245 246 // (a) Can't call global from some contexts until we support CUDA's 247 // dynamic parallelism. 248 if (CalleeTarget == CUDAFunctionTarget::Global && 249 (CallerTarget == CUDAFunctionTarget::Global || 250 CallerTarget == CUDAFunctionTarget::Device)) 251 return CFP_Never; 252 253 // (b) Calling HostDevice is OK for everyone. 254 if (CalleeTarget == CUDAFunctionTarget::HostDevice) 255 return CFP_HostDevice; 256 257 // (c) Best case scenarios 258 if (CalleeTarget == CallerTarget || 259 (CallerTarget == CUDAFunctionTarget::Host && 260 CalleeTarget == CUDAFunctionTarget::Global) || 261 (CallerTarget == CUDAFunctionTarget::Global && 262 CalleeTarget == CUDAFunctionTarget::Device)) 263 return CFP_Native; 264 265 // HipStdPar mode is special, in that assessing whether a device side call to 266 // a host target is deferred to a subsequent pass, and cannot unambiguously be 267 // adjudicated in the AST, hence we optimistically allow them to pass here. 268 if (getLangOpts().HIPStdPar && 269 (CallerTarget == CUDAFunctionTarget::Global || 270 CallerTarget == CUDAFunctionTarget::Device || 271 CallerTarget == CUDAFunctionTarget::HostDevice) && 272 CalleeTarget == CUDAFunctionTarget::Host) 273 return CFP_HostDevice; 274 275 // (d) HostDevice behavior depends on compilation mode. 276 if (CallerTarget == CUDAFunctionTarget::HostDevice) { 277 // It's OK to call a compilation-mode matching function from an HD one. 278 if ((getLangOpts().CUDAIsDevice && 279 CalleeTarget == CUDAFunctionTarget::Device) || 280 (!getLangOpts().CUDAIsDevice && 281 (CalleeTarget == CUDAFunctionTarget::Host || 282 CalleeTarget == CUDAFunctionTarget::Global))) 283 return CFP_SameSide; 284 285 // Calls from HD to non-mode-matching functions (i.e., to host functions 286 // when compiling in device mode or to device functions when compiling in 287 // host mode) are allowed at the sema level, but eventually rejected if 288 // they're ever codegened. TODO: Reject said calls earlier. 289 return CFP_WrongSide; 290 } 291 292 // (e) Calling across device/host boundary is not something you should do. 293 if ((CallerTarget == CUDAFunctionTarget::Host && 294 CalleeTarget == CUDAFunctionTarget::Device) || 295 (CallerTarget == CUDAFunctionTarget::Device && 296 CalleeTarget == CUDAFunctionTarget::Host) || 297 (CallerTarget == CUDAFunctionTarget::Global && 298 CalleeTarget == CUDAFunctionTarget::Host)) 299 return CFP_Never; 300 301 llvm_unreachable("All cases should've been handled by now."); 302 } 303 304 template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { 305 if (!D) 306 return false; 307 if (auto *A = D->getAttr<AttrT>()) 308 return A->isImplicit(); 309 return D->isImplicit(); 310 } 311 312 bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { 313 bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); 314 bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); 315 return IsImplicitDevAttr && IsImplicitHostAttr; 316 } 317 318 void SemaCUDA::EraseUnwantedMatches( 319 const FunctionDecl *Caller, 320 SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { 321 if (Matches.size() <= 1) 322 return; 323 324 using Pair = std::pair<DeclAccessPair, FunctionDecl*>; 325 326 // Gets the CUDA function preference for a call from Caller to Match. 327 auto GetCFP = [&](const Pair &Match) { 328 return IdentifyPreference(Caller, Match.second); 329 }; 330 331 // Find the best call preference among the functions in Matches. 332 CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( 333 Matches.begin(), Matches.end(), 334 [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); 335 336 // Erase all functions with lower priority. 337 llvm::erase_if(Matches, 338 [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); 339 } 340 341 /// When an implicitly-declared special member has to invoke more than one 342 /// base/field special member, conflicts may occur in the targets of these 343 /// members. For example, if one base's member __host__ and another's is 344 /// __device__, it's a conflict. 345 /// This function figures out if the given targets \param Target1 and 346 /// \param Target2 conflict, and if they do not it fills in 347 /// \param ResolvedTarget with a target that resolves for both calls. 348 /// \return true if there's a conflict, false otherwise. 349 static bool 350 resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, 351 CUDAFunctionTarget Target2, 352 CUDAFunctionTarget *ResolvedTarget) { 353 // Only free functions and static member functions may be global. 354 assert(Target1 != CUDAFunctionTarget::Global); 355 assert(Target2 != CUDAFunctionTarget::Global); 356 357 if (Target1 == CUDAFunctionTarget::HostDevice) { 358 *ResolvedTarget = Target2; 359 } else if (Target2 == CUDAFunctionTarget::HostDevice) { 360 *ResolvedTarget = Target1; 361 } else if (Target1 != Target2) { 362 return true; 363 } else { 364 *ResolvedTarget = Target1; 365 } 366 367 return false; 368 } 369 370 bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, 371 CXXSpecialMemberKind CSM, 372 CXXMethodDecl *MemberDecl, 373 bool ConstRHS, 374 bool Diagnose) { 375 // If the defaulted special member is defined lexically outside of its 376 // owning class, or the special member already has explicit device or host 377 // attributes, do not infer. 378 bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); 379 bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); 380 bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); 381 bool HasExplicitAttr = 382 (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || 383 (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); 384 if (!InClass || HasExplicitAttr) 385 return false; 386 387 std::optional<CUDAFunctionTarget> InferredTarget; 388 389 // We're going to invoke special member lookup; mark that these special 390 // members are called from this one, and not from its caller. 391 Sema::ContextRAII MethodContext(SemaRef, MemberDecl); 392 393 // Look for special members in base classes that should be invoked from here. 394 // Infer the target of this member base on the ones it should call. 395 // Skip direct and indirect virtual bases for abstract classes. 396 llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; 397 for (const auto &B : ClassDecl->bases()) { 398 if (!B.isVirtual()) { 399 Bases.push_back(&B); 400 } 401 } 402 403 if (!ClassDecl->isAbstract()) { 404 llvm::append_range(Bases, llvm::make_pointer_range(ClassDecl->vbases())); 405 } 406 407 for (const auto *B : Bases) { 408 const RecordType *BaseType = B->getType()->getAs<RecordType>(); 409 if (!BaseType) { 410 continue; 411 } 412 413 CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); 414 Sema::SpecialMemberOverloadResult SMOR = 415 SemaRef.LookupSpecialMember(BaseClassDecl, CSM, 416 /* ConstArg */ ConstRHS, 417 /* VolatileArg */ false, 418 /* RValueThis */ false, 419 /* ConstThis */ false, 420 /* VolatileThis */ false); 421 422 if (!SMOR.getMethod()) 423 continue; 424 425 CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod()); 426 if (!InferredTarget) { 427 InferredTarget = BaseMethodTarget; 428 } else { 429 bool ResolutionError = resolveCalleeCUDATargetConflict( 430 *InferredTarget, BaseMethodTarget, &*InferredTarget); 431 if (ResolutionError) { 432 if (Diagnose) { 433 Diag(ClassDecl->getLocation(), 434 diag::note_implicit_member_target_infer_collision) 435 << (unsigned)CSM << llvm::to_underlying(*InferredTarget) 436 << llvm::to_underlying(BaseMethodTarget); 437 } 438 MemberDecl->addAttr( 439 CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); 440 return true; 441 } 442 } 443 } 444 445 // Same as for bases, but now for special members of fields. 446 for (const auto *F : ClassDecl->fields()) { 447 if (F->isInvalidDecl()) { 448 continue; 449 } 450 451 const RecordType *FieldType = 452 getASTContext().getBaseElementType(F->getType())->getAs<RecordType>(); 453 if (!FieldType) { 454 continue; 455 } 456 457 CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); 458 Sema::SpecialMemberOverloadResult SMOR = 459 SemaRef.LookupSpecialMember(FieldRecDecl, CSM, 460 /* ConstArg */ ConstRHS && !F->isMutable(), 461 /* VolatileArg */ false, 462 /* RValueThis */ false, 463 /* ConstThis */ false, 464 /* VolatileThis */ false); 465 466 if (!SMOR.getMethod()) 467 continue; 468 469 CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod()); 470 if (!InferredTarget) { 471 InferredTarget = FieldMethodTarget; 472 } else { 473 bool ResolutionError = resolveCalleeCUDATargetConflict( 474 *InferredTarget, FieldMethodTarget, &*InferredTarget); 475 if (ResolutionError) { 476 if (Diagnose) { 477 Diag(ClassDecl->getLocation(), 478 diag::note_implicit_member_target_infer_collision) 479 << (unsigned)CSM << llvm::to_underlying(*InferredTarget) 480 << llvm::to_underlying(FieldMethodTarget); 481 } 482 MemberDecl->addAttr( 483 CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); 484 return true; 485 } 486 } 487 } 488 489 490 // If no target was inferred, mark this member as __host__ __device__; 491 // it's the least restrictive option that can be invoked from any target. 492 bool NeedsH = true, NeedsD = true; 493 if (InferredTarget) { 494 if (*InferredTarget == CUDAFunctionTarget::Device) 495 NeedsH = false; 496 else if (*InferredTarget == CUDAFunctionTarget::Host) 497 NeedsD = false; 498 } 499 500 // We either setting attributes first time, or the inferred ones must match 501 // previously set ones. 502 if (NeedsD && !HasD) 503 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 504 if (NeedsH && !HasH) 505 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 506 507 return false; 508 } 509 510 bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { 511 if (!CD->isDefined() && CD->isTemplateInstantiation()) 512 SemaRef.InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); 513 514 // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered 515 // empty at a point in the translation unit, if it is either a 516 // trivial constructor 517 if (CD->isTrivial()) 518 return true; 519 520 // ... or it satisfies all of the following conditions: 521 // The constructor function has been defined. 522 // The constructor function has no parameters, 523 // and the function body is an empty compound statement. 524 if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) 525 return false; 526 527 // Its class has no virtual functions and no virtual base classes. 528 if (CD->getParent()->isDynamicClass()) 529 return false; 530 531 // Union ctor does not call ctors of its data members. 532 if (CD->getParent()->isUnion()) 533 return true; 534 535 // The only form of initializer allowed is an empty constructor. 536 // This will recursively check all base classes and member initializers 537 if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { 538 if (const CXXConstructExpr *CE = 539 dyn_cast<CXXConstructExpr>(CI->getInit())) 540 return isEmptyConstructor(Loc, CE->getConstructor()); 541 return false; 542 })) 543 return false; 544 545 return true; 546 } 547 548 bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { 549 // No destructor -> no problem. 550 if (!DD) 551 return true; 552 553 if (!DD->isDefined() && DD->isTemplateInstantiation()) 554 SemaRef.InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); 555 556 // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered 557 // empty at a point in the translation unit, if it is either a 558 // trivial constructor 559 if (DD->isTrivial()) 560 return true; 561 562 // ... or it satisfies all of the following conditions: 563 // The destructor function has been defined. 564 // and the function body is an empty compound statement. 565 if (!DD->hasTrivialBody()) 566 return false; 567 568 const CXXRecordDecl *ClassDecl = DD->getParent(); 569 570 // Its class has no virtual functions and no virtual base classes. 571 if (ClassDecl->isDynamicClass()) 572 return false; 573 574 // Union does not have base class and union dtor does not call dtors of its 575 // data members. 576 if (DD->getParent()->isUnion()) 577 return true; 578 579 // Only empty destructors are allowed. This will recursively check 580 // destructors for all base classes... 581 if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { 582 if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) 583 return isEmptyDestructor(Loc, RD->getDestructor()); 584 return true; 585 })) 586 return false; 587 588 // ... and member fields. 589 if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { 590 if (CXXRecordDecl *RD = Field->getType() 591 ->getBaseElementTypeUnsafe() 592 ->getAsCXXRecordDecl()) 593 return isEmptyDestructor(Loc, RD->getDestructor()); 594 return true; 595 })) 596 return false; 597 598 return true; 599 } 600 601 namespace { 602 enum CUDAInitializerCheckKind { 603 CICK_DeviceOrConstant, // Check initializer for device/constant variable 604 CICK_Shared, // Check initializer for shared variable 605 }; 606 607 bool IsDependentVar(VarDecl *VD) { 608 if (VD->getType()->isDependentType()) 609 return true; 610 if (const auto *Init = VD->getInit()) 611 return Init->isValueDependent(); 612 return false; 613 } 614 615 // Check whether a variable has an allowed initializer for a CUDA device side 616 // variable with global storage. \p VD may be a host variable to be checked for 617 // potential promotion to device side variable. 618 // 619 // CUDA/HIP allows only empty constructors as initializers for global 620 // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all 621 // __shared__ variables whether they are local or not (they all are implicitly 622 // static in CUDA). One exception is that CUDA allows constant initializers 623 // for __constant__ and __device__ variables. 624 bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, 625 CUDAInitializerCheckKind CheckKind) { 626 assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); 627 assert(!IsDependentVar(VD) && "do not check dependent var"); 628 const Expr *Init = VD->getInit(); 629 auto IsEmptyInit = [&](const Expr *Init) { 630 if (!Init) 631 return true; 632 if (const auto *CE = dyn_cast<CXXConstructExpr>(Init)) { 633 return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor()); 634 } 635 return false; 636 }; 637 auto IsConstantInit = [&](const Expr *Init) { 638 assert(Init); 639 ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), 640 /*NoWronSidedVars=*/true); 641 return Init->isConstantInitializer(S.getASTContext(), 642 VD->getType()->isReferenceType()); 643 }; 644 auto HasEmptyDtor = [&](VarDecl *VD) { 645 if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) 646 return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor()); 647 return true; 648 }; 649 if (CheckKind == CICK_Shared) 650 return IsEmptyInit(Init) && HasEmptyDtor(VD); 651 return S.getLangOpts().GPUAllowDeviceInit || 652 ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); 653 } 654 } // namespace 655 656 void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { 657 // Return early if VD is inside a non-instantiated template function since 658 // the implicit constructor is not defined yet. 659 if (const FunctionDecl *FD = 660 dyn_cast_or_null<FunctionDecl>(VD->getDeclContext()); 661 FD && FD->isDependentContext()) 662 return; 663 664 // Do not check dependent variables since the ctor/dtor/initializer are not 665 // determined. Do it after instantiation. 666 if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || 667 IsDependentVar(VD)) 668 return; 669 const Expr *Init = VD->getInit(); 670 bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); 671 bool IsDeviceOrConstantVar = 672 !IsSharedVar && 673 (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); 674 if (IsDeviceOrConstantVar || IsSharedVar) { 675 if (HasAllowedCUDADeviceStaticInitializer( 676 *this, VD, IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) 677 return; 678 Diag(VD->getLocation(), 679 IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) 680 << Init->getSourceRange(); 681 VD->setInvalidDecl(); 682 } else { 683 // This is a host-side global variable. Check that the initializer is 684 // callable from the host side. 685 const FunctionDecl *InitFn = nullptr; 686 if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { 687 InitFn = CE->getConstructor(); 688 } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { 689 InitFn = CE->getDirectCallee(); 690 } 691 if (InitFn) { 692 CUDAFunctionTarget InitFnTarget = IdentifyTarget(InitFn); 693 if (InitFnTarget != CUDAFunctionTarget::Host && 694 InitFnTarget != CUDAFunctionTarget::HostDevice) { 695 Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) 696 << llvm::to_underlying(InitFnTarget) << InitFn; 697 Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; 698 VD->setInvalidDecl(); 699 } 700 } 701 } 702 } 703 704 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( 705 const FunctionDecl *Callee) { 706 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 707 if (!Caller) 708 return; 709 710 if (!isImplicitHostDeviceFunction(Callee)) 711 return; 712 713 CUDAFunctionTarget CallerTarget = IdentifyTarget(Caller); 714 715 // Record whether an implicit host device function is used on device side. 716 if (CallerTarget != CUDAFunctionTarget::Device && 717 CallerTarget != CUDAFunctionTarget::Global && 718 (CallerTarget != CUDAFunctionTarget::HostDevice || 719 (isImplicitHostDeviceFunction(Caller) && 720 !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) 721 return; 722 723 getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); 724 } 725 726 // With -fcuda-host-device-constexpr, an unattributed constexpr function is 727 // treated as implicitly __host__ __device__, unless: 728 // * it is a variadic function (device-side variadic functions are not 729 // allowed), or 730 // * a __device__ function with this signature was already declared, in which 731 // case in which case we output an error, unless the __device__ decl is in a 732 // system header, in which case we leave the constexpr function unattributed. 733 // 734 // In addition, all function decls are treated as __host__ __device__ when 735 // ForceHostDeviceDepth > 0 (corresponding to code within a 736 // #pragma clang force_cuda_host_device_begin/end 737 // pair). 738 void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, 739 const LookupResult &Previous) { 740 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 741 742 if (ForceHostDeviceDepth > 0) { 743 if (!NewD->hasAttr<CUDAHostAttr>()) 744 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 745 if (!NewD->hasAttr<CUDADeviceAttr>()) 746 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 747 return; 748 } 749 750 // If a template function has no host/device/global attributes, 751 // make it implicitly host device function. 752 if (getLangOpts().OffloadImplicitHostDeviceTemplates && 753 !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && 754 !NewD->hasAttr<CUDAGlobalAttr>() && 755 (NewD->getDescribedFunctionTemplate() || 756 NewD->isFunctionTemplateSpecialization())) { 757 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 758 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 759 return; 760 } 761 762 if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || 763 NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || 764 NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) 765 return; 766 767 // Is D a __device__ function with the same signature as NewD, ignoring CUDA 768 // attributes? 769 auto IsMatchingDeviceFn = [&](NamedDecl *D) { 770 if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) 771 D = Using->getTargetDecl(); 772 FunctionDecl *OldD = D->getAsFunction(); 773 return OldD && OldD->hasAttr<CUDADeviceAttr>() && 774 !OldD->hasAttr<CUDAHostAttr>() && 775 !SemaRef.IsOverload(NewD, OldD, 776 /* UseMemberUsingDeclRules = */ false, 777 /* ConsiderCudaAttrs = */ false); 778 }; 779 auto It = llvm::find_if(Previous, IsMatchingDeviceFn); 780 if (It != Previous.end()) { 781 // We found a __device__ function with the same name and signature as NewD 782 // (ignoring CUDA attrs). This is an error unless that function is defined 783 // in a system header, in which case we simply return without making NewD 784 // host+device. 785 NamedDecl *Match = *It; 786 if (!SemaRef.getSourceManager().isInSystemHeader(Match->getLocation())) { 787 Diag(NewD->getLocation(), 788 diag::err_cuda_unattributed_constexpr_cannot_overload_device) 789 << NewD; 790 Diag(Match->getLocation(), 791 diag::note_cuda_conflicting_device_function_declared_here); 792 } 793 return; 794 } 795 796 NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 797 NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 798 } 799 800 // TODO: `__constant__` memory may be a limited resource for certain targets. 801 // A safeguard may be needed at the end of compilation pipeline if 802 // `__constant__` memory usage goes beyond limit. 803 void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { 804 // Do not promote dependent variables since the cotr/dtor/initializer are 805 // not determined. Do it after instantiation. 806 if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && 807 !VD->hasAttr<CUDASharedAttr>() && 808 (VD->isFileVarDecl() || VD->isStaticDataMember()) && 809 !IsDependentVar(VD) && 810 ((VD->isConstexpr() || VD->getType().isConstQualified()) && 811 HasAllowedCUDADeviceStaticInitializer(*this, VD, 812 CICK_DeviceOrConstant))) { 813 VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); 814 } 815 } 816 817 SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, 818 unsigned DiagID) { 819 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 820 FunctionDecl *CurFunContext = 821 SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 822 SemaDiagnosticBuilder::Kind DiagKind = [&] { 823 if (!CurFunContext) 824 return SemaDiagnosticBuilder::K_Nop; 825 switch (CurrentTarget()) { 826 case CUDAFunctionTarget::Global: 827 case CUDAFunctionTarget::Device: 828 return SemaDiagnosticBuilder::K_Immediate; 829 case CUDAFunctionTarget::HostDevice: 830 // An HD function counts as host code if we're compiling for host, and 831 // device code if we're compiling for device. Defer any errors in device 832 // mode until the function is known-emitted. 833 if (!getLangOpts().CUDAIsDevice) 834 return SemaDiagnosticBuilder::K_Nop; 835 if (SemaRef.IsLastErrorImmediate && 836 getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) 837 return SemaDiagnosticBuilder::K_Immediate; 838 return (SemaRef.getEmissionStatus(CurFunContext) == 839 Sema::FunctionEmissionStatus::Emitted) 840 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 841 : SemaDiagnosticBuilder::K_Deferred; 842 default: 843 return SemaDiagnosticBuilder::K_Nop; 844 } 845 }(); 846 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); 847 } 848 849 Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, 850 unsigned DiagID) { 851 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 852 FunctionDecl *CurFunContext = 853 SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 854 SemaDiagnosticBuilder::Kind DiagKind = [&] { 855 if (!CurFunContext) 856 return SemaDiagnosticBuilder::K_Nop; 857 switch (CurrentTarget()) { 858 case CUDAFunctionTarget::Host: 859 return SemaDiagnosticBuilder::K_Immediate; 860 case CUDAFunctionTarget::HostDevice: 861 // An HD function counts as host code if we're compiling for host, and 862 // device code if we're compiling for device. Defer any errors in device 863 // mode until the function is known-emitted. 864 if (getLangOpts().CUDAIsDevice) 865 return SemaDiagnosticBuilder::K_Nop; 866 if (SemaRef.IsLastErrorImmediate && 867 getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) 868 return SemaDiagnosticBuilder::K_Immediate; 869 return (SemaRef.getEmissionStatus(CurFunContext) == 870 Sema::FunctionEmissionStatus::Emitted) 871 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 872 : SemaDiagnosticBuilder::K_Deferred; 873 default: 874 return SemaDiagnosticBuilder::K_Nop; 875 } 876 }(); 877 return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); 878 } 879 880 bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { 881 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 882 assert(Callee && "Callee may not be null."); 883 884 const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); 885 if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) 886 return true; 887 888 // FIXME: Is bailing out early correct here? Should we instead assume that 889 // the caller is a global initializer? 890 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 891 if (!Caller) 892 return true; 893 894 // If the caller is known-emitted, mark the callee as known-emitted. 895 // Otherwise, mark the call in our call graph so we can traverse it later. 896 bool CallerKnownEmitted = SemaRef.getEmissionStatus(Caller) == 897 Sema::FunctionEmissionStatus::Emitted; 898 SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, 899 CallerKnownEmitted] { 900 switch (IdentifyPreference(Caller, Callee)) { 901 case CFP_Never: 902 case CFP_WrongSide: 903 assert(Caller && "Never/wrongSide calls require a non-null caller"); 904 // If we know the caller will be emitted, we know this wrong-side call 905 // will be emitted, so it's an immediate error. Otherwise, defer the 906 // error until we know the caller is emitted. 907 return CallerKnownEmitted 908 ? SemaDiagnosticBuilder::K_ImmediateWithCallStack 909 : SemaDiagnosticBuilder::K_Deferred; 910 default: 911 return SemaDiagnosticBuilder::K_Nop; 912 } 913 }(); 914 915 if (DiagKind == SemaDiagnosticBuilder::K_Nop) { 916 // For -fgpu-rdc, keep track of external kernels used by host functions. 917 if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && 918 Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && 919 (!Caller || (!Caller->getDescribedFunctionTemplate() && 920 getASTContext().GetGVALinkageForFunction(Caller) == 921 GVA_StrongExternal))) 922 getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); 923 return true; 924 } 925 926 // Avoid emitting this error twice for the same location. Using a hashtable 927 // like this is unfortunate, but because we must continue parsing as normal 928 // after encountering a deferred error, it's otherwise very tricky for us to 929 // ensure that we only emit this deferred error once. 930 if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) 931 return true; 932 933 SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, 934 SemaRef) 935 << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee 936 << llvm::to_underlying(IdentifyTarget(Caller)); 937 if (!Callee->getBuiltinID()) 938 SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), 939 diag::note_previous_decl, Caller, SemaRef) 940 << Callee; 941 return DiagKind != SemaDiagnosticBuilder::K_Immediate && 942 DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; 943 } 944 945 // Check the wrong-sided reference capture of lambda for CUDA/HIP. 946 // A lambda function may capture a stack variable by reference when it is 947 // defined and uses the capture by reference when the lambda is called. When 948 // the capture and use happen on different sides, the capture is invalid and 949 // should be diagnosed. 950 void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, 951 const sema::Capture &Capture) { 952 // In host compilation we only need to check lambda functions emitted on host 953 // side. In such lambda functions, a reference capture is invalid only 954 // if the lambda structure is populated by a device function or kernel then 955 // is passed to and called by a host function. However that is impossible, 956 // since a device function or kernel can only call a device function, also a 957 // kernel cannot pass a lambda back to a host function since we cannot 958 // define a kernel argument type which can hold the lambda before the lambda 959 // itself is defined. 960 if (!getLangOpts().CUDAIsDevice) 961 return; 962 963 // File-scope lambda can only do init captures for global variables, which 964 // results in passing by value for these global variables. 965 FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); 966 if (!Caller) 967 return; 968 969 // In device compilation, we only need to check lambda functions which are 970 // emitted on device side. For such lambdas, a reference capture is invalid 971 // only if the lambda structure is populated by a host function then passed 972 // to and called in a device function or kernel. 973 bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); 974 bool CallerIsHost = 975 !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); 976 bool ShouldCheck = CalleeIsDevice && CallerIsHost; 977 if (!ShouldCheck || !Capture.isReferenceCapture()) 978 return; 979 auto DiagKind = SemaDiagnosticBuilder::K_Deferred; 980 if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { 981 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 982 diag::err_capture_bad_target, Callee, SemaRef) 983 << Capture.getVariable(); 984 } else if (Capture.isThisCapture()) { 985 // Capture of this pointer is allowed since this pointer may be pointing to 986 // managed memory which is accessible on both device and host sides. It only 987 // results in invalid memory access if this pointer points to memory not 988 // accessible on device side. 989 SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), 990 diag::warn_maybe_capture_bad_target_this_ptr, Callee, 991 SemaRef); 992 } 993 } 994 995 void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { 996 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 997 if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) 998 return; 999 Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); 1000 Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); 1001 } 1002 1003 void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, 1004 const LookupResult &Previous) { 1005 assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); 1006 CUDAFunctionTarget NewTarget = IdentifyTarget(NewFD); 1007 for (NamedDecl *OldND : Previous) { 1008 FunctionDecl *OldFD = OldND->getAsFunction(); 1009 if (!OldFD) 1010 continue; 1011 1012 CUDAFunctionTarget OldTarget = IdentifyTarget(OldFD); 1013 // Don't allow HD and global functions to overload other functions with the 1014 // same signature. We allow overloading based on CUDA attributes so that 1015 // functions can have different implementations on the host and device, but 1016 // HD/global functions "exist" in some sense on both the host and device, so 1017 // should have the same implementation on both sides. 1018 if (NewTarget != OldTarget && 1019 !SemaRef.IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, 1020 /* ConsiderCudaAttrs = */ false)) { 1021 if ((NewTarget == CUDAFunctionTarget::HostDevice && 1022 !(getLangOpts().OffloadImplicitHostDeviceTemplates && 1023 isImplicitHostDeviceFunction(NewFD) && 1024 OldTarget == CUDAFunctionTarget::Device)) || 1025 (OldTarget == CUDAFunctionTarget::HostDevice && 1026 !(getLangOpts().OffloadImplicitHostDeviceTemplates && 1027 isImplicitHostDeviceFunction(OldFD) && 1028 NewTarget == CUDAFunctionTarget::Device)) || 1029 (NewTarget == CUDAFunctionTarget::Global) || 1030 (OldTarget == CUDAFunctionTarget::Global)) { 1031 Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) 1032 << llvm::to_underlying(NewTarget) << NewFD->getDeclName() 1033 << llvm::to_underlying(OldTarget) << OldFD; 1034 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1035 NewFD->setInvalidDecl(); 1036 break; 1037 } 1038 if ((NewTarget == CUDAFunctionTarget::Host && 1039 OldTarget == CUDAFunctionTarget::Device) || 1040 (NewTarget == CUDAFunctionTarget::Device && 1041 OldTarget == CUDAFunctionTarget::Host)) { 1042 Diag(NewFD->getLocation(), diag::warn_offload_incompatible_redeclare) 1043 << llvm::to_underlying(NewTarget) << llvm::to_underlying(OldTarget); 1044 Diag(OldFD->getLocation(), diag::note_previous_declaration); 1045 } 1046 } 1047 } 1048 } 1049 1050 template <typename AttrTy> 1051 static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, 1052 const FunctionDecl &TemplateFD) { 1053 if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { 1054 AttrTy *Clone = Attribute->clone(S.Context); 1055 Clone->setInherited(true); 1056 FD->addAttr(Clone); 1057 } 1058 } 1059 1060 void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, 1061 const FunctionTemplateDecl &TD) { 1062 const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); 1063 copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD); 1064 copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD); 1065 copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD); 1066 } 1067 1068 std::string SemaCUDA::getConfigureFuncName() const { 1069 if (getLangOpts().OffloadViaLLVM) 1070 return "__llvmPushCallConfiguration"; 1071 1072 if (getLangOpts().HIP) 1073 return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" 1074 : "hipConfigureCall"; 1075 1076 // New CUDA kernel launch sequence. 1077 if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), 1078 CudaFeature::CUDA_USES_NEW_LAUNCH)) 1079 return "__cudaPushCallConfiguration"; 1080 1081 // Legacy CUDA kernel configuration call 1082 return "cudaConfigureCall"; 1083 } 1084