1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "ABIInfoImpl.h" 15 #include "CGCXXABI.h" 16 #include "CGCleanup.h" 17 #include "CGRecordLayout.h" 18 #include "CodeGenFunction.h" 19 #include "TargetInfo.h" 20 #include "clang/AST/APValue.h" 21 #include "clang/AST/Attr.h" 22 #include "clang/AST/Decl.h" 23 #include "clang/AST/OpenMPClause.h" 24 #include "clang/AST/StmtOpenMP.h" 25 #include "clang/AST/StmtVisitor.h" 26 #include "clang/Basic/OpenMPKinds.h" 27 #include "clang/Basic/SourceManager.h" 28 #include "clang/CodeGen/ConstantInitBuilder.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringExtras.h" 32 #include "llvm/Bitcode/BitcodeReader.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/InstrTypes.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <cstdint> 42 #include <numeric> 43 #include <optional> 44 45 using namespace clang; 46 using namespace CodeGen; 47 using namespace llvm::omp; 48 49 namespace { 50 /// Base class for handling code generation inside OpenMP regions. 51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 52 public: 53 /// Kinds of OpenMP regions used in codegen. 54 enum CGOpenMPRegionKind { 55 /// Region with outlined function for standalone 'parallel' 56 /// directive. 57 ParallelOutlinedRegion, 58 /// Region with outlined function for standalone 'task' directive. 59 TaskOutlinedRegion, 60 /// Region for constructs that do not require function outlining, 61 /// like 'for', 'sections', 'atomic' etc. directives. 62 InlinedRegion, 63 /// Region with outlined function for standalone 'target' directive. 64 TargetRegion, 65 }; 66 67 CGOpenMPRegionInfo(const CapturedStmt &CS, 68 const CGOpenMPRegionKind RegionKind, 69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 70 bool HasCancel) 71 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 72 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 73 74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 75 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 76 bool HasCancel) 77 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 78 Kind(Kind), HasCancel(HasCancel) {} 79 80 /// Get a variable or parameter for storing global thread id 81 /// inside OpenMP construct. 82 virtual const VarDecl *getThreadIDVariable() const = 0; 83 84 /// Emit the captured statement body. 85 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 86 87 /// Get an LValue for the current ThreadID variable. 88 /// \return LValue for thread id variable. This LValue always has type int32*. 89 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 90 91 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 92 93 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 94 95 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 96 97 bool hasCancel() const { return HasCancel; } 98 99 static bool classof(const CGCapturedStmtInfo *Info) { 100 return Info->getKind() == CR_OpenMP; 101 } 102 103 ~CGOpenMPRegionInfo() override = default; 104 105 protected: 106 CGOpenMPRegionKind RegionKind; 107 RegionCodeGenTy CodeGen; 108 OpenMPDirectiveKind Kind; 109 bool HasCancel; 110 }; 111 112 /// API for captured statement code generation in OpenMP constructs. 113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 114 public: 115 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 116 const RegionCodeGenTy &CodeGen, 117 OpenMPDirectiveKind Kind, bool HasCancel, 118 StringRef HelperName) 119 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 120 HasCancel), 121 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 122 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 123 } 124 125 /// Get a variable or parameter for storing global thread id 126 /// inside OpenMP construct. 127 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 128 129 /// Get the name of the capture helper. 130 StringRef getHelperName() const override { return HelperName; } 131 132 static bool classof(const CGCapturedStmtInfo *Info) { 133 return CGOpenMPRegionInfo::classof(Info) && 134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 135 ParallelOutlinedRegion; 136 } 137 138 private: 139 /// A variable or parameter storing global thread id for OpenMP 140 /// constructs. 141 const VarDecl *ThreadIDVar; 142 StringRef HelperName; 143 }; 144 145 /// API for captured statement code generation in OpenMP constructs. 146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 147 public: 148 class UntiedTaskActionTy final : public PrePostActionTy { 149 bool Untied; 150 const VarDecl *PartIDVar; 151 const RegionCodeGenTy UntiedCodeGen; 152 llvm::SwitchInst *UntiedSwitch = nullptr; 153 154 public: 155 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 156 const RegionCodeGenTy &UntiedCodeGen) 157 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 158 void Enter(CodeGenFunction &CGF) override { 159 if (Untied) { 160 // Emit task switching point. 161 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 162 CGF.GetAddrOfLocalVar(PartIDVar), 163 PartIDVar->getType()->castAs<PointerType>()); 164 llvm::Value *Res = 165 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 166 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 167 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 168 CGF.EmitBlock(DoneBB); 169 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 170 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 171 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 172 CGF.Builder.GetInsertBlock()); 173 emitUntiedSwitch(CGF); 174 } 175 } 176 void emitUntiedSwitch(CodeGenFunction &CGF) const { 177 if (Untied) { 178 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 179 CGF.GetAddrOfLocalVar(PartIDVar), 180 PartIDVar->getType()->castAs<PointerType>()); 181 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 182 PartIdLVal); 183 UntiedCodeGen(CGF); 184 CodeGenFunction::JumpDest CurPoint = 185 CGF.getJumpDestInCurrentScope(".untied.next."); 186 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 187 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 188 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 189 CGF.Builder.GetInsertBlock()); 190 CGF.EmitBranchThroughCleanup(CurPoint); 191 CGF.EmitBlock(CurPoint.getBlock()); 192 } 193 } 194 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 195 }; 196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 197 const VarDecl *ThreadIDVar, 198 const RegionCodeGenTy &CodeGen, 199 OpenMPDirectiveKind Kind, bool HasCancel, 200 const UntiedTaskActionTy &Action) 201 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 202 ThreadIDVar(ThreadIDVar), Action(Action) { 203 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 204 } 205 206 /// Get a variable or parameter for storing global thread id 207 /// inside OpenMP construct. 208 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 209 210 /// Get an LValue for the current ThreadID variable. 211 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 212 213 /// Get the name of the capture helper. 214 StringRef getHelperName() const override { return ".omp_outlined."; } 215 216 void emitUntiedSwitch(CodeGenFunction &CGF) override { 217 Action.emitUntiedSwitch(CGF); 218 } 219 220 static bool classof(const CGCapturedStmtInfo *Info) { 221 return CGOpenMPRegionInfo::classof(Info) && 222 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 223 TaskOutlinedRegion; 224 } 225 226 private: 227 /// A variable or parameter storing global thread id for OpenMP 228 /// constructs. 229 const VarDecl *ThreadIDVar; 230 /// Action for emitting code for untied tasks. 231 const UntiedTaskActionTy &Action; 232 }; 233 234 /// API for inlined captured statement code generation in OpenMP 235 /// constructs. 236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 237 public: 238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 239 const RegionCodeGenTy &CodeGen, 240 OpenMPDirectiveKind Kind, bool HasCancel) 241 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 242 OldCSI(OldCSI), 243 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 244 245 // Retrieve the value of the context parameter. 246 llvm::Value *getContextValue() const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->getContextValue(); 249 llvm_unreachable("No context value for inlined OpenMP region"); 250 } 251 252 void setContextValue(llvm::Value *V) override { 253 if (OuterRegionInfo) { 254 OuterRegionInfo->setContextValue(V); 255 return; 256 } 257 llvm_unreachable("No context value for inlined OpenMP region"); 258 } 259 260 /// Lookup the captured field decl for a variable. 261 const FieldDecl *lookup(const VarDecl *VD) const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->lookup(VD); 264 // If there is no outer outlined region,no need to lookup in a list of 265 // captured variables, we can use the original one. 266 return nullptr; 267 } 268 269 FieldDecl *getThisFieldDecl() const override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThisFieldDecl(); 272 return nullptr; 273 } 274 275 /// Get a variable or parameter for storing global thread id 276 /// inside OpenMP construct. 277 const VarDecl *getThreadIDVariable() const override { 278 if (OuterRegionInfo) 279 return OuterRegionInfo->getThreadIDVariable(); 280 return nullptr; 281 } 282 283 /// Get an LValue for the current ThreadID variable. 284 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 285 if (OuterRegionInfo) 286 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 287 llvm_unreachable("No LValue for inlined OpenMP construct"); 288 } 289 290 /// Get the name of the capture helper. 291 StringRef getHelperName() const override { 292 if (auto *OuterRegionInfo = getOldCSI()) 293 return OuterRegionInfo->getHelperName(); 294 llvm_unreachable("No helper name for inlined OpenMP construct"); 295 } 296 297 void emitUntiedSwitch(CodeGenFunction &CGF) override { 298 if (OuterRegionInfo) 299 OuterRegionInfo->emitUntiedSwitch(CGF); 300 } 301 302 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 303 304 static bool classof(const CGCapturedStmtInfo *Info) { 305 return CGOpenMPRegionInfo::classof(Info) && 306 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 307 } 308 309 ~CGOpenMPInlinedRegionInfo() override = default; 310 311 private: 312 /// CodeGen info about outer OpenMP region. 313 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 314 CGOpenMPRegionInfo *OuterRegionInfo; 315 }; 316 317 /// API for captured statement code generation in OpenMP target 318 /// constructs. For this captures, implicit parameters are used instead of the 319 /// captured fields. The name of the target region has to be unique in a given 320 /// application so it is provided by the client, because only the client has 321 /// the information to generate that. 322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 323 public: 324 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 325 const RegionCodeGenTy &CodeGen, StringRef HelperName) 326 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 327 /*HasCancel=*/false), 328 HelperName(HelperName) {} 329 330 /// This is unused for target regions because each starts executing 331 /// with a single thread. 332 const VarDecl *getThreadIDVariable() const override { return nullptr; } 333 334 /// Get the name of the capture helper. 335 StringRef getHelperName() const override { return HelperName; } 336 337 static bool classof(const CGCapturedStmtInfo *Info) { 338 return CGOpenMPRegionInfo::classof(Info) && 339 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 340 } 341 342 private: 343 StringRef HelperName; 344 }; 345 346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 347 llvm_unreachable("No codegen for expressions"); 348 } 349 /// API for generation of expressions captured in a innermost OpenMP 350 /// region. 351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 352 public: 353 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 354 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 355 OMPD_unknown, 356 /*HasCancel=*/false), 357 PrivScope(CGF) { 358 // Make sure the globals captured in the provided statement are local by 359 // using the privatization logic. We assume the same variable is not 360 // captured more than once. 361 for (const auto &C : CS.captures()) { 362 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 363 continue; 364 365 const VarDecl *VD = C.getCapturedVar(); 366 if (VD->isLocalVarDeclOrParm()) 367 continue; 368 369 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 370 /*RefersToEnclosingVariableOrCapture=*/false, 371 VD->getType().getNonReferenceType(), VK_LValue, 372 C.getLocation()); 373 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 /// Describes ident structure that describes a source location. 481 /// All descriptions are taken from 482 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 483 /// Original structure: 484 /// typedef struct ident { 485 /// kmp_int32 reserved_1; /**< might be used in Fortran; 486 /// see above */ 487 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 488 /// KMP_IDENT_KMPC identifies this union 489 /// member */ 490 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 491 /// see above */ 492 ///#if USE_ITT_BUILD 493 /// /* but currently used for storing 494 /// region-specific ITT */ 495 /// /* contextual information. */ 496 ///#endif /* USE_ITT_BUILD */ 497 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 498 /// C++ */ 499 /// char const *psource; /**< String describing the source location. 500 /// The string is composed of semi-colon separated 501 // fields which describe the source file, 502 /// the function and a pair of line numbers that 503 /// delimit the construct. 504 /// */ 505 /// } ident_t; 506 enum IdentFieldIndex { 507 /// might be used in Fortran 508 IdentField_Reserved_1, 509 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 510 IdentField_Flags, 511 /// Not really used in Fortran any more 512 IdentField_Reserved_2, 513 /// Source[4] in Fortran, do not use for C++ 514 IdentField_Reserved_3, 515 /// String describing the source location. The string is composed of 516 /// semi-colon separated fields which describe the source file, the function 517 /// and a pair of line numbers that delimit the construct. 518 IdentField_PSource 519 }; 520 521 /// Schedule types for 'omp for' loops (these enumerators are taken from 522 /// the enum sched_type in kmp.h). 523 enum OpenMPSchedType { 524 /// Lower bound for default (unordered) versions. 525 OMP_sch_lower = 32, 526 OMP_sch_static_chunked = 33, 527 OMP_sch_static = 34, 528 OMP_sch_dynamic_chunked = 35, 529 OMP_sch_guided_chunked = 36, 530 OMP_sch_runtime = 37, 531 OMP_sch_auto = 38, 532 /// static with chunk adjustment (e.g., simd) 533 OMP_sch_static_balanced_chunked = 45, 534 /// Lower bound for 'ordered' versions. 535 OMP_ord_lower = 64, 536 OMP_ord_static_chunked = 65, 537 OMP_ord_static = 66, 538 OMP_ord_dynamic_chunked = 67, 539 OMP_ord_guided_chunked = 68, 540 OMP_ord_runtime = 69, 541 OMP_ord_auto = 70, 542 OMP_sch_default = OMP_sch_static, 543 /// dist_schedule types 544 OMP_dist_sch_static_chunked = 91, 545 OMP_dist_sch_static = 92, 546 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 547 /// Set if the monotonic schedule modifier was present. 548 OMP_sch_modifier_monotonic = (1 << 29), 549 /// Set if the nonmonotonic schedule modifier was present. 550 OMP_sch_modifier_nonmonotonic = (1 << 30), 551 }; 552 553 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 554 /// region. 555 class CleanupTy final : public EHScopeStack::Cleanup { 556 PrePostActionTy *Action; 557 558 public: 559 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 560 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 561 if (!CGF.HaveInsertPoint()) 562 return; 563 Action->Exit(CGF); 564 } 565 }; 566 567 } // anonymous namespace 568 569 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 570 CodeGenFunction::RunCleanupsScope Scope(CGF); 571 if (PrePostAction) { 572 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 573 Callback(CodeGen, CGF, *PrePostAction); 574 } else { 575 PrePostActionTy Action; 576 Callback(CodeGen, CGF, Action); 577 } 578 } 579 580 /// Check if the combiner is a call to UDR combiner and if it is so return the 581 /// UDR decl used for reduction. 582 static const OMPDeclareReductionDecl * 583 getReductionInit(const Expr *ReductionOp) { 584 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 585 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 586 if (const auto *DRE = 587 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 588 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 589 return DRD; 590 return nullptr; 591 } 592 593 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 594 const OMPDeclareReductionDecl *DRD, 595 const Expr *InitOp, 596 Address Private, Address Original, 597 QualType Ty) { 598 if (DRD->getInitializer()) { 599 std::pair<llvm::Function *, llvm::Function *> Reduction = 600 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 601 const auto *CE = cast<CallExpr>(InitOp); 602 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 603 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 604 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 605 const auto *LHSDRE = 606 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 607 const auto *RHSDRE = 608 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 609 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 610 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); 611 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); 612 (void)PrivateScope.Privatize(); 613 RValue Func = RValue::get(Reduction.second); 614 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 615 CGF.EmitIgnoredExpr(InitOp); 616 } else { 617 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 618 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 619 auto *GV = new llvm::GlobalVariable( 620 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 621 llvm::GlobalValue::PrivateLinkage, Init, Name); 622 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty); 623 RValue InitRVal; 624 switch (CGF.getEvaluationKind(Ty)) { 625 case TEK_Scalar: 626 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 627 break; 628 case TEK_Complex: 629 InitRVal = 630 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 631 break; 632 case TEK_Aggregate: { 633 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 634 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 635 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 636 /*IsInitializer=*/false); 637 return; 638 } 639 } 640 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 641 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 642 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 643 /*IsInitializer=*/false); 644 } 645 } 646 647 /// Emit initialization of arrays of complex types. 648 /// \param DestAddr Address of the array. 649 /// \param Type Type of array. 650 /// \param Init Initial expression of array. 651 /// \param SrcAddr Address of the original array. 652 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 653 QualType Type, bool EmitDeclareReductionInit, 654 const Expr *Init, 655 const OMPDeclareReductionDecl *DRD, 656 Address SrcAddr = Address::invalid()) { 657 // Perform element-by-element initialization. 658 QualType ElementTy; 659 660 // Drill down to the base element type on both arrays. 661 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 662 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 663 if (DRD) 664 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); 665 666 llvm::Value *SrcBegin = nullptr; 667 if (DRD) 668 SrcBegin = SrcAddr.emitRawPointer(CGF); 669 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF); 670 // Cast from pointer to array type to pointer to single element. 671 llvm::Value *DestEnd = 672 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 673 // The basic structure here is a while-do loop. 674 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 675 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 676 llvm::Value *IsEmpty = 677 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 678 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 679 680 // Enter the loop body, making that address the current address. 681 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 682 CGF.EmitBlock(BodyBB); 683 684 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 685 686 llvm::PHINode *SrcElementPHI = nullptr; 687 Address SrcElementCurrent = Address::invalid(); 688 if (DRD) { 689 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 690 "omp.arraycpy.srcElementPast"); 691 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 692 SrcElementCurrent = 693 Address(SrcElementPHI, SrcAddr.getElementType(), 694 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 695 } 696 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 697 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 698 DestElementPHI->addIncoming(DestBegin, EntryBB); 699 Address DestElementCurrent = 700 Address(DestElementPHI, DestAddr.getElementType(), 701 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 702 703 // Emit copy. 704 { 705 CodeGenFunction::RunCleanupsScope InitScope(CGF); 706 if (EmitDeclareReductionInit) { 707 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 708 SrcElementCurrent, ElementTy); 709 } else 710 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 711 /*IsInitializer=*/false); 712 } 713 714 if (DRD) { 715 // Shift the address forward by one element. 716 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 717 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 718 "omp.arraycpy.dest.element"); 719 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 720 } 721 722 // Shift the address forward by one element. 723 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 724 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 725 "omp.arraycpy.dest.element"); 726 // Check whether we've reached the end. 727 llvm::Value *Done = 728 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 729 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 730 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 731 732 // Done. 733 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 734 } 735 736 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 737 return CGF.EmitOMPSharedLValue(E); 738 } 739 740 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 741 const Expr *E) { 742 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E)) 743 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false); 744 return LValue(); 745 } 746 747 void ReductionCodeGen::emitAggregateInitialization( 748 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 749 const OMPDeclareReductionDecl *DRD) { 750 // Emit VarDecl with copy init for arrays. 751 // Get the address of the original variable captured in current 752 // captured region. 753 const auto *PrivateVD = 754 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 755 bool EmitDeclareReductionInit = 756 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 757 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 758 EmitDeclareReductionInit, 759 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 760 : PrivateVD->getInit(), 761 DRD, SharedAddr); 762 } 763 764 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 765 ArrayRef<const Expr *> Origs, 766 ArrayRef<const Expr *> Privates, 767 ArrayRef<const Expr *> ReductionOps) { 768 ClausesData.reserve(Shareds.size()); 769 SharedAddresses.reserve(Shareds.size()); 770 Sizes.reserve(Shareds.size()); 771 BaseDecls.reserve(Shareds.size()); 772 const auto *IOrig = Origs.begin(); 773 const auto *IPriv = Privates.begin(); 774 const auto *IRed = ReductionOps.begin(); 775 for (const Expr *Ref : Shareds) { 776 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 777 std::advance(IOrig, 1); 778 std::advance(IPriv, 1); 779 std::advance(IRed, 1); 780 } 781 } 782 783 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 784 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 785 "Number of generated lvalues must be exactly N."); 786 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 787 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 788 SharedAddresses.emplace_back(First, Second); 789 if (ClausesData[N].Shared == ClausesData[N].Ref) { 790 OrigAddresses.emplace_back(First, Second); 791 } else { 792 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 793 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 794 OrigAddresses.emplace_back(First, Second); 795 } 796 } 797 798 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 799 QualType PrivateType = getPrivateType(N); 800 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref); 801 if (!PrivateType->isVariablyModifiedType()) { 802 Sizes.emplace_back( 803 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 804 nullptr); 805 return; 806 } 807 llvm::Value *Size; 808 llvm::Value *SizeInChars; 809 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType(); 810 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 811 if (AsArraySection) { 812 Size = CGF.Builder.CreatePtrDiff(ElemType, 813 OrigAddresses[N].second.getPointer(CGF), 814 OrigAddresses[N].first.getPointer(CGF)); 815 Size = CGF.Builder.CreateNUWAdd( 816 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 817 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 818 } else { 819 SizeInChars = 820 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 821 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 822 } 823 Sizes.emplace_back(SizeInChars, Size); 824 CodeGenFunction::OpaqueValueMapping OpaqueMap( 825 CGF, 826 cast<OpaqueValueExpr>( 827 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 828 RValue::get(Size)); 829 CGF.EmitVariablyModifiedType(PrivateType); 830 } 831 832 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 833 llvm::Value *Size) { 834 QualType PrivateType = getPrivateType(N); 835 if (!PrivateType->isVariablyModifiedType()) { 836 assert(!Size && !Sizes[N].second && 837 "Size should be nullptr for non-variably modified reduction " 838 "items."); 839 return; 840 } 841 CodeGenFunction::OpaqueValueMapping OpaqueMap( 842 CGF, 843 cast<OpaqueValueExpr>( 844 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 845 RValue::get(Size)); 846 CGF.EmitVariablyModifiedType(PrivateType); 847 } 848 849 void ReductionCodeGen::emitInitialization( 850 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 851 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 852 assert(SharedAddresses.size() > N && "No variable was generated"); 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 const OMPDeclareReductionDecl *DRD = 856 getReductionInit(ClausesData[N].ReductionOp); 857 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 858 if (DRD && DRD->getInitializer()) 859 (void)DefaultInit(CGF); 860 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 861 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 862 (void)DefaultInit(CGF); 863 QualType SharedType = SharedAddresses[N].first.getType(); 864 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 865 PrivateAddr, SharedAddr, SharedType); 866 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 867 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 868 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 869 PrivateVD->getType().getQualifiers(), 870 /*IsInitializer=*/false); 871 } 872 } 873 874 bool ReductionCodeGen::needCleanups(unsigned N) { 875 QualType PrivateType = getPrivateType(N); 876 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 877 return DTorKind != QualType::DK_none; 878 } 879 880 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 881 Address PrivateAddr) { 882 QualType PrivateType = getPrivateType(N); 883 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 884 if (needCleanups(N)) { 885 PrivateAddr = 886 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); 887 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 888 } 889 } 890 891 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 892 LValue BaseLV) { 893 BaseTy = BaseTy.getNonReferenceType(); 894 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 895 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 896 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 897 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 898 } else { 899 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 900 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 901 } 902 BaseTy = BaseTy->getPointeeType(); 903 } 904 return CGF.MakeAddrLValue( 905 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)), 906 BaseLV.getType(), BaseLV.getBaseInfo(), 907 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 908 } 909 910 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 911 Address OriginalBaseAddress, llvm::Value *Addr) { 912 RawAddress Tmp = RawAddress::invalid(); 913 Address TopTmp = Address::invalid(); 914 Address MostTopTmp = Address::invalid(); 915 BaseTy = BaseTy.getNonReferenceType(); 916 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 917 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 918 Tmp = CGF.CreateMemTemp(BaseTy); 919 if (TopTmp.isValid()) 920 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 921 else 922 MostTopTmp = Tmp; 923 TopTmp = Tmp; 924 BaseTy = BaseTy->getPointeeType(); 925 } 926 927 if (Tmp.isValid()) { 928 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 929 Addr, Tmp.getElementType()); 930 CGF.Builder.CreateStore(Addr, Tmp); 931 return MostTopTmp; 932 } 933 934 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 935 Addr, OriginalBaseAddress.getType()); 936 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); 937 } 938 939 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 940 const VarDecl *OrigVD = nullptr; 941 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) { 942 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 943 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base)) 944 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 945 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 946 Base = TempASE->getBase()->IgnoreParenImpCasts(); 947 DE = cast<DeclRefExpr>(Base); 948 OrigVD = cast<VarDecl>(DE->getDecl()); 949 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 950 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 951 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 952 Base = TempASE->getBase()->IgnoreParenImpCasts(); 953 DE = cast<DeclRefExpr>(Base); 954 OrigVD = cast<VarDecl>(DE->getDecl()); 955 } 956 return OrigVD; 957 } 958 959 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 960 Address PrivateAddr) { 961 const DeclRefExpr *DE; 962 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 963 BaseDecls.emplace_back(OrigVD); 964 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 965 LValue BaseLValue = 966 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 967 OriginalBaseLValue); 968 Address SharedAddr = SharedAddresses[N].first.getAddress(); 969 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 970 SharedAddr.getElementType(), BaseLValue.getPointer(CGF), 971 SharedAddr.emitRawPointer(CGF)); 972 llvm::Value *PrivatePointer = 973 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 974 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType()); 975 llvm::Value *Ptr = CGF.Builder.CreateGEP( 976 SharedAddr.getElementType(), PrivatePointer, Adjustment); 977 return castToBase(CGF, OrigVD->getType(), 978 SharedAddresses[N].first.getType(), 979 OriginalBaseLValue.getAddress(), Ptr); 980 } 981 BaseDecls.emplace_back( 982 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 983 return PrivateAddr; 984 } 985 986 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 987 const OMPDeclareReductionDecl *DRD = 988 getReductionInit(ClausesData[N].ReductionOp); 989 return DRD && DRD->getInitializer(); 990 } 991 992 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 993 return CGF.EmitLoadOfPointerLValue( 994 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 995 getThreadIDVariable()->getType()->castAs<PointerType>()); 996 } 997 998 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 999 if (!CGF.HaveInsertPoint()) 1000 return; 1001 // 1.2.2 OpenMP Language Terminology 1002 // Structured block - An executable statement with a single entry at the 1003 // top and a single exit at the bottom. 1004 // The point of exit cannot be a branch out of the structured block. 1005 // longjmp() and throw() must not violate the entry/exit criteria. 1006 CGF.EHStack.pushTerminate(); 1007 if (S) 1008 CGF.incrementProfileCounter(S); 1009 CodeGen(CGF); 1010 CGF.EHStack.popTerminate(); 1011 } 1012 1013 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1014 CodeGenFunction &CGF) { 1015 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1016 getThreadIDVariable()->getType(), 1017 AlignmentSource::Decl); 1018 } 1019 1020 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1021 QualType FieldTy) { 1022 auto *Field = FieldDecl::Create( 1023 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1024 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1025 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1026 Field->setAccess(AS_public); 1027 DC->addDecl(Field); 1028 return Field; 1029 } 1030 1031 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) 1032 : CGM(CGM), OMPBuilder(CGM.getModule()) { 1033 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1034 llvm::OpenMPIRBuilderConfig Config( 1035 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), 1036 CGM.getLangOpts().OpenMPOffloadMandatory, 1037 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, 1038 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); 1039 OMPBuilder.initialize(); 1040 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice 1041 ? CGM.getLangOpts().OMPHostIRFile 1042 : StringRef{}); 1043 OMPBuilder.setConfig(Config); 1044 1045 // The user forces the compiler to behave as if omp requires 1046 // unified_shared_memory was given. 1047 if (CGM.getLangOpts().OpenMPForceUSM) { 1048 HasRequiresUnifiedSharedMemory = true; 1049 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 1050 } 1051 } 1052 1053 void CGOpenMPRuntime::clear() { 1054 InternalVars.clear(); 1055 // Clean non-target variable declarations possibly used only in debug info. 1056 for (const auto &Data : EmittedNonTargetVariables) { 1057 if (!Data.getValue().pointsToAliveValue()) 1058 continue; 1059 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1060 if (!GV) 1061 continue; 1062 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1063 continue; 1064 GV->eraseFromParent(); 1065 } 1066 } 1067 1068 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1069 return OMPBuilder.createPlatformSpecificName(Parts); 1070 } 1071 1072 static llvm::Function * 1073 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1074 const Expr *CombinerInitializer, const VarDecl *In, 1075 const VarDecl *Out, bool IsCombiner) { 1076 // void .omp_combiner.(Ty *in, Ty *out); 1077 ASTContext &C = CGM.getContext(); 1078 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1079 FunctionArgList Args; 1080 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1081 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1082 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1083 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); 1084 Args.push_back(&OmpOutParm); 1085 Args.push_back(&OmpInParm); 1086 const CGFunctionInfo &FnInfo = 1087 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1088 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1089 std::string Name = CGM.getOpenMPRuntime().getName( 1090 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1091 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1092 Name, &CGM.getModule()); 1093 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1094 if (CGM.getLangOpts().Optimize) { 1095 Fn->removeFnAttr(llvm::Attribute::NoInline); 1096 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1097 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1098 } 1099 CodeGenFunction CGF(CGM); 1100 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1101 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1102 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1103 Out->getLocation()); 1104 CodeGenFunction::OMPPrivateScope Scope(CGF); 1105 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1106 Scope.addPrivate( 1107 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1108 .getAddress()); 1109 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1110 Scope.addPrivate( 1111 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1112 .getAddress()); 1113 (void)Scope.Privatize(); 1114 if (!IsCombiner && Out->hasInit() && 1115 !CGF.isTrivialInitializer(Out->getInit())) { 1116 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1117 Out->getType().getQualifiers(), 1118 /*IsInitializer=*/true); 1119 } 1120 if (CombinerInitializer) 1121 CGF.EmitIgnoredExpr(CombinerInitializer); 1122 Scope.ForceCleanup(); 1123 CGF.FinishFunction(); 1124 return Fn; 1125 } 1126 1127 void CGOpenMPRuntime::emitUserDefinedReduction( 1128 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1129 if (UDRMap.count(D) > 0) 1130 return; 1131 llvm::Function *Combiner = emitCombinerOrInitializer( 1132 CGM, D->getType(), D->getCombiner(), 1133 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1134 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1135 /*IsCombiner=*/true); 1136 llvm::Function *Initializer = nullptr; 1137 if (const Expr *Init = D->getInitializer()) { 1138 Initializer = emitCombinerOrInitializer( 1139 CGM, D->getType(), 1140 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init 1141 : nullptr, 1142 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1143 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1144 /*IsCombiner=*/false); 1145 } 1146 UDRMap.try_emplace(D, Combiner, Initializer); 1147 if (CGF) 1148 FunctionUDRMap[CGF->CurFn].push_back(D); 1149 } 1150 1151 std::pair<llvm::Function *, llvm::Function *> 1152 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1153 auto I = UDRMap.find(D); 1154 if (I != UDRMap.end()) 1155 return I->second; 1156 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1157 return UDRMap.lookup(D); 1158 } 1159 1160 namespace { 1161 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1162 // Builder if one is present. 1163 struct PushAndPopStackRAII { 1164 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1165 bool HasCancel, llvm::omp::Directive Kind) 1166 : OMPBuilder(OMPBuilder) { 1167 if (!OMPBuilder) 1168 return; 1169 1170 // The following callback is the crucial part of clangs cleanup process. 1171 // 1172 // NOTE: 1173 // Once the OpenMPIRBuilder is used to create parallel regions (and 1174 // similar), the cancellation destination (Dest below) is determined via 1175 // IP. That means if we have variables to finalize we split the block at IP, 1176 // use the new block (=BB) as destination to build a JumpDest (via 1177 // getJumpDestInCurrentScope(BB)) which then is fed to 1178 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1179 // to push & pop an FinalizationInfo object. 1180 // The FiniCB will still be needed but at the point where the 1181 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1182 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1183 assert(IP.getBlock()->end() == IP.getPoint() && 1184 "Clang CG should cause non-terminated block!"); 1185 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1186 CGF.Builder.restoreIP(IP); 1187 CodeGenFunction::JumpDest Dest = 1188 CGF.getOMPCancelDestination(OMPD_parallel); 1189 CGF.EmitBranchThroughCleanup(Dest); 1190 return llvm::Error::success(); 1191 }; 1192 1193 // TODO: Remove this once we emit parallel regions through the 1194 // OpenMPIRBuilder as it can do this setup internally. 1195 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1196 OMPBuilder->pushFinalizationCB(std::move(FI)); 1197 } 1198 ~PushAndPopStackRAII() { 1199 if (OMPBuilder) 1200 OMPBuilder->popFinalizationCB(); 1201 } 1202 llvm::OpenMPIRBuilder *OMPBuilder; 1203 }; 1204 } // namespace 1205 1206 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1207 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1208 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1209 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1210 assert(ThreadIDVar->getType()->isPointerType() && 1211 "thread id variable must be of type kmp_int32 *"); 1212 CodeGenFunction CGF(CGM, true); 1213 bool HasCancel = false; 1214 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1215 HasCancel = OPD->hasCancel(); 1216 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1217 HasCancel = OPD->hasCancel(); 1218 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1219 HasCancel = OPSD->hasCancel(); 1220 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1221 HasCancel = OPFD->hasCancel(); 1222 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1223 HasCancel = OPFD->hasCancel(); 1224 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1225 HasCancel = OPFD->hasCancel(); 1226 else if (const auto *OPFD = 1227 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1228 HasCancel = OPFD->hasCancel(); 1229 else if (const auto *OPFD = 1230 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1231 HasCancel = OPFD->hasCancel(); 1232 1233 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1234 // parallel region to make cancellation barriers work properly. 1235 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1236 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1237 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1238 HasCancel, OutlinedHelperName); 1239 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1240 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1241 } 1242 1243 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { 1244 std::string Suffix = getName({"omp_outlined"}); 1245 return (Name + Suffix).str(); 1246 } 1247 1248 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { 1249 return getOutlinedHelperName(CGF.CurFn->getName()); 1250 } 1251 1252 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { 1253 std::string Suffix = getName({"omp", "reduction", "reduction_func"}); 1254 return (Name + Suffix).str(); 1255 } 1256 1257 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1258 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1259 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1260 const RegionCodeGenTy &CodeGen) { 1261 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1262 return emitParallelOrTeamsOutlinedFunction( 1263 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1264 CodeGen); 1265 } 1266 1267 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1268 CodeGenFunction &CGF, const OMPExecutableDirective &D, 1269 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1270 const RegionCodeGenTy &CodeGen) { 1271 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1272 return emitParallelOrTeamsOutlinedFunction( 1273 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), 1274 CodeGen); 1275 } 1276 1277 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1279 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1280 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1281 bool Tied, unsigned &NumberOfParts) { 1282 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1283 PrePostActionTy &) { 1284 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1285 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1286 llvm::Value *TaskArgs[] = { 1287 UpLoc, ThreadID, 1288 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1289 TaskTVar->getType()->castAs<PointerType>()) 1290 .getPointer(CGF)}; 1291 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1292 CGM.getModule(), OMPRTL___kmpc_omp_task), 1293 TaskArgs); 1294 }; 1295 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1296 UntiedCodeGen); 1297 CodeGen.setAction(Action); 1298 assert(!ThreadIDVar->getType()->isPointerType() && 1299 "thread id variable must be of type kmp_int32 for tasks"); 1300 const OpenMPDirectiveKind Region = 1301 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1302 : OMPD_task; 1303 const CapturedStmt *CS = D.getCapturedStmt(Region); 1304 bool HasCancel = false; 1305 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1306 HasCancel = TD->hasCancel(); 1307 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1308 HasCancel = TD->hasCancel(); 1309 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1310 HasCancel = TD->hasCancel(); 1311 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1312 HasCancel = TD->hasCancel(); 1313 1314 CodeGenFunction CGF(CGM, true); 1315 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1316 InnermostKind, HasCancel, Action); 1317 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1318 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1319 if (!Tied) 1320 NumberOfParts = Action.getNumberOfParts(); 1321 return Res; 1322 } 1323 1324 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1325 bool AtCurrentPoint) { 1326 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn]; 1327 assert(!Elem.ServiceInsertPt && "Insert point is set already."); 1328 1329 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1330 if (AtCurrentPoint) { 1331 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt", 1332 CGF.Builder.GetInsertBlock()); 1333 } else { 1334 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1335 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator()); 1336 } 1337 } 1338 1339 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1340 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn]; 1341 if (Elem.ServiceInsertPt) { 1342 llvm::Instruction *Ptr = Elem.ServiceInsertPt; 1343 Elem.ServiceInsertPt = nullptr; 1344 Ptr->eraseFromParent(); 1345 } 1346 } 1347 1348 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1349 SourceLocation Loc, 1350 SmallString<128> &Buffer) { 1351 llvm::raw_svector_ostream OS(Buffer); 1352 // Build debug location 1353 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1354 OS << ";" << PLoc.getFilename() << ";"; 1355 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1356 OS << FD->getQualifiedNameAsString(); 1357 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1358 return OS.str(); 1359 } 1360 1361 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1362 SourceLocation Loc, 1363 unsigned Flags, bool EmitLoc) { 1364 uint32_t SrcLocStrSize; 1365 llvm::Constant *SrcLocStr; 1366 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == 1367 llvm::codegenoptions::NoDebugInfo) || 1368 Loc.isInvalid()) { 1369 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 1370 } else { 1371 std::string FunctionName; 1372 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1373 FunctionName = FD->getQualifiedNameAsString(); 1374 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1375 const char *FileName = PLoc.getFilename(); 1376 unsigned Line = PLoc.getLine(); 1377 unsigned Column = PLoc.getColumn(); 1378 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, 1379 Column, SrcLocStrSize); 1380 } 1381 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1382 return OMPBuilder.getOrCreateIdent( 1383 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); 1384 } 1385 1386 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1387 SourceLocation Loc) { 1388 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1389 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1390 // the clang invariants used below might be broken. 1391 if (CGM.getLangOpts().OpenMPIRBuilder) { 1392 SmallString<128> Buffer; 1393 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1394 uint32_t SrcLocStrSize; 1395 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1396 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); 1397 return OMPBuilder.getOrCreateThreadID( 1398 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); 1399 } 1400 1401 llvm::Value *ThreadID = nullptr; 1402 // Check whether we've already cached a load of the thread id in this 1403 // function. 1404 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1405 if (I != OpenMPLocThreadIDMap.end()) { 1406 ThreadID = I->second.ThreadID; 1407 if (ThreadID != nullptr) 1408 return ThreadID; 1409 } 1410 // If exceptions are enabled, do not use parameter to avoid possible crash. 1411 if (auto *OMPRegionInfo = 1412 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1413 if (OMPRegionInfo->getThreadIDVariable()) { 1414 // Check if this an outlined function with thread id passed as argument. 1415 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1416 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1417 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1418 !CGF.getLangOpts().CXXExceptions || 1419 CGF.Builder.GetInsertBlock() == TopBlock || 1420 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1421 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1422 TopBlock || 1423 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1424 CGF.Builder.GetInsertBlock()) { 1425 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1426 // If value loaded in entry block, cache it and use it everywhere in 1427 // function. 1428 if (CGF.Builder.GetInsertBlock() == TopBlock) 1429 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID; 1430 return ThreadID; 1431 } 1432 } 1433 } 1434 1435 // This is not an outlined function region - need to call __kmpc_int32 1436 // kmpc_global_thread_num(ident_t *loc). 1437 // Generate thread id value and cache this value for use across the 1438 // function. 1439 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn]; 1440 if (!Elem.ServiceInsertPt) 1441 setLocThreadIdInsertPt(CGF); 1442 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1443 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt); 1444 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 1445 llvm::CallInst *Call = CGF.Builder.CreateCall( 1446 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1447 OMPRTL___kmpc_global_thread_num), 1448 emitUpdateLocation(CGF, Loc)); 1449 Call->setCallingConv(CGF.getRuntimeCC()); 1450 Elem.ThreadID = Call; 1451 return Call; 1452 } 1453 1454 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1455 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1456 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1457 clearLocThreadIdInsertPt(CGF); 1458 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1459 } 1460 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1461 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1462 UDRMap.erase(D); 1463 FunctionUDRMap.erase(CGF.CurFn); 1464 } 1465 auto I = FunctionUDMMap.find(CGF.CurFn); 1466 if (I != FunctionUDMMap.end()) { 1467 for(const auto *D : I->second) 1468 UDMMap.erase(D); 1469 FunctionUDMMap.erase(I); 1470 } 1471 LastprivateConditionalToTypes.erase(CGF.CurFn); 1472 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1473 } 1474 1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1476 return OMPBuilder.IdentPtr; 1477 } 1478 1479 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1480 if (!Kmpc_MicroTy) { 1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1482 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1483 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1484 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1485 } 1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1487 } 1488 1489 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind 1490 convertDeviceClause(const VarDecl *VD) { 1491 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 1492 OMPDeclareTargetDeclAttr::getDeviceType(VD); 1493 if (!DevTy) 1494 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1495 1496 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default 1497 case OMPDeclareTargetDeclAttr::DT_Host: 1498 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; 1499 break; 1500 case OMPDeclareTargetDeclAttr::DT_NoHost: 1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; 1502 break; 1503 case OMPDeclareTargetDeclAttr::DT_Any: 1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; 1505 break; 1506 default: 1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; 1508 break; 1509 } 1510 } 1511 1512 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind 1513 convertCaptureClause(const VarDecl *VD) { 1514 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = 1515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1516 if (!MapType) 1517 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1518 switch ((int)*MapType) { // Avoid -Wcovered-switch-default 1519 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: 1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; 1521 break; 1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: 1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; 1524 break; 1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: 1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; 1527 break; 1528 default: 1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; 1530 break; 1531 } 1532 } 1533 1534 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( 1535 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, 1536 SourceLocation BeginLoc, llvm::StringRef ParentName = "") { 1537 1538 auto FileInfoCallBack = [&]() { 1539 SourceManager &SM = CGM.getContext().getSourceManager(); 1540 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); 1541 1542 llvm::sys::fs::UniqueID ID; 1543 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1544 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); 1545 } 1546 1547 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); 1548 }; 1549 1550 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); 1551 } 1552 1553 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1554 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 1555 1556 auto LinkageForVariable = [&VD, this]() { 1557 return CGM.getLLVMLinkageVarDefinition(VD); 1558 }; 1559 1560 std::vector<llvm::GlobalVariable *> GeneratedRefs; 1561 1562 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( 1563 CGM.getContext().getPointerType(VD->getType())); 1564 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( 1565 convertCaptureClause(VD), convertDeviceClause(VD), 1566 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 1567 VD->isExternallyVisible(), 1568 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 1569 VD->getCanonicalDecl()->getBeginLoc()), 1570 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 1571 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, 1572 LinkageForVariable); 1573 1574 if (!addr) 1575 return ConstantAddress::invalid(); 1576 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); 1577 } 1578 1579 llvm::Constant * 1580 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1581 assert(!CGM.getLangOpts().OpenMPUseTLS || 1582 !CGM.getContext().getTargetInfo().isTLSSupported()); 1583 // Lookup the entry, lazily creating it if necessary. 1584 std::string Suffix = getName({"cache", ""}); 1585 return OMPBuilder.getOrCreateInternalVariable( 1586 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); 1587 } 1588 1589 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1590 const VarDecl *VD, 1591 Address VDAddr, 1592 SourceLocation Loc) { 1593 if (CGM.getLangOpts().OpenMPUseTLS && 1594 CGM.getContext().getTargetInfo().isTLSSupported()) 1595 return VDAddr; 1596 1597 llvm::Type *VarTy = VDAddr.getElementType(); 1598 llvm::Value *Args[] = { 1599 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1600 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy), 1601 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1602 getOrCreateThreadPrivateCache(VD)}; 1603 return Address( 1604 CGF.EmitRuntimeCall( 1605 OMPBuilder.getOrCreateRuntimeFunction( 1606 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1607 Args), 1608 CGF.Int8Ty, VDAddr.getAlignment()); 1609 } 1610 1611 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1612 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1613 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1614 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1615 // library. 1616 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1617 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1618 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1619 OMPLoc); 1620 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1621 // to register constructor/destructor for variable. 1622 llvm::Value *Args[] = { 1623 OMPLoc, 1624 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy), 1625 Ctor, CopyCtor, Dtor}; 1626 CGF.EmitRuntimeCall( 1627 OMPBuilder.getOrCreateRuntimeFunction( 1628 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1629 Args); 1630 } 1631 1632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1633 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1634 bool PerformInit, CodeGenFunction *CGF) { 1635 if (CGM.getLangOpts().OpenMPUseTLS && 1636 CGM.getContext().getTargetInfo().isTLSSupported()) 1637 return nullptr; 1638 1639 VD = VD->getDefinition(CGM.getContext()); 1640 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1641 QualType ASTTy = VD->getType(); 1642 1643 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1644 const Expr *Init = VD->getAnyInitializer(); 1645 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1646 // Generate function that re-emits the declaration's initializer into the 1647 // threadprivate copy of the variable VD 1648 CodeGenFunction CtorCGF(CGM); 1649 FunctionArgList Args; 1650 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1651 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1652 ImplicitParamKind::Other); 1653 Args.push_back(&Dst); 1654 1655 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1656 CGM.getContext().VoidPtrTy, Args); 1657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1658 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1659 llvm::Function *Fn = 1660 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1662 Args, Loc, Loc); 1663 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1664 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1665 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1666 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), 1667 VDAddr.getAlignment()); 1668 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1669 /*IsInitializer=*/true); 1670 ArgVal = CtorCGF.EmitLoadOfScalar( 1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1672 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1673 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1674 CtorCGF.FinishFunction(); 1675 Ctor = Fn; 1676 } 1677 if (VD->getType().isDestructedType() != QualType::DK_none) { 1678 // Generate function that emits destructor call for the threadprivate copy 1679 // of the variable VD 1680 CodeGenFunction DtorCGF(CGM); 1681 FunctionArgList Args; 1682 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1683 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1684 ImplicitParamKind::Other); 1685 Args.push_back(&Dst); 1686 1687 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1688 CGM.getContext().VoidTy, Args); 1689 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1690 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1691 llvm::Function *Fn = 1692 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1693 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1694 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1695 Loc, Loc); 1696 // Create a scope with an artificial location for the body of this function. 1697 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1698 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1699 DtorCGF.GetAddrOfLocalVar(&Dst), 1700 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1701 DtorCGF.emitDestroy( 1702 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, 1703 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1704 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1705 DtorCGF.FinishFunction(); 1706 Dtor = Fn; 1707 } 1708 // Do not emit init function if it is not required. 1709 if (!Ctor && !Dtor) 1710 return nullptr; 1711 1712 // Copying constructor for the threadprivate variable. 1713 // Must be NULL - reserved by runtime, but currently it requires that this 1714 // parameter is always NULL. Otherwise it fires assertion. 1715 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy); 1716 if (Ctor == nullptr) { 1717 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy); 1718 } 1719 if (Dtor == nullptr) { 1720 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy); 1721 } 1722 if (!CGF) { 1723 auto *InitFunctionTy = 1724 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1725 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1726 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1727 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1728 CodeGenFunction InitCGF(CGM); 1729 FunctionArgList ArgList; 1730 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1731 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1732 Loc, Loc); 1733 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1734 InitCGF.FinishFunction(); 1735 return InitFunction; 1736 } 1737 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1738 } 1739 return nullptr; 1740 } 1741 1742 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, 1743 llvm::GlobalValue *GV) { 1744 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = 1745 OMPDeclareTargetDeclAttr::getActiveAttr(FD); 1746 1747 // We only need to handle active 'indirect' declare target functions. 1748 if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) 1749 return; 1750 1751 // Get a mangled name to store the new device global in. 1752 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 1753 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); 1754 SmallString<128> Name; 1755 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); 1756 1757 // We need to generate a new global to hold the address of the indirectly 1758 // called device function. Doing this allows us to keep the visibility and 1759 // linkage of the associated function unchanged while allowing the runtime to 1760 // access its value. 1761 llvm::GlobalValue *Addr = GV; 1762 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 1763 Addr = new llvm::GlobalVariable( 1764 CGM.getModule(), CGM.VoidPtrTy, 1765 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, 1766 nullptr, llvm::GlobalValue::NotThreadLocal, 1767 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); 1768 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); 1769 } 1770 1771 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( 1772 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), 1773 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, 1774 llvm::GlobalValue::WeakODRLinkage); 1775 } 1776 1777 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1778 QualType VarType, 1779 StringRef Name) { 1780 std::string Suffix = getName({"artificial", ""}); 1781 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1782 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( 1783 VarLVType, Twine(Name).concat(Suffix).str()); 1784 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1785 CGM.getTarget().isTLSSupported()) { 1786 GAddr->setThreadLocal(/*Val=*/true); 1787 return Address(GAddr, GAddr->getValueType(), 1788 CGM.getContext().getTypeAlignInChars(VarType)); 1789 } 1790 std::string CacheSuffix = getName({"cache", ""}); 1791 llvm::Value *Args[] = { 1792 emitUpdateLocation(CGF, SourceLocation()), 1793 getThreadID(CGF, SourceLocation()), 1794 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 1795 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 1796 /*isSigned=*/false), 1797 OMPBuilder.getOrCreateInternalVariable( 1798 CGM.VoidPtrPtrTy, 1799 Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; 1800 return Address( 1801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1802 CGF.EmitRuntimeCall( 1803 OMPBuilder.getOrCreateRuntimeFunction( 1804 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1805 Args), 1806 CGF.Builder.getPtrTy(0)), 1807 VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); 1808 } 1809 1810 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 1811 const RegionCodeGenTy &ThenGen, 1812 const RegionCodeGenTy &ElseGen) { 1813 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 1814 1815 // If the condition constant folds and can be elided, try to avoid emitting 1816 // the condition and the dead arm of the if/else. 1817 bool CondConstant; 1818 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 1819 if (CondConstant) 1820 ThenGen(CGF); 1821 else 1822 ElseGen(CGF); 1823 return; 1824 } 1825 1826 // Otherwise, the condition did not fold, or we couldn't elide it. Just 1827 // emit the conditional branch. 1828 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1829 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 1830 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 1831 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 1832 1833 // Emit the 'then' code. 1834 CGF.EmitBlock(ThenBlock); 1835 ThenGen(CGF); 1836 CGF.EmitBranch(ContBlock); 1837 // Emit the 'else' code if present. 1838 // There is no need to emit line number for unconditional branch. 1839 (void)ApplyDebugLocation::CreateEmpty(CGF); 1840 CGF.EmitBlock(ElseBlock); 1841 ElseGen(CGF); 1842 // There is no need to emit line number for unconditional branch. 1843 (void)ApplyDebugLocation::CreateEmpty(CGF); 1844 CGF.EmitBranch(ContBlock); 1845 // Emit the continuation block for code after the if. 1846 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 1847 } 1848 1849 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 1850 llvm::Function *OutlinedFn, 1851 ArrayRef<llvm::Value *> CapturedVars, 1852 const Expr *IfCond, 1853 llvm::Value *NumThreads) { 1854 if (!CGF.HaveInsertPoint()) 1855 return; 1856 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 1857 auto &M = CGM.getModule(); 1858 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 1859 this](CodeGenFunction &CGF, PrePostActionTy &) { 1860 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 1861 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1862 llvm::Value *Args[] = { 1863 RTLoc, 1864 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 1865 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 1866 llvm::SmallVector<llvm::Value *, 16> RealArgs; 1867 RealArgs.append(std::begin(Args), std::end(Args)); 1868 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 1869 1870 llvm::FunctionCallee RTLFn = 1871 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 1872 CGF.EmitRuntimeCall(RTLFn, RealArgs); 1873 }; 1874 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 1875 this](CodeGenFunction &CGF, PrePostActionTy &) { 1876 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 1877 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 1878 // Build calls: 1879 // __kmpc_serialized_parallel(&Loc, GTid); 1880 llvm::Value *Args[] = {RTLoc, ThreadID}; 1881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1882 M, OMPRTL___kmpc_serialized_parallel), 1883 Args); 1884 1885 // OutlinedFn(>id, &zero_bound, CapturedStruct); 1886 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 1887 RawAddress ZeroAddrBound = 1888 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 1889 /*Name=*/".bound.zero.addr"); 1890 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 1891 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 1892 // ThreadId for serialized parallels is 0. 1893 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF)); 1894 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 1895 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 1896 1897 // Ensure we do not inline the function. This is trivially true for the ones 1898 // passed to __kmpc_fork_call but the ones called in serialized regions 1899 // could be inlined. This is not a perfect but it is closer to the invariant 1900 // we want, namely, every data environment starts with a new function. 1901 // TODO: We should pass the if condition to the runtime function and do the 1902 // handling there. Much cleaner code. 1903 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 1904 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 1905 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 1906 1907 // __kmpc_end_serialized_parallel(&Loc, GTid); 1908 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 1909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1910 M, OMPRTL___kmpc_end_serialized_parallel), 1911 EndArgs); 1912 }; 1913 if (IfCond) { 1914 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 1915 } else { 1916 RegionCodeGenTy ThenRCG(ThenGen); 1917 ThenRCG(CGF); 1918 } 1919 } 1920 1921 // If we're inside an (outlined) parallel region, use the region info's 1922 // thread-ID variable (it is passed in a first argument of the outlined function 1923 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 1924 // regular serial code region, get thread ID by calling kmp_int32 1925 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 1926 // return the address of that temp. 1927 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 1928 SourceLocation Loc) { 1929 if (auto *OMPRegionInfo = 1930 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 1931 if (OMPRegionInfo->getThreadIDVariable()) 1932 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 1933 1934 llvm::Value *ThreadID = getThreadID(CGF, Loc); 1935 QualType Int32Ty = 1936 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 1937 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 1938 CGF.EmitStoreOfScalar(ThreadID, 1939 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 1940 1941 return ThreadIDTemp; 1942 } 1943 1944 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 1945 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 1946 std::string Name = getName({Prefix, "var"}); 1947 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); 1948 } 1949 1950 namespace { 1951 /// Common pre(post)-action for different OpenMP constructs. 1952 class CommonActionTy final : public PrePostActionTy { 1953 llvm::FunctionCallee EnterCallee; 1954 ArrayRef<llvm::Value *> EnterArgs; 1955 llvm::FunctionCallee ExitCallee; 1956 ArrayRef<llvm::Value *> ExitArgs; 1957 bool Conditional; 1958 llvm::BasicBlock *ContBlock = nullptr; 1959 1960 public: 1961 CommonActionTy(llvm::FunctionCallee EnterCallee, 1962 ArrayRef<llvm::Value *> EnterArgs, 1963 llvm::FunctionCallee ExitCallee, 1964 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 1965 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 1966 ExitArgs(ExitArgs), Conditional(Conditional) {} 1967 void Enter(CodeGenFunction &CGF) override { 1968 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 1969 if (Conditional) { 1970 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 1971 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 1972 ContBlock = CGF.createBasicBlock("omp_if.end"); 1973 // Generate the branch (If-stmt) 1974 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 1975 CGF.EmitBlock(ThenBlock); 1976 } 1977 } 1978 void Done(CodeGenFunction &CGF) { 1979 // Emit the rest of blocks/branches 1980 CGF.EmitBranch(ContBlock); 1981 CGF.EmitBlock(ContBlock, true); 1982 } 1983 void Exit(CodeGenFunction &CGF) override { 1984 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 1985 } 1986 }; 1987 } // anonymous namespace 1988 1989 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 1990 StringRef CriticalName, 1991 const RegionCodeGenTy &CriticalOpGen, 1992 SourceLocation Loc, const Expr *Hint) { 1993 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 1994 // CriticalOpGen(); 1995 // __kmpc_end_critical(ident_t *, gtid, Lock); 1996 // Prepare arguments and build a call to __kmpc_critical 1997 if (!CGF.HaveInsertPoint()) 1998 return; 1999 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2000 getCriticalRegionLock(CriticalName)}; 2001 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2002 std::end(Args)); 2003 if (Hint) { 2004 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2005 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2006 } 2007 CommonActionTy Action( 2008 OMPBuilder.getOrCreateRuntimeFunction( 2009 CGM.getModule(), 2010 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2011 EnterArgs, 2012 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2013 OMPRTL___kmpc_end_critical), 2014 Args); 2015 CriticalOpGen.setAction(Action); 2016 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2017 } 2018 2019 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2020 const RegionCodeGenTy &MasterOpGen, 2021 SourceLocation Loc) { 2022 if (!CGF.HaveInsertPoint()) 2023 return; 2024 // if(__kmpc_master(ident_t *, gtid)) { 2025 // MasterOpGen(); 2026 // __kmpc_end_master(ident_t *, gtid); 2027 // } 2028 // Prepare arguments and build a call to __kmpc_master 2029 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2030 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2031 CGM.getModule(), OMPRTL___kmpc_master), 2032 Args, 2033 OMPBuilder.getOrCreateRuntimeFunction( 2034 CGM.getModule(), OMPRTL___kmpc_end_master), 2035 Args, 2036 /*Conditional=*/true); 2037 MasterOpGen.setAction(Action); 2038 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2039 Action.Done(CGF); 2040 } 2041 2042 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2043 const RegionCodeGenTy &MaskedOpGen, 2044 SourceLocation Loc, const Expr *Filter) { 2045 if (!CGF.HaveInsertPoint()) 2046 return; 2047 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2048 // MaskedOpGen(); 2049 // __kmpc_end_masked(iden_t *, gtid); 2050 // } 2051 // Prepare arguments and build a call to __kmpc_masked 2052 llvm::Value *FilterVal = Filter 2053 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2054 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2055 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2056 FilterVal}; 2057 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2058 getThreadID(CGF, Loc)}; 2059 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2060 CGM.getModule(), OMPRTL___kmpc_masked), 2061 Args, 2062 OMPBuilder.getOrCreateRuntimeFunction( 2063 CGM.getModule(), OMPRTL___kmpc_end_masked), 2064 ArgsEnd, 2065 /*Conditional=*/true); 2066 MaskedOpGen.setAction(Action); 2067 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2068 Action.Done(CGF); 2069 } 2070 2071 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2072 SourceLocation Loc) { 2073 if (!CGF.HaveInsertPoint()) 2074 return; 2075 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2076 OMPBuilder.createTaskyield(CGF.Builder); 2077 } else { 2078 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2079 llvm::Value *Args[] = { 2080 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2081 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2082 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2083 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2084 Args); 2085 } 2086 2087 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2088 Region->emitUntiedSwitch(CGF); 2089 } 2090 2091 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2092 const RegionCodeGenTy &TaskgroupOpGen, 2093 SourceLocation Loc) { 2094 if (!CGF.HaveInsertPoint()) 2095 return; 2096 // __kmpc_taskgroup(ident_t *, gtid); 2097 // TaskgroupOpGen(); 2098 // __kmpc_end_taskgroup(ident_t *, gtid); 2099 // Prepare arguments and build a call to __kmpc_taskgroup 2100 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2101 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2102 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2103 Args, 2104 OMPBuilder.getOrCreateRuntimeFunction( 2105 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2106 Args); 2107 TaskgroupOpGen.setAction(Action); 2108 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2109 } 2110 2111 /// Given an array of pointers to variables, project the address of a 2112 /// given variable. 2113 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2114 unsigned Index, const VarDecl *Var) { 2115 // Pull out the pointer to the variable. 2116 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2117 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2118 2119 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); 2120 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var)); 2121 } 2122 2123 static llvm::Value *emitCopyprivateCopyFunction( 2124 CodeGenModule &CGM, llvm::Type *ArgsElemType, 2125 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2126 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2127 SourceLocation Loc) { 2128 ASTContext &C = CGM.getContext(); 2129 // void copy_func(void *LHSArg, void *RHSArg); 2130 FunctionArgList Args; 2131 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2132 ImplicitParamKind::Other); 2133 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2134 ImplicitParamKind::Other); 2135 Args.push_back(&LHSArg); 2136 Args.push_back(&RHSArg); 2137 const auto &CGFI = 2138 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2139 std::string Name = 2140 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2141 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2142 llvm::GlobalValue::InternalLinkage, Name, 2143 &CGM.getModule()); 2144 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2145 Fn->setDoesNotRecurse(); 2146 CodeGenFunction CGF(CGM); 2147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2148 // Dest = (void*[n])(LHSArg); 2149 // Src = (void*[n])(RHSArg); 2150 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2151 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2152 CGF.Builder.getPtrTy(0)), 2153 ArgsElemType, CGF.getPointerAlign()); 2154 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2156 CGF.Builder.getPtrTy(0)), 2157 ArgsElemType, CGF.getPointerAlign()); 2158 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2159 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2160 // ... 2161 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2162 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2163 const auto *DestVar = 2164 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2165 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2166 2167 const auto *SrcVar = 2168 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2169 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2170 2171 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2172 QualType Type = VD->getType(); 2173 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2174 } 2175 CGF.FinishFunction(); 2176 return Fn; 2177 } 2178 2179 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2180 const RegionCodeGenTy &SingleOpGen, 2181 SourceLocation Loc, 2182 ArrayRef<const Expr *> CopyprivateVars, 2183 ArrayRef<const Expr *> SrcExprs, 2184 ArrayRef<const Expr *> DstExprs, 2185 ArrayRef<const Expr *> AssignmentOps) { 2186 if (!CGF.HaveInsertPoint()) 2187 return; 2188 assert(CopyprivateVars.size() == SrcExprs.size() && 2189 CopyprivateVars.size() == DstExprs.size() && 2190 CopyprivateVars.size() == AssignmentOps.size()); 2191 ASTContext &C = CGM.getContext(); 2192 // int32 did_it = 0; 2193 // if(__kmpc_single(ident_t *, gtid)) { 2194 // SingleOpGen(); 2195 // __kmpc_end_single(ident_t *, gtid); 2196 // did_it = 1; 2197 // } 2198 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2199 // <copy_func>, did_it); 2200 2201 Address DidIt = Address::invalid(); 2202 if (!CopyprivateVars.empty()) { 2203 // int32 did_it = 0; 2204 QualType KmpInt32Ty = 2205 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2206 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2207 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2208 } 2209 // Prepare arguments and build a call to __kmpc_single 2210 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2211 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2212 CGM.getModule(), OMPRTL___kmpc_single), 2213 Args, 2214 OMPBuilder.getOrCreateRuntimeFunction( 2215 CGM.getModule(), OMPRTL___kmpc_end_single), 2216 Args, 2217 /*Conditional=*/true); 2218 SingleOpGen.setAction(Action); 2219 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2220 if (DidIt.isValid()) { 2221 // did_it = 1; 2222 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2223 } 2224 Action.Done(CGF); 2225 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2226 // <copy_func>, did_it); 2227 if (DidIt.isValid()) { 2228 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2229 QualType CopyprivateArrayTy = C.getConstantArrayType( 2230 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 2231 /*IndexTypeQuals=*/0); 2232 // Create a list of all private variables for copyprivate. 2233 Address CopyprivateList = 2234 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2235 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2236 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2237 CGF.Builder.CreateStore( 2238 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2239 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2240 CGF.VoidPtrTy), 2241 Elem); 2242 } 2243 // Build function that copies private values from single region to all other 2244 // threads in the corresponding parallel region. 2245 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2246 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, 2247 SrcExprs, DstExprs, AssignmentOps, Loc); 2248 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2249 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2250 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); 2251 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2252 llvm::Value *Args[] = { 2253 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2254 getThreadID(CGF, Loc), // i32 <gtid> 2255 BufSize, // size_t <buf_size> 2256 CL.emitRawPointer(CGF), // void *<copyprivate list> 2257 CpyFn, // void (*) (void *, void *) <copy_func> 2258 DidItVal // i32 did_it 2259 }; 2260 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2261 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2262 Args); 2263 } 2264 } 2265 2266 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2267 const RegionCodeGenTy &OrderedOpGen, 2268 SourceLocation Loc, bool IsThreads) { 2269 if (!CGF.HaveInsertPoint()) 2270 return; 2271 // __kmpc_ordered(ident_t *, gtid); 2272 // OrderedOpGen(); 2273 // __kmpc_end_ordered(ident_t *, gtid); 2274 // Prepare arguments and build a call to __kmpc_ordered 2275 if (IsThreads) { 2276 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2277 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2278 CGM.getModule(), OMPRTL___kmpc_ordered), 2279 Args, 2280 OMPBuilder.getOrCreateRuntimeFunction( 2281 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2282 Args); 2283 OrderedOpGen.setAction(Action); 2284 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2285 return; 2286 } 2287 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2288 } 2289 2290 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2291 unsigned Flags; 2292 if (Kind == OMPD_for) 2293 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2294 else if (Kind == OMPD_sections) 2295 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2296 else if (Kind == OMPD_single) 2297 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2298 else if (Kind == OMPD_barrier) 2299 Flags = OMP_IDENT_BARRIER_EXPL; 2300 else 2301 Flags = OMP_IDENT_BARRIER_IMPL; 2302 return Flags; 2303 } 2304 2305 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2306 CodeGenFunction &CGF, const OMPLoopDirective &S, 2307 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2308 // Check if the loop directive is actually a doacross loop directive. In this 2309 // case choose static, 1 schedule. 2310 if (llvm::any_of( 2311 S.getClausesOfKind<OMPOrderedClause>(), 2312 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2313 ScheduleKind = OMPC_SCHEDULE_static; 2314 // Chunk size is 1 in this case. 2315 llvm::APInt ChunkSize(32, 1); 2316 ChunkExpr = IntegerLiteral::Create( 2317 CGF.getContext(), ChunkSize, 2318 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2319 SourceLocation()); 2320 } 2321 } 2322 2323 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2324 OpenMPDirectiveKind Kind, bool EmitChecks, 2325 bool ForceSimpleCall) { 2326 // Check if we should use the OMPBuilder 2327 auto *OMPRegionInfo = 2328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2329 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2330 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 2331 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall, 2332 EmitChecks)); 2333 CGF.Builder.restoreIP(AfterIP); 2334 return; 2335 } 2336 2337 if (!CGF.HaveInsertPoint()) 2338 return; 2339 // Build call __kmpc_cancel_barrier(loc, thread_id); 2340 // Build call __kmpc_barrier(loc, thread_id); 2341 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2342 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2343 // thread_id); 2344 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2345 getThreadID(CGF, Loc)}; 2346 if (OMPRegionInfo) { 2347 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2348 llvm::Value *Result = CGF.EmitRuntimeCall( 2349 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2350 OMPRTL___kmpc_cancel_barrier), 2351 Args); 2352 if (EmitChecks) { 2353 // if (__kmpc_cancel_barrier()) { 2354 // exit from construct; 2355 // } 2356 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2357 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2358 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2359 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2360 CGF.EmitBlock(ExitBB); 2361 // exit from construct; 2362 CodeGenFunction::JumpDest CancelDestination = 2363 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2364 CGF.EmitBranchThroughCleanup(CancelDestination); 2365 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2366 } 2367 return; 2368 } 2369 } 2370 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2371 CGM.getModule(), OMPRTL___kmpc_barrier), 2372 Args); 2373 } 2374 2375 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, 2376 Expr *ME, bool IsFatal) { 2377 llvm::Value *MVL = 2378 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) 2379 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 2380 // Build call void __kmpc_error(ident_t *loc, int severity, const char 2381 // *message) 2382 llvm::Value *Args[] = { 2383 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), 2384 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), 2385 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; 2386 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2387 CGM.getModule(), OMPRTL___kmpc_error), 2388 Args); 2389 } 2390 2391 /// Map the OpenMP loop schedule to the runtime enumeration. 2392 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2393 bool Chunked, bool Ordered) { 2394 switch (ScheduleKind) { 2395 case OMPC_SCHEDULE_static: 2396 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2397 : (Ordered ? OMP_ord_static : OMP_sch_static); 2398 case OMPC_SCHEDULE_dynamic: 2399 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2400 case OMPC_SCHEDULE_guided: 2401 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2402 case OMPC_SCHEDULE_runtime: 2403 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2404 case OMPC_SCHEDULE_auto: 2405 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2406 case OMPC_SCHEDULE_unknown: 2407 assert(!Chunked && "chunk was specified but schedule kind not known"); 2408 return Ordered ? OMP_ord_static : OMP_sch_static; 2409 } 2410 llvm_unreachable("Unexpected runtime schedule"); 2411 } 2412 2413 /// Map the OpenMP distribute schedule to the runtime enumeration. 2414 static OpenMPSchedType 2415 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2416 // only static is allowed for dist_schedule 2417 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2418 } 2419 2420 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2421 bool Chunked) const { 2422 OpenMPSchedType Schedule = 2423 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2424 return Schedule == OMP_sch_static; 2425 } 2426 2427 bool CGOpenMPRuntime::isStaticNonchunked( 2428 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2429 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2430 return Schedule == OMP_dist_sch_static; 2431 } 2432 2433 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2434 bool Chunked) const { 2435 OpenMPSchedType Schedule = 2436 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2437 return Schedule == OMP_sch_static_chunked; 2438 } 2439 2440 bool CGOpenMPRuntime::isStaticChunked( 2441 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2442 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2443 return Schedule == OMP_dist_sch_static_chunked; 2444 } 2445 2446 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2447 OpenMPSchedType Schedule = 2448 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2449 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2450 return Schedule != OMP_sch_static; 2451 } 2452 2453 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2454 OpenMPScheduleClauseModifier M1, 2455 OpenMPScheduleClauseModifier M2) { 2456 int Modifier = 0; 2457 switch (M1) { 2458 case OMPC_SCHEDULE_MODIFIER_monotonic: 2459 Modifier = OMP_sch_modifier_monotonic; 2460 break; 2461 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2462 Modifier = OMP_sch_modifier_nonmonotonic; 2463 break; 2464 case OMPC_SCHEDULE_MODIFIER_simd: 2465 if (Schedule == OMP_sch_static_chunked) 2466 Schedule = OMP_sch_static_balanced_chunked; 2467 break; 2468 case OMPC_SCHEDULE_MODIFIER_last: 2469 case OMPC_SCHEDULE_MODIFIER_unknown: 2470 break; 2471 } 2472 switch (M2) { 2473 case OMPC_SCHEDULE_MODIFIER_monotonic: 2474 Modifier = OMP_sch_modifier_monotonic; 2475 break; 2476 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2477 Modifier = OMP_sch_modifier_nonmonotonic; 2478 break; 2479 case OMPC_SCHEDULE_MODIFIER_simd: 2480 if (Schedule == OMP_sch_static_chunked) 2481 Schedule = OMP_sch_static_balanced_chunked; 2482 break; 2483 case OMPC_SCHEDULE_MODIFIER_last: 2484 case OMPC_SCHEDULE_MODIFIER_unknown: 2485 break; 2486 } 2487 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2488 // If the static schedule kind is specified or if the ordered clause is 2489 // specified, and if the nonmonotonic modifier is not specified, the effect is 2490 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2491 // modifier is specified, the effect is as if the nonmonotonic modifier is 2492 // specified. 2493 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2494 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2495 Schedule == OMP_sch_static_balanced_chunked || 2496 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2497 Schedule == OMP_dist_sch_static_chunked || 2498 Schedule == OMP_dist_sch_static)) 2499 Modifier = OMP_sch_modifier_nonmonotonic; 2500 } 2501 return Schedule | Modifier; 2502 } 2503 2504 void CGOpenMPRuntime::emitForDispatchInit( 2505 CodeGenFunction &CGF, SourceLocation Loc, 2506 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2507 bool Ordered, const DispatchRTInput &DispatchValues) { 2508 if (!CGF.HaveInsertPoint()) 2509 return; 2510 OpenMPSchedType Schedule = getRuntimeSchedule( 2511 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2512 assert(Ordered || 2513 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2514 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2515 Schedule != OMP_sch_static_balanced_chunked)); 2516 // Call __kmpc_dispatch_init( 2517 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2518 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2519 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2520 2521 // If the Chunk was not specified in the clause - use default value 1. 2522 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2523 : CGF.Builder.getIntN(IVSize, 1); 2524 llvm::Value *Args[] = { 2525 emitUpdateLocation(CGF, Loc), 2526 getThreadID(CGF, Loc), 2527 CGF.Builder.getInt32(addMonoNonMonoModifier( 2528 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2529 DispatchValues.LB, // Lower 2530 DispatchValues.UB, // Upper 2531 CGF.Builder.getIntN(IVSize, 1), // Stride 2532 Chunk // Chunk 2533 }; 2534 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), 2535 Args); 2536 } 2537 2538 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, 2539 SourceLocation Loc) { 2540 if (!CGF.HaveInsertPoint()) 2541 return; 2542 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid); 2543 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2544 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args); 2545 } 2546 2547 static void emitForStaticInitCall( 2548 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2549 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2550 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2551 const CGOpenMPRuntime::StaticRTInput &Values) { 2552 if (!CGF.HaveInsertPoint()) 2553 return; 2554 2555 assert(!Values.Ordered); 2556 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2557 Schedule == OMP_sch_static_balanced_chunked || 2558 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2559 Schedule == OMP_dist_sch_static || 2560 Schedule == OMP_dist_sch_static_chunked); 2561 2562 // Call __kmpc_for_static_init( 2563 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2564 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2565 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2566 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2567 llvm::Value *Chunk = Values.Chunk; 2568 if (Chunk == nullptr) { 2569 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2570 Schedule == OMP_dist_sch_static) && 2571 "expected static non-chunked schedule"); 2572 // If the Chunk was not specified in the clause - use default value 1. 2573 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2574 } else { 2575 assert((Schedule == OMP_sch_static_chunked || 2576 Schedule == OMP_sch_static_balanced_chunked || 2577 Schedule == OMP_ord_static_chunked || 2578 Schedule == OMP_dist_sch_static_chunked) && 2579 "expected static chunked schedule"); 2580 } 2581 llvm::Value *Args[] = { 2582 UpdateLocation, 2583 ThreadId, 2584 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2585 M2)), // Schedule type 2586 Values.IL.emitRawPointer(CGF), // &isLastIter 2587 Values.LB.emitRawPointer(CGF), // &LB 2588 Values.UB.emitRawPointer(CGF), // &UB 2589 Values.ST.emitRawPointer(CGF), // &Stride 2590 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2591 Chunk // Chunk 2592 }; 2593 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2594 } 2595 2596 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2597 SourceLocation Loc, 2598 OpenMPDirectiveKind DKind, 2599 const OpenMPScheduleTy &ScheduleKind, 2600 const StaticRTInput &Values) { 2601 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2602 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2603 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && 2604 "Expected loop-based or sections-based directive."); 2605 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2606 isOpenMPLoopDirective(DKind) 2607 ? OMP_IDENT_WORK_LOOP 2608 : OMP_IDENT_WORK_SECTIONS); 2609 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2610 llvm::FunctionCallee StaticInitFunction = 2611 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, 2612 false); 2613 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2614 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2615 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2616 } 2617 2618 void CGOpenMPRuntime::emitDistributeStaticInit( 2619 CodeGenFunction &CGF, SourceLocation Loc, 2620 OpenMPDistScheduleClauseKind SchedKind, 2621 const CGOpenMPRuntime::StaticRTInput &Values) { 2622 OpenMPSchedType ScheduleNum = 2623 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2624 llvm::Value *UpdatedLocation = 2625 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2626 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2627 llvm::FunctionCallee StaticInitFunction; 2628 bool isGPUDistribute = 2629 CGM.getLangOpts().OpenMPIsTargetDevice && 2630 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2631 StaticInitFunction = OMPBuilder.createForStaticInitFunction( 2632 Values.IVSize, Values.IVSigned, isGPUDistribute); 2633 2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2635 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2636 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2637 } 2638 2639 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2640 SourceLocation Loc, 2641 OpenMPDirectiveKind DKind) { 2642 assert((DKind == OMPD_distribute || DKind == OMPD_for || 2643 DKind == OMPD_sections) && 2644 "Expected distribute, for, or sections directive kind"); 2645 if (!CGF.HaveInsertPoint()) 2646 return; 2647 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2648 llvm::Value *Args[] = { 2649 emitUpdateLocation(CGF, Loc, 2650 isOpenMPDistributeDirective(DKind) || 2651 (DKind == OMPD_target_teams_loop) 2652 ? OMP_IDENT_WORK_DISTRIBUTE 2653 : isOpenMPLoopDirective(DKind) 2654 ? OMP_IDENT_WORK_LOOP 2655 : OMP_IDENT_WORK_SECTIONS), 2656 getThreadID(CGF, Loc)}; 2657 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2658 if (isOpenMPDistributeDirective(DKind) && 2659 CGM.getLangOpts().OpenMPIsTargetDevice && 2660 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2661 CGF.EmitRuntimeCall( 2662 OMPBuilder.getOrCreateRuntimeFunction( 2663 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2664 Args); 2665 else 2666 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2667 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2668 Args); 2669 } 2670 2671 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2672 SourceLocation Loc, 2673 unsigned IVSize, 2674 bool IVSigned) { 2675 if (!CGF.HaveInsertPoint()) 2676 return; 2677 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2678 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2679 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), 2680 Args); 2681 } 2682 2683 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2684 SourceLocation Loc, unsigned IVSize, 2685 bool IVSigned, Address IL, 2686 Address LB, Address UB, 2687 Address ST) { 2688 // Call __kmpc_dispatch_next( 2689 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2690 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2691 // kmp_int[32|64] *p_stride); 2692 llvm::Value *Args[] = { 2693 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2694 IL.emitRawPointer(CGF), // &isLastIter 2695 LB.emitRawPointer(CGF), // &Lower 2696 UB.emitRawPointer(CGF), // &Upper 2697 ST.emitRawPointer(CGF) // &Stride 2698 }; 2699 llvm::Value *Call = CGF.EmitRuntimeCall( 2700 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); 2701 return CGF.EmitScalarConversion( 2702 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2703 CGF.getContext().BoolTy, Loc); 2704 } 2705 2706 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2707 llvm::Value *NumThreads, 2708 SourceLocation Loc) { 2709 if (!CGF.HaveInsertPoint()) 2710 return; 2711 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2712 llvm::Value *Args[] = { 2713 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2714 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2715 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2716 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2717 Args); 2718 } 2719 2720 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2721 ProcBindKind ProcBind, 2722 SourceLocation Loc) { 2723 if (!CGF.HaveInsertPoint()) 2724 return; 2725 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2726 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2727 llvm::Value *Args[] = { 2728 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2729 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2730 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2731 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2732 Args); 2733 } 2734 2735 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2736 SourceLocation Loc, llvm::AtomicOrdering AO) { 2737 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2738 OMPBuilder.createFlush(CGF.Builder); 2739 } else { 2740 if (!CGF.HaveInsertPoint()) 2741 return; 2742 // Build call void __kmpc_flush(ident_t *loc) 2743 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2744 CGM.getModule(), OMPRTL___kmpc_flush), 2745 emitUpdateLocation(CGF, Loc)); 2746 } 2747 } 2748 2749 namespace { 2750 /// Indexes of fields for type kmp_task_t. 2751 enum KmpTaskTFields { 2752 /// List of shared variables. 2753 KmpTaskTShareds, 2754 /// Task routine. 2755 KmpTaskTRoutine, 2756 /// Partition id for the untied tasks. 2757 KmpTaskTPartId, 2758 /// Function with call of destructors for private variables. 2759 Data1, 2760 /// Task priority. 2761 Data2, 2762 /// (Taskloops only) Lower bound. 2763 KmpTaskTLowerBound, 2764 /// (Taskloops only) Upper bound. 2765 KmpTaskTUpperBound, 2766 /// (Taskloops only) Stride. 2767 KmpTaskTStride, 2768 /// (Taskloops only) Is last iteration flag. 2769 KmpTaskTLastIter, 2770 /// (Taskloops only) Reduction data. 2771 KmpTaskTReductions, 2772 }; 2773 } // anonymous namespace 2774 2775 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 2776 // If we are in simd mode or there are no entries, we don't need to do 2777 // anything. 2778 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) 2779 return; 2780 2781 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = 2782 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, 2783 const llvm::TargetRegionEntryInfo &EntryInfo) -> void { 2784 SourceLocation Loc; 2785 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { 2786 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 2787 E = CGM.getContext().getSourceManager().fileinfo_end(); 2788 I != E; ++I) { 2789 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && 2790 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { 2791 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 2792 I->getFirst(), EntryInfo.Line, 1); 2793 break; 2794 } 2795 } 2796 } 2797 switch (Kind) { 2798 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { 2799 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2800 DiagnosticsEngine::Error, "Offloading entry for target region in " 2801 "%0 is incorrect: either the " 2802 "address or the ID is invalid."); 2803 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2804 } break; 2805 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { 2806 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2807 DiagnosticsEngine::Error, "Offloading entry for declare target " 2808 "variable %0 is incorrect: the " 2809 "address is invalid."); 2810 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; 2811 } break; 2812 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { 2813 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2814 DiagnosticsEngine::Error, 2815 "Offloading entry for declare target variable is incorrect: the " 2816 "address is invalid."); 2817 CGM.getDiags().Report(DiagID); 2818 } break; 2819 } 2820 }; 2821 2822 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); 2823 } 2824 2825 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 2826 if (!KmpRoutineEntryPtrTy) { 2827 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 2828 ASTContext &C = CGM.getContext(); 2829 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 2830 FunctionProtoType::ExtProtoInfo EPI; 2831 KmpRoutineEntryPtrQTy = C.getPointerType( 2832 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 2833 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 2834 } 2835 } 2836 2837 namespace { 2838 struct PrivateHelpersTy { 2839 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 2840 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 2841 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 2842 PrivateElemInit(PrivateElemInit) {} 2843 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 2844 const Expr *OriginalRef = nullptr; 2845 const VarDecl *Original = nullptr; 2846 const VarDecl *PrivateCopy = nullptr; 2847 const VarDecl *PrivateElemInit = nullptr; 2848 bool isLocalPrivate() const { 2849 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 2850 } 2851 }; 2852 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 2853 } // anonymous namespace 2854 2855 static bool isAllocatableDecl(const VarDecl *VD) { 2856 const VarDecl *CVD = VD->getCanonicalDecl(); 2857 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 2858 return false; 2859 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 2860 // Use the default allocation. 2861 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 2862 !AA->getAllocator()); 2863 } 2864 2865 static RecordDecl * 2866 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 2867 if (!Privates.empty()) { 2868 ASTContext &C = CGM.getContext(); 2869 // Build struct .kmp_privates_t. { 2870 // /* private vars */ 2871 // }; 2872 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 2873 RD->startDefinition(); 2874 for (const auto &Pair : Privates) { 2875 const VarDecl *VD = Pair.second.Original; 2876 QualType Type = VD->getType().getNonReferenceType(); 2877 // If the private variable is a local variable with lvalue ref type, 2878 // allocate the pointer instead of the pointee type. 2879 if (Pair.second.isLocalPrivate()) { 2880 if (VD->getType()->isLValueReferenceType()) 2881 Type = C.getPointerType(Type); 2882 if (isAllocatableDecl(VD)) 2883 Type = C.getPointerType(Type); 2884 } 2885 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 2886 if (VD->hasAttrs()) { 2887 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 2888 E(VD->getAttrs().end()); 2889 I != E; ++I) 2890 FD->addAttr(*I); 2891 } 2892 } 2893 RD->completeDefinition(); 2894 return RD; 2895 } 2896 return nullptr; 2897 } 2898 2899 static RecordDecl * 2900 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 2901 QualType KmpInt32Ty, 2902 QualType KmpRoutineEntryPointerQTy) { 2903 ASTContext &C = CGM.getContext(); 2904 // Build struct kmp_task_t { 2905 // void * shareds; 2906 // kmp_routine_entry_t routine; 2907 // kmp_int32 part_id; 2908 // kmp_cmplrdata_t data1; 2909 // kmp_cmplrdata_t data2; 2910 // For taskloops additional fields: 2911 // kmp_uint64 lb; 2912 // kmp_uint64 ub; 2913 // kmp_int64 st; 2914 // kmp_int32 liter; 2915 // void * reductions; 2916 // }; 2917 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); 2918 UD->startDefinition(); 2919 addFieldToRecordDecl(C, UD, KmpInt32Ty); 2920 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 2921 UD->completeDefinition(); 2922 QualType KmpCmplrdataTy = C.getRecordType(UD); 2923 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 2924 RD->startDefinition(); 2925 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2926 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 2927 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2928 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2929 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 2930 if (isOpenMPTaskLoopDirective(Kind)) { 2931 QualType KmpUInt64Ty = 2932 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 2933 QualType KmpInt64Ty = 2934 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 2935 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2936 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 2937 addFieldToRecordDecl(C, RD, KmpInt64Ty); 2938 addFieldToRecordDecl(C, RD, KmpInt32Ty); 2939 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 2940 } 2941 RD->completeDefinition(); 2942 return RD; 2943 } 2944 2945 static RecordDecl * 2946 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 2947 ArrayRef<PrivateDataTy> Privates) { 2948 ASTContext &C = CGM.getContext(); 2949 // Build struct kmp_task_t_with_privates { 2950 // kmp_task_t task_data; 2951 // .kmp_privates_t. privates; 2952 // }; 2953 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 2954 RD->startDefinition(); 2955 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 2956 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 2957 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 2958 RD->completeDefinition(); 2959 return RD; 2960 } 2961 2962 /// Emit a proxy function which accepts kmp_task_t as the second 2963 /// argument. 2964 /// \code 2965 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 2966 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 2967 /// For taskloops: 2968 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 2969 /// tt->reductions, tt->shareds); 2970 /// return 0; 2971 /// } 2972 /// \endcode 2973 static llvm::Function * 2974 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 2975 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 2976 QualType KmpTaskTWithPrivatesPtrQTy, 2977 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 2978 QualType SharedsPtrTy, llvm::Function *TaskFunction, 2979 llvm::Value *TaskPrivatesMap) { 2980 ASTContext &C = CGM.getContext(); 2981 FunctionArgList Args; 2982 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 2983 ImplicitParamKind::Other); 2984 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 2985 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 2986 ImplicitParamKind::Other); 2987 Args.push_back(&GtidArg); 2988 Args.push_back(&TaskTypeArg); 2989 const auto &TaskEntryFnInfo = 2990 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 2991 llvm::FunctionType *TaskEntryTy = 2992 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 2993 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 2994 auto *TaskEntry = llvm::Function::Create( 2995 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 2996 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 2997 TaskEntry->setDoesNotRecurse(); 2998 CodeGenFunction CGF(CGM); 2999 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3000 Loc, Loc); 3001 3002 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3003 // tt, 3004 // For taskloops: 3005 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3006 // tt->task_data.shareds); 3007 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3008 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3009 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3010 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3011 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3012 const auto *KmpTaskTWithPrivatesQTyRD = 3013 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3014 LValue Base = 3015 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3016 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3017 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3018 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3019 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3020 3021 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3022 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3023 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3024 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3025 CGF.ConvertTypeForMem(SharedsPtrTy)); 3026 3027 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3028 llvm::Value *PrivatesParam; 3029 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3030 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3031 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3032 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3033 } else { 3034 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3035 } 3036 3037 llvm::Value *CommonArgs[] = { 3038 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, 3039 CGF.Builder 3040 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(), 3041 CGF.VoidPtrTy, CGF.Int8Ty) 3042 .emitRawPointer(CGF)}; 3043 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3044 std::end(CommonArgs)); 3045 if (isOpenMPTaskLoopDirective(Kind)) { 3046 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3047 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3048 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3049 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3050 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3051 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3052 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3053 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3054 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3055 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3056 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3057 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3058 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3059 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3060 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3061 CallArgs.push_back(LBParam); 3062 CallArgs.push_back(UBParam); 3063 CallArgs.push_back(StParam); 3064 CallArgs.push_back(LIParam); 3065 CallArgs.push_back(RParam); 3066 } 3067 CallArgs.push_back(SharedsParam); 3068 3069 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3070 CallArgs); 3071 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3072 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3073 CGF.FinishFunction(); 3074 return TaskEntry; 3075 } 3076 3077 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3078 SourceLocation Loc, 3079 QualType KmpInt32Ty, 3080 QualType KmpTaskTWithPrivatesPtrQTy, 3081 QualType KmpTaskTWithPrivatesQTy) { 3082 ASTContext &C = CGM.getContext(); 3083 FunctionArgList Args; 3084 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3085 ImplicitParamKind::Other); 3086 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3087 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3088 ImplicitParamKind::Other); 3089 Args.push_back(&GtidArg); 3090 Args.push_back(&TaskTypeArg); 3091 const auto &DestructorFnInfo = 3092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3093 llvm::FunctionType *DestructorFnTy = 3094 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3095 std::string Name = 3096 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3097 auto *DestructorFn = 3098 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3099 Name, &CGM.getModule()); 3100 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3101 DestructorFnInfo); 3102 DestructorFn->setDoesNotRecurse(); 3103 CodeGenFunction CGF(CGM); 3104 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3105 Args, Loc, Loc); 3106 3107 LValue Base = CGF.EmitLoadOfPointerLValue( 3108 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3109 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3110 const auto *KmpTaskTWithPrivatesQTyRD = 3111 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3112 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3113 Base = CGF.EmitLValueForField(Base, *FI); 3114 for (const auto *Field : 3115 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3116 if (QualType::DestructionKind DtorKind = 3117 Field->getType().isDestructedType()) { 3118 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3119 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 3120 } 3121 } 3122 CGF.FinishFunction(); 3123 return DestructorFn; 3124 } 3125 3126 /// Emit a privates mapping function for correct handling of private and 3127 /// firstprivate variables. 3128 /// \code 3129 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3130 /// **noalias priv1,..., <tyn> **noalias privn) { 3131 /// *priv1 = &.privates.priv1; 3132 /// ...; 3133 /// *privn = &.privates.privn; 3134 /// } 3135 /// \endcode 3136 static llvm::Value * 3137 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3138 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3139 ArrayRef<PrivateDataTy> Privates) { 3140 ASTContext &C = CGM.getContext(); 3141 FunctionArgList Args; 3142 ImplicitParamDecl TaskPrivatesArg( 3143 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3144 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3145 ImplicitParamKind::Other); 3146 Args.push_back(&TaskPrivatesArg); 3147 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3148 unsigned Counter = 1; 3149 for (const Expr *E : Data.PrivateVars) { 3150 Args.push_back(ImplicitParamDecl::Create( 3151 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3152 C.getPointerType(C.getPointerType(E->getType())) 3153 .withConst() 3154 .withRestrict(), 3155 ImplicitParamKind::Other)); 3156 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3157 PrivateVarsPos[VD] = Counter; 3158 ++Counter; 3159 } 3160 for (const Expr *E : Data.FirstprivateVars) { 3161 Args.push_back(ImplicitParamDecl::Create( 3162 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3163 C.getPointerType(C.getPointerType(E->getType())) 3164 .withConst() 3165 .withRestrict(), 3166 ImplicitParamKind::Other)); 3167 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3168 PrivateVarsPos[VD] = Counter; 3169 ++Counter; 3170 } 3171 for (const Expr *E : Data.LastprivateVars) { 3172 Args.push_back(ImplicitParamDecl::Create( 3173 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3174 C.getPointerType(C.getPointerType(E->getType())) 3175 .withConst() 3176 .withRestrict(), 3177 ImplicitParamKind::Other)); 3178 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3179 PrivateVarsPos[VD] = Counter; 3180 ++Counter; 3181 } 3182 for (const VarDecl *VD : Data.PrivateLocals) { 3183 QualType Ty = VD->getType().getNonReferenceType(); 3184 if (VD->getType()->isLValueReferenceType()) 3185 Ty = C.getPointerType(Ty); 3186 if (isAllocatableDecl(VD)) 3187 Ty = C.getPointerType(Ty); 3188 Args.push_back(ImplicitParamDecl::Create( 3189 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3190 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3191 ImplicitParamKind::Other)); 3192 PrivateVarsPos[VD] = Counter; 3193 ++Counter; 3194 } 3195 const auto &TaskPrivatesMapFnInfo = 3196 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3197 llvm::FunctionType *TaskPrivatesMapTy = 3198 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3199 std::string Name = 3200 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3201 auto *TaskPrivatesMap = llvm::Function::Create( 3202 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3203 &CGM.getModule()); 3204 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3205 TaskPrivatesMapFnInfo); 3206 if (CGM.getLangOpts().Optimize) { 3207 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3208 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3209 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3210 } 3211 CodeGenFunction CGF(CGM); 3212 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3213 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3214 3215 // *privi = &.privates.privi; 3216 LValue Base = CGF.EmitLoadOfPointerLValue( 3217 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3218 TaskPrivatesArg.getType()->castAs<PointerType>()); 3219 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3220 Counter = 0; 3221 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3222 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3223 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3224 LValue RefLVal = 3225 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3226 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3227 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 3228 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3229 ++Counter; 3230 } 3231 CGF.FinishFunction(); 3232 return TaskPrivatesMap; 3233 } 3234 3235 /// Emit initialization for private variables in task-based directives. 3236 static void emitPrivatesInit(CodeGenFunction &CGF, 3237 const OMPExecutableDirective &D, 3238 Address KmpTaskSharedsPtr, LValue TDBase, 3239 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3240 QualType SharedsTy, QualType SharedsPtrTy, 3241 const OMPTaskDataTy &Data, 3242 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3243 ASTContext &C = CGF.getContext(); 3244 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3245 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3246 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3247 ? OMPD_taskloop 3248 : OMPD_task; 3249 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3250 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3251 LValue SrcBase; 3252 bool IsTargetTask = 3253 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3254 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3255 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3256 // PointersArray, SizesArray, and MappersArray. The original variables for 3257 // these arrays are not captured and we get their addresses explicitly. 3258 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3259 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3260 SrcBase = CGF.MakeAddrLValue( 3261 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3262 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), 3263 CGF.ConvertTypeForMem(SharedsTy)), 3264 SharedsTy); 3265 } 3266 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3267 for (const PrivateDataTy &Pair : Privates) { 3268 // Do not initialize private locals. 3269 if (Pair.second.isLocalPrivate()) { 3270 ++FI; 3271 continue; 3272 } 3273 const VarDecl *VD = Pair.second.PrivateCopy; 3274 const Expr *Init = VD->getAnyInitializer(); 3275 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3276 !CGF.isTrivialInitializer(Init)))) { 3277 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3278 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3279 const VarDecl *OriginalVD = Pair.second.Original; 3280 // Check if the variable is the target-based BasePointersArray, 3281 // PointersArray, SizesArray, or MappersArray. 3282 LValue SharedRefLValue; 3283 QualType Type = PrivateLValue.getType(); 3284 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3285 if (IsTargetTask && !SharedField) { 3286 assert(isa<ImplicitParamDecl>(OriginalVD) && 3287 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3288 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3289 ->getNumParams() == 0 && 3290 isa<TranslationUnitDecl>( 3291 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3292 ->getDeclContext()) && 3293 "Expected artificial target data variable."); 3294 SharedRefLValue = 3295 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3296 } else if (ForDup) { 3297 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3298 SharedRefLValue = CGF.MakeAddrLValue( 3299 SharedRefLValue.getAddress().withAlignment( 3300 C.getDeclAlign(OriginalVD)), 3301 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3302 SharedRefLValue.getTBAAInfo()); 3303 } else if (CGF.LambdaCaptureFields.count( 3304 Pair.second.Original->getCanonicalDecl()) > 0 || 3305 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3306 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3307 } else { 3308 // Processing for implicitly captured variables. 3309 InlinedOpenMPRegionRAII Region( 3310 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3311 /*HasCancel=*/false, /*NoInheritance=*/true); 3312 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3313 } 3314 if (Type->isArrayType()) { 3315 // Initialize firstprivate array. 3316 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3317 // Perform simple memcpy. 3318 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3319 } else { 3320 // Initialize firstprivate array using element-by-element 3321 // initialization. 3322 CGF.EmitOMPAggregateAssign( 3323 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 3324 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3325 Address SrcElement) { 3326 // Clean up any temporaries needed by the initialization. 3327 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3328 InitScope.addPrivate(Elem, SrcElement); 3329 (void)InitScope.Privatize(); 3330 // Emit initialization for single element. 3331 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3332 CGF, &CapturesInfo); 3333 CGF.EmitAnyExprToMem(Init, DestElement, 3334 Init->getType().getQualifiers(), 3335 /*IsInitializer=*/false); 3336 }); 3337 } 3338 } else { 3339 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3340 InitScope.addPrivate(Elem, SharedRefLValue.getAddress()); 3341 (void)InitScope.Privatize(); 3342 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3343 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3344 /*capturedByInit=*/false); 3345 } 3346 } else { 3347 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3348 } 3349 } 3350 ++FI; 3351 } 3352 } 3353 3354 /// Check if duplication function is required for taskloops. 3355 static bool checkInitIsRequired(CodeGenFunction &CGF, 3356 ArrayRef<PrivateDataTy> Privates) { 3357 bool InitRequired = false; 3358 for (const PrivateDataTy &Pair : Privates) { 3359 if (Pair.second.isLocalPrivate()) 3360 continue; 3361 const VarDecl *VD = Pair.second.PrivateCopy; 3362 const Expr *Init = VD->getAnyInitializer(); 3363 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && 3364 !CGF.isTrivialInitializer(Init)); 3365 if (InitRequired) 3366 break; 3367 } 3368 return InitRequired; 3369 } 3370 3371 3372 /// Emit task_dup function (for initialization of 3373 /// private/firstprivate/lastprivate vars and last_iter flag) 3374 /// \code 3375 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3376 /// lastpriv) { 3377 /// // setup lastprivate flag 3378 /// task_dst->last = lastpriv; 3379 /// // could be constructor calls here... 3380 /// } 3381 /// \endcode 3382 static llvm::Value * 3383 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3384 const OMPExecutableDirective &D, 3385 QualType KmpTaskTWithPrivatesPtrQTy, 3386 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3387 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3388 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3389 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3390 ASTContext &C = CGM.getContext(); 3391 FunctionArgList Args; 3392 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3393 KmpTaskTWithPrivatesPtrQTy, 3394 ImplicitParamKind::Other); 3395 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3396 KmpTaskTWithPrivatesPtrQTy, 3397 ImplicitParamKind::Other); 3398 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3399 ImplicitParamKind::Other); 3400 Args.push_back(&DstArg); 3401 Args.push_back(&SrcArg); 3402 Args.push_back(&LastprivArg); 3403 const auto &TaskDupFnInfo = 3404 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3405 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3406 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3407 auto *TaskDup = llvm::Function::Create( 3408 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3409 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3410 TaskDup->setDoesNotRecurse(); 3411 CodeGenFunction CGF(CGM); 3412 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3413 Loc); 3414 3415 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3416 CGF.GetAddrOfLocalVar(&DstArg), 3417 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3418 // task_dst->liter = lastpriv; 3419 if (WithLastIter) { 3420 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3421 LValue Base = CGF.EmitLValueForField( 3422 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3423 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3424 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3425 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3426 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3427 } 3428 3429 // Emit initial values for private copies (if any). 3430 assert(!Privates.empty()); 3431 Address KmpTaskSharedsPtr = Address::invalid(); 3432 if (!Data.FirstprivateVars.empty()) { 3433 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3434 CGF.GetAddrOfLocalVar(&SrcArg), 3435 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3436 LValue Base = CGF.EmitLValueForField( 3437 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3438 KmpTaskSharedsPtr = Address( 3439 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3440 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3441 KmpTaskTShareds)), 3442 Loc), 3443 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3444 } 3445 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3446 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3447 CGF.FinishFunction(); 3448 return TaskDup; 3449 } 3450 3451 /// Checks if destructor function is required to be generated. 3452 /// \return true if cleanups are required, false otherwise. 3453 static bool 3454 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3455 ArrayRef<PrivateDataTy> Privates) { 3456 for (const PrivateDataTy &P : Privates) { 3457 if (P.second.isLocalPrivate()) 3458 continue; 3459 QualType Ty = P.second.Original->getType().getNonReferenceType(); 3460 if (Ty.isDestructedType()) 3461 return true; 3462 } 3463 return false; 3464 } 3465 3466 namespace { 3467 /// Loop generator for OpenMP iterator expression. 3468 class OMPIteratorGeneratorScope final 3469 : public CodeGenFunction::OMPPrivateScope { 3470 CodeGenFunction &CGF; 3471 const OMPIteratorExpr *E = nullptr; 3472 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 3473 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 3474 OMPIteratorGeneratorScope() = delete; 3475 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 3476 3477 public: 3478 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 3479 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 3480 if (!E) 3481 return; 3482 SmallVector<llvm::Value *, 4> Uppers; 3483 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3484 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 3485 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 3486 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); 3487 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3488 addPrivate( 3489 HelperData.CounterVD, 3490 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); 3491 } 3492 Privatize(); 3493 3494 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 3495 const OMPIteratorHelperData &HelperData = E->getHelper(I); 3496 LValue CLVal = 3497 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 3498 HelperData.CounterVD->getType()); 3499 // Counter = 0; 3500 CGF.EmitStoreOfScalar( 3501 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0), 3502 CLVal); 3503 CodeGenFunction::JumpDest &ContDest = 3504 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 3505 CodeGenFunction::JumpDest &ExitDest = 3506 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 3507 // N = <number-of_iterations>; 3508 llvm::Value *N = Uppers[I]; 3509 // cont: 3510 // if (Counter < N) goto body; else goto exit; 3511 CGF.EmitBlock(ContDest.getBlock()); 3512 auto *CVal = 3513 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 3514 llvm::Value *Cmp = 3515 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 3516 ? CGF.Builder.CreateICmpSLT(CVal, N) 3517 : CGF.Builder.CreateICmpULT(CVal, N); 3518 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 3519 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 3520 // body: 3521 CGF.EmitBlock(BodyBB); 3522 // Iteri = Begini + Counter * Stepi; 3523 CGF.EmitIgnoredExpr(HelperData.Update); 3524 } 3525 } 3526 ~OMPIteratorGeneratorScope() { 3527 if (!E) 3528 return; 3529 for (unsigned I = E->numOfIterators(); I > 0; --I) { 3530 // Counter = Counter + 1; 3531 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 3532 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 3533 // goto cont; 3534 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 3535 // exit: 3536 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 3537 } 3538 } 3539 }; 3540 } // namespace 3541 3542 static std::pair<llvm::Value *, llvm::Value *> 3543 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 3544 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 3545 llvm::Value *Addr; 3546 if (OASE) { 3547 const Expr *Base = OASE->getBase(); 3548 Addr = CGF.EmitScalarExpr(Base); 3549 } else { 3550 Addr = CGF.EmitLValue(E).getPointer(CGF); 3551 } 3552 llvm::Value *SizeVal; 3553 QualType Ty = E->getType(); 3554 if (OASE) { 3555 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 3556 for (const Expr *SE : OASE->getDimensions()) { 3557 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 3558 Sz = CGF.EmitScalarConversion( 3559 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 3560 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 3561 } 3562 } else if (const auto *ASE = 3563 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) { 3564 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false); 3565 Address UpAddrAddress = UpAddrLVal.getAddress(); 3566 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 3567 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF), 3568 /*Idx0=*/1); 3569 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 3570 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 3571 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 3572 } else { 3573 SizeVal = CGF.getTypeSize(Ty); 3574 } 3575 return std::make_pair(Addr, SizeVal); 3576 } 3577 3578 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 3579 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 3580 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 3581 if (KmpTaskAffinityInfoTy.isNull()) { 3582 RecordDecl *KmpAffinityInfoRD = 3583 C.buildImplicitRecord("kmp_task_affinity_info_t"); 3584 KmpAffinityInfoRD->startDefinition(); 3585 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 3586 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 3587 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 3588 KmpAffinityInfoRD->completeDefinition(); 3589 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 3590 } 3591 } 3592 3593 CGOpenMPRuntime::TaskResultTy 3594 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 3595 const OMPExecutableDirective &D, 3596 llvm::Function *TaskFunction, QualType SharedsTy, 3597 Address Shareds, const OMPTaskDataTy &Data) { 3598 ASTContext &C = CGM.getContext(); 3599 llvm::SmallVector<PrivateDataTy, 4> Privates; 3600 // Aggregate privates and sort them by the alignment. 3601 const auto *I = Data.PrivateCopies.begin(); 3602 for (const Expr *E : Data.PrivateVars) { 3603 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3604 Privates.emplace_back( 3605 C.getDeclAlign(VD), 3606 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3607 /*PrivateElemInit=*/nullptr)); 3608 ++I; 3609 } 3610 I = Data.FirstprivateCopies.begin(); 3611 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 3612 for (const Expr *E : Data.FirstprivateVars) { 3613 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3614 Privates.emplace_back( 3615 C.getDeclAlign(VD), 3616 PrivateHelpersTy( 3617 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3618 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 3619 ++I; 3620 ++IElemInitRef; 3621 } 3622 I = Data.LastprivateCopies.begin(); 3623 for (const Expr *E : Data.LastprivateVars) { 3624 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3625 Privates.emplace_back( 3626 C.getDeclAlign(VD), 3627 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 3628 /*PrivateElemInit=*/nullptr)); 3629 ++I; 3630 } 3631 for (const VarDecl *VD : Data.PrivateLocals) { 3632 if (isAllocatableDecl(VD)) 3633 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 3634 else 3635 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 3636 } 3637 llvm::stable_sort(Privates, 3638 [](const PrivateDataTy &L, const PrivateDataTy &R) { 3639 return L.first > R.first; 3640 }); 3641 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3642 // Build type kmp_routine_entry_t (if not built yet). 3643 emitKmpRoutineEntryT(KmpInt32Ty); 3644 // Build type kmp_task_t (if not built yet). 3645 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 3646 if (SavedKmpTaskloopTQTy.isNull()) { 3647 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3648 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3649 } 3650 KmpTaskTQTy = SavedKmpTaskloopTQTy; 3651 } else { 3652 assert((D.getDirectiveKind() == OMPD_task || 3653 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 3654 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 3655 "Expected taskloop, task or target directive"); 3656 if (SavedKmpTaskTQTy.isNull()) { 3657 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 3658 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 3659 } 3660 KmpTaskTQTy = SavedKmpTaskTQTy; 3661 } 3662 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3663 // Build particular struct kmp_task_t for the given task. 3664 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 3665 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 3666 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 3667 QualType KmpTaskTWithPrivatesPtrQTy = 3668 C.getPointerType(KmpTaskTWithPrivatesQTy); 3669 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0); 3670 llvm::Value *KmpTaskTWithPrivatesTySize = 3671 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 3672 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 3673 3674 // Emit initial values for private copies (if any). 3675 llvm::Value *TaskPrivatesMap = nullptr; 3676 llvm::Type *TaskPrivatesMapTy = 3677 std::next(TaskFunction->arg_begin(), 3)->getType(); 3678 if (!Privates.empty()) { 3679 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3680 TaskPrivatesMap = 3681 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 3682 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3683 TaskPrivatesMap, TaskPrivatesMapTy); 3684 } else { 3685 TaskPrivatesMap = llvm::ConstantPointerNull::get( 3686 cast<llvm::PointerType>(TaskPrivatesMapTy)); 3687 } 3688 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 3689 // kmp_task_t *tt); 3690 llvm::Function *TaskEntry = emitProxyTaskFunction( 3691 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3692 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 3693 TaskPrivatesMap); 3694 3695 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 3696 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 3697 // kmp_routine_entry_t *task_entry); 3698 // Task flags. Format is taken from 3699 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 3700 // description of kmp_tasking_flags struct. 3701 enum { 3702 TiedFlag = 0x1, 3703 FinalFlag = 0x2, 3704 DestructorsFlag = 0x8, 3705 PriorityFlag = 0x20, 3706 DetachableFlag = 0x40, 3707 }; 3708 unsigned Flags = Data.Tied ? TiedFlag : 0; 3709 bool NeedsCleanup = false; 3710 if (!Privates.empty()) { 3711 NeedsCleanup = 3712 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 3713 if (NeedsCleanup) 3714 Flags = Flags | DestructorsFlag; 3715 } 3716 if (Data.Priority.getInt()) 3717 Flags = Flags | PriorityFlag; 3718 if (D.hasClausesOfKind<OMPDetachClause>()) 3719 Flags = Flags | DetachableFlag; 3720 llvm::Value *TaskFlags = 3721 Data.Final.getPointer() 3722 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 3723 CGF.Builder.getInt32(FinalFlag), 3724 CGF.Builder.getInt32(/*C=*/0)) 3725 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 3726 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 3727 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 3728 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 3729 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 3730 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3731 TaskEntry, KmpRoutineEntryPtrTy)}; 3732 llvm::Value *NewTask; 3733 if (D.hasClausesOfKind<OMPNowaitClause>()) { 3734 // Check if we have any device clause associated with the directive. 3735 const Expr *Device = nullptr; 3736 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 3737 Device = C->getDevice(); 3738 // Emit device ID if any otherwise use default value. 3739 llvm::Value *DeviceID; 3740 if (Device) 3741 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 3742 CGF.Int64Ty, /*isSigned=*/true); 3743 else 3744 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 3745 AllocArgs.push_back(DeviceID); 3746 NewTask = CGF.EmitRuntimeCall( 3747 OMPBuilder.getOrCreateRuntimeFunction( 3748 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 3749 AllocArgs); 3750 } else { 3751 NewTask = 3752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 3753 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 3754 AllocArgs); 3755 } 3756 // Emit detach clause initialization. 3757 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 3758 // task_descriptor); 3759 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 3760 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 3761 LValue EvtLVal = CGF.EmitLValue(Evt); 3762 3763 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 3764 // int gtid, kmp_task_t *task); 3765 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 3766 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 3767 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 3768 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 3769 OMPBuilder.getOrCreateRuntimeFunction( 3770 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 3771 {Loc, Tid, NewTask}); 3772 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 3773 Evt->getExprLoc()); 3774 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 3775 } 3776 // Process affinity clauses. 3777 if (D.hasClausesOfKind<OMPAffinityClause>()) { 3778 // Process list of affinity data. 3779 ASTContext &C = CGM.getContext(); 3780 Address AffinitiesArray = Address::invalid(); 3781 // Calculate number of elements to form the array of affinity data. 3782 llvm::Value *NumOfElements = nullptr; 3783 unsigned NumAffinities = 0; 3784 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3785 if (const Expr *Modifier = C->getModifier()) { 3786 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 3787 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 3788 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 3789 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 3790 NumOfElements = 3791 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 3792 } 3793 } else { 3794 NumAffinities += C->varlist_size(); 3795 } 3796 } 3797 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 3798 // Fields ids in kmp_task_affinity_info record. 3799 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 3800 3801 QualType KmpTaskAffinityInfoArrayTy; 3802 if (NumOfElements) { 3803 NumOfElements = CGF.Builder.CreateNUWAdd( 3804 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 3805 auto *OVE = new (C) OpaqueValueExpr( 3806 Loc, 3807 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 3808 VK_PRValue); 3809 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 3810 RValue::get(NumOfElements)); 3811 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( 3812 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, 3813 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 3814 // Properly emit variable-sized array. 3815 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 3816 ImplicitParamKind::Other); 3817 CGF.EmitVarDecl(*PD); 3818 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 3819 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 3820 /*isSigned=*/false); 3821 } else { 3822 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 3823 KmpTaskAffinityInfoTy, 3824 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 3825 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 3826 AffinitiesArray = 3827 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 3828 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 3829 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 3830 /*isSigned=*/false); 3831 } 3832 3833 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 3834 // Fill array by elements without iterators. 3835 unsigned Pos = 0; 3836 bool HasIterator = false; 3837 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3838 if (C->getModifier()) { 3839 HasIterator = true; 3840 continue; 3841 } 3842 for (const Expr *E : C->varlist()) { 3843 llvm::Value *Addr; 3844 llvm::Value *Size; 3845 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3846 LValue Base = 3847 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 3848 KmpTaskAffinityInfoTy); 3849 // affs[i].base_addr = &<Affinities[i].second>; 3850 LValue BaseAddrLVal = CGF.EmitLValueForField( 3851 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3852 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3853 BaseAddrLVal); 3854 // affs[i].len = sizeof(<Affinities[i].second>); 3855 LValue LenLVal = CGF.EmitLValueForField( 3856 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3857 CGF.EmitStoreOfScalar(Size, LenLVal); 3858 ++Pos; 3859 } 3860 } 3861 LValue PosLVal; 3862 if (HasIterator) { 3863 PosLVal = CGF.MakeAddrLValue( 3864 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 3865 C.getSizeType()); 3866 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 3867 } 3868 // Process elements with iterators. 3869 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 3870 const Expr *Modifier = C->getModifier(); 3871 if (!Modifier) 3872 continue; 3873 OMPIteratorGeneratorScope IteratorScope( 3874 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 3875 for (const Expr *E : C->varlist()) { 3876 llvm::Value *Addr; 3877 llvm::Value *Size; 3878 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 3879 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 3880 LValue Base = 3881 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx), 3882 KmpTaskAffinityInfoTy); 3883 // affs[i].base_addr = &<Affinities[i].second>; 3884 LValue BaseAddrLVal = CGF.EmitLValueForField( 3885 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 3886 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 3887 BaseAddrLVal); 3888 // affs[i].len = sizeof(<Affinities[i].second>); 3889 LValue LenLVal = CGF.EmitLValueForField( 3890 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 3891 CGF.EmitStoreOfScalar(Size, LenLVal); 3892 Idx = CGF.Builder.CreateNUWAdd( 3893 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 3894 CGF.EmitStoreOfScalar(Idx, PosLVal); 3895 } 3896 } 3897 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 3898 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 3899 // naffins, kmp_task_affinity_info_t *affin_list); 3900 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 3901 llvm::Value *GTid = getThreadID(CGF, Loc); 3902 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3903 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy); 3904 // FIXME: Emit the function and ignore its result for now unless the 3905 // runtime function is properly implemented. 3906 (void)CGF.EmitRuntimeCall( 3907 OMPBuilder.getOrCreateRuntimeFunction( 3908 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 3909 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 3910 } 3911 llvm::Value *NewTaskNewTaskTTy = 3912 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3913 NewTask, KmpTaskTWithPrivatesPtrTy); 3914 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy, 3915 KmpTaskTWithPrivatesQTy); 3916 LValue TDBase = 3917 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3918 // Fill the data in the resulting kmp_task_t record. 3919 // Copy shareds if there are any. 3920 Address KmpTaskSharedsPtr = Address::invalid(); 3921 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 3922 KmpTaskSharedsPtr = Address( 3923 CGF.EmitLoadOfScalar( 3924 CGF.EmitLValueForField( 3925 TDBase, 3926 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), 3927 Loc), 3928 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); 3929 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 3930 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 3931 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 3932 } 3933 // Emit initial values for private copies (if any). 3934 TaskResultTy Result; 3935 if (!Privates.empty()) { 3936 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 3937 SharedsTy, SharedsPtrTy, Data, Privates, 3938 /*ForDup=*/false); 3939 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 3940 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 3941 Result.TaskDupFn = emitTaskDupFunction( 3942 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 3943 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 3944 /*WithLastIter=*/!Data.LastprivateVars.empty()); 3945 } 3946 } 3947 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 3948 enum { Priority = 0, Destructors = 1 }; 3949 // Provide pointer to function with destructors for privates. 3950 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 3951 const RecordDecl *KmpCmplrdataUD = 3952 (*FI)->getType()->getAsUnionType()->getDecl(); 3953 if (NeedsCleanup) { 3954 llvm::Value *DestructorFn = emitDestructorsFunction( 3955 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 3956 KmpTaskTWithPrivatesQTy); 3957 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 3958 LValue DestructorsLV = CGF.EmitLValueForField( 3959 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 3960 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3961 DestructorFn, KmpRoutineEntryPtrTy), 3962 DestructorsLV); 3963 } 3964 // Set priority. 3965 if (Data.Priority.getInt()) { 3966 LValue Data2LV = CGF.EmitLValueForField( 3967 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 3968 LValue PriorityLV = CGF.EmitLValueForField( 3969 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 3970 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 3971 } 3972 Result.NewTask = NewTask; 3973 Result.TaskEntry = TaskEntry; 3974 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 3975 Result.TDBase = TDBase; 3976 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 3977 return Result; 3978 } 3979 3980 /// Translates internal dependency kind into the runtime kind. 3981 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 3982 RTLDependenceKindTy DepKind; 3983 switch (K) { 3984 case OMPC_DEPEND_in: 3985 DepKind = RTLDependenceKindTy::DepIn; 3986 break; 3987 // Out and InOut dependencies must use the same code. 3988 case OMPC_DEPEND_out: 3989 case OMPC_DEPEND_inout: 3990 DepKind = RTLDependenceKindTy::DepInOut; 3991 break; 3992 case OMPC_DEPEND_mutexinoutset: 3993 DepKind = RTLDependenceKindTy::DepMutexInOutSet; 3994 break; 3995 case OMPC_DEPEND_inoutset: 3996 DepKind = RTLDependenceKindTy::DepInOutSet; 3997 break; 3998 case OMPC_DEPEND_outallmemory: 3999 DepKind = RTLDependenceKindTy::DepOmpAllMem; 4000 break; 4001 case OMPC_DEPEND_source: 4002 case OMPC_DEPEND_sink: 4003 case OMPC_DEPEND_depobj: 4004 case OMPC_DEPEND_inoutallmemory: 4005 case OMPC_DEPEND_unknown: 4006 llvm_unreachable("Unknown task dependence type"); 4007 } 4008 return DepKind; 4009 } 4010 4011 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4012 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4013 QualType &FlagsTy) { 4014 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4015 if (KmpDependInfoTy.isNull()) { 4016 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4017 KmpDependInfoRD->startDefinition(); 4018 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4019 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4020 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4021 KmpDependInfoRD->completeDefinition(); 4022 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4023 } 4024 } 4025 4026 std::pair<llvm::Value *, LValue> 4027 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4028 SourceLocation Loc) { 4029 ASTContext &C = CGM.getContext(); 4030 QualType FlagsTy; 4031 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4032 RecordDecl *KmpDependInfoRD = 4033 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4034 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4035 LValue Base = CGF.EmitLoadOfPointerLValue( 4036 DepobjLVal.getAddress().withElementType( 4037 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), 4038 KmpDependInfoPtrTy->castAs<PointerType>()); 4039 Address DepObjAddr = CGF.Builder.CreateGEP( 4040 CGF, Base.getAddress(), 4041 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4042 LValue NumDepsBase = CGF.MakeAddrLValue( 4043 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4044 // NumDeps = deps[i].base_addr; 4045 LValue BaseAddrLVal = CGF.EmitLValueForField( 4046 NumDepsBase, 4047 *std::next(KmpDependInfoRD->field_begin(), 4048 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4049 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4050 return std::make_pair(NumDeps, Base); 4051 } 4052 4053 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4054 llvm::PointerUnion<unsigned *, LValue *> Pos, 4055 const OMPTaskDataTy::DependData &Data, 4056 Address DependenciesArray) { 4057 CodeGenModule &CGM = CGF.CGM; 4058 ASTContext &C = CGM.getContext(); 4059 QualType FlagsTy; 4060 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4061 RecordDecl *KmpDependInfoRD = 4062 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4063 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4064 4065 OMPIteratorGeneratorScope IteratorScope( 4066 CGF, cast_or_null<OMPIteratorExpr>( 4067 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4068 : nullptr)); 4069 for (const Expr *E : Data.DepExprs) { 4070 llvm::Value *Addr; 4071 llvm::Value *Size; 4072 4073 // The expression will be a nullptr in the 'omp_all_memory' case. 4074 if (E) { 4075 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4076 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); 4077 } else { 4078 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4079 Size = llvm::ConstantInt::get(CGF.SizeTy, 0); 4080 } 4081 LValue Base; 4082 if (unsigned *P = dyn_cast<unsigned *>(Pos)) { 4083 Base = CGF.MakeAddrLValue( 4084 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4085 } else { 4086 assert(E && "Expected a non-null expression"); 4087 LValue &PosLVal = *cast<LValue *>(Pos); 4088 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4089 Base = CGF.MakeAddrLValue( 4090 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy); 4091 } 4092 // deps[i].base_addr = &<Dependencies[i].second>; 4093 LValue BaseAddrLVal = CGF.EmitLValueForField( 4094 Base, 4095 *std::next(KmpDependInfoRD->field_begin(), 4096 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4097 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); 4098 // deps[i].len = sizeof(<Dependencies[i].second>); 4099 LValue LenLVal = CGF.EmitLValueForField( 4100 Base, *std::next(KmpDependInfoRD->field_begin(), 4101 static_cast<unsigned int>(RTLDependInfoFields::Len))); 4102 CGF.EmitStoreOfScalar(Size, LenLVal); 4103 // deps[i].flags = <Dependencies[i].first>; 4104 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4105 LValue FlagsLVal = CGF.EmitLValueForField( 4106 Base, 4107 *std::next(KmpDependInfoRD->field_begin(), 4108 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4109 CGF.EmitStoreOfScalar( 4110 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4111 FlagsLVal); 4112 if (unsigned *P = dyn_cast<unsigned *>(Pos)) { 4113 ++(*P); 4114 } else { 4115 LValue &PosLVal = *cast<LValue *>(Pos); 4116 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4117 Idx = CGF.Builder.CreateNUWAdd(Idx, 4118 llvm::ConstantInt::get(Idx->getType(), 1)); 4119 CGF.EmitStoreOfScalar(Idx, PosLVal); 4120 } 4121 } 4122 } 4123 4124 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( 4125 CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4126 const OMPTaskDataTy::DependData &Data) { 4127 assert(Data.DepKind == OMPC_DEPEND_depobj && 4128 "Expected depobj dependency kind."); 4129 SmallVector<llvm::Value *, 4> Sizes; 4130 SmallVector<LValue, 4> SizeLVals; 4131 ASTContext &C = CGF.getContext(); 4132 { 4133 OMPIteratorGeneratorScope IteratorScope( 4134 CGF, cast_or_null<OMPIteratorExpr>( 4135 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4136 : nullptr)); 4137 for (const Expr *E : Data.DepExprs) { 4138 llvm::Value *NumDeps; 4139 LValue Base; 4140 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4141 std::tie(NumDeps, Base) = 4142 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4143 LValue NumLVal = CGF.MakeAddrLValue( 4144 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4145 C.getUIntPtrType()); 4146 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4147 NumLVal.getAddress()); 4148 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4149 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4150 CGF.EmitStoreOfScalar(Add, NumLVal); 4151 SizeLVals.push_back(NumLVal); 4152 } 4153 } 4154 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4155 llvm::Value *Size = 4156 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4157 Sizes.push_back(Size); 4158 } 4159 return Sizes; 4160 } 4161 4162 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, 4163 QualType &KmpDependInfoTy, 4164 LValue PosLVal, 4165 const OMPTaskDataTy::DependData &Data, 4166 Address DependenciesArray) { 4167 assert(Data.DepKind == OMPC_DEPEND_depobj && 4168 "Expected depobj dependency kind."); 4169 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4170 { 4171 OMPIteratorGeneratorScope IteratorScope( 4172 CGF, cast_or_null<OMPIteratorExpr>( 4173 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4174 : nullptr)); 4175 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4176 const Expr *E = Data.DepExprs[I]; 4177 llvm::Value *NumDeps; 4178 LValue Base; 4179 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4180 std::tie(NumDeps, Base) = 4181 getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); 4182 4183 // memcopy dependency data. 4184 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4185 ElSize, 4186 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4187 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4188 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos); 4189 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size); 4190 4191 // Increase pos. 4192 // pos += size; 4193 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4194 CGF.EmitStoreOfScalar(Add, PosLVal); 4195 } 4196 } 4197 } 4198 4199 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4200 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4201 SourceLocation Loc) { 4202 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4203 return D.DepExprs.empty(); 4204 })) 4205 return std::make_pair(nullptr, Address::invalid()); 4206 // Process list of dependencies. 4207 ASTContext &C = CGM.getContext(); 4208 Address DependenciesArray = Address::invalid(); 4209 llvm::Value *NumOfElements = nullptr; 4210 unsigned NumDependencies = std::accumulate( 4211 Dependencies.begin(), Dependencies.end(), 0, 4212 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4213 return D.DepKind == OMPC_DEPEND_depobj 4214 ? V 4215 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4216 }); 4217 QualType FlagsTy; 4218 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4219 bool HasDepobjDeps = false; 4220 bool HasRegularWithIterators = false; 4221 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4222 llvm::Value *NumOfRegularWithIterators = 4223 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4224 // Calculate number of depobj dependencies and regular deps with the 4225 // iterators. 4226 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4227 if (D.DepKind == OMPC_DEPEND_depobj) { 4228 SmallVector<llvm::Value *, 4> Sizes = 4229 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4230 for (llvm::Value *Size : Sizes) { 4231 NumOfDepobjElements = 4232 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4233 } 4234 HasDepobjDeps = true; 4235 continue; 4236 } 4237 // Include number of iterations, if any. 4238 4239 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4240 llvm::Value *ClauseIteratorSpace = 4241 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4242 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4243 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4244 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4245 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace); 4246 } 4247 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4248 ClauseIteratorSpace, 4249 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4250 NumOfRegularWithIterators = 4251 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4252 HasRegularWithIterators = true; 4253 continue; 4254 } 4255 } 4256 4257 QualType KmpDependInfoArrayTy; 4258 if (HasDepobjDeps || HasRegularWithIterators) { 4259 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4260 /*isSigned=*/false); 4261 if (HasDepobjDeps) { 4262 NumOfElements = 4263 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4264 } 4265 if (HasRegularWithIterators) { 4266 NumOfElements = 4267 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4268 } 4269 auto *OVE = new (C) OpaqueValueExpr( 4270 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4271 VK_PRValue); 4272 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4273 RValue::get(NumOfElements)); 4274 KmpDependInfoArrayTy = 4275 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, 4276 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4277 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4278 // Properly emit variable-sized array. 4279 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4280 ImplicitParamKind::Other); 4281 CGF.EmitVarDecl(*PD); 4282 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4283 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4284 /*isSigned=*/false); 4285 } else { 4286 KmpDependInfoArrayTy = C.getConstantArrayType( 4287 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4288 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4289 DependenciesArray = 4290 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4291 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4292 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4293 /*isSigned=*/false); 4294 } 4295 unsigned Pos = 0; 4296 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4297 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4298 Dependencies[I].IteratorExpr) 4299 continue; 4300 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4301 DependenciesArray); 4302 } 4303 // Copy regular dependencies with iterators. 4304 LValue PosLVal = CGF.MakeAddrLValue( 4305 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4306 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4307 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4308 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4309 !Dependencies[I].IteratorExpr) 4310 continue; 4311 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4312 DependenciesArray); 4313 } 4314 // Copy final depobj arrays without iterators. 4315 if (HasDepobjDeps) { 4316 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4317 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4318 continue; 4319 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4320 DependenciesArray); 4321 } 4322 } 4323 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4324 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); 4325 return std::make_pair(NumOfElements, DependenciesArray); 4326 } 4327 4328 Address CGOpenMPRuntime::emitDepobjDependClause( 4329 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4330 SourceLocation Loc) { 4331 if (Dependencies.DepExprs.empty()) 4332 return Address::invalid(); 4333 // Process list of dependencies. 4334 ASTContext &C = CGM.getContext(); 4335 Address DependenciesArray = Address::invalid(); 4336 unsigned NumDependencies = Dependencies.DepExprs.size(); 4337 QualType FlagsTy; 4338 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4339 RecordDecl *KmpDependInfoRD = 4340 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4341 4342 llvm::Value *Size; 4343 // Define type kmp_depend_info[<Dependencies.size()>]; 4344 // For depobj reserve one extra element to store the number of elements. 4345 // It is required to handle depobj(x) update(in) construct. 4346 // kmp_depend_info[<Dependencies.size()>] deps; 4347 llvm::Value *NumDepsVal; 4348 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4349 if (const auto *IE = 4350 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4351 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4352 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4353 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4354 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4355 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4356 } 4357 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4358 NumDepsVal); 4359 CharUnits SizeInBytes = 4360 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4361 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4362 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4363 NumDepsVal = 4364 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4365 } else { 4366 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4367 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4368 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 4369 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4370 Size = CGM.getSize(Sz.alignTo(Align)); 4371 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4372 } 4373 // Need to allocate on the dynamic memory. 4374 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4375 // Use default allocator. 4376 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4377 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4378 4379 llvm::Value *Addr = 4380 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4381 CGM.getModule(), OMPRTL___kmpc_alloc), 4382 Args, ".dep.arr.addr"); 4383 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); 4384 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4385 Addr, CGF.Builder.getPtrTy(0)); 4386 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); 4387 // Write number of elements in the first element of array for depobj. 4388 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4389 // deps[i].base_addr = NumDependencies; 4390 LValue BaseAddrLVal = CGF.EmitLValueForField( 4391 Base, 4392 *std::next(KmpDependInfoRD->field_begin(), 4393 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); 4394 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4395 llvm::PointerUnion<unsigned *, LValue *> Pos; 4396 unsigned Idx = 1; 4397 LValue PosLVal; 4398 if (Dependencies.IteratorExpr) { 4399 PosLVal = CGF.MakeAddrLValue( 4400 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4401 C.getSizeType()); 4402 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4403 /*IsInit=*/true); 4404 Pos = &PosLVal; 4405 } else { 4406 Pos = &Idx; 4407 } 4408 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4409 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4410 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, 4411 CGF.Int8Ty); 4412 return DependenciesArray; 4413 } 4414 4415 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4416 SourceLocation Loc) { 4417 ASTContext &C = CGM.getContext(); 4418 QualType FlagsTy; 4419 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4420 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(), 4421 C.VoidPtrTy.castAs<PointerType>()); 4422 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4423 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4424 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), 4425 CGF.ConvertTypeForMem(KmpDependInfoTy)); 4426 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4427 Addr.getElementType(), Addr.emitRawPointer(CGF), 4428 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4429 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 4430 CGF.VoidPtrTy); 4431 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4432 // Use default allocator. 4433 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4434 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 4435 4436 // _kmpc_free(gtid, addr, nullptr); 4437 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4438 CGM.getModule(), OMPRTL___kmpc_free), 4439 Args); 4440 } 4441 4442 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 4443 OpenMPDependClauseKind NewDepKind, 4444 SourceLocation Loc) { 4445 ASTContext &C = CGM.getContext(); 4446 QualType FlagsTy; 4447 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4448 RecordDecl *KmpDependInfoRD = 4449 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4450 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4451 llvm::Value *NumDeps; 4452 LValue Base; 4453 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 4454 4455 Address Begin = Base.getAddress(); 4456 // Cast from pointer to array type to pointer to single element. 4457 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(), 4458 Begin.emitRawPointer(CGF), NumDeps); 4459 // The basic structure here is a while-do loop. 4460 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 4461 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 4462 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4463 CGF.EmitBlock(BodyBB); 4464 llvm::PHINode *ElementPHI = 4465 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 4466 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB); 4467 Begin = Begin.withPointer(ElementPHI, KnownNonNull); 4468 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 4469 Base.getTBAAInfo()); 4470 // deps[i].flags = NewDepKind; 4471 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 4472 LValue FlagsLVal = CGF.EmitLValueForField( 4473 Base, *std::next(KmpDependInfoRD->field_begin(), 4474 static_cast<unsigned int>(RTLDependInfoFields::Flags))); 4475 CGF.EmitStoreOfScalar( 4476 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), 4477 FlagsLVal); 4478 4479 // Shift the address forward by one element. 4480 llvm::Value *ElementNext = 4481 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext") 4482 .emitRawPointer(CGF); 4483 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock()); 4484 llvm::Value *IsEmpty = 4485 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty"); 4486 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4487 // Done. 4488 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4489 } 4490 4491 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 4492 const OMPExecutableDirective &D, 4493 llvm::Function *TaskFunction, 4494 QualType SharedsTy, Address Shareds, 4495 const Expr *IfCond, 4496 const OMPTaskDataTy &Data) { 4497 if (!CGF.HaveInsertPoint()) 4498 return; 4499 4500 TaskResultTy Result = 4501 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4502 llvm::Value *NewTask = Result.NewTask; 4503 llvm::Function *TaskEntry = Result.TaskEntry; 4504 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 4505 LValue TDBase = Result.TDBase; 4506 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 4507 // Process list of dependences. 4508 Address DependenciesArray = Address::invalid(); 4509 llvm::Value *NumOfElements; 4510 std::tie(NumOfElements, DependenciesArray) = 4511 emitDependClause(CGF, Data.Dependences, Loc); 4512 4513 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4514 // libcall. 4515 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 4516 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 4517 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 4518 // list is not empty 4519 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4520 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4521 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 4522 llvm::Value *DepTaskArgs[7]; 4523 if (!Data.Dependences.empty()) { 4524 DepTaskArgs[0] = UpLoc; 4525 DepTaskArgs[1] = ThreadID; 4526 DepTaskArgs[2] = NewTask; 4527 DepTaskArgs[3] = NumOfElements; 4528 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF); 4529 DepTaskArgs[5] = CGF.Builder.getInt32(0); 4530 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4531 } 4532 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 4533 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 4534 if (!Data.Tied) { 4535 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4536 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 4537 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 4538 } 4539 if (!Data.Dependences.empty()) { 4540 CGF.EmitRuntimeCall( 4541 OMPBuilder.getOrCreateRuntimeFunction( 4542 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 4543 DepTaskArgs); 4544 } else { 4545 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4546 CGM.getModule(), OMPRTL___kmpc_omp_task), 4547 TaskArgs); 4548 } 4549 // Check if parent region is untied and build return for untied task; 4550 if (auto *Region = 4551 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 4552 Region->emitUntiedSwitch(CGF); 4553 }; 4554 4555 llvm::Value *DepWaitTaskArgs[7]; 4556 if (!Data.Dependences.empty()) { 4557 DepWaitTaskArgs[0] = UpLoc; 4558 DepWaitTaskArgs[1] = ThreadID; 4559 DepWaitTaskArgs[2] = NumOfElements; 4560 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); 4561 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 4562 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4563 DepWaitTaskArgs[6] = 4564 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 4565 } 4566 auto &M = CGM.getModule(); 4567 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 4568 TaskEntry, &Data, &DepWaitTaskArgs, 4569 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 4570 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 4571 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 4572 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 4573 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 4574 // is specified. 4575 if (!Data.Dependences.empty()) 4576 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4577 M, OMPRTL___kmpc_omp_taskwait_deps_51), 4578 DepWaitTaskArgs); 4579 // Call proxy_task_entry(gtid, new_task); 4580 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 4581 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 4582 Action.Enter(CGF); 4583 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 4584 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 4585 OutlinedFnArgs); 4586 }; 4587 4588 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 4589 // kmp_task_t *new_task); 4590 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 4591 // kmp_task_t *new_task); 4592 RegionCodeGenTy RCG(CodeGen); 4593 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 4594 M, OMPRTL___kmpc_omp_task_begin_if0), 4595 TaskArgs, 4596 OMPBuilder.getOrCreateRuntimeFunction( 4597 M, OMPRTL___kmpc_omp_task_complete_if0), 4598 TaskArgs); 4599 RCG.setAction(Action); 4600 RCG(CGF); 4601 }; 4602 4603 if (IfCond) { 4604 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 4605 } else { 4606 RegionCodeGenTy ThenRCG(ThenCodeGen); 4607 ThenRCG(CGF); 4608 } 4609 } 4610 4611 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 4612 const OMPLoopDirective &D, 4613 llvm::Function *TaskFunction, 4614 QualType SharedsTy, Address Shareds, 4615 const Expr *IfCond, 4616 const OMPTaskDataTy &Data) { 4617 if (!CGF.HaveInsertPoint()) 4618 return; 4619 TaskResultTy Result = 4620 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 4621 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 4622 // libcall. 4623 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 4624 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 4625 // sched, kmp_uint64 grainsize, void *task_dup); 4626 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4627 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 4628 llvm::Value *IfVal; 4629 if (IfCond) { 4630 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 4631 /*isSigned=*/true); 4632 } else { 4633 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 4634 } 4635 4636 LValue LBLVal = CGF.EmitLValueForField( 4637 Result.TDBase, 4638 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 4639 const auto *LBVar = 4640 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 4641 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 4642 /*IsInitializer=*/true); 4643 LValue UBLVal = CGF.EmitLValueForField( 4644 Result.TDBase, 4645 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 4646 const auto *UBVar = 4647 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 4648 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 4649 /*IsInitializer=*/true); 4650 LValue StLVal = CGF.EmitLValueForField( 4651 Result.TDBase, 4652 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 4653 const auto *StVar = 4654 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 4655 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 4656 /*IsInitializer=*/true); 4657 // Store reductions address. 4658 LValue RedLVal = CGF.EmitLValueForField( 4659 Result.TDBase, 4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 4661 if (Data.Reductions) { 4662 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 4663 } else { 4664 CGF.EmitNullInitialization(RedLVal.getAddress(), 4665 CGF.getContext().VoidPtrTy); 4666 } 4667 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 4668 llvm::SmallVector<llvm::Value *, 12> TaskArgs{ 4669 UpLoc, 4670 ThreadID, 4671 Result.NewTask, 4672 IfVal, 4673 LBLVal.getPointer(CGF), 4674 UBLVal.getPointer(CGF), 4675 CGF.EmitLoadOfScalar(StLVal, Loc), 4676 llvm::ConstantInt::getSigned( 4677 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 4678 llvm::ConstantInt::getSigned( 4679 CGF.IntTy, Data.Schedule.getPointer() 4680 ? Data.Schedule.getInt() ? NumTasks : Grainsize 4681 : NoSchedule), 4682 Data.Schedule.getPointer() 4683 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 4684 /*isSigned=*/false) 4685 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)}; 4686 if (Data.HasModifier) 4687 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1)); 4688 4689 TaskArgs.push_back(Result.TaskDupFn 4690 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4691 Result.TaskDupFn, CGF.VoidPtrTy) 4692 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); 4693 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4694 CGM.getModule(), Data.HasModifier 4695 ? OMPRTL___kmpc_taskloop_5 4696 : OMPRTL___kmpc_taskloop), 4697 TaskArgs); 4698 } 4699 4700 /// Emit reduction operation for each element of array (required for 4701 /// array sections) LHS op = RHS. 4702 /// \param Type Type of array. 4703 /// \param LHSVar Variable on the left side of the reduction operation 4704 /// (references element of array in original variable). 4705 /// \param RHSVar Variable on the right side of the reduction operation 4706 /// (references element of array in original variable). 4707 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 4708 /// RHSVar. 4709 static void EmitOMPAggregateReduction( 4710 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 4711 const VarDecl *RHSVar, 4712 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 4713 const Expr *, const Expr *)> &RedOpGen, 4714 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 4715 const Expr *UpExpr = nullptr) { 4716 // Perform element-by-element initialization. 4717 QualType ElementTy; 4718 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 4719 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 4720 4721 // Drill down to the base element type on both arrays. 4722 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 4723 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 4724 4725 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF); 4726 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF); 4727 // Cast from pointer to array type to pointer to single element. 4728 llvm::Value *LHSEnd = 4729 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 4730 // The basic structure here is a while-do loop. 4731 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 4732 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 4733 llvm::Value *IsEmpty = 4734 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 4735 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 4736 4737 // Enter the loop body, making that address the current address. 4738 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 4739 CGF.EmitBlock(BodyBB); 4740 4741 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 4742 4743 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 4744 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 4745 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 4746 Address RHSElementCurrent( 4747 RHSElementPHI, RHSAddr.getElementType(), 4748 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4749 4750 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 4751 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 4752 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 4753 Address LHSElementCurrent( 4754 LHSElementPHI, LHSAddr.getElementType(), 4755 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 4756 4757 // Emit copy. 4758 CodeGenFunction::OMPPrivateScope Scope(CGF); 4759 Scope.addPrivate(LHSVar, LHSElementCurrent); 4760 Scope.addPrivate(RHSVar, RHSElementCurrent); 4761 Scope.Privatize(); 4762 RedOpGen(CGF, XExpr, EExpr, UpExpr); 4763 Scope.ForceCleanup(); 4764 4765 // Shift the address forward by one element. 4766 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 4767 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 4768 "omp.arraycpy.dest.element"); 4769 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 4770 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 4771 "omp.arraycpy.src.element"); 4772 // Check whether we've reached the end. 4773 llvm::Value *Done = 4774 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 4775 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 4776 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 4777 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 4778 4779 // Done. 4780 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 4781 } 4782 4783 /// Emit reduction combiner. If the combiner is a simple expression emit it as 4784 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 4785 /// UDR combiner function. 4786 static void emitReductionCombiner(CodeGenFunction &CGF, 4787 const Expr *ReductionOp) { 4788 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 4789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 4790 if (const auto *DRE = 4791 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 4792 if (const auto *DRD = 4793 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 4794 std::pair<llvm::Function *, llvm::Function *> Reduction = 4795 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 4796 RValue Func = RValue::get(Reduction.first); 4797 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 4798 CGF.EmitIgnoredExpr(ReductionOp); 4799 return; 4800 } 4801 CGF.EmitIgnoredExpr(ReductionOp); 4802 } 4803 4804 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 4805 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, 4806 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, 4807 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { 4808 ASTContext &C = CGM.getContext(); 4809 4810 // void reduction_func(void *LHSArg, void *RHSArg); 4811 FunctionArgList Args; 4812 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4813 ImplicitParamKind::Other); 4814 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 4815 ImplicitParamKind::Other); 4816 Args.push_back(&LHSArg); 4817 Args.push_back(&RHSArg); 4818 const auto &CGFI = 4819 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4820 std::string Name = getReductionFuncName(ReducerName); 4821 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 4822 llvm::GlobalValue::InternalLinkage, Name, 4823 &CGM.getModule()); 4824 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 4825 Fn->setDoesNotRecurse(); 4826 CodeGenFunction CGF(CGM); 4827 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 4828 4829 // Dst = (void*[n])(LHSArg); 4830 // Src = (void*[n])(RHSArg); 4831 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4832 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 4833 CGF.Builder.getPtrTy(0)), 4834 ArgsElemType, CGF.getPointerAlign()); 4835 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4836 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 4837 CGF.Builder.getPtrTy(0)), 4838 ArgsElemType, CGF.getPointerAlign()); 4839 4840 // ... 4841 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 4842 // ... 4843 CodeGenFunction::OMPPrivateScope Scope(CGF); 4844 const auto *IPriv = Privates.begin(); 4845 unsigned Idx = 0; 4846 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 4847 const auto *RHSVar = 4848 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 4849 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); 4850 const auto *LHSVar = 4851 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 4852 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); 4853 QualType PrivTy = (*IPriv)->getType(); 4854 if (PrivTy->isVariablyModifiedType()) { 4855 // Get array size and emit VLA type. 4856 ++Idx; 4857 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 4858 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 4859 const VariableArrayType *VLA = 4860 CGF.getContext().getAsVariableArrayType(PrivTy); 4861 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 4862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 4863 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 4864 CGF.EmitVariablyModifiedType(PrivTy); 4865 } 4866 } 4867 Scope.Privatize(); 4868 IPriv = Privates.begin(); 4869 const auto *ILHS = LHSExprs.begin(); 4870 const auto *IRHS = RHSExprs.begin(); 4871 for (const Expr *E : ReductionOps) { 4872 if ((*IPriv)->getType()->isArrayType()) { 4873 // Emit reduction for array section. 4874 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 4875 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 4876 EmitOMPAggregateReduction( 4877 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 4878 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4879 emitReductionCombiner(CGF, E); 4880 }); 4881 } else { 4882 // Emit reduction for array subscript or single variable. 4883 emitReductionCombiner(CGF, E); 4884 } 4885 ++IPriv; 4886 ++ILHS; 4887 ++IRHS; 4888 } 4889 Scope.ForceCleanup(); 4890 CGF.FinishFunction(); 4891 return Fn; 4892 } 4893 4894 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 4895 const Expr *ReductionOp, 4896 const Expr *PrivateRef, 4897 const DeclRefExpr *LHS, 4898 const DeclRefExpr *RHS) { 4899 if (PrivateRef->getType()->isArrayType()) { 4900 // Emit reduction for array section. 4901 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 4902 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 4903 EmitOMPAggregateReduction( 4904 CGF, PrivateRef->getType(), LHSVar, RHSVar, 4905 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 4906 emitReductionCombiner(CGF, ReductionOp); 4907 }); 4908 } else { 4909 // Emit reduction for array subscript or single variable. 4910 emitReductionCombiner(CGF, ReductionOp); 4911 } 4912 } 4913 4914 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 4915 ArrayRef<const Expr *> Privates, 4916 ArrayRef<const Expr *> LHSExprs, 4917 ArrayRef<const Expr *> RHSExprs, 4918 ArrayRef<const Expr *> ReductionOps, 4919 ReductionOptionsTy Options) { 4920 if (!CGF.HaveInsertPoint()) 4921 return; 4922 4923 bool WithNowait = Options.WithNowait; 4924 bool SimpleReduction = Options.SimpleReduction; 4925 4926 // Next code should be emitted for reduction: 4927 // 4928 // static kmp_critical_name lock = { 0 }; 4929 // 4930 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 4931 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 4932 // ... 4933 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 4934 // *(Type<n>-1*)rhs[<n>-1]); 4935 // } 4936 // 4937 // ... 4938 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 4939 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 4940 // RedList, reduce_func, &<lock>)) { 4941 // case 1: 4942 // ... 4943 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4944 // ... 4945 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 4946 // break; 4947 // case 2: 4948 // ... 4949 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 4950 // ... 4951 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 4952 // break; 4953 // default:; 4954 // } 4955 // 4956 // if SimpleReduction is true, only the next code is generated: 4957 // ... 4958 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 4959 // ... 4960 4961 ASTContext &C = CGM.getContext(); 4962 4963 if (SimpleReduction) { 4964 CodeGenFunction::RunCleanupsScope Scope(CGF); 4965 const auto *IPriv = Privates.begin(); 4966 const auto *ILHS = LHSExprs.begin(); 4967 const auto *IRHS = RHSExprs.begin(); 4968 for (const Expr *E : ReductionOps) { 4969 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 4970 cast<DeclRefExpr>(*IRHS)); 4971 ++IPriv; 4972 ++ILHS; 4973 ++IRHS; 4974 } 4975 return; 4976 } 4977 4978 // 1. Build a list of reduction variables. 4979 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 4980 auto Size = RHSExprs.size(); 4981 for (const Expr *E : Privates) { 4982 if (E->getType()->isVariablyModifiedType()) 4983 // Reserve place for array size. 4984 ++Size; 4985 } 4986 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 4987 QualType ReductionArrayTy = C.getConstantArrayType( 4988 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, 4989 /*IndexTypeQuals=*/0); 4990 RawAddress ReductionList = 4991 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 4992 const auto *IPriv = Privates.begin(); 4993 unsigned Idx = 0; 4994 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 4995 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 4996 CGF.Builder.CreateStore( 4997 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4998 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 4999 Elem); 5000 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5001 // Store array size. 5002 ++Idx; 5003 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5004 llvm::Value *Size = CGF.Builder.CreateIntCast( 5005 CGF.getVLASize( 5006 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5007 .NumElts, 5008 CGF.SizeTy, /*isSigned=*/false); 5009 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5010 Elem); 5011 } 5012 } 5013 5014 // 2. Emit reduce_func(). 5015 llvm::Function *ReductionFn = emitReductionFunction( 5016 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), 5017 Privates, LHSExprs, RHSExprs, ReductionOps); 5018 5019 // 3. Create static kmp_critical_name lock = { 0 }; 5020 std::string Name = getName({"reduction"}); 5021 llvm::Value *Lock = getCriticalRegionLock(Name); 5022 5023 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5024 // RedList, reduce_func, &<lock>); 5025 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5026 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5027 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5028 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5029 ReductionList.getPointer(), CGF.VoidPtrTy); 5030 llvm::Value *Args[] = { 5031 IdentTLoc, // ident_t *<loc> 5032 ThreadId, // i32 <gtid> 5033 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5034 ReductionArrayTySize, // size_type sizeof(RedList) 5035 RL, // void *RedList 5036 ReductionFn, // void (*) (void *, void *) <reduce_func> 5037 Lock // kmp_critical_name *&<lock> 5038 }; 5039 llvm::Value *Res = CGF.EmitRuntimeCall( 5040 OMPBuilder.getOrCreateRuntimeFunction( 5041 CGM.getModule(), 5042 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5043 Args); 5044 5045 // 5. Build switch(res) 5046 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5047 llvm::SwitchInst *SwInst = 5048 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5049 5050 // 6. Build case 1: 5051 // ... 5052 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5053 // ... 5054 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5055 // break; 5056 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5057 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5058 CGF.EmitBlock(Case1BB); 5059 5060 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5061 llvm::Value *EndArgs[] = { 5062 IdentTLoc, // ident_t *<loc> 5063 ThreadId, // i32 <gtid> 5064 Lock // kmp_critical_name *&<lock> 5065 }; 5066 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5067 CodeGenFunction &CGF, PrePostActionTy &Action) { 5068 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5069 const auto *IPriv = Privates.begin(); 5070 const auto *ILHS = LHSExprs.begin(); 5071 const auto *IRHS = RHSExprs.begin(); 5072 for (const Expr *E : ReductionOps) { 5073 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5074 cast<DeclRefExpr>(*IRHS)); 5075 ++IPriv; 5076 ++ILHS; 5077 ++IRHS; 5078 } 5079 }; 5080 RegionCodeGenTy RCG(CodeGen); 5081 CommonActionTy Action( 5082 nullptr, {}, 5083 OMPBuilder.getOrCreateRuntimeFunction( 5084 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5085 : OMPRTL___kmpc_end_reduce), 5086 EndArgs); 5087 RCG.setAction(Action); 5088 RCG(CGF); 5089 5090 CGF.EmitBranch(DefaultBB); 5091 5092 // 7. Build case 2: 5093 // ... 5094 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5095 // ... 5096 // break; 5097 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5098 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5099 CGF.EmitBlock(Case2BB); 5100 5101 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5102 CodeGenFunction &CGF, PrePostActionTy &Action) { 5103 const auto *ILHS = LHSExprs.begin(); 5104 const auto *IRHS = RHSExprs.begin(); 5105 const auto *IPriv = Privates.begin(); 5106 for (const Expr *E : ReductionOps) { 5107 const Expr *XExpr = nullptr; 5108 const Expr *EExpr = nullptr; 5109 const Expr *UpExpr = nullptr; 5110 BinaryOperatorKind BO = BO_Comma; 5111 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5112 if (BO->getOpcode() == BO_Assign) { 5113 XExpr = BO->getLHS(); 5114 UpExpr = BO->getRHS(); 5115 } 5116 } 5117 // Try to emit update expression as a simple atomic. 5118 const Expr *RHSExpr = UpExpr; 5119 if (RHSExpr) { 5120 // Analyze RHS part of the whole expression. 5121 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5122 RHSExpr->IgnoreParenImpCasts())) { 5123 // If this is a conditional operator, analyze its condition for 5124 // min/max reduction operator. 5125 RHSExpr = ACO->getCond(); 5126 } 5127 if (const auto *BORHS = 5128 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5129 EExpr = BORHS->getRHS(); 5130 BO = BORHS->getOpcode(); 5131 } 5132 } 5133 if (XExpr) { 5134 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5135 auto &&AtomicRedGen = [BO, VD, 5136 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5137 const Expr *EExpr, const Expr *UpExpr) { 5138 LValue X = CGF.EmitLValue(XExpr); 5139 RValue E; 5140 if (EExpr) 5141 E = CGF.EmitAnyExpr(EExpr); 5142 CGF.EmitOMPAtomicSimpleUpdateExpr( 5143 X, E, BO, /*IsXLHSInRHSPart=*/true, 5144 llvm::AtomicOrdering::Monotonic, Loc, 5145 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5146 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5147 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5148 CGF.emitOMPSimpleStore( 5149 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5150 VD->getType().getNonReferenceType(), Loc); 5151 PrivateScope.addPrivate(VD, LHSTemp); 5152 (void)PrivateScope.Privatize(); 5153 return CGF.EmitAnyExpr(UpExpr); 5154 }); 5155 }; 5156 if ((*IPriv)->getType()->isArrayType()) { 5157 // Emit atomic reduction for array section. 5158 const auto *RHSVar = 5159 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5160 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5161 AtomicRedGen, XExpr, EExpr, UpExpr); 5162 } else { 5163 // Emit atomic reduction for array subscript or single variable. 5164 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5165 } 5166 } else { 5167 // Emit as a critical region. 5168 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5169 const Expr *, const Expr *) { 5170 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5171 std::string Name = RT.getName({"atomic_reduction"}); 5172 RT.emitCriticalRegion( 5173 CGF, Name, 5174 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5175 Action.Enter(CGF); 5176 emitReductionCombiner(CGF, E); 5177 }, 5178 Loc); 5179 }; 5180 if ((*IPriv)->getType()->isArrayType()) { 5181 const auto *LHSVar = 5182 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5183 const auto *RHSVar = 5184 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5185 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5186 CritRedGen); 5187 } else { 5188 CritRedGen(CGF, nullptr, nullptr, nullptr); 5189 } 5190 } 5191 ++ILHS; 5192 ++IRHS; 5193 ++IPriv; 5194 } 5195 }; 5196 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5197 if (!WithNowait) { 5198 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5199 llvm::Value *EndArgs[] = { 5200 IdentTLoc, // ident_t *<loc> 5201 ThreadId, // i32 <gtid> 5202 Lock // kmp_critical_name *&<lock> 5203 }; 5204 CommonActionTy Action(nullptr, {}, 5205 OMPBuilder.getOrCreateRuntimeFunction( 5206 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5207 EndArgs); 5208 AtomicRCG.setAction(Action); 5209 AtomicRCG(CGF); 5210 } else { 5211 AtomicRCG(CGF); 5212 } 5213 5214 CGF.EmitBranch(DefaultBB); 5215 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5216 } 5217 5218 /// Generates unique name for artificial threadprivate variables. 5219 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5220 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5221 const Expr *Ref) { 5222 SmallString<256> Buffer; 5223 llvm::raw_svector_ostream Out(Buffer); 5224 const clang::DeclRefExpr *DE; 5225 const VarDecl *D = ::getBaseDecl(Ref, DE); 5226 if (!D) 5227 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5228 D = D->getCanonicalDecl(); 5229 std::string Name = CGM.getOpenMPRuntime().getName( 5230 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5231 Out << Prefix << Name << "_" 5232 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5233 return std::string(Out.str()); 5234 } 5235 5236 /// Emits reduction initializer function: 5237 /// \code 5238 /// void @.red_init(void* %arg, void* %orig) { 5239 /// %0 = bitcast void* %arg to <type>* 5240 /// store <type> <init>, <type>* %0 5241 /// ret void 5242 /// } 5243 /// \endcode 5244 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5245 SourceLocation Loc, 5246 ReductionCodeGen &RCG, unsigned N) { 5247 ASTContext &C = CGM.getContext(); 5248 QualType VoidPtrTy = C.VoidPtrTy; 5249 VoidPtrTy.addRestrict(); 5250 FunctionArgList Args; 5251 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5252 ImplicitParamKind::Other); 5253 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5254 ImplicitParamKind::Other); 5255 Args.emplace_back(&Param); 5256 Args.emplace_back(&ParamOrig); 5257 const auto &FnInfo = 5258 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5259 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5260 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5261 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5262 Name, &CGM.getModule()); 5263 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5264 Fn->setDoesNotRecurse(); 5265 CodeGenFunction CGF(CGM); 5266 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5267 QualType PrivateType = RCG.getPrivateType(N); 5268 Address PrivateAddr = CGF.EmitLoadOfPointer( 5269 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)), 5270 C.getPointerType(PrivateType)->castAs<PointerType>()); 5271 llvm::Value *Size = nullptr; 5272 // If the size of the reduction item is non-constant, load it from global 5273 // threadprivate variable. 5274 if (RCG.getSizes(N).second) { 5275 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5276 CGF, CGM.getContext().getSizeType(), 5277 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5278 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5279 CGM.getContext().getSizeType(), Loc); 5280 } 5281 RCG.emitAggregateType(CGF, N, Size); 5282 Address OrigAddr = Address::invalid(); 5283 // If initializer uses initializer from declare reduction construct, emit a 5284 // pointer to the address of the original reduction item (reuired by reduction 5285 // initializer) 5286 if (RCG.usesReductionInitializer(N)) { 5287 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5288 OrigAddr = CGF.EmitLoadOfPointer( 5289 SharedAddr, 5290 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5291 } 5292 // Emit the initializer: 5293 // %0 = bitcast void* %arg to <type>* 5294 // store <type> <init>, <type>* %0 5295 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5296 [](CodeGenFunction &) { return false; }); 5297 CGF.FinishFunction(); 5298 return Fn; 5299 } 5300 5301 /// Emits reduction combiner function: 5302 /// \code 5303 /// void @.red_comb(void* %arg0, void* %arg1) { 5304 /// %lhs = bitcast void* %arg0 to <type>* 5305 /// %rhs = bitcast void* %arg1 to <type>* 5306 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5307 /// store <type> %2, <type>* %lhs 5308 /// ret void 5309 /// } 5310 /// \endcode 5311 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5312 SourceLocation Loc, 5313 ReductionCodeGen &RCG, unsigned N, 5314 const Expr *ReductionOp, 5315 const Expr *LHS, const Expr *RHS, 5316 const Expr *PrivateRef) { 5317 ASTContext &C = CGM.getContext(); 5318 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5319 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5320 FunctionArgList Args; 5321 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5322 C.VoidPtrTy, ImplicitParamKind::Other); 5323 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5324 ImplicitParamKind::Other); 5325 Args.emplace_back(&ParamInOut); 5326 Args.emplace_back(&ParamIn); 5327 const auto &FnInfo = 5328 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5329 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5330 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5331 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5332 Name, &CGM.getModule()); 5333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5334 Fn->setDoesNotRecurse(); 5335 CodeGenFunction CGF(CGM); 5336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5337 llvm::Value *Size = nullptr; 5338 // If the size of the reduction item is non-constant, load it from global 5339 // threadprivate variable. 5340 if (RCG.getSizes(N).second) { 5341 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5342 CGF, CGM.getContext().getSizeType(), 5343 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5344 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5345 CGM.getContext().getSizeType(), Loc); 5346 } 5347 RCG.emitAggregateType(CGF, N, Size); 5348 // Remap lhs and rhs variables to the addresses of the function arguments. 5349 // %lhs = bitcast void* %arg0 to <type>* 5350 // %rhs = bitcast void* %arg1 to <type>* 5351 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5352 PrivateScope.addPrivate( 5353 LHSVD, 5354 // Pull out the pointer to the variable. 5355 CGF.EmitLoadOfPointer( 5356 CGF.GetAddrOfLocalVar(&ParamInOut) 5357 .withElementType(CGF.Builder.getPtrTy(0)), 5358 C.getPointerType(LHSVD->getType())->castAs<PointerType>())); 5359 PrivateScope.addPrivate( 5360 RHSVD, 5361 // Pull out the pointer to the variable. 5362 CGF.EmitLoadOfPointer( 5363 CGF.GetAddrOfLocalVar(&ParamIn).withElementType( 5364 CGF.Builder.getPtrTy(0)), 5365 C.getPointerType(RHSVD->getType())->castAs<PointerType>())); 5366 PrivateScope.Privatize(); 5367 // Emit the combiner body: 5368 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5369 // store <type> %2, <type>* %lhs 5370 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5371 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5372 cast<DeclRefExpr>(RHS)); 5373 CGF.FinishFunction(); 5374 return Fn; 5375 } 5376 5377 /// Emits reduction finalizer function: 5378 /// \code 5379 /// void @.red_fini(void* %arg) { 5380 /// %0 = bitcast void* %arg to <type>* 5381 /// <destroy>(<type>* %0) 5382 /// ret void 5383 /// } 5384 /// \endcode 5385 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5386 SourceLocation Loc, 5387 ReductionCodeGen &RCG, unsigned N) { 5388 if (!RCG.needCleanups(N)) 5389 return nullptr; 5390 ASTContext &C = CGM.getContext(); 5391 FunctionArgList Args; 5392 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5393 ImplicitParamKind::Other); 5394 Args.emplace_back(&Param); 5395 const auto &FnInfo = 5396 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5397 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5398 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5399 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5400 Name, &CGM.getModule()); 5401 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5402 Fn->setDoesNotRecurse(); 5403 CodeGenFunction CGF(CGM); 5404 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5405 Address PrivateAddr = CGF.EmitLoadOfPointer( 5406 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); 5407 llvm::Value *Size = nullptr; 5408 // If the size of the reduction item is non-constant, load it from global 5409 // threadprivate variable. 5410 if (RCG.getSizes(N).second) { 5411 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5412 CGF, CGM.getContext().getSizeType(), 5413 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5414 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5415 CGM.getContext().getSizeType(), Loc); 5416 } 5417 RCG.emitAggregateType(CGF, N, Size); 5418 // Emit the finalizer body: 5419 // <destroy>(<type>* %0) 5420 RCG.emitCleanups(CGF, N, PrivateAddr); 5421 CGF.FinishFunction(Loc); 5422 return Fn; 5423 } 5424 5425 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 5426 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 5427 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 5428 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 5429 return nullptr; 5430 5431 // Build typedef struct: 5432 // kmp_taskred_input { 5433 // void *reduce_shar; // shared reduction item 5434 // void *reduce_orig; // original reduction item used for initialization 5435 // size_t reduce_size; // size of data item 5436 // void *reduce_init; // data initialization routine 5437 // void *reduce_fini; // data finalization routine 5438 // void *reduce_comb; // data combiner routine 5439 // kmp_task_red_flags_t flags; // flags for additional info from compiler 5440 // } kmp_taskred_input_t; 5441 ASTContext &C = CGM.getContext(); 5442 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 5443 RD->startDefinition(); 5444 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5445 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5446 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 5447 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5448 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5449 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 5450 const FieldDecl *FlagsFD = addFieldToRecordDecl( 5451 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 5452 RD->completeDefinition(); 5453 QualType RDType = C.getRecordType(RD); 5454 unsigned Size = Data.ReductionVars.size(); 5455 llvm::APInt ArraySize(/*numBits=*/64, Size); 5456 QualType ArrayRDType = 5457 C.getConstantArrayType(RDType, ArraySize, nullptr, 5458 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); 5459 // kmp_task_red_input_t .rd_input.[Size]; 5460 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 5461 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 5462 Data.ReductionCopies, Data.ReductionOps); 5463 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 5464 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 5465 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 5466 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 5467 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 5468 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 5469 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 5470 ".rd_input.gep."); 5471 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType); 5472 // ElemLVal.reduce_shar = &Shareds[Cnt]; 5473 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 5474 RCG.emitSharedOrigLValue(CGF, Cnt); 5475 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); 5476 CGF.EmitStoreOfScalar(Shared, SharedLVal); 5477 // ElemLVal.reduce_orig = &Origs[Cnt]; 5478 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 5479 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); 5480 CGF.EmitStoreOfScalar(Orig, OrigLVal); 5481 RCG.emitAggregateType(CGF, Cnt); 5482 llvm::Value *SizeValInChars; 5483 llvm::Value *SizeVal; 5484 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 5485 // We use delayed creation/initialization for VLAs and array sections. It is 5486 // required because runtime does not provide the way to pass the sizes of 5487 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 5488 // threadprivate global variables are used to store these values and use 5489 // them in the functions. 5490 bool DelayedCreation = !!SizeVal; 5491 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 5492 /*isSigned=*/false); 5493 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 5494 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 5495 // ElemLVal.reduce_init = init; 5496 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 5497 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); 5498 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 5499 // ElemLVal.reduce_fini = fini; 5500 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 5501 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 5502 llvm::Value *FiniAddr = 5503 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 5504 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 5505 // ElemLVal.reduce_comb = comb; 5506 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 5507 llvm::Value *CombAddr = emitReduceCombFunction( 5508 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 5509 RHSExprs[Cnt], Data.ReductionCopies[Cnt]); 5510 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 5511 // ElemLVal.flags = 0; 5512 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 5513 if (DelayedCreation) { 5514 CGF.EmitStoreOfScalar( 5515 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 5516 FlagsLVal); 5517 } else 5518 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 5519 } 5520 if (Data.IsReductionWithTaskMod) { 5521 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5522 // is_ws, int num, void *data); 5523 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5524 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5525 CGM.IntTy, /*isSigned=*/true); 5526 llvm::Value *Args[] = { 5527 IdentTLoc, GTid, 5528 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 5529 /*isSigned=*/true), 5530 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5531 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5532 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 5533 return CGF.EmitRuntimeCall( 5534 OMPBuilder.getOrCreateRuntimeFunction( 5535 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 5536 Args); 5537 } 5538 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 5539 llvm::Value *Args[] = { 5540 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 5541 /*isSigned=*/true), 5542 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 5543 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 5544 CGM.VoidPtrTy)}; 5545 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5546 CGM.getModule(), OMPRTL___kmpc_taskred_init), 5547 Args); 5548 } 5549 5550 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 5551 SourceLocation Loc, 5552 bool IsWorksharingReduction) { 5553 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 5554 // is_ws, int num, void *data); 5555 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 5556 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5557 CGM.IntTy, /*isSigned=*/true); 5558 llvm::Value *Args[] = {IdentTLoc, GTid, 5559 llvm::ConstantInt::get(CGM.IntTy, 5560 IsWorksharingReduction ? 1 : 0, 5561 /*isSigned=*/true)}; 5562 (void)CGF.EmitRuntimeCall( 5563 OMPBuilder.getOrCreateRuntimeFunction( 5564 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 5565 Args); 5566 } 5567 5568 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 5569 SourceLocation Loc, 5570 ReductionCodeGen &RCG, 5571 unsigned N) { 5572 auto Sizes = RCG.getSizes(N); 5573 // Emit threadprivate global variable if the type is non-constant 5574 // (Sizes.second = nullptr). 5575 if (Sizes.second) { 5576 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 5577 /*isSigned=*/false); 5578 Address SizeAddr = getAddrOfArtificialThreadPrivate( 5579 CGF, CGM.getContext().getSizeType(), 5580 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5581 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 5582 } 5583 } 5584 5585 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 5586 SourceLocation Loc, 5587 llvm::Value *ReductionsPtr, 5588 LValue SharedLVal) { 5589 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 5590 // *d); 5591 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 5592 CGM.IntTy, 5593 /*isSigned=*/true), 5594 ReductionsPtr, 5595 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5596 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 5597 return Address( 5598 CGF.EmitRuntimeCall( 5599 OMPBuilder.getOrCreateRuntimeFunction( 5600 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 5601 Args), 5602 CGF.Int8Ty, SharedLVal.getAlignment()); 5603 } 5604 5605 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 5606 const OMPTaskDataTy &Data) { 5607 if (!CGF.HaveInsertPoint()) 5608 return; 5609 5610 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 5611 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 5612 OMPBuilder.createTaskwait(CGF.Builder); 5613 } else { 5614 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5615 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5616 auto &M = CGM.getModule(); 5617 Address DependenciesArray = Address::invalid(); 5618 llvm::Value *NumOfElements; 5619 std::tie(NumOfElements, DependenciesArray) = 5620 emitDependClause(CGF, Data.Dependences, Loc); 5621 if (!Data.Dependences.empty()) { 5622 llvm::Value *DepWaitTaskArgs[7]; 5623 DepWaitTaskArgs[0] = UpLoc; 5624 DepWaitTaskArgs[1] = ThreadID; 5625 DepWaitTaskArgs[2] = NumOfElements; 5626 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); 5627 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5628 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5629 DepWaitTaskArgs[6] = 5630 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); 5631 5632 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5633 5634 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, 5635 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5636 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, 5637 // kmp_int32 has_no_wait); if dependence info is specified. 5638 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5639 M, OMPRTL___kmpc_omp_taskwait_deps_51), 5640 DepWaitTaskArgs); 5641 5642 } else { 5643 5644 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 5645 // global_tid); 5646 llvm::Value *Args[] = {UpLoc, ThreadID}; 5647 // Ignore return result until untied tasks are supported. 5648 CGF.EmitRuntimeCall( 5649 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 5650 Args); 5651 } 5652 } 5653 5654 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5655 Region->emitUntiedSwitch(CGF); 5656 } 5657 5658 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 5659 OpenMPDirectiveKind InnerKind, 5660 const RegionCodeGenTy &CodeGen, 5661 bool HasCancel) { 5662 if (!CGF.HaveInsertPoint()) 5663 return; 5664 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 5665 InnerKind != OMPD_critical && 5666 InnerKind != OMPD_master && 5667 InnerKind != OMPD_masked); 5668 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 5669 } 5670 5671 namespace { 5672 enum RTCancelKind { 5673 CancelNoreq = 0, 5674 CancelParallel = 1, 5675 CancelLoop = 2, 5676 CancelSections = 3, 5677 CancelTaskgroup = 4 5678 }; 5679 } // anonymous namespace 5680 5681 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 5682 RTCancelKind CancelKind = CancelNoreq; 5683 if (CancelRegion == OMPD_parallel) 5684 CancelKind = CancelParallel; 5685 else if (CancelRegion == OMPD_for) 5686 CancelKind = CancelLoop; 5687 else if (CancelRegion == OMPD_sections) 5688 CancelKind = CancelSections; 5689 else { 5690 assert(CancelRegion == OMPD_taskgroup); 5691 CancelKind = CancelTaskgroup; 5692 } 5693 return CancelKind; 5694 } 5695 5696 void CGOpenMPRuntime::emitCancellationPointCall( 5697 CodeGenFunction &CGF, SourceLocation Loc, 5698 OpenMPDirectiveKind CancelRegion) { 5699 if (!CGF.HaveInsertPoint()) 5700 return; 5701 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 5702 // global_tid, kmp_int32 cncl_kind); 5703 if (auto *OMPRegionInfo = 5704 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5705 // For 'cancellation point taskgroup', the task region info may not have a 5706 // cancel. This may instead happen in another adjacent task. 5707 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 5708 llvm::Value *Args[] = { 5709 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 5710 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5711 // Ignore return result until untied tasks are supported. 5712 llvm::Value *Result = CGF.EmitRuntimeCall( 5713 OMPBuilder.getOrCreateRuntimeFunction( 5714 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 5715 Args); 5716 // if (__kmpc_cancellationpoint()) { 5717 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5718 // exit from construct; 5719 // } 5720 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5721 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5722 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5723 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5724 CGF.EmitBlock(ExitBB); 5725 if (CancelRegion == OMPD_parallel) 5726 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5727 // exit from construct; 5728 CodeGenFunction::JumpDest CancelDest = 5729 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5730 CGF.EmitBranchThroughCleanup(CancelDest); 5731 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5732 } 5733 } 5734 } 5735 5736 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 5737 const Expr *IfCond, 5738 OpenMPDirectiveKind CancelRegion) { 5739 if (!CGF.HaveInsertPoint()) 5740 return; 5741 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 5742 // kmp_int32 cncl_kind); 5743 auto &M = CGM.getModule(); 5744 if (auto *OMPRegionInfo = 5745 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 5746 auto &&ThenGen = [this, &M, Loc, CancelRegion, 5747 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 5748 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5749 llvm::Value *Args[] = { 5750 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 5751 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 5752 // Ignore return result until untied tasks are supported. 5753 llvm::Value *Result = CGF.EmitRuntimeCall( 5754 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 5755 // if (__kmpc_cancel()) { 5756 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 5757 // exit from construct; 5758 // } 5759 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 5760 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 5761 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 5762 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 5763 CGF.EmitBlock(ExitBB); 5764 if (CancelRegion == OMPD_parallel) 5765 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 5766 // exit from construct; 5767 CodeGenFunction::JumpDest CancelDest = 5768 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 5769 CGF.EmitBranchThroughCleanup(CancelDest); 5770 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 5771 }; 5772 if (IfCond) { 5773 emitIfClause(CGF, IfCond, ThenGen, 5774 [](CodeGenFunction &, PrePostActionTy &) {}); 5775 } else { 5776 RegionCodeGenTy ThenRCG(ThenGen); 5777 ThenRCG(CGF); 5778 } 5779 } 5780 } 5781 5782 namespace { 5783 /// Cleanup action for uses_allocators support. 5784 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 5785 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 5786 5787 public: 5788 OMPUsesAllocatorsActionTy( 5789 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 5790 : Allocators(Allocators) {} 5791 void Enter(CodeGenFunction &CGF) override { 5792 if (!CGF.HaveInsertPoint()) 5793 return; 5794 for (const auto &AllocatorData : Allocators) { 5795 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 5796 CGF, AllocatorData.first, AllocatorData.second); 5797 } 5798 } 5799 void Exit(CodeGenFunction &CGF) override { 5800 if (!CGF.HaveInsertPoint()) 5801 return; 5802 for (const auto &AllocatorData : Allocators) { 5803 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 5804 AllocatorData.first); 5805 } 5806 } 5807 }; 5808 } // namespace 5809 5810 void CGOpenMPRuntime::emitTargetOutlinedFunction( 5811 const OMPExecutableDirective &D, StringRef ParentName, 5812 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5813 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5814 assert(!ParentName.empty() && "Invalid target entry parent name!"); 5815 HasEmittedTargetRegion = true; 5816 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 5817 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 5818 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 5819 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 5820 if (!D.AllocatorTraits) 5821 continue; 5822 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 5823 } 5824 } 5825 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 5826 CodeGen.setAction(UsesAllocatorAction); 5827 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 5828 IsOffloadEntry, CodeGen); 5829 } 5830 5831 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 5832 const Expr *Allocator, 5833 const Expr *AllocatorTraits) { 5834 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5835 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5836 // Use default memspace handle. 5837 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5838 llvm::Value *NumTraits = llvm::ConstantInt::get( 5839 CGF.IntTy, cast<ConstantArrayType>( 5840 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 5841 ->getSize() 5842 .getLimitedValue()); 5843 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 5844 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5845 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); 5846 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 5847 AllocatorTraitsLVal.getBaseInfo(), 5848 AllocatorTraitsLVal.getTBAAInfo()); 5849 llvm::Value *Traits = Addr.emitRawPointer(CGF); 5850 5851 llvm::Value *AllocatorVal = 5852 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5853 CGM.getModule(), OMPRTL___kmpc_init_allocator), 5854 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 5855 // Store to allocator. 5856 CGF.EmitAutoVarAlloca(*cast<VarDecl>( 5857 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 5858 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5859 AllocatorVal = 5860 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 5861 Allocator->getType(), Allocator->getExprLoc()); 5862 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 5863 } 5864 5865 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 5866 const Expr *Allocator) { 5867 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 5868 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 5869 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 5870 llvm::Value *AllocatorVal = 5871 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 5872 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 5873 CGF.getContext().VoidPtrTy, 5874 Allocator->getExprLoc()); 5875 (void)CGF.EmitRuntimeCall( 5876 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 5877 OMPRTL___kmpc_destroy_allocator), 5878 {ThreadId, AllocatorVal}); 5879 } 5880 5881 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( 5882 const OMPExecutableDirective &D, CodeGenFunction &CGF, 5883 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) { 5884 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 && 5885 "invalid default attrs structure"); 5886 int32_t &MaxTeamsVal = Attrs.MaxTeams.front(); 5887 int32_t &MaxThreadsVal = Attrs.MaxThreads.front(); 5888 5889 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal); 5890 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, 5891 /*UpperBoundOnly=*/true); 5892 5893 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5894 for (auto *A : C->getAttrs()) { 5895 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; 5896 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; 5897 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) 5898 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, 5899 &AttrMinBlocksVal, &AttrMaxBlocksVal); 5900 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) 5901 CGM.handleAMDGPUFlatWorkGroupSizeAttr( 5902 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, 5903 &AttrMaxThreadsVal); 5904 else 5905 continue; 5906 5907 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal); 5908 if (AttrMaxThreadsVal > 0) 5909 MaxThreadsVal = MaxThreadsVal > 0 5910 ? std::min(MaxThreadsVal, AttrMaxThreadsVal) 5911 : AttrMaxThreadsVal; 5912 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal); 5913 if (AttrMaxBlocksVal > 0) 5914 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) 5915 : AttrMaxBlocksVal; 5916 } 5917 } 5918 } 5919 5920 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 5921 const OMPExecutableDirective &D, StringRef ParentName, 5922 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 5923 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 5924 5925 llvm::TargetRegionEntryInfo EntryInfo = 5926 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); 5927 5928 CodeGenFunction CGF(CGM, true); 5929 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = 5930 [&CGF, &D, &CodeGen](StringRef EntryFnName) { 5931 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 5932 5933 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 5934 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 5935 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 5936 }; 5937 5938 cantFail(OMPBuilder.emitTargetRegionFunction( 5939 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn, 5940 OutlinedFnID)); 5941 5942 if (!OutlinedFn) 5943 return; 5944 5945 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 5946 5947 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { 5948 for (auto *A : C->getAttrs()) { 5949 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) 5950 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); 5951 } 5952 } 5953 } 5954 5955 /// Checks if the expression is constant or does not have non-trivial function 5956 /// calls. 5957 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 5958 // We can skip constant expressions. 5959 // We can skip expressions with trivial calls or simple expressions. 5960 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 5961 !E->hasNonTrivialCall(Ctx)) && 5962 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 5963 } 5964 5965 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 5966 const Stmt *Body) { 5967 const Stmt *Child = Body->IgnoreContainers(); 5968 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 5969 Child = nullptr; 5970 for (const Stmt *S : C->body()) { 5971 if (const auto *E = dyn_cast<Expr>(S)) { 5972 if (isTrivial(Ctx, E)) 5973 continue; 5974 } 5975 // Some of the statements can be ignored. 5976 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 5977 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 5978 continue; 5979 // Analyze declarations. 5980 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 5981 if (llvm::all_of(DS->decls(), [](const Decl *D) { 5982 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 5983 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 5984 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 5985 isa<UsingDirectiveDecl>(D) || 5986 isa<OMPDeclareReductionDecl>(D) || 5987 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 5988 return true; 5989 const auto *VD = dyn_cast<VarDecl>(D); 5990 if (!VD) 5991 return false; 5992 return VD->hasGlobalStorage() || !VD->isUsed(); 5993 })) 5994 continue; 5995 } 5996 // Found multiple children - cannot get the one child only. 5997 if (Child) 5998 return nullptr; 5999 Child = S; 6000 } 6001 if (Child) 6002 Child = Child->IgnoreContainers(); 6003 } 6004 return Child; 6005 } 6006 6007 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6008 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, 6009 int32_t &MaxTeamsVal) { 6010 6011 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6012 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6013 "Expected target-based executable directive."); 6014 switch (DirectiveKind) { 6015 case OMPD_target: { 6016 const auto *CS = D.getInnermostCapturedStmt(); 6017 const auto *Body = 6018 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6019 const Stmt *ChildStmt = 6020 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6021 if (const auto *NestedDir = 6022 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6023 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6024 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6025 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>() 6026 ->getNumTeams() 6027 .front(); 6028 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6029 if (auto Constant = 6030 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6031 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6032 return NumTeams; 6033 } 6034 MinTeamsVal = MaxTeamsVal = 0; 6035 return nullptr; 6036 } 6037 MinTeamsVal = MaxTeamsVal = 1; 6038 return nullptr; 6039 } 6040 // A value of -1 is used to check if we need to emit no teams region 6041 MinTeamsVal = MaxTeamsVal = -1; 6042 return nullptr; 6043 } 6044 case OMPD_target_teams_loop: 6045 case OMPD_target_teams: 6046 case OMPD_target_teams_distribute: 6047 case OMPD_target_teams_distribute_simd: 6048 case OMPD_target_teams_distribute_parallel_for: 6049 case OMPD_target_teams_distribute_parallel_for_simd: { 6050 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6051 const Expr *NumTeams = 6052 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front(); 6053 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6054 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6055 MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); 6056 return NumTeams; 6057 } 6058 MinTeamsVal = MaxTeamsVal = 0; 6059 return nullptr; 6060 } 6061 case OMPD_target_parallel: 6062 case OMPD_target_parallel_for: 6063 case OMPD_target_parallel_for_simd: 6064 case OMPD_target_parallel_loop: 6065 case OMPD_target_simd: 6066 MinTeamsVal = MaxTeamsVal = 1; 6067 return nullptr; 6068 case OMPD_parallel: 6069 case OMPD_for: 6070 case OMPD_parallel_for: 6071 case OMPD_parallel_loop: 6072 case OMPD_parallel_master: 6073 case OMPD_parallel_sections: 6074 case OMPD_for_simd: 6075 case OMPD_parallel_for_simd: 6076 case OMPD_cancel: 6077 case OMPD_cancellation_point: 6078 case OMPD_ordered: 6079 case OMPD_threadprivate: 6080 case OMPD_allocate: 6081 case OMPD_task: 6082 case OMPD_simd: 6083 case OMPD_tile: 6084 case OMPD_unroll: 6085 case OMPD_sections: 6086 case OMPD_section: 6087 case OMPD_single: 6088 case OMPD_master: 6089 case OMPD_critical: 6090 case OMPD_taskyield: 6091 case OMPD_barrier: 6092 case OMPD_taskwait: 6093 case OMPD_taskgroup: 6094 case OMPD_atomic: 6095 case OMPD_flush: 6096 case OMPD_depobj: 6097 case OMPD_scan: 6098 case OMPD_teams: 6099 case OMPD_target_data: 6100 case OMPD_target_exit_data: 6101 case OMPD_target_enter_data: 6102 case OMPD_distribute: 6103 case OMPD_distribute_simd: 6104 case OMPD_distribute_parallel_for: 6105 case OMPD_distribute_parallel_for_simd: 6106 case OMPD_teams_distribute: 6107 case OMPD_teams_distribute_simd: 6108 case OMPD_teams_distribute_parallel_for: 6109 case OMPD_teams_distribute_parallel_for_simd: 6110 case OMPD_target_update: 6111 case OMPD_declare_simd: 6112 case OMPD_declare_variant: 6113 case OMPD_begin_declare_variant: 6114 case OMPD_end_declare_variant: 6115 case OMPD_declare_target: 6116 case OMPD_end_declare_target: 6117 case OMPD_declare_reduction: 6118 case OMPD_declare_mapper: 6119 case OMPD_taskloop: 6120 case OMPD_taskloop_simd: 6121 case OMPD_master_taskloop: 6122 case OMPD_master_taskloop_simd: 6123 case OMPD_parallel_master_taskloop: 6124 case OMPD_parallel_master_taskloop_simd: 6125 case OMPD_requires: 6126 case OMPD_metadirective: 6127 case OMPD_unknown: 6128 break; 6129 default: 6130 break; 6131 } 6132 llvm_unreachable("Unexpected directive kind."); 6133 } 6134 6135 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6136 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6137 assert(!CGF.getLangOpts().OpenMPIsTargetDevice && 6138 "Clauses associated with the teams directive expected to be emitted " 6139 "only for the host!"); 6140 CGBuilderTy &Bld = CGF.Builder; 6141 int32_t MinNT = -1, MaxNT = -1; 6142 const Expr *NumTeams = 6143 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); 6144 if (NumTeams != nullptr) { 6145 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6146 6147 switch (DirectiveKind) { 6148 case OMPD_target: { 6149 const auto *CS = D.getInnermostCapturedStmt(); 6150 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6151 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6152 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6153 /*IgnoreResultAssign*/ true); 6154 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6155 /*isSigned=*/true); 6156 } 6157 case OMPD_target_teams: 6158 case OMPD_target_teams_distribute: 6159 case OMPD_target_teams_distribute_simd: 6160 case OMPD_target_teams_distribute_parallel_for: 6161 case OMPD_target_teams_distribute_parallel_for_simd: { 6162 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6163 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6164 /*IgnoreResultAssign*/ true); 6165 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6166 /*isSigned=*/true); 6167 } 6168 default: 6169 break; 6170 } 6171 } 6172 6173 assert(MinNT == MaxNT && "Num threads ranges require handling here."); 6174 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); 6175 } 6176 6177 /// Check for a num threads constant value (stored in \p DefaultVal), or 6178 /// expression (stored in \p E). If the value is conditional (via an if-clause), 6179 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are 6180 /// nullptr, no expression evaluation is perfomed. 6181 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6182 const Expr **E, int32_t &UpperBound, 6183 bool UpperBoundOnly, llvm::Value **CondVal) { 6184 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6185 CGF.getContext(), CS->getCapturedStmt()); 6186 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6187 if (!Dir) 6188 return; 6189 6190 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6191 // Handle if clause. If if clause present, the number of threads is 6192 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6193 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { 6194 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6195 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6196 const OMPIfClause *IfClause = nullptr; 6197 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6198 if (C->getNameModifier() == OMPD_unknown || 6199 C->getNameModifier() == OMPD_parallel) { 6200 IfClause = C; 6201 break; 6202 } 6203 } 6204 if (IfClause) { 6205 const Expr *CondExpr = IfClause->getCondition(); 6206 bool Result; 6207 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6208 if (!Result) { 6209 UpperBound = 1; 6210 return; 6211 } 6212 } else { 6213 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); 6214 if (const auto *PreInit = 6215 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6216 for (const auto *I : PreInit->decls()) { 6217 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6218 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6219 } else { 6220 CodeGenFunction::AutoVarEmission Emission = 6221 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6222 CGF.EmitAutoVarCleanups(Emission); 6223 } 6224 } 6225 *CondVal = CGF.EvaluateExprAsBool(CondExpr); 6226 } 6227 } 6228 } 6229 } 6230 // Check the value of num_threads clause iff if clause was not specified 6231 // or is not evaluated to false. 6232 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6233 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6234 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6235 const auto *NumThreadsClause = 6236 Dir->getSingleClause<OMPNumThreadsClause>(); 6237 const Expr *NTExpr = NumThreadsClause->getNumThreads(); 6238 if (NTExpr->isIntegerConstantExpr(CGF.getContext())) 6239 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) 6240 UpperBound = 6241 UpperBound 6242 ? Constant->getZExtValue() 6243 : std::min(UpperBound, 6244 static_cast<int32_t>(Constant->getZExtValue())); 6245 // If we haven't found a upper bound, remember we saw a thread limiting 6246 // clause. 6247 if (UpperBound == -1) 6248 UpperBound = 0; 6249 if (!E) 6250 return; 6251 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); 6252 if (const auto *PreInit = 6253 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6254 for (const auto *I : PreInit->decls()) { 6255 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6256 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6257 } else { 6258 CodeGenFunction::AutoVarEmission Emission = 6259 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6260 CGF.EmitAutoVarCleanups(Emission); 6261 } 6262 } 6263 } 6264 *E = NTExpr; 6265 } 6266 return; 6267 } 6268 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6269 UpperBound = 1; 6270 } 6271 6272 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6273 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, 6274 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { 6275 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && 6276 "Clauses associated with the teams directive expected to be emitted " 6277 "only for the host!"); 6278 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6279 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6280 "Expected target-based executable directive."); 6281 6282 const Expr *NT = nullptr; 6283 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; 6284 6285 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { 6286 if (E->isIntegerConstantExpr(CGF.getContext())) { 6287 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) 6288 UpperBound = UpperBound ? Constant->getZExtValue() 6289 : std::min(UpperBound, 6290 int32_t(Constant->getZExtValue())); 6291 } 6292 // If we haven't found a upper bound, remember we saw a thread limiting 6293 // clause. 6294 if (UpperBound == -1) 6295 UpperBound = 0; 6296 if (EPtr) 6297 *EPtr = E; 6298 }; 6299 6300 auto ReturnSequential = [&]() { 6301 UpperBound = 1; 6302 return NT; 6303 }; 6304 6305 switch (DirectiveKind) { 6306 case OMPD_target: { 6307 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6308 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6309 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6310 CGF.getContext(), CS->getCapturedStmt()); 6311 // TODO: The standard is not clear how to resolve two thread limit clauses, 6312 // let's pick the teams one if it's present, otherwise the target one. 6313 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6314 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6315 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { 6316 ThreadLimitClause = TLC; 6317 if (ThreadLimitExpr) { 6318 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6319 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6320 CodeGenFunction::LexicalScope Scope( 6321 CGF, 6322 ThreadLimitClause->getThreadLimit().front()->getSourceRange()); 6323 if (const auto *PreInit = 6324 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6325 for (const auto *I : PreInit->decls()) { 6326 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6327 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6328 } else { 6329 CodeGenFunction::AutoVarEmission Emission = 6330 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6331 CGF.EmitAutoVarCleanups(Emission); 6332 } 6333 } 6334 } 6335 } 6336 } 6337 } 6338 if (ThreadLimitClause) 6339 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(), 6340 ThreadLimitExpr); 6341 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6342 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6343 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6344 CS = Dir->getInnermostCapturedStmt(); 6345 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6346 CGF.getContext(), CS->getCapturedStmt()); 6347 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6348 } 6349 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6350 CS = Dir->getInnermostCapturedStmt(); 6351 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6352 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6353 return ReturnSequential(); 6354 } 6355 return NT; 6356 } 6357 case OMPD_target_teams: { 6358 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6359 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6360 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6361 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(), 6362 ThreadLimitExpr); 6363 } 6364 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6365 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6366 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6367 CGF.getContext(), CS->getCapturedStmt()); 6368 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6369 if (Dir->getDirectiveKind() == OMPD_distribute) { 6370 CS = Dir->getInnermostCapturedStmt(); 6371 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); 6372 } 6373 } 6374 return NT; 6375 } 6376 case OMPD_target_teams_distribute: 6377 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6378 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6379 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6380 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(), 6381 ThreadLimitExpr); 6382 } 6383 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, 6384 UpperBoundOnly, CondVal); 6385 return NT; 6386 case OMPD_target_teams_loop: 6387 case OMPD_target_parallel_loop: 6388 case OMPD_target_parallel: 6389 case OMPD_target_parallel_for: 6390 case OMPD_target_parallel_for_simd: 6391 case OMPD_target_teams_distribute_parallel_for: 6392 case OMPD_target_teams_distribute_parallel_for_simd: { 6393 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { 6394 const OMPIfClause *IfClause = nullptr; 6395 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6396 if (C->getNameModifier() == OMPD_unknown || 6397 C->getNameModifier() == OMPD_parallel) { 6398 IfClause = C; 6399 break; 6400 } 6401 } 6402 if (IfClause) { 6403 const Expr *Cond = IfClause->getCondition(); 6404 bool Result; 6405 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6406 if (!Result) 6407 return ReturnSequential(); 6408 } else { 6409 CodeGenFunction::RunCleanupsScope Scope(CGF); 6410 *CondVal = CGF.EvaluateExprAsBool(Cond); 6411 } 6412 } 6413 } 6414 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6415 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6416 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6417 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(), 6418 ThreadLimitExpr); 6419 } 6420 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6421 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6422 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6423 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); 6424 return NumThreadsClause->getNumThreads(); 6425 } 6426 return NT; 6427 } 6428 case OMPD_target_teams_distribute_simd: 6429 case OMPD_target_simd: 6430 return ReturnSequential(); 6431 default: 6432 break; 6433 } 6434 llvm_unreachable("Unsupported directive kind."); 6435 } 6436 6437 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 6438 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6439 llvm::Value *NumThreadsVal = nullptr; 6440 llvm::Value *CondVal = nullptr; 6441 llvm::Value *ThreadLimitVal = nullptr; 6442 const Expr *ThreadLimitExpr = nullptr; 6443 int32_t UpperBound = -1; 6444 6445 const Expr *NT = getNumThreadsExprForTargetDirective( 6446 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, 6447 &ThreadLimitExpr); 6448 6449 // Thread limit expressions are used below, emit them. 6450 if (ThreadLimitExpr) { 6451 ThreadLimitVal = 6452 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); 6453 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, 6454 /*isSigned=*/false); 6455 } 6456 6457 // Generate the num teams expression. 6458 if (UpperBound == 1) { 6459 NumThreadsVal = CGF.Builder.getInt32(UpperBound); 6460 } else if (NT) { 6461 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); 6462 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, 6463 /*isSigned=*/false); 6464 } else if (ThreadLimitVal) { 6465 // If we do not have a num threads value but a thread limit, replace the 6466 // former with the latter. We know handled the thread limit expression. 6467 NumThreadsVal = ThreadLimitVal; 6468 ThreadLimitVal = nullptr; 6469 } else { 6470 // Default to "0" which means runtime choice. 6471 assert(!ThreadLimitVal && "Default not applicable with thread limit value"); 6472 NumThreadsVal = CGF.Builder.getInt32(0); 6473 } 6474 6475 // Handle if clause. If if clause present, the number of threads is 6476 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6477 if (CondVal) { 6478 CodeGenFunction::RunCleanupsScope Scope(CGF); 6479 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, 6480 CGF.Builder.getInt32(1)); 6481 } 6482 6483 // If the thread limit and num teams expression were present, take the 6484 // minimum. 6485 if (ThreadLimitVal) { 6486 NumThreadsVal = CGF.Builder.CreateSelect( 6487 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), 6488 ThreadLimitVal, NumThreadsVal); 6489 } 6490 6491 return NumThreadsVal; 6492 } 6493 6494 namespace { 6495 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6496 6497 // Utility to handle information from clauses associated with a given 6498 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 6499 // It provides a convenient interface to obtain the information and generate 6500 // code for that information. 6501 class MappableExprsHandler { 6502 public: 6503 /// Get the offset of the OMP_MAP_MEMBER_OF field. 6504 static unsigned getFlagMemberOffset() { 6505 unsigned Offset = 0; 6506 for (uint64_t Remain = 6507 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 6508 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 6509 !(Remain & 1); Remain = Remain >> 1) 6510 Offset++; 6511 return Offset; 6512 } 6513 6514 /// Class that holds debugging information for a data mapping to be passed to 6515 /// the runtime library. 6516 class MappingExprInfo { 6517 /// The variable declaration used for the data mapping. 6518 const ValueDecl *MapDecl = nullptr; 6519 /// The original expression used in the map clause, or null if there is 6520 /// none. 6521 const Expr *MapExpr = nullptr; 6522 6523 public: 6524 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 6525 : MapDecl(MapDecl), MapExpr(MapExpr) {} 6526 6527 const ValueDecl *getMapDecl() const { return MapDecl; } 6528 const Expr *getMapExpr() const { return MapExpr; } 6529 }; 6530 6531 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; 6532 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6533 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; 6534 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; 6535 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; 6536 using MapNonContiguousArrayTy = 6537 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; 6538 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 6539 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; 6540 6541 /// This structure contains combined information generated for mappable 6542 /// clauses, including base pointers, pointers, sizes, map types, user-defined 6543 /// mappers, and non-contiguous information. 6544 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { 6545 MapExprsArrayTy Exprs; 6546 MapValueDeclsArrayTy Mappers; 6547 MapValueDeclsArrayTy DevicePtrDecls; 6548 6549 /// Append arrays in \a CurInfo. 6550 void append(MapCombinedInfoTy &CurInfo) { 6551 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 6552 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), 6553 CurInfo.DevicePtrDecls.end()); 6554 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 6555 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); 6556 } 6557 }; 6558 6559 /// Map between a struct and the its lowest & highest elements which have been 6560 /// mapped. 6561 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 6562 /// HE(FieldIndex, Pointer)} 6563 struct StructRangeInfoTy { 6564 MapCombinedInfoTy PreliminaryMapData; 6565 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 6566 0, Address::invalid()}; 6567 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 6568 0, Address::invalid()}; 6569 Address Base = Address::invalid(); 6570 Address LB = Address::invalid(); 6571 bool IsArraySection = false; 6572 bool HasCompleteRecord = false; 6573 }; 6574 6575 private: 6576 /// Kind that defines how a device pointer has to be returned. 6577 struct MapInfo { 6578 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 6579 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 6580 ArrayRef<OpenMPMapModifierKind> MapModifiers; 6581 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 6582 bool ReturnDevicePointer = false; 6583 bool IsImplicit = false; 6584 const ValueDecl *Mapper = nullptr; 6585 const Expr *VarRef = nullptr; 6586 bool ForDeviceAddr = false; 6587 6588 MapInfo() = default; 6589 MapInfo( 6590 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6591 OpenMPMapClauseKind MapType, 6592 ArrayRef<OpenMPMapModifierKind> MapModifiers, 6593 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6594 bool ReturnDevicePointer, bool IsImplicit, 6595 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 6596 bool ForDeviceAddr = false) 6597 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 6598 MotionModifiers(MotionModifiers), 6599 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 6600 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 6601 }; 6602 6603 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 6604 /// member and there is no map information about it, then emission of that 6605 /// entry is deferred until the whole struct has been processed. 6606 struct DeferredDevicePtrEntryTy { 6607 const Expr *IE = nullptr; 6608 const ValueDecl *VD = nullptr; 6609 bool ForDeviceAddr = false; 6610 6611 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 6612 bool ForDeviceAddr) 6613 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 6614 }; 6615 6616 /// The target directive from where the mappable clauses were extracted. It 6617 /// is either a executable directive or a user-defined mapper directive. 6618 llvm::PointerUnion<const OMPExecutableDirective *, 6619 const OMPDeclareMapperDecl *> 6620 CurDir; 6621 6622 /// Function the directive is being generated for. 6623 CodeGenFunction &CGF; 6624 6625 /// Set of all first private variables in the current directive. 6626 /// bool data is set to true if the variable is implicitly marked as 6627 /// firstprivate, false otherwise. 6628 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 6629 6630 /// Map between device pointer declarations and their expression components. 6631 /// The key value for declarations in 'this' is null. 6632 llvm::DenseMap< 6633 const ValueDecl *, 6634 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6635 DevPointersMap; 6636 6637 /// Map between device addr declarations and their expression components. 6638 /// The key value for declarations in 'this' is null. 6639 llvm::DenseMap< 6640 const ValueDecl *, 6641 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 6642 HasDevAddrsMap; 6643 6644 /// Map between lambda declarations and their map type. 6645 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 6646 6647 llvm::Value *getExprTypeSize(const Expr *E) const { 6648 QualType ExprTy = E->getType().getCanonicalType(); 6649 6650 // Calculate the size for array shaping expression. 6651 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 6652 llvm::Value *Size = 6653 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 6654 for (const Expr *SE : OAE->getDimensions()) { 6655 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 6656 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 6657 CGF.getContext().getSizeType(), 6658 SE->getExprLoc()); 6659 Size = CGF.Builder.CreateNUWMul(Size, Sz); 6660 } 6661 return Size; 6662 } 6663 6664 // Reference types are ignored for mapping purposes. 6665 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 6666 ExprTy = RefTy->getPointeeType().getCanonicalType(); 6667 6668 // Given that an array section is considered a built-in type, we need to 6669 // do the calculation based on the length of the section instead of relying 6670 // on CGF.getTypeSize(E->getType()). 6671 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) { 6672 QualType BaseTy = ArraySectionExpr::getBaseOriginalType( 6673 OAE->getBase()->IgnoreParenImpCasts()) 6674 .getCanonicalType(); 6675 6676 // If there is no length associated with the expression and lower bound is 6677 // not specified too, that means we are using the whole length of the 6678 // base. 6679 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6680 !OAE->getLowerBound()) 6681 return CGF.getTypeSize(BaseTy); 6682 6683 llvm::Value *ElemSize; 6684 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 6685 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 6686 } else { 6687 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 6688 assert(ATy && "Expecting array type if not a pointer type."); 6689 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 6690 } 6691 6692 // If we don't have a length at this point, that is because we have an 6693 // array section with a single element. 6694 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 6695 return ElemSize; 6696 6697 if (const Expr *LenExpr = OAE->getLength()) { 6698 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 6699 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 6700 CGF.getContext().getSizeType(), 6701 LenExpr->getExprLoc()); 6702 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 6703 } 6704 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 6705 OAE->getLowerBound() && "expected array_section[lb:]."); 6706 // Size = sizetype - lb * elemtype; 6707 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 6708 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 6709 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 6710 CGF.getContext().getSizeType(), 6711 OAE->getLowerBound()->getExprLoc()); 6712 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 6713 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 6714 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 6715 LengthVal = CGF.Builder.CreateSelect( 6716 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 6717 return LengthVal; 6718 } 6719 return CGF.getTypeSize(ExprTy); 6720 } 6721 6722 /// Return the corresponding bits for a given map clause modifier. Add 6723 /// a flag marking the map as a pointer if requested. Add a flag marking the 6724 /// map as the first one of a series of maps that relate to the same map 6725 /// expression. 6726 OpenMPOffloadMappingFlags getMapTypeBits( 6727 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6728 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 6729 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 6730 OpenMPOffloadMappingFlags Bits = 6731 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT 6732 : OpenMPOffloadMappingFlags::OMP_MAP_NONE; 6733 switch (MapType) { 6734 case OMPC_MAP_alloc: 6735 case OMPC_MAP_release: 6736 // alloc and release is the default behavior in the runtime library, i.e. 6737 // if we don't pass any bits alloc/release that is what the runtime is 6738 // going to do. Therefore, we don't need to signal anything for these two 6739 // type modifiers. 6740 break; 6741 case OMPC_MAP_to: 6742 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; 6743 break; 6744 case OMPC_MAP_from: 6745 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6746 break; 6747 case OMPC_MAP_tofrom: 6748 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | 6749 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 6750 break; 6751 case OMPC_MAP_delete: 6752 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; 6753 break; 6754 case OMPC_MAP_unknown: 6755 llvm_unreachable("Unexpected map type!"); 6756 } 6757 if (AddPtrFlag) 6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 6759 if (AddIsTargetParamFlag) 6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 6761 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; 6763 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 6764 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; 6765 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 6766 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 6767 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 6768 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 6769 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 6770 if (IsNonContiguous) 6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; 6772 return Bits; 6773 } 6774 6775 /// Return true if the provided expression is a final array section. A 6776 /// final array section, is one whose length can't be proved to be one. 6777 bool isFinalArraySectionExpression(const Expr *E) const { 6778 const auto *OASE = dyn_cast<ArraySectionExpr>(E); 6779 6780 // It is not an array section and therefore not a unity-size one. 6781 if (!OASE) 6782 return false; 6783 6784 // An array section with no colon always refer to a single element. 6785 if (OASE->getColonLocFirst().isInvalid()) 6786 return false; 6787 6788 const Expr *Length = OASE->getLength(); 6789 6790 // If we don't have a length we have to check if the array has size 1 6791 // for this dimension. Also, we should always expect a length if the 6792 // base type is pointer. 6793 if (!Length) { 6794 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType( 6795 OASE->getBase()->IgnoreParenImpCasts()) 6796 .getCanonicalType(); 6797 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 6798 return ATy->getSExtSize() != 1; 6799 // If we don't have a constant dimension length, we have to consider 6800 // the current section as having any size, so it is not necessarily 6801 // unitary. If it happen to be unity size, that's user fault. 6802 return true; 6803 } 6804 6805 // Check if the length evaluates to 1. 6806 Expr::EvalResult Result; 6807 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 6808 return true; // Can have more that size 1. 6809 6810 llvm::APSInt ConstLength = Result.Val.getInt(); 6811 return ConstLength.getSExtValue() != 1; 6812 } 6813 6814 /// Generate the base pointers, section pointers, sizes, map type bits, and 6815 /// user-defined mappers (all included in \a CombinedInfo) for the provided 6816 /// map type, map or motion modifiers, and expression components. 6817 /// \a IsFirstComponent should be set to true if the provided set of 6818 /// components is the first associated with a capture. 6819 void generateInfoForComponentList( 6820 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 6821 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 6822 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 6823 MapCombinedInfoTy &CombinedInfo, 6824 MapCombinedInfoTy &StructBaseCombinedInfo, 6825 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 6826 bool IsImplicit, bool GenerateAllInfoForClauses, 6827 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 6828 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 6829 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 6830 OverlappedElements = {}, 6831 bool AreBothBasePtrAndPteeMapped = false) const { 6832 // The following summarizes what has to be generated for each map and the 6833 // types below. The generated information is expressed in this order: 6834 // base pointer, section pointer, size, flags 6835 // (to add to the ones that come from the map type and modifier). 6836 // 6837 // double d; 6838 // int i[100]; 6839 // float *p; 6840 // int **a = &i; 6841 // 6842 // struct S1 { 6843 // int i; 6844 // float f[50]; 6845 // } 6846 // struct S2 { 6847 // int i; 6848 // float f[50]; 6849 // S1 s; 6850 // double *p; 6851 // struct S2 *ps; 6852 // int &ref; 6853 // } 6854 // S2 s; 6855 // S2 *ps; 6856 // 6857 // map(d) 6858 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 6859 // 6860 // map(i) 6861 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 6862 // 6863 // map(i[1:23]) 6864 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 6865 // 6866 // map(p) 6867 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 6868 // 6869 // map(p[1:24]) 6870 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 6871 // in unified shared memory mode or for local pointers 6872 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 6873 // 6874 // map((*a)[0:3]) 6875 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6876 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM 6877 // 6878 // map(**a) 6879 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM 6880 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM 6881 // 6882 // map(s) 6883 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 6884 // 6885 // map(s.i) 6886 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 6887 // 6888 // map(s.s.f) 6889 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6890 // 6891 // map(s.p) 6892 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 6893 // 6894 // map(to: s.p[:22]) 6895 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 6896 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 6897 // &(s.p), &(s.p[0]), 22*sizeof(double), 6898 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6899 // (*) alloc space for struct members, only this is a target parameter 6900 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6901 // optimizes this entry out, same in the examples below) 6902 // (***) map the pointee (map: to) 6903 // 6904 // map(to: s.ref) 6905 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 6906 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 6907 // (*) alloc space for struct members, only this is a target parameter 6908 // (**) map the pointer (nothing to be mapped in this example) (the compiler 6909 // optimizes this entry out, same in the examples below) 6910 // (***) map the pointee (map: to) 6911 // 6912 // map(s.ps) 6913 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6914 // 6915 // map(from: s.ps->s.i) 6916 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6917 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6918 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6919 // 6920 // map(to: s.ps->ps) 6921 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6922 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6923 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 6924 // 6925 // map(s.ps->ps->ps) 6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6927 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6928 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6929 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6930 // 6931 // map(to: s.ps->ps->s.f[:22]) 6932 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 6933 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 6934 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6935 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6936 // 6937 // map(ps) 6938 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 6939 // 6940 // map(ps->i) 6941 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 6942 // 6943 // map(ps->s.f) 6944 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 6945 // 6946 // map(from: ps->p) 6947 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 6948 // 6949 // map(to: ps->p[:22]) 6950 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 6951 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 6952 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 6953 // 6954 // map(ps->ps) 6955 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 6956 // 6957 // map(from: ps->ps->s.i) 6958 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6959 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6960 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6961 // 6962 // map(from: ps->ps->ps) 6963 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6964 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6965 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6966 // 6967 // map(ps->ps->ps->ps) 6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6969 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6970 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6971 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 6972 // 6973 // map(to: ps->ps->ps->s.f[:22]) 6974 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 6975 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 6976 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 6977 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 6978 // 6979 // map(to: s.f[:22]) map(from: s.p[:33]) 6980 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 6981 // sizeof(double*) (**), TARGET_PARAM 6982 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 6983 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 6984 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 6985 // (*) allocate contiguous space needed to fit all mapped members even if 6986 // we allocate space for members not mapped (in this example, 6987 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 6988 // them as well because they fall between &s.f[0] and &s.p) 6989 // 6990 // map(from: s.f[:22]) map(to: ps->p[:33]) 6991 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 6992 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 6993 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 6994 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 6995 // (*) the struct this entry pertains to is the 2nd element in the list of 6996 // arguments, hence MEMBER_OF(2) 6997 // 6998 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 6999 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7000 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7001 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7002 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7003 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7004 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7005 // (*) the struct this entry pertains to is the 4th element in the list 7006 // of arguments, hence MEMBER_OF(4) 7007 // 7008 // map(p, p[:100]) 7009 // ===> map(p[:100]) 7010 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM 7011 7012 // Track if the map information being generated is the first for a capture. 7013 bool IsCaptureFirstInfo = IsFirstComponentList; 7014 // When the variable is on a declare target link or in a to clause with 7015 // unified memory, a reference is needed to hold the host/device address 7016 // of the variable. 7017 bool RequiresReference = false; 7018 7019 // Scan the components from the base to the complete expression. 7020 auto CI = Components.rbegin(); 7021 auto CE = Components.rend(); 7022 auto I = CI; 7023 7024 // Track if the map information being generated is the first for a list of 7025 // components. 7026 bool IsExpressionFirstInfo = true; 7027 bool FirstPointerInComplexData = false; 7028 Address BP = Address::invalid(); 7029 const Expr *AssocExpr = I->getAssociatedExpression(); 7030 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7031 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7032 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7033 7034 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE) 7035 return; 7036 if (isa<MemberExpr>(AssocExpr)) { 7037 // The base is the 'this' pointer. The content of the pointer is going 7038 // to be the base of the field being mapped. 7039 BP = CGF.LoadCXXThisAddress(); 7040 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7041 (OASE && 7042 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7043 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7044 } else if (OAShE && 7045 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7046 BP = Address( 7047 CGF.EmitScalarExpr(OAShE->getBase()), 7048 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), 7049 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7050 } else { 7051 // The base is the reference to the variable. 7052 // BP = &Var. 7053 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7054 if (const auto *VD = 7055 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7056 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7057 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7058 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7059 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 7060 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 7061 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7062 RequiresReference = true; 7063 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7064 } 7065 } 7066 } 7067 7068 // If the variable is a pointer and is being dereferenced (i.e. is not 7069 // the last component), the base has to be the pointer itself, not its 7070 // reference. References are ignored for mapping purposes. 7071 QualType Ty = 7072 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7073 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7074 // No need to generate individual map information for the pointer, it 7075 // can be associated with the combined storage if shared memory mode is 7076 // active or the base declaration is not global variable. 7077 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7078 if (!AreBothBasePtrAndPteeMapped && 7079 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7080 !VD || VD->hasLocalStorage())) 7081 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7082 else 7083 FirstPointerInComplexData = true; 7084 ++I; 7085 } 7086 } 7087 7088 // Track whether a component of the list should be marked as MEMBER_OF some 7089 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7090 // in a component list should be marked as MEMBER_OF, all subsequent entries 7091 // do not belong to the base struct. E.g. 7092 // struct S2 s; 7093 // s.ps->ps->ps->f[:] 7094 // (1) (2) (3) (4) 7095 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7096 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7097 // is the pointee of ps(2) which is not member of struct s, so it should not 7098 // be marked as such (it is still PTR_AND_OBJ). 7099 // The variable is initialized to false so that PTR_AND_OBJ entries which 7100 // are not struct members are not considered (e.g. array of pointers to 7101 // data). 7102 bool ShouldBeMemberOf = false; 7103 7104 // Variable keeping track of whether or not we have encountered a component 7105 // in the component list which is a member expression. Useful when we have a 7106 // pointer or a final array section, in which case it is the previous 7107 // component in the list which tells us whether we have a member expression. 7108 // E.g. X.f[:] 7109 // While processing the final array section "[:]" it is "f" which tells us 7110 // whether we are dealing with a member of a declared struct. 7111 const MemberExpr *EncounteredME = nullptr; 7112 7113 // Track for the total number of dimension. Start from one for the dummy 7114 // dimension. 7115 uint64_t DimSize = 1; 7116 7117 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7118 bool IsPrevMemberReference = false; 7119 7120 bool IsPartialMapped = 7121 !PartialStruct.PreliminaryMapData.BasePointers.empty(); 7122 7123 // We need to check if we will be encountering any MEs. If we do not 7124 // encounter any ME expression it means we will be mapping the whole struct. 7125 // In that case we need to skip adding an entry for the struct to the 7126 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo 7127 // list only when generating all info for clauses. 7128 bool IsMappingWholeStruct = true; 7129 if (!GenerateAllInfoForClauses) { 7130 IsMappingWholeStruct = false; 7131 } else { 7132 for (auto TempI = I; TempI != CE; ++TempI) { 7133 const MemberExpr *PossibleME = 7134 dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); 7135 if (PossibleME) { 7136 IsMappingWholeStruct = false; 7137 break; 7138 } 7139 } 7140 } 7141 7142 for (; I != CE; ++I) { 7143 // If the current component is member of a struct (parent struct) mark it. 7144 if (!EncounteredME) { 7145 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7146 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7147 // as MEMBER_OF the parent struct. 7148 if (EncounteredME) { 7149 ShouldBeMemberOf = true; 7150 // Do not emit as complex pointer if this is actually not array-like 7151 // expression. 7152 if (FirstPointerInComplexData) { 7153 QualType Ty = std::prev(I) 7154 ->getAssociatedDeclaration() 7155 ->getType() 7156 .getNonReferenceType(); 7157 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7158 FirstPointerInComplexData = false; 7159 } 7160 } 7161 } 7162 7163 auto Next = std::next(I); 7164 7165 // We need to generate the addresses and sizes if this is the last 7166 // component, if the component is a pointer or if it is an array section 7167 // whose length can't be proved to be one. If this is a pointer, it 7168 // becomes the base address for the following components. 7169 7170 // A final array section, is one whose length can't be proved to be one. 7171 // If the map item is non-contiguous then we don't treat any array section 7172 // as final array section. 7173 bool IsFinalArraySection = 7174 !IsNonContiguous && 7175 isFinalArraySectionExpression(I->getAssociatedExpression()); 7176 7177 // If we have a declaration for the mapping use that, otherwise use 7178 // the base declaration of the map clause. 7179 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7180 ? I->getAssociatedDeclaration() 7181 : BaseDecl; 7182 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7183 : MapExpr; 7184 7185 // Get information on whether the element is a pointer. Have to do a 7186 // special treatment for array sections given that they are built-in 7187 // types. 7188 const auto *OASE = 7189 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression()); 7190 const auto *OAShE = 7191 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7192 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7193 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7194 bool IsPointer = 7195 OAShE || 7196 (OASE && ArraySectionExpr::getBaseOriginalType(OASE) 7197 .getCanonicalType() 7198 ->isAnyPointerType()) || 7199 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7200 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 7201 MapDecl && 7202 MapDecl->getType()->isLValueReferenceType(); 7203 bool IsNonDerefPointer = IsPointer && 7204 !(UO && UO->getOpcode() != UO_Deref) && !BO && 7205 !IsNonContiguous; 7206 7207 if (OASE) 7208 ++DimSize; 7209 7210 if (Next == CE || IsMemberReference || IsNonDerefPointer || 7211 IsFinalArraySection) { 7212 // If this is not the last component, we expect the pointer to be 7213 // associated with an array expression or member expression. 7214 assert((Next == CE || 7215 isa<MemberExpr>(Next->getAssociatedExpression()) || 7216 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7217 isa<ArraySectionExpr>(Next->getAssociatedExpression()) || 7218 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7219 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7220 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7221 "Unexpected expression"); 7222 7223 Address LB = Address::invalid(); 7224 Address LowestElem = Address::invalid(); 7225 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 7226 const MemberExpr *E) { 7227 const Expr *BaseExpr = E->getBase(); 7228 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 7229 // scalar. 7230 LValue BaseLV; 7231 if (E->isArrow()) { 7232 LValueBaseInfo BaseInfo; 7233 TBAAAccessInfo TBAAInfo; 7234 Address Addr = 7235 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 7236 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 7237 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 7238 } else { 7239 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 7240 } 7241 return BaseLV; 7242 }; 7243 if (OAShE) { 7244 LowestElem = LB = 7245 Address(CGF.EmitScalarExpr(OAShE->getBase()), 7246 CGF.ConvertTypeForMem( 7247 OAShE->getBase()->getType()->getPointeeType()), 7248 CGF.getContext().getTypeAlignInChars( 7249 OAShE->getBase()->getType())); 7250 } else if (IsMemberReference) { 7251 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 7252 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7253 LowestElem = CGF.EmitLValueForFieldInitialization( 7254 BaseLVal, cast<FieldDecl>(MapDecl)) 7255 .getAddress(); 7256 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 7257 .getAddress(); 7258 } else { 7259 LowestElem = LB = 7260 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7261 .getAddress(); 7262 } 7263 7264 // If this component is a pointer inside the base struct then we don't 7265 // need to create any entry for it - it will be combined with the object 7266 // it is pointing to into a single PTR_AND_OBJ entry. 7267 bool IsMemberPointerOrAddr = 7268 EncounteredME && 7269 (((IsPointer || ForDeviceAddr) && 7270 I->getAssociatedExpression() == EncounteredME) || 7271 (IsPrevMemberReference && !IsPointer) || 7272 (IsMemberReference && Next != CE && 7273 !Next->getAssociatedExpression()->getType()->isPointerType())); 7274 if (!OverlappedElements.empty() && Next == CE) { 7275 // Handle base element with the info for overlapped elements. 7276 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7277 assert(!IsPointer && 7278 "Unexpected base element with the pointer type."); 7279 // Mark the whole struct as the struct that requires allocation on the 7280 // device. 7281 PartialStruct.LowestElem = {0, LowestElem}; 7282 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7283 I->getAssociatedExpression()->getType()); 7284 Address HB = CGF.Builder.CreateConstGEP( 7285 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7286 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), 7287 TypeSize.getQuantity() - 1); 7288 PartialStruct.HighestElem = { 7289 std::numeric_limits<decltype( 7290 PartialStruct.HighestElem.first)>::max(), 7291 HB}; 7292 PartialStruct.Base = BP; 7293 PartialStruct.LB = LB; 7294 assert( 7295 PartialStruct.PreliminaryMapData.BasePointers.empty() && 7296 "Overlapped elements must be used only once for the variable."); 7297 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 7298 // Emit data for non-overlapped data. 7299 OpenMPOffloadMappingFlags Flags = 7300 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 7301 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7302 /*AddPtrFlag=*/false, 7303 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7304 llvm::Value *Size = nullptr; 7305 // Do bitcopy of all non-overlapped structure elements. 7306 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7307 Component : OverlappedElements) { 7308 Address ComponentLB = Address::invalid(); 7309 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7310 Component) { 7311 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 7312 const auto *FD = dyn_cast<FieldDecl>(VD); 7313 if (FD && FD->getType()->isLValueReferenceType()) { 7314 const auto *ME = 7315 cast<MemberExpr>(MC.getAssociatedExpression()); 7316 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 7317 ComponentLB = 7318 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 7319 .getAddress(); 7320 } else { 7321 ComponentLB = 7322 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7323 .getAddress(); 7324 } 7325 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); 7326 llvm::Value *LBPtr = LB.emitRawPointer(CGF); 7327 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, 7328 LBPtr); 7329 break; 7330 } 7331 } 7332 assert(Size && "Failed to determine structure size"); 7333 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7334 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7335 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7336 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7337 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7338 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7339 Size, CGF.Int64Ty, /*isSigned=*/true)); 7340 CombinedInfo.Types.push_back(Flags); 7341 CombinedInfo.Mappers.push_back(nullptr); 7342 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7343 : 1); 7344 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7345 } 7346 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7347 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7348 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7349 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7350 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7351 llvm::Value *LBPtr = LB.emitRawPointer(CGF); 7352 Size = CGF.Builder.CreatePtrDiff( 7353 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), 7354 LBPtr); 7355 CombinedInfo.Sizes.push_back( 7356 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7357 CombinedInfo.Types.push_back(Flags); 7358 CombinedInfo.Mappers.push_back(nullptr); 7359 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7360 : 1); 7361 break; 7362 } 7363 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7364 // Skip adding an entry in the CurInfo of this combined entry if the 7365 // whole struct is currently being mapped. The struct needs to be added 7366 // in the first position before any data internal to the struct is being 7367 // mapped. 7368 // Skip adding an entry in the CurInfo of this combined entry if the 7369 // PartialStruct.PreliminaryMapData.BasePointers has been mapped. 7370 if ((!IsMemberPointerOrAddr && !IsPartialMapped) || 7371 (Next == CE && MapType != OMPC_MAP_unknown)) { 7372 if (!IsMappingWholeStruct) { 7373 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7374 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); 7375 CombinedInfo.DevicePtrDecls.push_back(nullptr); 7376 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7377 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7378 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7379 Size, CGF.Int64Ty, /*isSigned=*/true)); 7380 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7381 : 1); 7382 } else { 7383 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7384 StructBaseCombinedInfo.BasePointers.push_back( 7385 BP.emitRawPointer(CGF)); 7386 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); 7387 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 7388 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); 7389 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7390 Size, CGF.Int64Ty, /*isSigned=*/true)); 7391 StructBaseCombinedInfo.NonContigInfo.Dims.push_back( 7392 IsNonContiguous ? DimSize : 1); 7393 } 7394 7395 // If Mapper is valid, the last component inherits the mapper. 7396 bool HasMapper = Mapper && Next == CE; 7397 if (!IsMappingWholeStruct) 7398 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7399 else 7400 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper 7401 : nullptr); 7402 7403 // We need to add a pointer flag for each map that comes from the 7404 // same expression except for the first one. We also need to signal 7405 // this map is the first one that relates with the current capture 7406 // (there is a set of entries for each capture). 7407 OpenMPOffloadMappingFlags Flags = 7408 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7409 !IsExpressionFirstInfo || RequiresReference || 7410 FirstPointerInComplexData || IsMemberReference, 7411 AreBothBasePtrAndPteeMapped || 7412 (IsCaptureFirstInfo && !RequiresReference), 7413 IsNonContiguous); 7414 7415 if (!IsExpressionFirstInfo || IsMemberReference) { 7416 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7417 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7418 if (IsPointer || (IsMemberReference && Next != CE)) 7419 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | 7420 OpenMPOffloadMappingFlags::OMP_MAP_FROM | 7421 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | 7422 OpenMPOffloadMappingFlags::OMP_MAP_DELETE | 7423 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); 7424 7425 if (ShouldBeMemberOf) { 7426 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7427 // should be later updated with the correct value of MEMBER_OF. 7428 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; 7429 // From now on, all subsequent PTR_AND_OBJ entries should not be 7430 // marked as MEMBER_OF. 7431 ShouldBeMemberOf = false; 7432 } 7433 } 7434 7435 if (!IsMappingWholeStruct) 7436 CombinedInfo.Types.push_back(Flags); 7437 else 7438 StructBaseCombinedInfo.Types.push_back(Flags); 7439 } 7440 7441 // If we have encountered a member expression so far, keep track of the 7442 // mapped member. If the parent is "*this", then the value declaration 7443 // is nullptr. 7444 if (EncounteredME) { 7445 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7446 unsigned FieldIndex = FD->getFieldIndex(); 7447 7448 // Update info about the lowest and highest elements for this struct 7449 if (!PartialStruct.Base.isValid()) { 7450 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7451 if (IsFinalArraySection) { 7452 Address HB = 7453 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) 7454 .getAddress(); 7455 PartialStruct.HighestElem = {FieldIndex, HB}; 7456 } else { 7457 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7458 } 7459 PartialStruct.Base = BP; 7460 PartialStruct.LB = BP; 7461 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7462 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 7463 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7464 if (IsFinalArraySection) { 7465 Address HB = 7466 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) 7467 .getAddress(); 7468 PartialStruct.HighestElem = {FieldIndex, HB}; 7469 } else { 7470 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 7471 } 7472 } 7473 } 7474 7475 // Need to emit combined struct for array sections. 7476 if (IsFinalArraySection || IsNonContiguous) 7477 PartialStruct.IsArraySection = true; 7478 7479 // If we have a final array section, we are done with this expression. 7480 if (IsFinalArraySection) 7481 break; 7482 7483 // The pointer becomes the base for the next element. 7484 if (Next != CE) 7485 BP = IsMemberReference ? LowestElem : LB; 7486 if (!IsPartialMapped) 7487 IsExpressionFirstInfo = false; 7488 IsCaptureFirstInfo = false; 7489 FirstPointerInComplexData = false; 7490 IsPrevMemberReference = IsMemberReference; 7491 } else if (FirstPointerInComplexData) { 7492 QualType Ty = Components.rbegin() 7493 ->getAssociatedDeclaration() 7494 ->getType() 7495 .getNonReferenceType(); 7496 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7497 FirstPointerInComplexData = false; 7498 } 7499 } 7500 // If ran into the whole component - allocate the space for the whole 7501 // record. 7502 if (!EncounteredME) 7503 PartialStruct.HasCompleteRecord = true; 7504 7505 if (!IsNonContiguous) 7506 return; 7507 7508 const ASTContext &Context = CGF.getContext(); 7509 7510 // For supporting stride in array section, we need to initialize the first 7511 // dimension size as 1, first offset as 0, and first count as 1 7512 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7513 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7514 MapValuesArrayTy CurStrides; 7515 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7516 uint64_t ElementTypeSize; 7517 7518 // Collect Size information for each dimension and get the element size as 7519 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7520 // should be [10, 10] and the first stride is 4 btyes. 7521 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7522 Components) { 7523 const Expr *AssocExpr = Component.getAssociatedExpression(); 7524 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7525 7526 if (!OASE) 7527 continue; 7528 7529 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7530 auto *CAT = Context.getAsConstantArrayType(Ty); 7531 auto *VAT = Context.getAsVariableArrayType(Ty); 7532 7533 // We need all the dimension size except for the last dimension. 7534 assert((VAT || CAT || &Component == &*Components.begin()) && 7535 "Should be either ConstantArray or VariableArray if not the " 7536 "first Component"); 7537 7538 // Get element size if CurStrides is empty. 7539 if (CurStrides.empty()) { 7540 const Type *ElementType = nullptr; 7541 if (CAT) 7542 ElementType = CAT->getElementType().getTypePtr(); 7543 else if (VAT) 7544 ElementType = VAT->getElementType().getTypePtr(); 7545 else 7546 assert(&Component == &*Components.begin() && 7547 "Only expect pointer (non CAT or VAT) when this is the " 7548 "first Component"); 7549 // If ElementType is null, then it means the base is a pointer 7550 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7551 // for next iteration. 7552 if (ElementType) { 7553 // For the case that having pointer as base, we need to remove one 7554 // level of indirection. 7555 if (&Component != &*Components.begin()) 7556 ElementType = ElementType->getPointeeOrArrayElementType(); 7557 ElementTypeSize = 7558 Context.getTypeSizeInChars(ElementType).getQuantity(); 7559 CurStrides.push_back( 7560 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7561 } 7562 } 7563 // Get dimension value except for the last dimension since we don't need 7564 // it. 7565 if (DimSizes.size() < Components.size() - 1) { 7566 if (CAT) 7567 DimSizes.push_back( 7568 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize())); 7569 else if (VAT) 7570 DimSizes.push_back(CGF.Builder.CreateIntCast( 7571 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7572 /*IsSigned=*/false)); 7573 } 7574 } 7575 7576 // Skip the dummy dimension since we have already have its information. 7577 auto *DI = DimSizes.begin() + 1; 7578 // Product of dimension. 7579 llvm::Value *DimProd = 7580 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7581 7582 // Collect info for non-contiguous. Notice that offset, count, and stride 7583 // are only meaningful for array-section, so we insert a null for anything 7584 // other than array-section. 7585 // Also, the size of offset, count, and stride are not the same as 7586 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 7587 // count, and stride are the same as the number of non-contiguous 7588 // declaration in target update to/from clause. 7589 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7590 Components) { 7591 const Expr *AssocExpr = Component.getAssociatedExpression(); 7592 7593 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 7594 llvm::Value *Offset = CGF.Builder.CreateIntCast( 7595 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 7596 /*isSigned=*/false); 7597 CurOffsets.push_back(Offset); 7598 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 7599 CurStrides.push_back(CurStrides.back()); 7600 continue; 7601 } 7602 7603 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); 7604 7605 if (!OASE) 7606 continue; 7607 7608 // Offset 7609 const Expr *OffsetExpr = OASE->getLowerBound(); 7610 llvm::Value *Offset = nullptr; 7611 if (!OffsetExpr) { 7612 // If offset is absent, then we just set it to zero. 7613 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 7614 } else { 7615 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 7616 CGF.Int64Ty, 7617 /*isSigned=*/false); 7618 } 7619 CurOffsets.push_back(Offset); 7620 7621 // Count 7622 const Expr *CountExpr = OASE->getLength(); 7623 llvm::Value *Count = nullptr; 7624 if (!CountExpr) { 7625 // In Clang, once a high dimension is an array section, we construct all 7626 // the lower dimension as array section, however, for case like 7627 // arr[0:2][2], Clang construct the inner dimension as an array section 7628 // but it actually is not in an array section form according to spec. 7629 if (!OASE->getColonLocFirst().isValid() && 7630 !OASE->getColonLocSecond().isValid()) { 7631 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 7632 } else { 7633 // OpenMP 5.0, 2.1.5 Array Sections, Description. 7634 // When the length is absent it defaults to ⌈(size − 7635 // lower-bound)/stride⌉, where size is the size of the array 7636 // dimension. 7637 const Expr *StrideExpr = OASE->getStride(); 7638 llvm::Value *Stride = 7639 StrideExpr 7640 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7641 CGF.Int64Ty, /*isSigned=*/false) 7642 : nullptr; 7643 if (Stride) 7644 Count = CGF.Builder.CreateUDiv( 7645 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 7646 else 7647 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 7648 } 7649 } else { 7650 Count = CGF.EmitScalarExpr(CountExpr); 7651 } 7652 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 7653 CurCounts.push_back(Count); 7654 7655 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 7656 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 7657 // Offset Count Stride 7658 // D0 0 1 4 (int) <- dummy dimension 7659 // D1 0 2 8 (2 * (1) * 4) 7660 // D2 1 2 20 (1 * (1 * 5) * 4) 7661 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 7662 const Expr *StrideExpr = OASE->getStride(); 7663 llvm::Value *Stride = 7664 StrideExpr 7665 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 7666 CGF.Int64Ty, /*isSigned=*/false) 7667 : nullptr; 7668 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 7669 if (Stride) 7670 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 7671 else 7672 CurStrides.push_back(DimProd); 7673 if (DI != DimSizes.end()) 7674 ++DI; 7675 } 7676 7677 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 7678 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 7679 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 7680 } 7681 7682 /// Return the adjusted map modifiers if the declaration a capture refers to 7683 /// appears in a first-private clause. This is expected to be used only with 7684 /// directives that start with 'target'. 7685 OpenMPOffloadMappingFlags 7686 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7687 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7688 7689 // A first private variable captured by reference will use only the 7690 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7691 // declaration is known as first-private in this handler. 7692 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7693 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7694 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7695 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; 7696 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | 7697 OpenMPOffloadMappingFlags::OMP_MAP_TO; 7698 } 7699 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 7700 if (I != LambdasMap.end()) 7701 // for map(to: lambda): using user specified map type. 7702 return getMapTypeBits( 7703 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 7704 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(), 7705 /*AddPtrFlag=*/false, 7706 /*AddIsTargetParamFlag=*/false, 7707 /*isNonContiguous=*/false); 7708 return OpenMPOffloadMappingFlags::OMP_MAP_TO | 7709 OpenMPOffloadMappingFlags::OMP_MAP_FROM; 7710 } 7711 7712 void getPlainLayout(const CXXRecordDecl *RD, 7713 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7714 bool AsBase) const { 7715 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7716 7717 llvm::StructType *St = 7718 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7719 7720 unsigned NumElements = St->getNumElements(); 7721 llvm::SmallVector< 7722 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7723 RecordLayout(NumElements); 7724 7725 // Fill bases. 7726 for (const auto &I : RD->bases()) { 7727 if (I.isVirtual()) 7728 continue; 7729 7730 QualType BaseTy = I.getType(); 7731 const auto *Base = BaseTy->getAsCXXRecordDecl(); 7732 // Ignore empty bases. 7733 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) || 7734 CGF.getContext() 7735 .getASTRecordLayout(Base) 7736 .getNonVirtualSize() 7737 .isZero()) 7738 continue; 7739 7740 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7741 RecordLayout[FieldIndex] = Base; 7742 } 7743 // Fill in virtual bases. 7744 for (const auto &I : RD->vbases()) { 7745 QualType BaseTy = I.getType(); 7746 // Ignore empty bases. 7747 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy)) 7748 continue; 7749 7750 const auto *Base = BaseTy->getAsCXXRecordDecl(); 7751 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7752 if (RecordLayout[FieldIndex]) 7753 continue; 7754 RecordLayout[FieldIndex] = Base; 7755 } 7756 // Fill in all the fields. 7757 assert(!RD->isUnion() && "Unexpected union."); 7758 for (const auto *Field : RD->fields()) { 7759 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7760 // will fill in later.) 7761 if (!Field->isBitField() && 7762 !isEmptyFieldForLayout(CGF.getContext(), Field)) { 7763 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7764 RecordLayout[FieldIndex] = Field; 7765 } 7766 } 7767 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7768 &Data : RecordLayout) { 7769 if (Data.isNull()) 7770 continue; 7771 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data)) 7772 getPlainLayout(Base, Layout, /*AsBase=*/true); 7773 else 7774 Layout.push_back(cast<const FieldDecl *>(Data)); 7775 } 7776 } 7777 7778 /// Generate all the base pointers, section pointers, sizes, map types, and 7779 /// mappers for the extracted mappable expressions (all included in \a 7780 /// CombinedInfo). Also, for each item that relates with a device pointer, a 7781 /// pair of the relevant declaration and index where it occurs is appended to 7782 /// the device pointers info array. 7783 void generateAllInfoForClauses( 7784 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 7785 llvm::OpenMPIRBuilder &OMPBuilder, 7786 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 7787 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 7788 // We have to process the component lists that relate with the same 7789 // declaration in a single chunk so that we can generate the map flags 7790 // correctly. Therefore, we organize all lists in a map. 7791 enum MapKind { Present, Allocs, Other, Total }; 7792 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7793 SmallVector<SmallVector<MapInfo, 8>, 4>> 7794 Info; 7795 7796 // Helper function to fill the information map for the different supported 7797 // clauses. 7798 auto &&InfoGen = 7799 [&Info, &SkipVarSet]( 7800 const ValueDecl *D, MapKind Kind, 7801 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7802 OpenMPMapClauseKind MapType, 7803 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7804 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7805 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 7806 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 7807 if (SkipVarSet.contains(D)) 7808 return; 7809 auto It = Info.try_emplace(D, Total).first; 7810 It->second[Kind].emplace_back( 7811 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 7812 IsImplicit, Mapper, VarRef, ForDeviceAddr); 7813 }; 7814 7815 for (const auto *Cl : Clauses) { 7816 const auto *C = dyn_cast<OMPMapClause>(Cl); 7817 if (!C) 7818 continue; 7819 MapKind Kind = Other; 7820 if (llvm::is_contained(C->getMapTypeModifiers(), 7821 OMPC_MAP_MODIFIER_present)) 7822 Kind = Present; 7823 else if (C->getMapType() == OMPC_MAP_alloc) 7824 Kind = Allocs; 7825 const auto *EI = C->getVarRefs().begin(); 7826 for (const auto L : C->component_lists()) { 7827 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 7828 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 7829 C->getMapTypeModifiers(), {}, 7830 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7831 E); 7832 ++EI; 7833 } 7834 } 7835 for (const auto *Cl : Clauses) { 7836 const auto *C = dyn_cast<OMPToClause>(Cl); 7837 if (!C) 7838 continue; 7839 MapKind Kind = Other; 7840 if (llvm::is_contained(C->getMotionModifiers(), 7841 OMPC_MOTION_MODIFIER_present)) 7842 Kind = Present; 7843 const auto *EI = C->getVarRefs().begin(); 7844 for (const auto L : C->component_lists()) { 7845 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {}, 7846 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 7847 C->isImplicit(), std::get<2>(L), *EI); 7848 ++EI; 7849 } 7850 } 7851 for (const auto *Cl : Clauses) { 7852 const auto *C = dyn_cast<OMPFromClause>(Cl); 7853 if (!C) 7854 continue; 7855 MapKind Kind = Other; 7856 if (llvm::is_contained(C->getMotionModifiers(), 7857 OMPC_MOTION_MODIFIER_present)) 7858 Kind = Present; 7859 const auto *EI = C->getVarRefs().begin(); 7860 for (const auto L : C->component_lists()) { 7861 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {}, 7862 C->getMotionModifiers(), 7863 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 7864 *EI); 7865 ++EI; 7866 } 7867 } 7868 7869 // Look at the use_device_ptr and use_device_addr clauses information and 7870 // mark the existing map entries as such. If there is no map information for 7871 // an entry in the use_device_ptr and use_device_addr list, we create one 7872 // with map type 'alloc' and zero size section. It is the user fault if that 7873 // was not mapped before. If there is no map information and the pointer is 7874 // a struct member, then we defer the emission of that entry until the whole 7875 // struct has been processed. 7876 llvm::MapVector<CanonicalDeclPtr<const Decl>, 7877 SmallVector<DeferredDevicePtrEntryTy, 4>> 7878 DeferredInfo; 7879 MapCombinedInfoTy UseDeviceDataCombinedInfo; 7880 7881 auto &&UseDeviceDataCombinedInfoGen = 7882 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, 7883 CodeGenFunction &CGF, bool IsDevAddr) { 7884 UseDeviceDataCombinedInfo.Exprs.push_back(VD); 7885 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); 7886 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); 7887 UseDeviceDataCombinedInfo.DevicePointers.emplace_back( 7888 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 7889 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); 7890 UseDeviceDataCombinedInfo.Sizes.push_back( 7891 llvm::Constant::getNullValue(CGF.Int64Ty)); 7892 UseDeviceDataCombinedInfo.Types.push_back( 7893 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); 7894 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); 7895 }; 7896 7897 auto &&MapInfoGen = 7898 [&DeferredInfo, &UseDeviceDataCombinedInfoGen, 7899 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, 7900 OMPClauseMappableExprCommon::MappableExprComponentListRef 7901 Components, 7902 bool IsImplicit, bool IsDevAddr) { 7903 // We didn't find any match in our map information - generate a zero 7904 // size array section - if the pointer is a struct member we defer 7905 // this action until the whole struct has been processed. 7906 if (isa<MemberExpr>(IE)) { 7907 // Insert the pointer into Info to be processed by 7908 // generateInfoForComponentList. Because it is a member pointer 7909 // without a pointee, no entry will be generated for it, therefore 7910 // we need to generate one after the whole struct has been 7911 // processed. Nonetheless, generateInfoForComponentList must be 7912 // called to take the pointer into account for the calculation of 7913 // the range of the partial struct. 7914 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {}, 7915 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr, 7916 IsDevAddr); 7917 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); 7918 } else { 7919 llvm::Value *Ptr; 7920 if (IsDevAddr) { 7921 if (IE->isGLValue()) 7922 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 7923 else 7924 Ptr = CGF.EmitScalarExpr(IE); 7925 } else { 7926 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 7927 } 7928 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); 7929 } 7930 }; 7931 7932 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, 7933 const Expr *IE, bool IsDevAddr) -> bool { 7934 // We potentially have map information for this declaration already. 7935 // Look for the first set of components that refer to it. If found, 7936 // return true. 7937 // If the first component is a member expression, we have to look into 7938 // 'this', which maps to null in the map of map information. Otherwise 7939 // look directly for the information. 7940 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7941 if (It != Info.end()) { 7942 bool Found = false; 7943 for (auto &Data : It->second) { 7944 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 7945 return MI.Components.back().getAssociatedDeclaration() == VD; 7946 }); 7947 // If we found a map entry, signal that the pointer has to be 7948 // returned and move on to the next declaration. Exclude cases where 7949 // the base pointer is mapped as array subscript, array section or 7950 // array shaping. The base address is passed as a pointer to base in 7951 // this case and cannot be used as a base for use_device_ptr list 7952 // item. 7953 if (CI != Data.end()) { 7954 if (IsDevAddr) { 7955 CI->ForDeviceAddr = IsDevAddr; 7956 CI->ReturnDevicePointer = true; 7957 Found = true; 7958 break; 7959 } else { 7960 auto PrevCI = std::next(CI->Components.rbegin()); 7961 const auto *VarD = dyn_cast<VarDecl>(VD); 7962 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7963 isa<MemberExpr>(IE) || 7964 !VD->getType().getNonReferenceType()->isPointerType() || 7965 PrevCI == CI->Components.rend() || 7966 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 7967 VarD->hasLocalStorage()) { 7968 CI->ForDeviceAddr = IsDevAddr; 7969 CI->ReturnDevicePointer = true; 7970 Found = true; 7971 break; 7972 } 7973 } 7974 } 7975 } 7976 return Found; 7977 } 7978 return false; 7979 }; 7980 7981 // Look at the use_device_ptr clause information and mark the existing map 7982 // entries as such. If there is no map information for an entry in the 7983 // use_device_ptr list, we create one with map type 'alloc' and zero size 7984 // section. It is the user fault if that was not mapped before. If there is 7985 // no map information and the pointer is a struct member, then we defer the 7986 // emission of that entry until the whole struct has been processed. 7987 for (const auto *Cl : Clauses) { 7988 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 7989 if (!C) 7990 continue; 7991 for (const auto L : C->component_lists()) { 7992 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 7993 std::get<1>(L); 7994 assert(!Components.empty() && 7995 "Not expecting empty list of components!"); 7996 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 7997 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7998 const Expr *IE = Components.back().getAssociatedExpression(); 7999 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) 8000 continue; 8001 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8002 /*IsDevAddr=*/false); 8003 } 8004 } 8005 8006 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8007 for (const auto *Cl : Clauses) { 8008 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8009 if (!C) 8010 continue; 8011 for (const auto L : C->component_lists()) { 8012 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8013 std::get<1>(L); 8014 assert(!std::get<1>(L).empty() && 8015 "Not expecting empty list of components!"); 8016 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8017 if (!Processed.insert(VD).second) 8018 continue; 8019 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8020 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8021 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) 8022 continue; 8023 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), 8024 /*IsDevAddr=*/true); 8025 } 8026 } 8027 8028 for (const auto &Data : Info) { 8029 StructRangeInfoTy PartialStruct; 8030 // Current struct information: 8031 MapCombinedInfoTy CurInfo; 8032 // Current struct base information: 8033 MapCombinedInfoTy StructBaseCurInfo; 8034 const Decl *D = Data.first; 8035 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8036 bool HasMapBasePtr = false; 8037 bool HasMapArraySec = false; 8038 if (VD && VD->getType()->isAnyPointerType()) { 8039 for (const auto &M : Data.second) { 8040 HasMapBasePtr = any_of(M, [](const MapInfo &L) { 8041 return isa_and_present<DeclRefExpr>(L.VarRef); 8042 }); 8043 HasMapArraySec = any_of(M, [](const MapInfo &L) { 8044 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>( 8045 L.VarRef); 8046 }); 8047 if (HasMapBasePtr && HasMapArraySec) 8048 break; 8049 } 8050 } 8051 for (const auto &M : Data.second) { 8052 for (const MapInfo &L : M) { 8053 assert(!L.Components.empty() && 8054 "Not expecting declaration with no component lists."); 8055 8056 // Remember the current base pointer index. 8057 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8058 unsigned StructBasePointersIdx = 8059 StructBaseCurInfo.BasePointers.size(); 8060 CurInfo.NonContigInfo.IsNonContiguous = 8061 L.Components.back().isNonContiguous(); 8062 generateInfoForComponentList( 8063 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8064 CurInfo, StructBaseCurInfo, PartialStruct, 8065 /*IsFirstComponentList=*/false, L.IsImplicit, 8066 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, 8067 L.VarRef, /*OverlappedElements*/ {}, 8068 HasMapBasePtr && HasMapArraySec); 8069 8070 // If this entry relates to a device pointer, set the relevant 8071 // declaration and add the 'return pointer' flag. 8072 if (L.ReturnDevicePointer) { 8073 // Check whether a value was added to either CurInfo or 8074 // StructBaseCurInfo and error if no value was added to either of 8075 // them: 8076 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || 8077 StructBasePointersIdx < 8078 StructBaseCurInfo.BasePointers.size()) && 8079 "Unexpected number of mapped base pointers."); 8080 8081 // Choose a base pointer index which is always valid: 8082 const ValueDecl *RelevantVD = 8083 L.Components.back().getAssociatedDeclaration(); 8084 assert(RelevantVD && 8085 "No relevant declaration related with device pointer??"); 8086 8087 // If StructBaseCurInfo has been updated this iteration then work on 8088 // the first new entry added to it i.e. make sure that when multiple 8089 // values are added to any of the lists, the first value added is 8090 // being modified by the assignments below (not the last value 8091 // added). 8092 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { 8093 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = 8094 RelevantVD; 8095 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = 8096 L.ForDeviceAddr ? DeviceInfoTy::Address 8097 : DeviceInfoTy::Pointer; 8098 StructBaseCurInfo.Types[StructBasePointersIdx] |= 8099 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8100 } else { 8101 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; 8102 CurInfo.DevicePointers[CurrentBasePointersIdx] = 8103 L.ForDeviceAddr ? DeviceInfoTy::Address 8104 : DeviceInfoTy::Pointer; 8105 CurInfo.Types[CurrentBasePointersIdx] |= 8106 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; 8107 } 8108 } 8109 } 8110 } 8111 8112 // Append any pending zero-length pointers which are struct members and 8113 // used with use_device_ptr or use_device_addr. 8114 auto CI = DeferredInfo.find(Data.first); 8115 if (CI != DeferredInfo.end()) { 8116 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8117 llvm::Value *BasePtr; 8118 llvm::Value *Ptr; 8119 if (L.ForDeviceAddr) { 8120 if (L.IE->isGLValue()) 8121 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8122 else 8123 Ptr = this->CGF.EmitScalarExpr(L.IE); 8124 BasePtr = Ptr; 8125 // Entry is RETURN_PARAM. Also, set the placeholder value 8126 // MEMBER_OF=FFFF so that the entry is later updated with the 8127 // correct value of MEMBER_OF. 8128 CurInfo.Types.push_back( 8129 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8130 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8131 } else { 8132 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8133 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8134 L.IE->getExprLoc()); 8135 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8136 // placeholder value MEMBER_OF=FFFF so that the entry is later 8137 // updated with the correct value of MEMBER_OF. 8138 CurInfo.Types.push_back( 8139 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8140 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | 8141 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); 8142 } 8143 CurInfo.Exprs.push_back(L.VD); 8144 CurInfo.BasePointers.emplace_back(BasePtr); 8145 CurInfo.DevicePtrDecls.emplace_back(L.VD); 8146 CurInfo.DevicePointers.emplace_back( 8147 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); 8148 CurInfo.Pointers.push_back(Ptr); 8149 CurInfo.Sizes.push_back( 8150 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8151 CurInfo.Mappers.push_back(nullptr); 8152 } 8153 } 8154 8155 // Unify entries in one list making sure the struct mapping precedes the 8156 // individual fields: 8157 MapCombinedInfoTy UnionCurInfo; 8158 UnionCurInfo.append(StructBaseCurInfo); 8159 UnionCurInfo.append(CurInfo); 8160 8161 // If there is an entry in PartialStruct it means we have a struct with 8162 // individual members mapped. Emit an extra combined entry. 8163 if (PartialStruct.Base.isValid()) { 8164 UnionCurInfo.NonContigInfo.Dims.push_back(0); 8165 // Emit a combined entry: 8166 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, 8167 /*IsMapThis*/ !VD, OMPBuilder, VD); 8168 } 8169 8170 // We need to append the results of this capture to what we already have. 8171 CombinedInfo.append(UnionCurInfo); 8172 } 8173 // Append data for use_device_ptr clauses. 8174 CombinedInfo.append(UseDeviceDataCombinedInfo); 8175 } 8176 8177 public: 8178 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8179 : CurDir(&Dir), CGF(CGF) { 8180 // Extract firstprivate clause information. 8181 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8182 for (const auto *D : C->varlist()) 8183 FirstPrivateDecls.try_emplace( 8184 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8185 // Extract implicit firstprivates from uses_allocators clauses. 8186 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8187 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8188 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8189 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8190 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8191 /*Implicit=*/true); 8192 else if (const auto *VD = dyn_cast<VarDecl>( 8193 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8194 ->getDecl())) 8195 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8196 } 8197 } 8198 // Extract device pointer clause information. 8199 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8200 for (auto L : C->component_lists()) 8201 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8202 // Extract device addr clause information. 8203 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) 8204 for (auto L : C->component_lists()) 8205 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); 8206 // Extract map information. 8207 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8208 if (C->getMapType() != OMPC_MAP_to) 8209 continue; 8210 for (auto L : C->component_lists()) { 8211 const ValueDecl *VD = std::get<0>(L); 8212 const auto *RD = VD ? VD->getType() 8213 .getCanonicalType() 8214 .getNonReferenceType() 8215 ->getAsCXXRecordDecl() 8216 : nullptr; 8217 if (RD && RD->isLambda()) 8218 LambdasMap.try_emplace(std::get<0>(L), C); 8219 } 8220 } 8221 } 8222 8223 /// Constructor for the declare mapper directive. 8224 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8225 : CurDir(&Dir), CGF(CGF) {} 8226 8227 /// Generate code for the combined entry if we have a partially mapped struct 8228 /// and take care of the mapping flags of the arguments corresponding to 8229 /// individual struct members. 8230 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8231 MapFlagsArrayTy &CurTypes, 8232 const StructRangeInfoTy &PartialStruct, bool IsMapThis, 8233 llvm::OpenMPIRBuilder &OMPBuilder, 8234 const ValueDecl *VD = nullptr, 8235 bool NotTargetParams = true) const { 8236 if (CurTypes.size() == 1 && 8237 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != 8238 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && 8239 !PartialStruct.IsArraySection) 8240 return; 8241 Address LBAddr = PartialStruct.LowestElem.second; 8242 Address HBAddr = PartialStruct.HighestElem.second; 8243 if (PartialStruct.HasCompleteRecord) { 8244 LBAddr = PartialStruct.LB; 8245 HBAddr = PartialStruct.LB; 8246 } 8247 CombinedInfo.Exprs.push_back(VD); 8248 // Base is the base of the struct 8249 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); 8250 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8251 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8252 // Pointer is the address of the lowest element 8253 llvm::Value *LB = LBAddr.emitRawPointer(CGF); 8254 const CXXMethodDecl *MD = 8255 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; 8256 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; 8257 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; 8258 // There should not be a mapper for a combined entry. 8259 if (HasBaseClass) { 8260 // OpenMP 5.2 148:21: 8261 // If the target construct is within a class non-static member function, 8262 // and a variable is an accessible data member of the object for which the 8263 // non-static data member function is invoked, the variable is treated as 8264 // if the this[:1] expression had appeared in a map clause with a map-type 8265 // of tofrom. 8266 // Emit this[:1] 8267 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); 8268 QualType Ty = MD->getFunctionObjectParameterType(); 8269 llvm::Value *Size = 8270 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, 8271 /*isSigned=*/true); 8272 CombinedInfo.Sizes.push_back(Size); 8273 } else { 8274 CombinedInfo.Pointers.push_back(LB); 8275 // Size is (addr of {highest+1} element) - (addr of lowest element) 8276 llvm::Value *HB = HBAddr.emitRawPointer(CGF); 8277 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( 8278 HBAddr.getElementType(), HB, /*Idx0=*/1); 8279 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8280 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8281 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); 8282 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8283 /*isSigned=*/false); 8284 CombinedInfo.Sizes.push_back(Size); 8285 } 8286 CombinedInfo.Mappers.push_back(nullptr); 8287 // Map type is always TARGET_PARAM, if generate info for captures. 8288 CombinedInfo.Types.push_back( 8289 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE 8290 : !PartialStruct.PreliminaryMapData.BasePointers.empty() 8291 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ 8292 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8293 // If any element has the present modifier, then make sure the runtime 8294 // doesn't attempt to allocate the struct. 8295 if (CurTypes.end() != 8296 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8297 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8298 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); 8299 })) 8300 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; 8301 // Remove TARGET_PARAM flag from the first element 8302 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8303 // If any element has the ompx_hold modifier, then make sure the runtime 8304 // uses the hold reference count for the struct as a whole so that it won't 8305 // be unmapped by an extra dynamic reference count decrement. Add it to all 8306 // elements as well so the runtime knows which reference count to check 8307 // when determining whether it's time for device-to-host transfers of 8308 // individual elements. 8309 if (CurTypes.end() != 8310 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8311 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( 8312 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); 8313 })) { 8314 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8315 for (auto &M : CurTypes) 8316 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; 8317 } 8318 8319 // All other current entries will be MEMBER_OF the combined entry 8320 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8321 // 0xFFFF in the MEMBER_OF field). 8322 OpenMPOffloadMappingFlags MemberOfFlag = 8323 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8324 for (auto &M : CurTypes) 8325 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); 8326 } 8327 8328 /// Generate all the base pointers, section pointers, sizes, map types, and 8329 /// mappers for the extracted mappable expressions (all included in \a 8330 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8331 /// pair of the relevant declaration and index where it occurs is appended to 8332 /// the device pointers info array. 8333 void generateAllInfo( 8334 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, 8335 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8336 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8337 assert(isa<const OMPExecutableDirective *>(CurDir) && 8338 "Expect a executable directive"); 8339 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir); 8340 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, 8341 SkipVarSet); 8342 } 8343 8344 /// Generate all the base pointers, section pointers, sizes, map types, and 8345 /// mappers for the extracted map clauses of user-defined mapper (all included 8346 /// in \a CombinedInfo). 8347 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, 8348 llvm::OpenMPIRBuilder &OMPBuilder) const { 8349 assert(isa<const OMPDeclareMapperDecl *>(CurDir) && 8350 "Expect a declare mapper directive"); 8351 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir); 8352 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, 8353 OMPBuilder); 8354 } 8355 8356 /// Emit capture info for lambdas for variables captured by reference. 8357 void generateInfoForLambdaCaptures( 8358 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8359 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8360 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); 8361 const auto *RD = VDType->getAsCXXRecordDecl(); 8362 if (!RD || !RD->isLambda()) 8363 return; 8364 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), 8365 CGF.getContext().getDeclAlign(VD)); 8366 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); 8367 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; 8368 FieldDecl *ThisCapture = nullptr; 8369 RD->getCaptureFields(Captures, ThisCapture); 8370 if (ThisCapture) { 8371 LValue ThisLVal = 8372 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8373 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8374 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8375 VDLVal.getPointer(CGF)); 8376 CombinedInfo.Exprs.push_back(VD); 8377 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8378 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8379 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8380 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8381 CombinedInfo.Sizes.push_back( 8382 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8383 CGF.Int64Ty, /*isSigned=*/true)); 8384 CombinedInfo.Types.push_back( 8385 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8386 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8387 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8388 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8389 CombinedInfo.Mappers.push_back(nullptr); 8390 } 8391 for (const LambdaCapture &LC : RD->captures()) { 8392 if (!LC.capturesVariable()) 8393 continue; 8394 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); 8395 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8396 continue; 8397 auto It = Captures.find(VD); 8398 assert(It != Captures.end() && "Found lambda capture without field."); 8399 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8400 if (LC.getCaptureKind() == LCK_ByRef) { 8401 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8402 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8403 VDLVal.getPointer(CGF)); 8404 CombinedInfo.Exprs.push_back(VD); 8405 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8406 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8407 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8408 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8409 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8410 CGF.getTypeSize( 8411 VD->getType().getCanonicalType().getNonReferenceType()), 8412 CGF.Int64Ty, /*isSigned=*/true)); 8413 } else { 8414 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8415 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8416 VDLVal.getPointer(CGF)); 8417 CombinedInfo.Exprs.push_back(VD); 8418 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8419 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8420 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8421 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8422 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8423 } 8424 CombinedInfo.Types.push_back( 8425 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8426 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8427 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8428 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 8429 CombinedInfo.Mappers.push_back(nullptr); 8430 } 8431 } 8432 8433 /// Set correct indices for lambdas captures. 8434 void adjustMemberOfForLambdaCaptures( 8435 llvm::OpenMPIRBuilder &OMPBuilder, 8436 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8437 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8438 MapFlagsArrayTy &Types) const { 8439 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8440 // Set correct member_of idx for all implicit lambda captures. 8441 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | 8442 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8443 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | 8444 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) 8445 continue; 8446 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); 8447 assert(BasePtr && "Unable to find base lambda address."); 8448 int TgtIdx = -1; 8449 for (unsigned J = I; J > 0; --J) { 8450 unsigned Idx = J - 1; 8451 if (Pointers[Idx] != BasePtr) 8452 continue; 8453 TgtIdx = Idx; 8454 break; 8455 } 8456 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8457 // All other current entries will be MEMBER_OF the combined entry 8458 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8459 // 0xFFFF in the MEMBER_OF field). 8460 OpenMPOffloadMappingFlags MemberOfFlag = 8461 OMPBuilder.getMemberOfFlag(TgtIdx); 8462 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8463 } 8464 } 8465 8466 /// Generate the base pointers, section pointers, sizes, map types, and 8467 /// mappers associated to a given capture (all included in \a CombinedInfo). 8468 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8469 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8470 StructRangeInfoTy &PartialStruct) const { 8471 assert(!Cap->capturesVariableArrayType() && 8472 "Not expecting to generate map info for a variable array type!"); 8473 8474 // We need to know when we generating information for the first component 8475 const ValueDecl *VD = Cap->capturesThis() 8476 ? nullptr 8477 : Cap->getCapturedVar()->getCanonicalDecl(); 8478 8479 // for map(to: lambda): skip here, processing it in 8480 // generateDefaultMapInfo 8481 if (LambdasMap.count(VD)) 8482 return; 8483 8484 // If this declaration appears in a is_device_ptr clause we just have to 8485 // pass the pointer by value. If it is a reference to a declaration, we just 8486 // pass its value. 8487 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { 8488 CombinedInfo.Exprs.push_back(VD); 8489 CombinedInfo.BasePointers.emplace_back(Arg); 8490 CombinedInfo.DevicePtrDecls.emplace_back(VD); 8491 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); 8492 CombinedInfo.Pointers.push_back(Arg); 8493 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8494 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8495 /*isSigned=*/true)); 8496 CombinedInfo.Types.push_back( 8497 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 8498 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); 8499 CombinedInfo.Mappers.push_back(nullptr); 8500 return; 8501 } 8502 8503 using MapData = 8504 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8505 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8506 const ValueDecl *, const Expr *>; 8507 SmallVector<MapData, 4> DeclComponentLists; 8508 // For member fields list in is_device_ptr, store it in 8509 // DeclComponentLists for generating components info. 8510 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; 8511 auto It = DevPointersMap.find(VD); 8512 if (It != DevPointersMap.end()) 8513 for (const auto &MCL : It->second) 8514 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, 8515 /*IsImpicit = */ true, nullptr, 8516 nullptr); 8517 auto I = HasDevAddrsMap.find(VD); 8518 if (I != HasDevAddrsMap.end()) 8519 for (const auto &MCL : I->second) 8520 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, 8521 /*IsImpicit = */ true, nullptr, 8522 nullptr); 8523 assert(isa<const OMPExecutableDirective *>(CurDir) && 8524 "Expect a executable directive"); 8525 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir); 8526 bool HasMapBasePtr = false; 8527 bool HasMapArraySec = false; 8528 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8529 const auto *EI = C->getVarRefs().begin(); 8530 for (const auto L : C->decl_component_lists(VD)) { 8531 const ValueDecl *VDecl, *Mapper; 8532 // The Expression is not correct if the mapping is implicit 8533 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8534 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8535 std::tie(VDecl, Components, Mapper) = L; 8536 assert(VDecl == VD && "We got information for the wrong declaration??"); 8537 assert(!Components.empty() && 8538 "Not expecting declaration with no component lists."); 8539 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E)) 8540 HasMapBasePtr = true; 8541 if (VD && E && VD->getType()->isAnyPointerType() && 8542 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E))) 8543 HasMapArraySec = true; 8544 DeclComponentLists.emplace_back(Components, C->getMapType(), 8545 C->getMapTypeModifiers(), 8546 C->isImplicit(), Mapper, E); 8547 ++EI; 8548 } 8549 } 8550 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 8551 const MapData &RHS) { 8552 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 8553 OpenMPMapClauseKind MapType = std::get<1>(RHS); 8554 bool HasPresent = 8555 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8556 bool HasAllocs = MapType == OMPC_MAP_alloc; 8557 MapModifiers = std::get<2>(RHS); 8558 MapType = std::get<1>(LHS); 8559 bool HasPresentR = 8560 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 8561 bool HasAllocsR = MapType == OMPC_MAP_alloc; 8562 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 8563 }); 8564 8565 // Find overlapping elements (including the offset from the base element). 8566 llvm::SmallDenseMap< 8567 const MapData *, 8568 llvm::SmallVector< 8569 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8570 4> 8571 OverlappedData; 8572 size_t Count = 0; 8573 for (const MapData &L : DeclComponentLists) { 8574 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8575 OpenMPMapClauseKind MapType; 8576 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8577 bool IsImplicit; 8578 const ValueDecl *Mapper; 8579 const Expr *VarRef; 8580 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8581 L; 8582 ++Count; 8583 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { 8584 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8585 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8586 VarRef) = L1; 8587 auto CI = Components.rbegin(); 8588 auto CE = Components.rend(); 8589 auto SI = Components1.rbegin(); 8590 auto SE = Components1.rend(); 8591 for (; CI != CE && SI != SE; ++CI, ++SI) { 8592 if (CI->getAssociatedExpression()->getStmtClass() != 8593 SI->getAssociatedExpression()->getStmtClass()) 8594 break; 8595 // Are we dealing with different variables/fields? 8596 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8597 break; 8598 } 8599 // Found overlapping if, at least for one component, reached the head 8600 // of the components list. 8601 if (CI == CE || SI == SE) { 8602 // Ignore it if it is the same component. 8603 if (CI == CE && SI == SE) 8604 continue; 8605 const auto It = (SI == SE) ? CI : SI; 8606 // If one component is a pointer and another one is a kind of 8607 // dereference of this pointer (array subscript, section, dereference, 8608 // etc.), it is not an overlapping. 8609 // Same, if one component is a base and another component is a 8610 // dereferenced pointer memberexpr with the same base. 8611 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 8612 (std::prev(It)->getAssociatedDeclaration() && 8613 std::prev(It) 8614 ->getAssociatedDeclaration() 8615 ->getType() 8616 ->isPointerType()) || 8617 (It->getAssociatedDeclaration() && 8618 It->getAssociatedDeclaration()->getType()->isPointerType() && 8619 std::next(It) != CE && std::next(It) != SE)) 8620 continue; 8621 const MapData &BaseData = CI == CE ? L : L1; 8622 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8623 SI == SE ? Components : Components1; 8624 OverlappedData[&BaseData].push_back(SubData); 8625 } 8626 } 8627 } 8628 // Sort the overlapped elements for each item. 8629 llvm::SmallVector<const FieldDecl *, 4> Layout; 8630 if (!OverlappedData.empty()) { 8631 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 8632 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 8633 while (BaseType != OrigType) { 8634 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 8635 OrigType = BaseType->getPointeeOrArrayElementType(); 8636 } 8637 8638 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 8639 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8640 else { 8641 const auto *RD = BaseType->getAsRecordDecl(); 8642 Layout.append(RD->field_begin(), RD->field_end()); 8643 } 8644 } 8645 for (auto &Pair : OverlappedData) { 8646 llvm::stable_sort( 8647 Pair.getSecond(), 8648 [&Layout]( 8649 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8650 OMPClauseMappableExprCommon::MappableExprComponentListRef 8651 Second) { 8652 auto CI = First.rbegin(); 8653 auto CE = First.rend(); 8654 auto SI = Second.rbegin(); 8655 auto SE = Second.rend(); 8656 for (; CI != CE && SI != SE; ++CI, ++SI) { 8657 if (CI->getAssociatedExpression()->getStmtClass() != 8658 SI->getAssociatedExpression()->getStmtClass()) 8659 break; 8660 // Are we dealing with different variables/fields? 8661 if (CI->getAssociatedDeclaration() != 8662 SI->getAssociatedDeclaration()) 8663 break; 8664 } 8665 8666 // Lists contain the same elements. 8667 if (CI == CE && SI == SE) 8668 return false; 8669 8670 // List with less elements is less than list with more elements. 8671 if (CI == CE || SI == SE) 8672 return CI == CE; 8673 8674 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8675 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8676 if (FD1->getParent() == FD2->getParent()) 8677 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8678 const auto *It = 8679 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8680 return FD == FD1 || FD == FD2; 8681 }); 8682 return *It == FD1; 8683 }); 8684 } 8685 8686 // Associated with a capture, because the mapping flags depend on it. 8687 // Go through all of the elements with the overlapped elements. 8688 bool IsFirstComponentList = true; 8689 MapCombinedInfoTy StructBaseCombinedInfo; 8690 for (const auto &Pair : OverlappedData) { 8691 const MapData &L = *Pair.getFirst(); 8692 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8693 OpenMPMapClauseKind MapType; 8694 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8695 bool IsImplicit; 8696 const ValueDecl *Mapper; 8697 const Expr *VarRef; 8698 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8699 L; 8700 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8701 OverlappedComponents = Pair.getSecond(); 8702 generateInfoForComponentList( 8703 MapType, MapModifiers, {}, Components, CombinedInfo, 8704 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8705 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8706 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8707 IsFirstComponentList = false; 8708 } 8709 // Go through other elements without overlapped elements. 8710 for (const MapData &L : DeclComponentLists) { 8711 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8712 OpenMPMapClauseKind MapType; 8713 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8714 bool IsImplicit; 8715 const ValueDecl *Mapper; 8716 const Expr *VarRef; 8717 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8718 L; 8719 auto It = OverlappedData.find(&L); 8720 if (It == OverlappedData.end()) 8721 generateInfoForComponentList( 8722 MapType, MapModifiers, {}, Components, CombinedInfo, 8723 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, 8724 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, 8725 /*ForDeviceAddr=*/false, VD, VarRef, 8726 /*OverlappedElements*/ {}, HasMapBasePtr && HasMapArraySec); 8727 IsFirstComponentList = false; 8728 } 8729 } 8730 8731 /// Generate the default map information for a given capture \a CI, 8732 /// record field declaration \a RI and captured value \a CV. 8733 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8734 const FieldDecl &RI, llvm::Value *CV, 8735 MapCombinedInfoTy &CombinedInfo) const { 8736 bool IsImplicit = true; 8737 // Do the default mapping. 8738 if (CI.capturesThis()) { 8739 CombinedInfo.Exprs.push_back(nullptr); 8740 CombinedInfo.BasePointers.push_back(CV); 8741 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8742 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8743 CombinedInfo.Pointers.push_back(CV); 8744 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8745 CombinedInfo.Sizes.push_back( 8746 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8747 CGF.Int64Ty, /*isSigned=*/true)); 8748 // Default map type. 8749 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | 8750 OpenMPOffloadMappingFlags::OMP_MAP_FROM); 8751 } else if (CI.capturesVariableByCopy()) { 8752 const VarDecl *VD = CI.getCapturedVar(); 8753 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8754 CombinedInfo.BasePointers.push_back(CV); 8755 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8756 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8757 CombinedInfo.Pointers.push_back(CV); 8758 if (!RI.getType()->isAnyPointerType()) { 8759 // We have to signal to the runtime captures passed by value that are 8760 // not pointers. 8761 CombinedInfo.Types.push_back( 8762 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); 8763 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8764 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8765 } else { 8766 // Pointers are implicitly mapped with a zero size and no flags 8767 // (other than first map that is added for all implicit maps). 8768 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); 8769 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8770 } 8771 auto I = FirstPrivateDecls.find(VD); 8772 if (I != FirstPrivateDecls.end()) 8773 IsImplicit = I->getSecond(); 8774 } else { 8775 assert(CI.capturesVariable() && "Expected captured reference."); 8776 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8777 QualType ElementType = PtrTy->getPointeeType(); 8778 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8779 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8780 // The default map type for a scalar/complex type is 'to' because by 8781 // default the value doesn't have to be retrieved. For an aggregate 8782 // type, the default is 'tofrom'. 8783 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8784 const VarDecl *VD = CI.getCapturedVar(); 8785 auto I = FirstPrivateDecls.find(VD); 8786 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8787 CombinedInfo.BasePointers.push_back(CV); 8788 CombinedInfo.DevicePtrDecls.push_back(nullptr); 8789 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); 8790 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8791 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8792 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8793 AlignmentSource::Decl)); 8794 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF)); 8795 } else { 8796 CombinedInfo.Pointers.push_back(CV); 8797 } 8798 if (I != FirstPrivateDecls.end()) 8799 IsImplicit = I->getSecond(); 8800 } 8801 // Every default map produces a single argument which is a target parameter. 8802 CombinedInfo.Types.back() |= 8803 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; 8804 8805 // Add flag stating this is an implicit map. 8806 if (IsImplicit) 8807 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; 8808 8809 // No user-defined mapper for default mapping. 8810 CombinedInfo.Mappers.push_back(nullptr); 8811 } 8812 }; 8813 } // anonymous namespace 8814 8815 // Try to extract the base declaration from a `this->x` expression if possible. 8816 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 8817 if (!E) 8818 return nullptr; 8819 8820 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts())) 8821 if (const MemberExpr *ME = 8822 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 8823 return ME->getMemberDecl(); 8824 return nullptr; 8825 } 8826 8827 /// Emit a string constant containing the names of the values mapped to the 8828 /// offloading runtime library. 8829 static llvm::Constant * 8830 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 8831 MappableExprsHandler::MappingExprInfo &MapExprs) { 8832 8833 uint32_t SrcLocStrSize; 8834 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 8835 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 8836 8837 SourceLocation Loc; 8838 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 8839 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 8840 Loc = VD->getLocation(); 8841 else 8842 Loc = MapExprs.getMapExpr()->getExprLoc(); 8843 } else { 8844 Loc = MapExprs.getMapDecl()->getLocation(); 8845 } 8846 8847 std::string ExprName; 8848 if (MapExprs.getMapExpr()) { 8849 PrintingPolicy P(CGF.getContext().getLangOpts()); 8850 llvm::raw_string_ostream OS(ExprName); 8851 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 8852 } else { 8853 ExprName = MapExprs.getMapDecl()->getNameAsString(); 8854 } 8855 8856 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 8857 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, 8858 PLoc.getLine(), PLoc.getColumn(), 8859 SrcLocStrSize); 8860 } 8861 /// Emit the arrays used to pass the captures and map information to the 8862 /// offloading runtime library. If there is no map or capture information, 8863 /// return nullptr by reference. 8864 static void emitOffloadingArraysAndArgs( 8865 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8866 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 8867 bool IsNonContiguous = false, bool ForEndCall = false) { 8868 CodeGenModule &CGM = CGF.CGM; 8869 8870 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 8871 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 8872 CGF.AllocaInsertPt->getIterator()); 8873 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 8874 CGF.Builder.GetInsertPoint()); 8875 8876 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 8877 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 8878 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 8879 } 8880 }; 8881 8882 auto CustomMapperCB = [&](unsigned int I) { 8883 llvm::Value *MFunc = nullptr; 8884 if (CombinedInfo.Mappers[I]) { 8885 Info.HasMapper = true; 8886 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8887 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8888 } 8889 return MFunc; 8890 }; 8891 OMPBuilder.emitOffloadingArraysAndArgs( 8892 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous, 8893 ForEndCall, DeviceAddrCB, CustomMapperCB); 8894 } 8895 8896 /// Check for inner distribute directive. 8897 static const OMPExecutableDirective * 8898 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8899 const auto *CS = D.getInnermostCapturedStmt(); 8900 const auto *Body = 8901 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8902 const Stmt *ChildStmt = 8903 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8904 8905 if (const auto *NestedDir = 8906 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8907 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8908 switch (D.getDirectiveKind()) { 8909 case OMPD_target: 8910 // For now, treat 'target' with nested 'teams loop' as if it's 8911 // distributed (target teams distribute). 8912 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) 8913 return NestedDir; 8914 if (DKind == OMPD_teams) { 8915 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8916 /*IgnoreCaptured=*/true); 8917 if (!Body) 8918 return nullptr; 8919 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8920 if (const auto *NND = 8921 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8922 DKind = NND->getDirectiveKind(); 8923 if (isOpenMPDistributeDirective(DKind)) 8924 return NND; 8925 } 8926 } 8927 return nullptr; 8928 case OMPD_target_teams: 8929 if (isOpenMPDistributeDirective(DKind)) 8930 return NestedDir; 8931 return nullptr; 8932 case OMPD_target_parallel: 8933 case OMPD_target_simd: 8934 case OMPD_target_parallel_for: 8935 case OMPD_target_parallel_for_simd: 8936 return nullptr; 8937 case OMPD_target_teams_distribute: 8938 case OMPD_target_teams_distribute_simd: 8939 case OMPD_target_teams_distribute_parallel_for: 8940 case OMPD_target_teams_distribute_parallel_for_simd: 8941 case OMPD_parallel: 8942 case OMPD_for: 8943 case OMPD_parallel_for: 8944 case OMPD_parallel_master: 8945 case OMPD_parallel_sections: 8946 case OMPD_for_simd: 8947 case OMPD_parallel_for_simd: 8948 case OMPD_cancel: 8949 case OMPD_cancellation_point: 8950 case OMPD_ordered: 8951 case OMPD_threadprivate: 8952 case OMPD_allocate: 8953 case OMPD_task: 8954 case OMPD_simd: 8955 case OMPD_tile: 8956 case OMPD_unroll: 8957 case OMPD_sections: 8958 case OMPD_section: 8959 case OMPD_single: 8960 case OMPD_master: 8961 case OMPD_critical: 8962 case OMPD_taskyield: 8963 case OMPD_barrier: 8964 case OMPD_taskwait: 8965 case OMPD_taskgroup: 8966 case OMPD_atomic: 8967 case OMPD_flush: 8968 case OMPD_depobj: 8969 case OMPD_scan: 8970 case OMPD_teams: 8971 case OMPD_target_data: 8972 case OMPD_target_exit_data: 8973 case OMPD_target_enter_data: 8974 case OMPD_distribute: 8975 case OMPD_distribute_simd: 8976 case OMPD_distribute_parallel_for: 8977 case OMPD_distribute_parallel_for_simd: 8978 case OMPD_teams_distribute: 8979 case OMPD_teams_distribute_simd: 8980 case OMPD_teams_distribute_parallel_for: 8981 case OMPD_teams_distribute_parallel_for_simd: 8982 case OMPD_target_update: 8983 case OMPD_declare_simd: 8984 case OMPD_declare_variant: 8985 case OMPD_begin_declare_variant: 8986 case OMPD_end_declare_variant: 8987 case OMPD_declare_target: 8988 case OMPD_end_declare_target: 8989 case OMPD_declare_reduction: 8990 case OMPD_declare_mapper: 8991 case OMPD_taskloop: 8992 case OMPD_taskloop_simd: 8993 case OMPD_master_taskloop: 8994 case OMPD_master_taskloop_simd: 8995 case OMPD_parallel_master_taskloop: 8996 case OMPD_parallel_master_taskloop_simd: 8997 case OMPD_requires: 8998 case OMPD_metadirective: 8999 case OMPD_unknown: 9000 default: 9001 llvm_unreachable("Unexpected directive."); 9002 } 9003 } 9004 9005 return nullptr; 9006 } 9007 9008 /// Emit the user-defined mapper function. The code generation follows the 9009 /// pattern in the example below. 9010 /// \code 9011 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9012 /// void *base, void *begin, 9013 /// int64_t size, int64_t type, 9014 /// void *name = nullptr) { 9015 /// // Allocate space for an array section first or add a base/begin for 9016 /// // pointer dereference. 9017 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9018 /// !maptype.IsDelete) 9019 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9020 /// size*sizeof(Ty), clearToFromMember(type)); 9021 /// // Map members. 9022 /// for (unsigned i = 0; i < size; i++) { 9023 /// // For each component specified by this mapper: 9024 /// for (auto c : begin[i]->all_components) { 9025 /// if (c.hasMapper()) 9026 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9027 /// c.arg_type, c.arg_name); 9028 /// else 9029 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9030 /// c.arg_begin, c.arg_size, c.arg_type, 9031 /// c.arg_name); 9032 /// } 9033 /// } 9034 /// // Delete the array section. 9035 /// if (size > 1 && maptype.IsDelete) 9036 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9037 /// size*sizeof(Ty), clearToFromMember(type)); 9038 /// } 9039 /// \endcode 9040 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9041 CodeGenFunction *CGF) { 9042 if (UDMMap.count(D) > 0) 9043 return; 9044 ASTContext &C = CGM.getContext(); 9045 QualType Ty = D->getType(); 9046 auto *MapperVarDecl = 9047 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9048 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9049 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); 9050 9051 CodeGenFunction MapperCGF(CGM); 9052 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9053 auto PrivatizeAndGenMapInfoCB = 9054 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI, 9055 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & { 9056 MapperCGF.Builder.restoreIP(CodeGenIP); 9057 9058 // Privatize the declared variable of mapper to be the current array 9059 // element. 9060 Address PtrCurrent( 9061 PtrPHI, ElemTy, 9062 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign()) 9063 .getAlignment() 9064 .alignmentOfArrayElement(ElementSize)); 9065 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9066 Scope.addPrivate(MapperVarDecl, PtrCurrent); 9067 (void)Scope.Privatize(); 9068 9069 // Get map clause information. 9070 MappableExprsHandler MEHandler(*D, MapperCGF); 9071 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder); 9072 9073 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9074 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr); 9075 }; 9076 if (CGM.getCodeGenOpts().getDebugInfo() != 9077 llvm::codegenoptions::NoDebugInfo) { 9078 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 9079 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 9080 FillInfoMap); 9081 } 9082 9083 return CombinedInfo; 9084 }; 9085 9086 auto CustomMapperCB = [&](unsigned I, llvm::Function **MapperFunc) { 9087 if (CombinedInfo.Mappers[I]) { 9088 // Call the corresponding mapper function. 9089 *MapperFunc = getOrCreateUserDefinedMapperFunc( 9090 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9091 assert(*MapperFunc && "Expect a valid mapper function is available."); 9092 return true; 9093 } 9094 return false; 9095 }; 9096 9097 SmallString<64> TyStr; 9098 llvm::raw_svector_ostream Out(TyStr); 9099 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); 9100 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9101 9102 auto *NewFn = OMPBuilder.emitUserDefinedMapper(PrivatizeAndGenMapInfoCB, 9103 ElemTy, Name, CustomMapperCB); 9104 UDMMap.try_emplace(D, NewFn); 9105 if (CGF) 9106 FunctionUDMMap[CGF->CurFn].push_back(D); 9107 } 9108 9109 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9110 const OMPDeclareMapperDecl *D) { 9111 auto I = UDMMap.find(D); 9112 if (I != UDMMap.end()) 9113 return I->second; 9114 emitUserDefinedMapper(D); 9115 return UDMMap.lookup(D); 9116 } 9117 9118 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( 9119 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9120 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9121 const OMPLoopDirective &D)> 9122 SizeEmitter) { 9123 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9124 const OMPExecutableDirective *TD = &D; 9125 // Get nested teams distribute kind directive, if any. For now, treat 9126 // 'target_teams_loop' as if it's really a target_teams_distribute. 9127 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && 9128 Kind != OMPD_target_teams_loop) 9129 TD = getNestedDistributeDirective(CGM.getContext(), D); 9130 if (!TD) 9131 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9132 9133 const auto *LD = cast<OMPLoopDirective>(TD); 9134 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) 9135 return NumIterations; 9136 return llvm::ConstantInt::get(CGF.Int64Ty, 0); 9137 } 9138 9139 static void 9140 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9141 const OMPExecutableDirective &D, 9142 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9143 bool RequiresOuterTask, const CapturedStmt &CS, 9144 bool OffloadingMandatory, CodeGenFunction &CGF) { 9145 if (OffloadingMandatory) { 9146 CGF.Builder.CreateUnreachable(); 9147 } else { 9148 if (RequiresOuterTask) { 9149 CapturedVars.clear(); 9150 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9151 } 9152 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, 9153 CapturedVars); 9154 } 9155 } 9156 9157 static llvm::Value *emitDeviceID( 9158 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9159 CodeGenFunction &CGF) { 9160 // Emit device ID if any. 9161 llvm::Value *DeviceID; 9162 if (Device.getPointer()) { 9163 assert((Device.getInt() == OMPC_DEVICE_unknown || 9164 Device.getInt() == OMPC_DEVICE_device_num) && 9165 "Expected device_num modifier."); 9166 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9167 DeviceID = 9168 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9169 } else { 9170 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9171 } 9172 return DeviceID; 9173 } 9174 9175 static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, 9176 CodeGenFunction &CGF) { 9177 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); 9178 9179 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { 9180 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); 9181 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( 9182 DynMemClause->getSize(), /*IgnoreResultAssign=*/true); 9183 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, 9184 /*isSigned=*/false); 9185 } 9186 return DynCGroupMem; 9187 } 9188 static void genMapInfoForCaptures( 9189 MappableExprsHandler &MEHandler, CodeGenFunction &CGF, 9190 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9191 llvm::OpenMPIRBuilder &OMPBuilder, 9192 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, 9193 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { 9194 9195 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9196 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9197 auto *CV = CapturedVars.begin(); 9198 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9199 CE = CS.capture_end(); 9200 CI != CE; ++CI, ++RI, ++CV) { 9201 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9202 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9203 9204 // VLA sizes are passed to the outlined region by copy and do not have map 9205 // information associated. 9206 if (CI->capturesVariableArrayType()) { 9207 CurInfo.Exprs.push_back(nullptr); 9208 CurInfo.BasePointers.push_back(*CV); 9209 CurInfo.DevicePtrDecls.push_back(nullptr); 9210 CurInfo.DevicePointers.push_back( 9211 MappableExprsHandler::DeviceInfoTy::None); 9212 CurInfo.Pointers.push_back(*CV); 9213 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9214 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9215 // Copy to the device as an argument. No need to retrieve it. 9216 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | 9217 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | 9218 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); 9219 CurInfo.Mappers.push_back(nullptr); 9220 } else { 9221 // If we have any information in the map clause, we use it, otherwise we 9222 // just do a default mapping. 9223 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9224 if (!CI->capturesThis()) 9225 MappedVarSet.insert(CI->getCapturedVar()); 9226 else 9227 MappedVarSet.insert(nullptr); 9228 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 9229 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9230 // Generate correct mapping for variables captured by reference in 9231 // lambdas. 9232 if (CI->capturesVariable()) 9233 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9234 CurInfo, LambdaPointers); 9235 } 9236 // We expect to have at least an element of information for this capture. 9237 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 9238 "Non-existing map pointer for capture!"); 9239 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9240 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9241 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9242 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9243 "Inconsistent map information sizes!"); 9244 9245 // If there is an entry in PartialStruct it means we have a struct with 9246 // individual members mapped. Emit an extra combined entry. 9247 if (PartialStruct.Base.isValid()) { 9248 CombinedInfo.append(PartialStruct.PreliminaryMapData); 9249 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, 9250 CI->capturesThis(), OMPBuilder, nullptr, 9251 /*NotTargetParams*/ false); 9252 } 9253 9254 // We need to append the results of this capture to what we already have. 9255 CombinedInfo.append(CurInfo); 9256 } 9257 // Adjust MEMBER_OF flags for the lambdas captures. 9258 MEHandler.adjustMemberOfForLambdaCaptures( 9259 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, 9260 CombinedInfo.Pointers, CombinedInfo.Types); 9261 } 9262 static void 9263 genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, 9264 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9265 llvm::OpenMPIRBuilder &OMPBuilder, 9266 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet = 9267 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) { 9268 9269 CodeGenModule &CGM = CGF.CGM; 9270 // Map any list items in a map clause that were not captures because they 9271 // weren't referenced within the construct. 9272 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet); 9273 9274 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9275 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9276 }; 9277 if (CGM.getCodeGenOpts().getDebugInfo() != 9278 llvm::codegenoptions::NoDebugInfo) { 9279 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 9280 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 9281 FillInfoMap); 9282 } 9283 } 9284 9285 static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, 9286 const CapturedStmt &CS, 9287 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9288 llvm::OpenMPIRBuilder &OMPBuilder, 9289 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { 9290 // Get mappable expression information. 9291 MappableExprsHandler MEHandler(D, CGF); 9292 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9293 9294 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder, 9295 MappedVarSet, CombinedInfo); 9296 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); 9297 } 9298 9299 template <typename ClauseTy> 9300 static void 9301 emitClauseForBareTargetDirective(CodeGenFunction &CGF, 9302 const OMPExecutableDirective &D, 9303 llvm::SmallVectorImpl<llvm::Value *> &Values) { 9304 const auto *C = D.getSingleClause<ClauseTy>(); 9305 assert(!C->varlist_empty() && 9306 "ompx_bare requires explicit num_teams and thread_limit"); 9307 CodeGenFunction::RunCleanupsScope Scope(CGF); 9308 for (auto *E : C->varlist()) { 9309 llvm::Value *V = CGF.EmitScalarExpr(E); 9310 Values.push_back( 9311 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true)); 9312 } 9313 } 9314 9315 static void emitTargetCallKernelLaunch( 9316 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9317 const OMPExecutableDirective &D, 9318 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, 9319 const CapturedStmt &CS, bool OffloadingMandatory, 9320 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9321 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, 9322 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, 9323 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9324 const OMPLoopDirective &D)> 9325 SizeEmitter, 9326 CodeGenFunction &CGF, CodeGenModule &CGM) { 9327 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); 9328 9329 // Fill up the arrays with all the captured variables. 9330 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9331 CGOpenMPRuntime::TargetDataInfo Info; 9332 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); 9333 9334 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder, 9335 /*IsNonContiguous=*/true, /*ForEndCall=*/false); 9336 9337 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9338 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 9339 CGF.VoidPtrTy, CGM.getPointerAlign()); 9340 InputInfo.PointersArray = 9341 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9342 InputInfo.SizesArray = 9343 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 9344 InputInfo.MappersArray = 9345 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 9346 MapTypesArray = Info.RTArgs.MapTypesArray; 9347 MapNamesArray = Info.RTArgs.MapNamesArray; 9348 9349 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, 9350 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9351 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, 9352 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9353 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; 9354 9355 if (IsReverseOffloading) { 9356 // Reverse offloading is not supported, so just execute on the host. 9357 // FIXME: This fallback solution is incorrect since it ignores the 9358 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to 9359 // assert here and ensure SEMA emits an error. 9360 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9361 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9362 return; 9363 } 9364 9365 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); 9366 unsigned NumTargetItems = InputInfo.NumberOfTargetItems; 9367 9368 llvm::Value *BasePointersArray = 9369 InputInfo.BasePointersArray.emitRawPointer(CGF); 9370 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); 9371 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); 9372 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); 9373 9374 auto &&EmitTargetCallFallbackCB = 9375 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9376 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) 9377 -> llvm::OpenMPIRBuilder::InsertPointTy { 9378 CGF.Builder.restoreIP(IP); 9379 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9380 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9381 return CGF.Builder.saveIP(); 9382 }; 9383 9384 bool IsBare = D.hasClausesOfKind<OMPXBareClause>(); 9385 SmallVector<llvm::Value *, 3> NumTeams; 9386 SmallVector<llvm::Value *, 3> NumThreads; 9387 if (IsBare) { 9388 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams); 9389 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D, 9390 NumThreads); 9391 } else { 9392 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D)); 9393 NumThreads.push_back( 9394 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D)); 9395 } 9396 9397 llvm::Value *DeviceID = emitDeviceID(Device, CGF); 9398 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); 9399 llvm::Value *NumIterations = 9400 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); 9401 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); 9402 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( 9403 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); 9404 9405 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( 9406 BasePointersArray, PointersArray, SizesArray, MapTypesArray, 9407 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); 9408 9409 llvm::OpenMPIRBuilder::TargetKernelArgs Args( 9410 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, 9411 DynCGGroupMem, HasNoWait); 9412 9413 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 9414 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch( 9415 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID, 9416 RTLoc, AllocaIP)); 9417 CGF.Builder.restoreIP(AfterIP); 9418 }; 9419 9420 if (RequiresOuterTask) 9421 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9422 else 9423 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9424 } 9425 9426 static void 9427 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, 9428 const OMPExecutableDirective &D, 9429 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, 9430 bool RequiresOuterTask, const CapturedStmt &CS, 9431 bool OffloadingMandatory, CodeGenFunction &CGF) { 9432 9433 // Notify that the host version must be executed. 9434 auto &&ElseGen = 9435 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9436 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9437 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, 9438 RequiresOuterTask, CS, OffloadingMandatory, CGF); 9439 }; 9440 9441 if (RequiresOuterTask) { 9442 CodeGenFunction::OMPTargetDataInfo InputInfo; 9443 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9444 } else { 9445 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9446 } 9447 } 9448 9449 void CGOpenMPRuntime::emitTargetCall( 9450 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9451 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9452 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9453 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9454 const OMPLoopDirective &D)> 9455 SizeEmitter) { 9456 if (!CGF.HaveInsertPoint()) 9457 return; 9458 9459 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && 9460 CGM.getLangOpts().OpenMPOffloadMandatory; 9461 9462 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); 9463 9464 const bool RequiresOuterTask = 9465 D.hasClausesOfKind<OMPDependClause>() || 9466 D.hasClausesOfKind<OMPNowaitClause>() || 9467 D.hasClausesOfKind<OMPInReductionClause>() || 9468 (CGM.getLangOpts().OpenMP >= 51 && 9469 needsTaskBasedThreadLimit(D.getDirectiveKind()) && 9470 D.hasClausesOfKind<OMPThreadLimitClause>()); 9471 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9472 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9473 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9474 PrePostActionTy &) { 9475 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9476 }; 9477 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9478 9479 CodeGenFunction::OMPTargetDataInfo InputInfo; 9480 llvm::Value *MapTypesArray = nullptr; 9481 llvm::Value *MapNamesArray = nullptr; 9482 9483 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, 9484 RequiresOuterTask, &CS, OffloadingMandatory, Device, 9485 OutlinedFnID, &InputInfo, &MapTypesArray, 9486 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, 9487 PrePostActionTy &) { 9488 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, 9489 RequiresOuterTask, CS, OffloadingMandatory, 9490 Device, OutlinedFnID, InputInfo, MapTypesArray, 9491 MapNamesArray, SizeEmitter, CGF, CGM); 9492 }; 9493 9494 auto &&TargetElseGen = 9495 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, 9496 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { 9497 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, 9498 CS, OffloadingMandatory, CGF); 9499 }; 9500 9501 // If we have a target function ID it means that we need to support 9502 // offloading, otherwise, just execute on the host. We need to execute on host 9503 // regardless of the conditional in the if clause if, e.g., the user do not 9504 // specify target triples. 9505 if (OutlinedFnID) { 9506 if (IfCond) { 9507 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9508 } else { 9509 RegionCodeGenTy ThenRCG(TargetThenGen); 9510 ThenRCG(CGF); 9511 } 9512 } else { 9513 RegionCodeGenTy ElseRCG(TargetElseGen); 9514 ElseRCG(CGF); 9515 } 9516 } 9517 9518 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9519 StringRef ParentName) { 9520 if (!S) 9521 return; 9522 9523 // Codegen OMP target directives that offload compute to the device. 9524 bool RequiresDeviceCodegen = 9525 isa<OMPExecutableDirective>(S) && 9526 isOpenMPTargetExecutionDirective( 9527 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9528 9529 if (RequiresDeviceCodegen) { 9530 const auto &E = *cast<OMPExecutableDirective>(S); 9531 9532 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( 9533 CGM, OMPBuilder, E.getBeginLoc(), ParentName); 9534 9535 // Is this a target region that should not be emitted as an entry point? If 9536 // so just signal we are done with this target region. 9537 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) 9538 return; 9539 9540 switch (E.getDirectiveKind()) { 9541 case OMPD_target: 9542 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9543 cast<OMPTargetDirective>(E)); 9544 break; 9545 case OMPD_target_parallel: 9546 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9547 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9548 break; 9549 case OMPD_target_teams: 9550 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9551 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9552 break; 9553 case OMPD_target_teams_distribute: 9554 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9555 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9556 break; 9557 case OMPD_target_teams_distribute_simd: 9558 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9559 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9560 break; 9561 case OMPD_target_parallel_for: 9562 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9563 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9564 break; 9565 case OMPD_target_parallel_for_simd: 9566 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9567 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9568 break; 9569 case OMPD_target_simd: 9570 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9571 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9572 break; 9573 case OMPD_target_teams_distribute_parallel_for: 9574 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9575 CGM, ParentName, 9576 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9577 break; 9578 case OMPD_target_teams_distribute_parallel_for_simd: 9579 CodeGenFunction:: 9580 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9581 CGM, ParentName, 9582 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9583 break; 9584 case OMPD_target_teams_loop: 9585 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( 9586 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); 9587 break; 9588 case OMPD_target_parallel_loop: 9589 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( 9590 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); 9591 break; 9592 case OMPD_parallel: 9593 case OMPD_for: 9594 case OMPD_parallel_for: 9595 case OMPD_parallel_master: 9596 case OMPD_parallel_sections: 9597 case OMPD_for_simd: 9598 case OMPD_parallel_for_simd: 9599 case OMPD_cancel: 9600 case OMPD_cancellation_point: 9601 case OMPD_ordered: 9602 case OMPD_threadprivate: 9603 case OMPD_allocate: 9604 case OMPD_task: 9605 case OMPD_simd: 9606 case OMPD_tile: 9607 case OMPD_unroll: 9608 case OMPD_sections: 9609 case OMPD_section: 9610 case OMPD_single: 9611 case OMPD_master: 9612 case OMPD_critical: 9613 case OMPD_taskyield: 9614 case OMPD_barrier: 9615 case OMPD_taskwait: 9616 case OMPD_taskgroup: 9617 case OMPD_atomic: 9618 case OMPD_flush: 9619 case OMPD_depobj: 9620 case OMPD_scan: 9621 case OMPD_teams: 9622 case OMPD_target_data: 9623 case OMPD_target_exit_data: 9624 case OMPD_target_enter_data: 9625 case OMPD_distribute: 9626 case OMPD_distribute_simd: 9627 case OMPD_distribute_parallel_for: 9628 case OMPD_distribute_parallel_for_simd: 9629 case OMPD_teams_distribute: 9630 case OMPD_teams_distribute_simd: 9631 case OMPD_teams_distribute_parallel_for: 9632 case OMPD_teams_distribute_parallel_for_simd: 9633 case OMPD_target_update: 9634 case OMPD_declare_simd: 9635 case OMPD_declare_variant: 9636 case OMPD_begin_declare_variant: 9637 case OMPD_end_declare_variant: 9638 case OMPD_declare_target: 9639 case OMPD_end_declare_target: 9640 case OMPD_declare_reduction: 9641 case OMPD_declare_mapper: 9642 case OMPD_taskloop: 9643 case OMPD_taskloop_simd: 9644 case OMPD_master_taskloop: 9645 case OMPD_master_taskloop_simd: 9646 case OMPD_parallel_master_taskloop: 9647 case OMPD_parallel_master_taskloop_simd: 9648 case OMPD_requires: 9649 case OMPD_metadirective: 9650 case OMPD_unknown: 9651 default: 9652 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9653 } 9654 return; 9655 } 9656 9657 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9658 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9659 return; 9660 9661 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 9662 return; 9663 } 9664 9665 // If this is a lambda function, look into its body. 9666 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9667 S = L->getBody(); 9668 9669 // Keep looking for target regions recursively. 9670 for (const Stmt *II : S->children()) 9671 scanForTargetRegionsFunctions(II, ParentName); 9672 } 9673 9674 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 9675 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9676 OMPDeclareTargetDeclAttr::getDeviceType(VD); 9677 if (!DevTy) 9678 return false; 9679 // Do not emit device_type(nohost) functions for the host. 9680 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9681 return true; 9682 // Do not emit device_type(host) functions for the device. 9683 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9684 return true; 9685 return false; 9686 } 9687 9688 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9689 // If emitting code for the host, we do not process FD here. Instead we do 9690 // the normal code generation. 9691 if (!CGM.getLangOpts().OpenMPIsTargetDevice) { 9692 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 9693 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9694 CGM.getLangOpts().OpenMPIsTargetDevice)) 9695 return true; 9696 return false; 9697 } 9698 9699 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9700 // Try to detect target regions in the function. 9701 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9702 StringRef Name = CGM.getMangledName(GD); 9703 scanForTargetRegionsFunctions(FD->getBody(), Name); 9704 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 9705 CGM.getLangOpts().OpenMPIsTargetDevice)) 9706 return true; 9707 } 9708 9709 // Do not to emit function if it is not marked as declare target. 9710 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9711 AlreadyEmittedTargetDecls.count(VD) == 0; 9712 } 9713 9714 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9715 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 9716 CGM.getLangOpts().OpenMPIsTargetDevice)) 9717 return true; 9718 9719 if (!CGM.getLangOpts().OpenMPIsTargetDevice) 9720 return false; 9721 9722 // Check if there are Ctors/Dtors in this declaration and look for target 9723 // regions in it. We use the complete variant to produce the kernel name 9724 // mangling. 9725 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9726 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9727 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9728 StringRef ParentName = 9729 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9730 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9731 } 9732 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9733 StringRef ParentName = 9734 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9735 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9736 } 9737 } 9738 9739 // Do not to emit variable if it is not marked as declare target. 9740 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9741 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9742 cast<VarDecl>(GD.getDecl())); 9743 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9744 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9745 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9746 HasRequiresUnifiedSharedMemory)) { 9747 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9748 return true; 9749 } 9750 return false; 9751 } 9752 9753 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9754 llvm::Constant *Addr) { 9755 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9756 !CGM.getLangOpts().OpenMPIsTargetDevice) 9757 return; 9758 9759 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9760 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9761 9762 // If this is an 'extern' declaration we defer to the canonical definition and 9763 // do not emit an offloading entry. 9764 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && 9765 VD->hasExternalStorage()) 9766 return; 9767 9768 if (!Res) { 9769 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 9770 // Register non-target variables being emitted in device code (debug info 9771 // may cause this). 9772 StringRef VarName = CGM.getMangledName(VD); 9773 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9774 } 9775 return; 9776 } 9777 9778 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; 9779 auto LinkageForVariable = [&VD, this]() { 9780 return CGM.getLLVMLinkageVarDefinition(VD); 9781 }; 9782 9783 std::vector<llvm::GlobalVariable *> GeneratedRefs; 9784 OMPBuilder.registerTargetGlobalVariable( 9785 convertCaptureClause(VD), convertDeviceClause(VD), 9786 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, 9787 VD->isExternallyVisible(), 9788 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, 9789 VD->getCanonicalDecl()->getBeginLoc()), 9790 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, 9791 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, 9792 CGM.getTypes().ConvertTypeForMem( 9793 CGM.getContext().getPointerType(VD->getType())), 9794 Addr); 9795 9796 for (auto *ref : GeneratedRefs) 9797 CGM.addCompilerUsedGlobal(ref); 9798 } 9799 9800 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9801 if (isa<FunctionDecl>(GD.getDecl()) || 9802 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9803 return emitTargetFunctions(GD); 9804 9805 return emitTargetGlobalVariable(GD); 9806 } 9807 9808 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9809 for (const VarDecl *VD : DeferredGlobalVariables) { 9810 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9811 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9812 if (!Res) 9813 continue; 9814 if ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9815 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9816 !HasRequiresUnifiedSharedMemory) { 9817 CGM.EmitGlobal(VD); 9818 } else { 9819 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9820 ((*Res == OMPDeclareTargetDeclAttr::MT_To || 9821 *Res == OMPDeclareTargetDeclAttr::MT_Enter) && 9822 HasRequiresUnifiedSharedMemory)) && 9823 "Expected link clause or to clause with unified memory."); 9824 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9825 } 9826 } 9827 } 9828 9829 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9830 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9831 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9832 " Expected target-based directive."); 9833 } 9834 9835 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9836 for (const OMPClause *Clause : D->clauselists()) { 9837 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9838 HasRequiresUnifiedSharedMemory = true; 9839 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); 9840 } else if (const auto *AC = 9841 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9842 switch (AC->getAtomicDefaultMemOrderKind()) { 9843 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9844 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9845 break; 9846 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9847 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9848 break; 9849 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9850 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9851 break; 9852 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9853 break; 9854 } 9855 } 9856 } 9857 } 9858 9859 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9860 return RequiresAtomicOrdering; 9861 } 9862 9863 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9864 LangAS &AS) { 9865 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9866 return false; 9867 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9868 switch(A->getAllocatorType()) { 9869 case OMPAllocateDeclAttr::OMPNullMemAlloc: 9870 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9871 // Not supported, fallback to the default mem space. 9872 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9873 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9874 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9875 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9876 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9877 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9878 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9879 AS = LangAS::Default; 9880 return true; 9881 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9882 llvm_unreachable("Expected predefined allocator for the variables with the " 9883 "static storage."); 9884 } 9885 return false; 9886 } 9887 9888 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9889 return HasRequiresUnifiedSharedMemory; 9890 } 9891 9892 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9893 CodeGenModule &CGM) 9894 : CGM(CGM) { 9895 if (CGM.getLangOpts().OpenMPIsTargetDevice) { 9896 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9897 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9898 } 9899 } 9900 9901 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9902 if (CGM.getLangOpts().OpenMPIsTargetDevice) 9903 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9904 } 9905 9906 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9907 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) 9908 return true; 9909 9910 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9911 // Do not to emit function if it is marked as declare target as it was already 9912 // emitted. 9913 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9914 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 9915 if (auto *F = dyn_cast_or_null<llvm::Function>( 9916 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 9917 return !F->isDeclaration(); 9918 return false; 9919 } 9920 return true; 9921 } 9922 9923 return !AlreadyEmittedTargetDecls.insert(D).second; 9924 } 9925 9926 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9927 const OMPExecutableDirective &D, 9928 SourceLocation Loc, 9929 llvm::Function *OutlinedFn, 9930 ArrayRef<llvm::Value *> CapturedVars) { 9931 if (!CGF.HaveInsertPoint()) 9932 return; 9933 9934 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9935 CodeGenFunction::RunCleanupsScope Scope(CGF); 9936 9937 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9938 llvm::Value *Args[] = { 9939 RTLoc, 9940 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9941 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9942 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9943 RealArgs.append(std::begin(Args), std::end(Args)); 9944 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9945 9946 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 9947 CGM.getModule(), OMPRTL___kmpc_fork_teams); 9948 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9949 } 9950 9951 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9952 const Expr *NumTeams, 9953 const Expr *ThreadLimit, 9954 SourceLocation Loc) { 9955 if (!CGF.HaveInsertPoint()) 9956 return; 9957 9958 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9959 9960 llvm::Value *NumTeamsVal = 9961 NumTeams 9962 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9963 CGF.CGM.Int32Ty, /* isSigned = */ true) 9964 : CGF.Builder.getInt32(0); 9965 9966 llvm::Value *ThreadLimitVal = 9967 ThreadLimit 9968 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9969 CGF.CGM.Int32Ty, /* isSigned = */ true) 9970 : CGF.Builder.getInt32(0); 9971 9972 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9973 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9974 ThreadLimitVal}; 9975 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 9976 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 9977 PushNumTeamsArgs); 9978 } 9979 9980 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, 9981 const Expr *ThreadLimit, 9982 SourceLocation Loc) { 9983 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9984 llvm::Value *ThreadLimitVal = 9985 ThreadLimit 9986 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9987 CGF.CGM.Int32Ty, /* isSigned = */ true) 9988 : CGF.Builder.getInt32(0); 9989 9990 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) 9991 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), 9992 ThreadLimitVal}; 9993 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 9994 CGM.getModule(), OMPRTL___kmpc_set_thread_limit), 9995 ThreadLimitArgs); 9996 } 9997 9998 void CGOpenMPRuntime::emitTargetDataCalls( 9999 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10000 const Expr *Device, const RegionCodeGenTy &CodeGen, 10001 CGOpenMPRuntime::TargetDataInfo &Info) { 10002 if (!CGF.HaveInsertPoint()) 10003 return; 10004 10005 // Action used to replace the default codegen action and turn privatization 10006 // off. 10007 PrePostActionTy NoPrivAction; 10008 10009 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 10010 10011 llvm::Value *IfCondVal = nullptr; 10012 if (IfCond) 10013 IfCondVal = CGF.EvaluateExprAsBool(IfCond); 10014 10015 // Emit device ID if any. 10016 llvm::Value *DeviceID = nullptr; 10017 if (Device) { 10018 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10019 CGF.Int64Ty, /*isSigned=*/true); 10020 } else { 10021 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10022 } 10023 10024 // Fill up the arrays with all the mapped variables. 10025 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10026 auto GenMapInfoCB = 10027 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { 10028 CGF.Builder.restoreIP(CodeGenIP); 10029 // Get map clause information. 10030 MappableExprsHandler MEHandler(D, CGF); 10031 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); 10032 10033 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 10034 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 10035 }; 10036 if (CGM.getCodeGenOpts().getDebugInfo() != 10037 llvm::codegenoptions::NoDebugInfo) { 10038 CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); 10039 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), 10040 FillInfoMap); 10041 } 10042 10043 return CombinedInfo; 10044 }; 10045 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; 10046 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { 10047 CGF.Builder.restoreIP(CodeGenIP); 10048 switch (BodyGenType) { 10049 case BodyGenTy::Priv: 10050 if (!Info.CaptureDeviceAddrMap.empty()) 10051 CodeGen(CGF); 10052 break; 10053 case BodyGenTy::DupNoPriv: 10054 if (!Info.CaptureDeviceAddrMap.empty()) { 10055 CodeGen.setAction(NoPrivAction); 10056 CodeGen(CGF); 10057 } 10058 break; 10059 case BodyGenTy::NoPriv: 10060 if (Info.CaptureDeviceAddrMap.empty()) { 10061 CodeGen.setAction(NoPrivAction); 10062 CodeGen(CGF); 10063 } 10064 break; 10065 } 10066 return InsertPointTy(CGF.Builder.GetInsertBlock(), 10067 CGF.Builder.GetInsertPoint()); 10068 }; 10069 10070 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { 10071 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { 10072 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); 10073 } 10074 }; 10075 10076 auto CustomMapperCB = [&](unsigned int I) { 10077 llvm::Value *MFunc = nullptr; 10078 if (CombinedInfo.Mappers[I]) { 10079 Info.HasMapper = true; 10080 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 10081 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 10082 } 10083 return MFunc; 10084 }; 10085 10086 // Source location for the ident struct 10087 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10088 10089 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), 10090 CGF.AllocaInsertPt->getIterator()); 10091 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), 10092 CGF.Builder.GetInsertPoint()); 10093 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); 10094 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = 10095 cantFail(OMPBuilder.createTargetData( 10096 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, 10097 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); 10098 CGF.Builder.restoreIP(AfterIP); 10099 } 10100 10101 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10102 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10103 const Expr *Device) { 10104 if (!CGF.HaveInsertPoint()) 10105 return; 10106 10107 assert((isa<OMPTargetEnterDataDirective>(D) || 10108 isa<OMPTargetExitDataDirective>(D) || 10109 isa<OMPTargetUpdateDirective>(D)) && 10110 "Expecting either target enter, exit data, or update directives."); 10111 10112 CodeGenFunction::OMPTargetDataInfo InputInfo; 10113 llvm::Value *MapTypesArray = nullptr; 10114 llvm::Value *MapNamesArray = nullptr; 10115 // Generate the code for the opening of the data environment. 10116 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10117 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10118 // Emit device ID if any. 10119 llvm::Value *DeviceID = nullptr; 10120 if (Device) { 10121 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10122 CGF.Int64Ty, /*isSigned=*/true); 10123 } else { 10124 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10125 } 10126 10127 // Emit the number of elements in the offloading arrays. 10128 llvm::Constant *PointerNum = 10129 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10130 10131 // Source location for the ident struct 10132 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10133 10134 SmallVector<llvm::Value *, 13> OffloadingArgs( 10135 {RTLoc, DeviceID, PointerNum, 10136 InputInfo.BasePointersArray.emitRawPointer(CGF), 10137 InputInfo.PointersArray.emitRawPointer(CGF), 10138 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray, 10139 InputInfo.MappersArray.emitRawPointer(CGF)}); 10140 10141 // Select the right runtime function call for each standalone 10142 // directive. 10143 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10144 RuntimeFunction RTLFn; 10145 switch (D.getDirectiveKind()) { 10146 case OMPD_target_enter_data: 10147 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10148 : OMPRTL___tgt_target_data_begin_mapper; 10149 break; 10150 case OMPD_target_exit_data: 10151 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10152 : OMPRTL___tgt_target_data_end_mapper; 10153 break; 10154 case OMPD_target_update: 10155 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10156 : OMPRTL___tgt_target_data_update_mapper; 10157 break; 10158 case OMPD_parallel: 10159 case OMPD_for: 10160 case OMPD_parallel_for: 10161 case OMPD_parallel_master: 10162 case OMPD_parallel_sections: 10163 case OMPD_for_simd: 10164 case OMPD_parallel_for_simd: 10165 case OMPD_cancel: 10166 case OMPD_cancellation_point: 10167 case OMPD_ordered: 10168 case OMPD_threadprivate: 10169 case OMPD_allocate: 10170 case OMPD_task: 10171 case OMPD_simd: 10172 case OMPD_tile: 10173 case OMPD_unroll: 10174 case OMPD_sections: 10175 case OMPD_section: 10176 case OMPD_single: 10177 case OMPD_master: 10178 case OMPD_critical: 10179 case OMPD_taskyield: 10180 case OMPD_barrier: 10181 case OMPD_taskwait: 10182 case OMPD_taskgroup: 10183 case OMPD_atomic: 10184 case OMPD_flush: 10185 case OMPD_depobj: 10186 case OMPD_scan: 10187 case OMPD_teams: 10188 case OMPD_target_data: 10189 case OMPD_distribute: 10190 case OMPD_distribute_simd: 10191 case OMPD_distribute_parallel_for: 10192 case OMPD_distribute_parallel_for_simd: 10193 case OMPD_teams_distribute: 10194 case OMPD_teams_distribute_simd: 10195 case OMPD_teams_distribute_parallel_for: 10196 case OMPD_teams_distribute_parallel_for_simd: 10197 case OMPD_declare_simd: 10198 case OMPD_declare_variant: 10199 case OMPD_begin_declare_variant: 10200 case OMPD_end_declare_variant: 10201 case OMPD_declare_target: 10202 case OMPD_end_declare_target: 10203 case OMPD_declare_reduction: 10204 case OMPD_declare_mapper: 10205 case OMPD_taskloop: 10206 case OMPD_taskloop_simd: 10207 case OMPD_master_taskloop: 10208 case OMPD_master_taskloop_simd: 10209 case OMPD_parallel_master_taskloop: 10210 case OMPD_parallel_master_taskloop_simd: 10211 case OMPD_target: 10212 case OMPD_target_simd: 10213 case OMPD_target_teams_distribute: 10214 case OMPD_target_teams_distribute_simd: 10215 case OMPD_target_teams_distribute_parallel_for: 10216 case OMPD_target_teams_distribute_parallel_for_simd: 10217 case OMPD_target_teams: 10218 case OMPD_target_parallel: 10219 case OMPD_target_parallel_for: 10220 case OMPD_target_parallel_for_simd: 10221 case OMPD_requires: 10222 case OMPD_metadirective: 10223 case OMPD_unknown: 10224 default: 10225 llvm_unreachable("Unexpected standalone target data directive."); 10226 break; 10227 } 10228 if (HasNowait) { 10229 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); 10230 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); 10231 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); 10232 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); 10233 } 10234 CGF.EmitRuntimeCall( 10235 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10236 OffloadingArgs); 10237 }; 10238 10239 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10240 &MapNamesArray](CodeGenFunction &CGF, 10241 PrePostActionTy &) { 10242 // Fill up the arrays with all the mapped variables. 10243 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10244 CGOpenMPRuntime::TargetDataInfo Info; 10245 MappableExprsHandler MEHandler(D, CGF); 10246 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); 10247 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder, 10248 /*IsNonContiguous=*/true, /*ForEndCall=*/false); 10249 10250 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10251 D.hasClausesOfKind<OMPNowaitClause>(); 10252 10253 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10254 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, 10255 CGF.VoidPtrTy, CGM.getPointerAlign()); 10256 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, 10257 CGM.getPointerAlign()); 10258 InputInfo.SizesArray = 10259 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); 10260 InputInfo.MappersArray = 10261 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); 10262 MapTypesArray = Info.RTArgs.MapTypesArray; 10263 MapNamesArray = Info.RTArgs.MapNamesArray; 10264 if (RequiresOuterTask) 10265 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10266 else 10267 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10268 }; 10269 10270 if (IfCond) { 10271 emitIfClause(CGF, IfCond, TargetThenGen, 10272 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10273 } else { 10274 RegionCodeGenTy ThenRCG(TargetThenGen); 10275 ThenRCG(CGF); 10276 } 10277 } 10278 10279 namespace { 10280 /// Kind of parameter in a function with 'declare simd' directive. 10281 enum ParamKindTy { 10282 Linear, 10283 LinearRef, 10284 LinearUVal, 10285 LinearVal, 10286 Uniform, 10287 Vector, 10288 }; 10289 /// Attribute set of the parameter. 10290 struct ParamAttrTy { 10291 ParamKindTy Kind = Vector; 10292 llvm::APSInt StrideOrArg; 10293 llvm::APSInt Alignment; 10294 bool HasVarStride = false; 10295 }; 10296 } // namespace 10297 10298 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10299 ArrayRef<ParamAttrTy> ParamAttrs) { 10300 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10301 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10302 // of that clause. The VLEN value must be power of 2. 10303 // In other case the notion of the function`s "characteristic data type" (CDT) 10304 // is used to compute the vector length. 10305 // CDT is defined in the following order: 10306 // a) For non-void function, the CDT is the return type. 10307 // b) If the function has any non-uniform, non-linear parameters, then the 10308 // CDT is the type of the first such parameter. 10309 // c) If the CDT determined by a) or b) above is struct, union, or class 10310 // type which is pass-by-value (except for the type that maps to the 10311 // built-in complex data type), the characteristic data type is int. 10312 // d) If none of the above three cases is applicable, the CDT is int. 10313 // The VLEN is then determined based on the CDT and the size of vector 10314 // register of that ISA for which current vector version is generated. The 10315 // VLEN is computed using the formula below: 10316 // VLEN = sizeof(vector_register) / sizeof(CDT), 10317 // where vector register size specified in section 3.2.1 Registers and the 10318 // Stack Frame of original AMD64 ABI document. 10319 QualType RetType = FD->getReturnType(); 10320 if (RetType.isNull()) 10321 return 0; 10322 ASTContext &C = FD->getASTContext(); 10323 QualType CDT; 10324 if (!RetType.isNull() && !RetType->isVoidType()) { 10325 CDT = RetType; 10326 } else { 10327 unsigned Offset = 0; 10328 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10329 if (ParamAttrs[Offset].Kind == Vector) 10330 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10331 ++Offset; 10332 } 10333 if (CDT.isNull()) { 10334 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10335 if (ParamAttrs[I + Offset].Kind == Vector) { 10336 CDT = FD->getParamDecl(I)->getType(); 10337 break; 10338 } 10339 } 10340 } 10341 } 10342 if (CDT.isNull()) 10343 CDT = C.IntTy; 10344 CDT = CDT->getCanonicalTypeUnqualified(); 10345 if (CDT->isRecordType() || CDT->isUnionType()) 10346 CDT = C.IntTy; 10347 return C.getTypeSize(CDT); 10348 } 10349 10350 /// Mangle the parameter part of the vector function name according to 10351 /// their OpenMP classification. The mangling function is defined in 10352 /// section 4.5 of the AAVFABI(2021Q1). 10353 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10354 SmallString<256> Buffer; 10355 llvm::raw_svector_ostream Out(Buffer); 10356 for (const auto &ParamAttr : ParamAttrs) { 10357 switch (ParamAttr.Kind) { 10358 case Linear: 10359 Out << 'l'; 10360 break; 10361 case LinearRef: 10362 Out << 'R'; 10363 break; 10364 case LinearUVal: 10365 Out << 'U'; 10366 break; 10367 case LinearVal: 10368 Out << 'L'; 10369 break; 10370 case Uniform: 10371 Out << 'u'; 10372 break; 10373 case Vector: 10374 Out << 'v'; 10375 break; 10376 } 10377 if (ParamAttr.HasVarStride) 10378 Out << "s" << ParamAttr.StrideOrArg; 10379 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || 10380 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { 10381 // Don't print the step value if it is not present or if it is 10382 // equal to 1. 10383 if (ParamAttr.StrideOrArg < 0) 10384 Out << 'n' << -ParamAttr.StrideOrArg; 10385 else if (ParamAttr.StrideOrArg != 1) 10386 Out << ParamAttr.StrideOrArg; 10387 } 10388 10389 if (!!ParamAttr.Alignment) 10390 Out << 'a' << ParamAttr.Alignment; 10391 } 10392 10393 return std::string(Out.str()); 10394 } 10395 10396 static void 10397 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10398 const llvm::APSInt &VLENVal, 10399 ArrayRef<ParamAttrTy> ParamAttrs, 10400 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10401 struct ISADataTy { 10402 char ISA; 10403 unsigned VecRegSize; 10404 }; 10405 ISADataTy ISAData[] = { 10406 { 10407 'b', 128 10408 }, // SSE 10409 { 10410 'c', 256 10411 }, // AVX 10412 { 10413 'd', 256 10414 }, // AVX2 10415 { 10416 'e', 512 10417 }, // AVX512 10418 }; 10419 llvm::SmallVector<char, 2> Masked; 10420 switch (State) { 10421 case OMPDeclareSimdDeclAttr::BS_Undefined: 10422 Masked.push_back('N'); 10423 Masked.push_back('M'); 10424 break; 10425 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10426 Masked.push_back('N'); 10427 break; 10428 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10429 Masked.push_back('M'); 10430 break; 10431 } 10432 for (char Mask : Masked) { 10433 for (const ISADataTy &Data : ISAData) { 10434 SmallString<256> Buffer; 10435 llvm::raw_svector_ostream Out(Buffer); 10436 Out << "_ZGV" << Data.ISA << Mask; 10437 if (!VLENVal) { 10438 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10439 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10440 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10441 } else { 10442 Out << VLENVal; 10443 } 10444 Out << mangleVectorParameters(ParamAttrs); 10445 Out << '_' << Fn->getName(); 10446 Fn->addFnAttr(Out.str()); 10447 } 10448 } 10449 } 10450 10451 // This are the Functions that are needed to mangle the name of the 10452 // vector functions generated by the compiler, according to the rules 10453 // defined in the "Vector Function ABI specifications for AArch64", 10454 // available at 10455 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10456 10457 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). 10458 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10459 QT = QT.getCanonicalType(); 10460 10461 if (QT->isVoidType()) 10462 return false; 10463 10464 if (Kind == ParamKindTy::Uniform) 10465 return false; 10466 10467 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) 10468 return false; 10469 10470 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && 10471 !QT->isReferenceType()) 10472 return false; 10473 10474 return true; 10475 } 10476 10477 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10478 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10479 QT = QT.getCanonicalType(); 10480 unsigned Size = C.getTypeSize(QT); 10481 10482 // Only scalars and complex within 16 bytes wide set PVB to true. 10483 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10484 return false; 10485 10486 if (QT->isFloatingType()) 10487 return true; 10488 10489 if (QT->isIntegerType()) 10490 return true; 10491 10492 if (QT->isPointerType()) 10493 return true; 10494 10495 // TODO: Add support for complex types (section 3.1.2, item 2). 10496 10497 return false; 10498 } 10499 10500 /// Computes the lane size (LS) of a return type or of an input parameter, 10501 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10502 /// TODO: Add support for references, section 3.2.1, item 1. 10503 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10504 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10505 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10506 if (getAArch64PBV(PTy, C)) 10507 return C.getTypeSize(PTy); 10508 } 10509 if (getAArch64PBV(QT, C)) 10510 return C.getTypeSize(QT); 10511 10512 return C.getTypeSize(C.getUIntPtrType()); 10513 } 10514 10515 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10516 // signature of the scalar function, as defined in 3.2.2 of the 10517 // AAVFABI. 10518 static std::tuple<unsigned, unsigned, bool> 10519 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10520 QualType RetType = FD->getReturnType().getCanonicalType(); 10521 10522 ASTContext &C = FD->getASTContext(); 10523 10524 bool OutputBecomesInput = false; 10525 10526 llvm::SmallVector<unsigned, 8> Sizes; 10527 if (!RetType->isVoidType()) { 10528 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10529 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10530 OutputBecomesInput = true; 10531 } 10532 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10533 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10534 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10535 } 10536 10537 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10538 // The LS of a function parameter / return value can only be a power 10539 // of 2, starting from 8 bits, up to 128. 10540 assert(llvm::all_of(Sizes, 10541 [](unsigned Size) { 10542 return Size == 8 || Size == 16 || Size == 32 || 10543 Size == 64 || Size == 128; 10544 }) && 10545 "Invalid size"); 10546 10547 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10548 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10549 OutputBecomesInput); 10550 } 10551 10552 // Function used to add the attribute. The parameter `VLEN` is 10553 // templated to allow the use of "x" when targeting scalable functions 10554 // for SVE. 10555 template <typename T> 10556 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10557 char ISA, StringRef ParSeq, 10558 StringRef MangledName, bool OutputBecomesInput, 10559 llvm::Function *Fn) { 10560 SmallString<256> Buffer; 10561 llvm::raw_svector_ostream Out(Buffer); 10562 Out << Prefix << ISA << LMask << VLEN; 10563 if (OutputBecomesInput) 10564 Out << "v"; 10565 Out << ParSeq << "_" << MangledName; 10566 Fn->addFnAttr(Out.str()); 10567 } 10568 10569 // Helper function to generate the Advanced SIMD names depending on 10570 // the value of the NDS when simdlen is not present. 10571 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10572 StringRef Prefix, char ISA, 10573 StringRef ParSeq, StringRef MangledName, 10574 bool OutputBecomesInput, 10575 llvm::Function *Fn) { 10576 switch (NDS) { 10577 case 8: 10578 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10579 OutputBecomesInput, Fn); 10580 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10581 OutputBecomesInput, Fn); 10582 break; 10583 case 16: 10584 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10585 OutputBecomesInput, Fn); 10586 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10587 OutputBecomesInput, Fn); 10588 break; 10589 case 32: 10590 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10591 OutputBecomesInput, Fn); 10592 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10593 OutputBecomesInput, Fn); 10594 break; 10595 case 64: 10596 case 128: 10597 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10598 OutputBecomesInput, Fn); 10599 break; 10600 default: 10601 llvm_unreachable("Scalar type is too wide."); 10602 } 10603 } 10604 10605 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10606 static void emitAArch64DeclareSimdFunction( 10607 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10608 ArrayRef<ParamAttrTy> ParamAttrs, 10609 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10610 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10611 10612 // Get basic data for building the vector signature. 10613 const auto Data = getNDSWDS(FD, ParamAttrs); 10614 const unsigned NDS = std::get<0>(Data); 10615 const unsigned WDS = std::get<1>(Data); 10616 const bool OutputBecomesInput = std::get<2>(Data); 10617 10618 // Check the values provided via `simdlen` by the user. 10619 // 1. A `simdlen(1)` doesn't produce vector signatures, 10620 if (UserVLEN == 1) { 10621 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10622 DiagnosticsEngine::Warning, 10623 "The clause simdlen(1) has no effect when targeting aarch64."); 10624 CGM.getDiags().Report(SLoc, DiagID); 10625 return; 10626 } 10627 10628 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10629 // Advanced SIMD output. 10630 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10631 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10632 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10633 "power of 2 when targeting Advanced SIMD."); 10634 CGM.getDiags().Report(SLoc, DiagID); 10635 return; 10636 } 10637 10638 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10639 // limits. 10640 if (ISA == 's' && UserVLEN != 0) { 10641 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10642 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10643 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10644 "lanes in the architectural constraints " 10645 "for SVE (min is 128-bit, max is " 10646 "2048-bit, by steps of 128-bit)"); 10647 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10648 return; 10649 } 10650 } 10651 10652 // Sort out parameter sequence. 10653 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10654 StringRef Prefix = "_ZGV"; 10655 // Generate simdlen from user input (if any). 10656 if (UserVLEN) { 10657 if (ISA == 's') { 10658 // SVE generates only a masked function. 10659 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10660 OutputBecomesInput, Fn); 10661 } else { 10662 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10663 // Advanced SIMD generates one or two functions, depending on 10664 // the `[not]inbranch` clause. 10665 switch (State) { 10666 case OMPDeclareSimdDeclAttr::BS_Undefined: 10667 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10668 OutputBecomesInput, Fn); 10669 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10670 OutputBecomesInput, Fn); 10671 break; 10672 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10673 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10674 OutputBecomesInput, Fn); 10675 break; 10676 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10677 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10678 OutputBecomesInput, Fn); 10679 break; 10680 } 10681 } 10682 } else { 10683 // If no user simdlen is provided, follow the AAVFABI rules for 10684 // generating the vector length. 10685 if (ISA == 's') { 10686 // SVE, section 3.4.1, item 1. 10687 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10688 OutputBecomesInput, Fn); 10689 } else { 10690 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10691 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10692 // two vector names depending on the use of the clause 10693 // `[not]inbranch`. 10694 switch (State) { 10695 case OMPDeclareSimdDeclAttr::BS_Undefined: 10696 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10697 OutputBecomesInput, Fn); 10698 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10699 OutputBecomesInput, Fn); 10700 break; 10701 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10702 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10703 OutputBecomesInput, Fn); 10704 break; 10705 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10706 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10707 OutputBecomesInput, Fn); 10708 break; 10709 } 10710 } 10711 } 10712 } 10713 10714 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10715 llvm::Function *Fn) { 10716 ASTContext &C = CGM.getContext(); 10717 FD = FD->getMostRecentDecl(); 10718 while (FD) { 10719 // Map params to their positions in function decl. 10720 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10721 if (isa<CXXMethodDecl>(FD)) 10722 ParamPositions.try_emplace(FD, 0); 10723 unsigned ParamPos = ParamPositions.size(); 10724 for (const ParmVarDecl *P : FD->parameters()) { 10725 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10726 ++ParamPos; 10727 } 10728 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10729 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10730 // Mark uniform parameters. 10731 for (const Expr *E : Attr->uniforms()) { 10732 E = E->IgnoreParenImpCasts(); 10733 unsigned Pos; 10734 if (isa<CXXThisExpr>(E)) { 10735 Pos = ParamPositions[FD]; 10736 } else { 10737 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10738 ->getCanonicalDecl(); 10739 auto It = ParamPositions.find(PVD); 10740 assert(It != ParamPositions.end() && "Function parameter not found"); 10741 Pos = It->second; 10742 } 10743 ParamAttrs[Pos].Kind = Uniform; 10744 } 10745 // Get alignment info. 10746 auto *NI = Attr->alignments_begin(); 10747 for (const Expr *E : Attr->aligneds()) { 10748 E = E->IgnoreParenImpCasts(); 10749 unsigned Pos; 10750 QualType ParmTy; 10751 if (isa<CXXThisExpr>(E)) { 10752 Pos = ParamPositions[FD]; 10753 ParmTy = E->getType(); 10754 } else { 10755 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10756 ->getCanonicalDecl(); 10757 auto It = ParamPositions.find(PVD); 10758 assert(It != ParamPositions.end() && "Function parameter not found"); 10759 Pos = It->second; 10760 ParmTy = PVD->getType(); 10761 } 10762 ParamAttrs[Pos].Alignment = 10763 (*NI) 10764 ? (*NI)->EvaluateKnownConstInt(C) 10765 : llvm::APSInt::getUnsigned( 10766 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10767 .getQuantity()); 10768 ++NI; 10769 } 10770 // Mark linear parameters. 10771 auto *SI = Attr->steps_begin(); 10772 auto *MI = Attr->modifiers_begin(); 10773 for (const Expr *E : Attr->linears()) { 10774 E = E->IgnoreParenImpCasts(); 10775 unsigned Pos; 10776 bool IsReferenceType = false; 10777 // Rescaling factor needed to compute the linear parameter 10778 // value in the mangled name. 10779 unsigned PtrRescalingFactor = 1; 10780 if (isa<CXXThisExpr>(E)) { 10781 Pos = ParamPositions[FD]; 10782 auto *P = cast<PointerType>(E->getType()); 10783 PtrRescalingFactor = CGM.getContext() 10784 .getTypeSizeInChars(P->getPointeeType()) 10785 .getQuantity(); 10786 } else { 10787 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10788 ->getCanonicalDecl(); 10789 auto It = ParamPositions.find(PVD); 10790 assert(It != ParamPositions.end() && "Function parameter not found"); 10791 Pos = It->second; 10792 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10793 PtrRescalingFactor = CGM.getContext() 10794 .getTypeSizeInChars(P->getPointeeType()) 10795 .getQuantity(); 10796 else if (PVD->getType()->isReferenceType()) { 10797 IsReferenceType = true; 10798 PtrRescalingFactor = 10799 CGM.getContext() 10800 .getTypeSizeInChars(PVD->getType().getNonReferenceType()) 10801 .getQuantity(); 10802 } 10803 } 10804 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10805 if (*MI == OMPC_LINEAR_ref) 10806 ParamAttr.Kind = LinearRef; 10807 else if (*MI == OMPC_LINEAR_uval) 10808 ParamAttr.Kind = LinearUVal; 10809 else if (IsReferenceType) 10810 ParamAttr.Kind = LinearVal; 10811 else 10812 ParamAttr.Kind = Linear; 10813 // Assuming a stride of 1, for `linear` without modifiers. 10814 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10815 if (*SI) { 10816 Expr::EvalResult Result; 10817 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10818 if (const auto *DRE = 10819 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10820 if (const auto *StridePVD = 10821 dyn_cast<ParmVarDecl>(DRE->getDecl())) { 10822 ParamAttr.HasVarStride = true; 10823 auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); 10824 assert(It != ParamPositions.end() && 10825 "Function parameter not found"); 10826 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); 10827 } 10828 } 10829 } else { 10830 ParamAttr.StrideOrArg = Result.Val.getInt(); 10831 } 10832 } 10833 // If we are using a linear clause on a pointer, we need to 10834 // rescale the value of linear_step with the byte size of the 10835 // pointee type. 10836 if (!ParamAttr.HasVarStride && 10837 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) 10838 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 10839 ++SI; 10840 ++MI; 10841 } 10842 llvm::APSInt VLENVal; 10843 SourceLocation ExprLoc; 10844 const Expr *VLENExpr = Attr->getSimdlen(); 10845 if (VLENExpr) { 10846 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10847 ExprLoc = VLENExpr->getExprLoc(); 10848 } 10849 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10850 if (CGM.getTriple().isX86()) { 10851 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10852 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10853 unsigned VLEN = VLENVal.getExtValue(); 10854 StringRef MangledName = Fn->getName(); 10855 if (CGM.getTarget().hasFeature("sve")) 10856 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10857 MangledName, 's', 128, Fn, ExprLoc); 10858 else if (CGM.getTarget().hasFeature("neon")) 10859 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10860 MangledName, 'n', 128, Fn, ExprLoc); 10861 } 10862 } 10863 FD = FD->getPreviousDecl(); 10864 } 10865 } 10866 10867 namespace { 10868 /// Cleanup action for doacross support. 10869 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10870 public: 10871 static const int DoacrossFinArgs = 2; 10872 10873 private: 10874 llvm::FunctionCallee RTLFn; 10875 llvm::Value *Args[DoacrossFinArgs]; 10876 10877 public: 10878 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10879 ArrayRef<llvm::Value *> CallArgs) 10880 : RTLFn(RTLFn) { 10881 assert(CallArgs.size() == DoacrossFinArgs); 10882 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10883 } 10884 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10885 if (!CGF.HaveInsertPoint()) 10886 return; 10887 CGF.EmitRuntimeCall(RTLFn, Args); 10888 } 10889 }; 10890 } // namespace 10891 10892 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10893 const OMPLoopDirective &D, 10894 ArrayRef<Expr *> NumIterations) { 10895 if (!CGF.HaveInsertPoint()) 10896 return; 10897 10898 ASTContext &C = CGM.getContext(); 10899 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10900 RecordDecl *RD; 10901 if (KmpDimTy.isNull()) { 10902 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10903 // kmp_int64 lo; // lower 10904 // kmp_int64 up; // upper 10905 // kmp_int64 st; // stride 10906 // }; 10907 RD = C.buildImplicitRecord("kmp_dim"); 10908 RD->startDefinition(); 10909 addFieldToRecordDecl(C, RD, Int64Ty); 10910 addFieldToRecordDecl(C, RD, Int64Ty); 10911 addFieldToRecordDecl(C, RD, Int64Ty); 10912 RD->completeDefinition(); 10913 KmpDimTy = C.getRecordType(RD); 10914 } else { 10915 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10916 } 10917 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10918 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, 10919 ArraySizeModifier::Normal, 0); 10920 10921 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10922 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10923 enum { LowerFD = 0, UpperFD, StrideFD }; 10924 // Fill dims with data. 10925 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10926 LValue DimsLVal = CGF.MakeAddrLValue( 10927 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10928 // dims.upper = num_iterations; 10929 LValue UpperLVal = CGF.EmitLValueForField( 10930 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10931 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 10932 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 10933 Int64Ty, NumIterations[I]->getExprLoc()); 10934 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10935 // dims.stride = 1; 10936 LValue StrideLVal = CGF.EmitLValueForField( 10937 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10938 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10939 StrideLVal); 10940 } 10941 10942 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10943 // kmp_int32 num_dims, struct kmp_dim * dims); 10944 llvm::Value *Args[] = { 10945 emitUpdateLocation(CGF, D.getBeginLoc()), 10946 getThreadID(CGF, D.getBeginLoc()), 10947 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10948 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10949 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF), 10950 CGM.VoidPtrTy)}; 10951 10952 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10953 CGM.getModule(), OMPRTL___kmpc_doacross_init); 10954 CGF.EmitRuntimeCall(RTLFn, Args); 10955 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10956 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10957 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10958 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 10959 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10960 llvm::ArrayRef(FiniArgs)); 10961 } 10962 10963 template <typename T> 10964 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, 10965 const T *C, llvm::Value *ULoc, 10966 llvm::Value *ThreadID) { 10967 QualType Int64Ty = 10968 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10969 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10970 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10971 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); 10972 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10973 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10974 const Expr *CounterVal = C->getLoopData(I); 10975 assert(CounterVal); 10976 llvm::Value *CntVal = CGF.EmitScalarConversion( 10977 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10978 CounterVal->getExprLoc()); 10979 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10980 /*Volatile=*/false, Int64Ty); 10981 } 10982 llvm::Value *Args[] = { 10983 ULoc, ThreadID, 10984 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)}; 10985 llvm::FunctionCallee RTLFn; 10986 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 10987 OMPDoacrossKind<T> ODK; 10988 if (ODK.isSource(C)) { 10989 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10990 OMPRTL___kmpc_doacross_post); 10991 } else { 10992 assert(ODK.isSink(C) && "Expect sink modifier."); 10993 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10994 OMPRTL___kmpc_doacross_wait); 10995 } 10996 CGF.EmitRuntimeCall(RTLFn, Args); 10997 } 10998 10999 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11000 const OMPDependClause *C) { 11001 return EmitDoacrossOrdered<OMPDependClause>( 11002 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11003 getThreadID(CGF, C->getBeginLoc())); 11004 } 11005 11006 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11007 const OMPDoacrossClause *C) { 11008 return EmitDoacrossOrdered<OMPDoacrossClause>( 11009 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), 11010 getThreadID(CGF, C->getBeginLoc())); 11011 } 11012 11013 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11014 llvm::FunctionCallee Callee, 11015 ArrayRef<llvm::Value *> Args) const { 11016 assert(Loc.isValid() && "Outlined function call location must be valid."); 11017 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11018 11019 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11020 if (Fn->doesNotThrow()) { 11021 CGF.EmitNounwindRuntimeCall(Fn, Args); 11022 return; 11023 } 11024 } 11025 CGF.EmitRuntimeCall(Callee, Args); 11026 } 11027 11028 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11029 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11030 ArrayRef<llvm::Value *> Args) const { 11031 emitCall(CGF, Loc, OutlinedFn, Args); 11032 } 11033 11034 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11035 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11036 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11037 HasEmittedDeclareTargetRegion = true; 11038 } 11039 11040 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11041 const VarDecl *NativeParam, 11042 const VarDecl *TargetParam) const { 11043 return CGF.GetAddrOfLocalVar(NativeParam); 11044 } 11045 11046 /// Return allocator value from expression, or return a null allocator (default 11047 /// when no allocator specified). 11048 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, 11049 const Expr *Allocator) { 11050 llvm::Value *AllocVal; 11051 if (Allocator) { 11052 AllocVal = CGF.EmitScalarExpr(Allocator); 11053 // According to the standard, the original allocator type is a enum 11054 // (integer). Convert to pointer type, if required. 11055 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11056 CGF.getContext().VoidPtrTy, 11057 Allocator->getExprLoc()); 11058 } else { 11059 // If no allocator specified, it defaults to the null allocator. 11060 AllocVal = llvm::Constant::getNullValue( 11061 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); 11062 } 11063 return AllocVal; 11064 } 11065 11066 /// Return the alignment from an allocate directive if present. 11067 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { 11068 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); 11069 11070 if (!AllocateAlignment) 11071 return nullptr; 11072 11073 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); 11074 } 11075 11076 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11077 const VarDecl *VD) { 11078 if (!VD) 11079 return Address::invalid(); 11080 Address UntiedAddr = Address::invalid(); 11081 Address UntiedRealAddr = Address::invalid(); 11082 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11083 if (It != FunctionToUntiedTaskStackMap.end()) { 11084 const UntiedLocalVarsAddressesMap &UntiedData = 11085 UntiedLocalVarsStack[It->second]; 11086 auto I = UntiedData.find(VD); 11087 if (I != UntiedData.end()) { 11088 UntiedAddr = I->second.first; 11089 UntiedRealAddr = I->second.second; 11090 } 11091 } 11092 const VarDecl *CVD = VD->getCanonicalDecl(); 11093 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11094 // Use the default allocation. 11095 if (!isAllocatableDecl(VD)) 11096 return UntiedAddr; 11097 llvm::Value *Size; 11098 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11099 if (CVD->getType()->isVariablyModifiedType()) { 11100 Size = CGF.getTypeSize(CVD->getType()); 11101 // Align the size: ((size + align - 1) / align) * align 11102 Size = CGF.Builder.CreateNUWAdd( 11103 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11104 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11105 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11106 } else { 11107 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11108 Size = CGM.getSize(Sz.alignTo(Align)); 11109 } 11110 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11111 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11112 const Expr *Allocator = AA->getAllocator(); 11113 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); 11114 llvm::Value *Alignment = getAlignmentValue(CGM, CVD); 11115 SmallVector<llvm::Value *, 4> Args; 11116 Args.push_back(ThreadID); 11117 if (Alignment) 11118 Args.push_back(Alignment); 11119 Args.push_back(Size); 11120 Args.push_back(AllocVal); 11121 llvm::omp::RuntimeFunction FnID = 11122 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; 11123 llvm::Value *Addr = CGF.EmitRuntimeCall( 11124 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, 11125 getName({CVD->getName(), ".void.addr"})); 11126 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11127 CGM.getModule(), OMPRTL___kmpc_free); 11128 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11129 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11130 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11131 if (UntiedAddr.isValid()) 11132 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11133 11134 // Cleanup action for allocate support. 11135 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11136 llvm::FunctionCallee RTLFn; 11137 SourceLocation::UIntTy LocEncoding; 11138 Address Addr; 11139 const Expr *AllocExpr; 11140 11141 public: 11142 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11143 SourceLocation::UIntTy LocEncoding, Address Addr, 11144 const Expr *AllocExpr) 11145 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11146 AllocExpr(AllocExpr) {} 11147 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11148 if (!CGF.HaveInsertPoint()) 11149 return; 11150 llvm::Value *Args[3]; 11151 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11152 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11153 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11154 Addr.emitRawPointer(CGF), CGF.VoidPtrTy); 11155 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); 11156 Args[2] = AllocVal; 11157 CGF.EmitRuntimeCall(RTLFn, Args); 11158 } 11159 }; 11160 Address VDAddr = 11161 UntiedRealAddr.isValid() 11162 ? UntiedRealAddr 11163 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); 11164 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11165 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11166 VDAddr, Allocator); 11167 if (UntiedRealAddr.isValid()) 11168 if (auto *Region = 11169 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11170 Region->emitUntiedSwitch(CGF); 11171 return VDAddr; 11172 } 11173 return UntiedAddr; 11174 } 11175 11176 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11177 const VarDecl *VD) const { 11178 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11179 if (It == FunctionToUntiedTaskStackMap.end()) 11180 return false; 11181 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11182 } 11183 11184 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11185 CodeGenModule &CGM, const OMPLoopDirective &S) 11186 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11187 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11188 if (!NeedToPush) 11189 return; 11190 NontemporalDeclsSet &DS = 11191 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11192 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11193 for (const Stmt *Ref : C->private_refs()) { 11194 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11195 const ValueDecl *VD; 11196 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11197 VD = DRE->getDecl(); 11198 } else { 11199 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11200 assert((ME->isImplicitCXXThis() || 11201 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11202 "Expected member of current class."); 11203 VD = ME->getMemberDecl(); 11204 } 11205 DS.insert(VD); 11206 } 11207 } 11208 } 11209 11210 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11211 if (!NeedToPush) 11212 return; 11213 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11214 } 11215 11216 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11217 CodeGenFunction &CGF, 11218 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 11219 std::pair<Address, Address>> &LocalVars) 11220 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11221 if (!NeedToPush) 11222 return; 11223 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11224 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11225 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11226 } 11227 11228 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11229 if (!NeedToPush) 11230 return; 11231 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11232 } 11233 11234 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11235 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11236 11237 return llvm::any_of( 11238 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11239 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 11240 } 11241 11242 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11243 const OMPExecutableDirective &S, 11244 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11245 const { 11246 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11247 // Vars in target/task regions must be excluded completely. 11248 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11249 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11250 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11251 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11252 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11253 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11254 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11255 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11256 } 11257 } 11258 // Exclude vars in private clauses. 11259 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11260 for (const Expr *Ref : C->varlist()) { 11261 if (!Ref->getType()->isScalarType()) 11262 continue; 11263 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11264 if (!DRE) 11265 continue; 11266 NeedToCheckForLPCs.insert(DRE->getDecl()); 11267 } 11268 } 11269 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11270 for (const Expr *Ref : C->varlist()) { 11271 if (!Ref->getType()->isScalarType()) 11272 continue; 11273 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11274 if (!DRE) 11275 continue; 11276 NeedToCheckForLPCs.insert(DRE->getDecl()); 11277 } 11278 } 11279 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11280 for (const Expr *Ref : C->varlist()) { 11281 if (!Ref->getType()->isScalarType()) 11282 continue; 11283 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11284 if (!DRE) 11285 continue; 11286 NeedToCheckForLPCs.insert(DRE->getDecl()); 11287 } 11288 } 11289 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11290 for (const Expr *Ref : C->varlist()) { 11291 if (!Ref->getType()->isScalarType()) 11292 continue; 11293 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11294 if (!DRE) 11295 continue; 11296 NeedToCheckForLPCs.insert(DRE->getDecl()); 11297 } 11298 } 11299 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11300 for (const Expr *Ref : C->varlist()) { 11301 if (!Ref->getType()->isScalarType()) 11302 continue; 11303 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11304 if (!DRE) 11305 continue; 11306 NeedToCheckForLPCs.insert(DRE->getDecl()); 11307 } 11308 } 11309 for (const Decl *VD : NeedToCheckForLPCs) { 11310 for (const LastprivateConditionalData &Data : 11311 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11312 if (Data.DeclToUniqueName.count(VD) > 0) { 11313 if (!Data.Disabled) 11314 NeedToAddForLPCsAsDisabled.insert(VD); 11315 break; 11316 } 11317 } 11318 } 11319 } 11320 11321 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11322 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11323 : CGM(CGF.CGM), 11324 Action((CGM.getLangOpts().OpenMP >= 50 && 11325 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11326 [](const OMPLastprivateClause *C) { 11327 return C->getKind() == 11328 OMPC_LASTPRIVATE_conditional; 11329 })) 11330 ? ActionToDo::PushAsLastprivateConditional 11331 : ActionToDo::DoNotPush) { 11332 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11333 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11334 return; 11335 assert(Action == ActionToDo::PushAsLastprivateConditional && 11336 "Expected a push action."); 11337 LastprivateConditionalData &Data = 11338 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11339 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11340 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11341 continue; 11342 11343 for (const Expr *Ref : C->varlist()) { 11344 Data.DeclToUniqueName.insert(std::make_pair( 11345 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11346 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11347 } 11348 } 11349 Data.IVLVal = IVLVal; 11350 Data.Fn = CGF.CurFn; 11351 } 11352 11353 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11354 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11355 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11356 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11357 if (CGM.getLangOpts().OpenMP < 50) 11358 return; 11359 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11360 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11361 if (!NeedToAddForLPCsAsDisabled.empty()) { 11362 Action = ActionToDo::DisableLastprivateConditional; 11363 LastprivateConditionalData &Data = 11364 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11365 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11366 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11367 Data.Fn = CGF.CurFn; 11368 Data.Disabled = true; 11369 } 11370 } 11371 11372 CGOpenMPRuntime::LastprivateConditionalRAII 11373 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11374 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11375 return LastprivateConditionalRAII(CGF, S); 11376 } 11377 11378 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11379 if (CGM.getLangOpts().OpenMP < 50) 11380 return; 11381 if (Action == ActionToDo::DisableLastprivateConditional) { 11382 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11383 "Expected list of disabled private vars."); 11384 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11385 } 11386 if (Action == ActionToDo::PushAsLastprivateConditional) { 11387 assert( 11388 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11389 "Expected list of lastprivate conditional vars."); 11390 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11391 } 11392 } 11393 11394 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11395 const VarDecl *VD) { 11396 ASTContext &C = CGM.getContext(); 11397 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11398 QualType NewType; 11399 const FieldDecl *VDField; 11400 const FieldDecl *FiredField; 11401 LValue BaseLVal; 11402 auto VI = I->getSecond().find(VD); 11403 if (VI == I->getSecond().end()) { 11404 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11405 RD->startDefinition(); 11406 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11407 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11408 RD->completeDefinition(); 11409 NewType = C.getRecordType(RD); 11410 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11411 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11412 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11413 } else { 11414 NewType = std::get<0>(VI->getSecond()); 11415 VDField = std::get<1>(VI->getSecond()); 11416 FiredField = std::get<2>(VI->getSecond()); 11417 BaseLVal = std::get<3>(VI->getSecond()); 11418 } 11419 LValue FiredLVal = 11420 CGF.EmitLValueForField(BaseLVal, FiredField); 11421 CGF.EmitStoreOfScalar( 11422 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11423 FiredLVal); 11424 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(); 11425 } 11426 11427 namespace { 11428 /// Checks if the lastprivate conditional variable is referenced in LHS. 11429 class LastprivateConditionalRefChecker final 11430 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11431 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11432 const Expr *FoundE = nullptr; 11433 const Decl *FoundD = nullptr; 11434 StringRef UniqueDeclName; 11435 LValue IVLVal; 11436 llvm::Function *FoundFn = nullptr; 11437 SourceLocation Loc; 11438 11439 public: 11440 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11441 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11442 llvm::reverse(LPM)) { 11443 auto It = D.DeclToUniqueName.find(E->getDecl()); 11444 if (It == D.DeclToUniqueName.end()) 11445 continue; 11446 if (D.Disabled) 11447 return false; 11448 FoundE = E; 11449 FoundD = E->getDecl()->getCanonicalDecl(); 11450 UniqueDeclName = It->second; 11451 IVLVal = D.IVLVal; 11452 FoundFn = D.Fn; 11453 break; 11454 } 11455 return FoundE == E; 11456 } 11457 bool VisitMemberExpr(const MemberExpr *E) { 11458 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11459 return false; 11460 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11461 llvm::reverse(LPM)) { 11462 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11463 if (It == D.DeclToUniqueName.end()) 11464 continue; 11465 if (D.Disabled) 11466 return false; 11467 FoundE = E; 11468 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11469 UniqueDeclName = It->second; 11470 IVLVal = D.IVLVal; 11471 FoundFn = D.Fn; 11472 break; 11473 } 11474 return FoundE == E; 11475 } 11476 bool VisitStmt(const Stmt *S) { 11477 for (const Stmt *Child : S->children()) { 11478 if (!Child) 11479 continue; 11480 if (const auto *E = dyn_cast<Expr>(Child)) 11481 if (!E->isGLValue()) 11482 continue; 11483 if (Visit(Child)) 11484 return true; 11485 } 11486 return false; 11487 } 11488 explicit LastprivateConditionalRefChecker( 11489 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11490 : LPM(LPM) {} 11491 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11492 getFoundData() const { 11493 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11494 } 11495 }; 11496 } // namespace 11497 11498 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11499 LValue IVLVal, 11500 StringRef UniqueDeclName, 11501 LValue LVal, 11502 SourceLocation Loc) { 11503 // Last updated loop counter for the lastprivate conditional var. 11504 // int<xx> last_iv = 0; 11505 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11506 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( 11507 LLIVTy, getName({UniqueDeclName, "iv"})); 11508 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11509 IVLVal.getAlignment().getAsAlign()); 11510 LValue LastIVLVal = 11511 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType()); 11512 11513 // Last value of the lastprivate conditional. 11514 // decltype(priv_a) last_a; 11515 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( 11516 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11517 cast<llvm::GlobalVariable>(Last)->setAlignment( 11518 LVal.getAlignment().getAsAlign()); 11519 LValue LastLVal = 11520 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11521 11522 // Global loop counter. Required to handle inner parallel-for regions. 11523 // iv 11524 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11525 11526 // #pragma omp critical(a) 11527 // if (last_iv <= iv) { 11528 // last_iv = iv; 11529 // last_a = priv_a; 11530 // } 11531 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11532 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11533 Action.Enter(CGF); 11534 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11535 // (last_iv <= iv) ? Check if the variable is updated and store new 11536 // value in global var. 11537 llvm::Value *CmpRes; 11538 if (IVLVal.getType()->isSignedIntegerType()) { 11539 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11540 } else { 11541 assert(IVLVal.getType()->isUnsignedIntegerType() && 11542 "Loop iteration variable must be integer."); 11543 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11544 } 11545 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11547 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11548 // { 11549 CGF.EmitBlock(ThenBB); 11550 11551 // last_iv = iv; 11552 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11553 11554 // last_a = priv_a; 11555 switch (CGF.getEvaluationKind(LVal.getType())) { 11556 case TEK_Scalar: { 11557 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11558 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11559 break; 11560 } 11561 case TEK_Complex: { 11562 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11563 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11564 break; 11565 } 11566 case TEK_Aggregate: 11567 llvm_unreachable( 11568 "Aggregates are not supported in lastprivate conditional."); 11569 } 11570 // } 11571 CGF.EmitBranch(ExitBB); 11572 // There is no need to emit line number for unconditional branch. 11573 (void)ApplyDebugLocation::CreateEmpty(CGF); 11574 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11575 }; 11576 11577 if (CGM.getLangOpts().OpenMPSimd) { 11578 // Do not emit as a critical region as no parallel region could be emitted. 11579 RegionCodeGenTy ThenRCG(CodeGen); 11580 ThenRCG(CGF); 11581 } else { 11582 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11583 } 11584 } 11585 11586 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11587 const Expr *LHS) { 11588 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11589 return; 11590 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11591 if (!Checker.Visit(LHS)) 11592 return; 11593 const Expr *FoundE; 11594 const Decl *FoundD; 11595 StringRef UniqueDeclName; 11596 LValue IVLVal; 11597 llvm::Function *FoundFn; 11598 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11599 Checker.getFoundData(); 11600 if (FoundFn != CGF.CurFn) { 11601 // Special codegen for inner parallel regions. 11602 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11603 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11604 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11605 "Lastprivate conditional is not found in outer region."); 11606 QualType StructTy = std::get<0>(It->getSecond()); 11607 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11608 LValue PrivLVal = CGF.EmitLValue(FoundE); 11609 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11610 PrivLVal.getAddress(), 11611 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), 11612 CGF.ConvertTypeForMem(StructTy)); 11613 LValue BaseLVal = 11614 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11615 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11616 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11617 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11618 FiredLVal, llvm::AtomicOrdering::Unordered, 11619 /*IsVolatile=*/true, /*isInit=*/false); 11620 return; 11621 } 11622 11623 // Private address of the lastprivate conditional in the current context. 11624 // priv_a 11625 LValue LVal = CGF.EmitLValue(FoundE); 11626 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11627 FoundE->getExprLoc()); 11628 } 11629 11630 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11631 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11632 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11633 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11634 return; 11635 auto Range = llvm::reverse(LastprivateConditionalStack); 11636 auto It = llvm::find_if( 11637 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11638 if (It == Range.end() || It->Fn != CGF.CurFn) 11639 return; 11640 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11641 assert(LPCI != LastprivateConditionalToTypes.end() && 11642 "Lastprivates must be registered already."); 11643 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11644 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11645 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11646 for (const auto &Pair : It->DeclToUniqueName) { 11647 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11648 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 11649 continue; 11650 auto I = LPCI->getSecond().find(Pair.first); 11651 assert(I != LPCI->getSecond().end() && 11652 "Lastprivate must be rehistered already."); 11653 // bool Cmp = priv_a.Fired != 0; 11654 LValue BaseLVal = std::get<3>(I->getSecond()); 11655 LValue FiredLVal = 11656 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11657 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11658 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11659 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11660 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11661 // if (Cmp) { 11662 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11663 CGF.EmitBlock(ThenBB); 11664 Address Addr = CGF.GetAddrOfLocalVar(VD); 11665 LValue LVal; 11666 if (VD->getType()->isReferenceType()) 11667 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11668 AlignmentSource::Decl); 11669 else 11670 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11671 AlignmentSource::Decl); 11672 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11673 D.getBeginLoc()); 11674 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11675 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11676 // } 11677 } 11678 } 11679 11680 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11681 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11682 SourceLocation Loc) { 11683 if (CGF.getLangOpts().OpenMP < 50) 11684 return; 11685 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11686 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11687 "Unknown lastprivate conditional variable."); 11688 StringRef UniqueName = It->second; 11689 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11690 // The variable was not updated in the region - exit. 11691 if (!GV) 11692 return; 11693 LValue LPLVal = CGF.MakeRawAddrLValue( 11694 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11695 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11696 CGF.EmitStoreOfScalar(Res, PrivLVal); 11697 } 11698 11699 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11700 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11701 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11702 const RegionCodeGenTy &CodeGen) { 11703 llvm_unreachable("Not supported in SIMD-only mode"); 11704 } 11705 11706 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11707 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11708 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 11709 const RegionCodeGenTy &CodeGen) { 11710 llvm_unreachable("Not supported in SIMD-only mode"); 11711 } 11712 11713 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11714 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11715 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11716 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11717 bool Tied, unsigned &NumberOfParts) { 11718 llvm_unreachable("Not supported in SIMD-only mode"); 11719 } 11720 11721 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11722 SourceLocation Loc, 11723 llvm::Function *OutlinedFn, 11724 ArrayRef<llvm::Value *> CapturedVars, 11725 const Expr *IfCond, 11726 llvm::Value *NumThreads) { 11727 llvm_unreachable("Not supported in SIMD-only mode"); 11728 } 11729 11730 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11731 CodeGenFunction &CGF, StringRef CriticalName, 11732 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11733 const Expr *Hint) { 11734 llvm_unreachable("Not supported in SIMD-only mode"); 11735 } 11736 11737 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11738 const RegionCodeGenTy &MasterOpGen, 11739 SourceLocation Loc) { 11740 llvm_unreachable("Not supported in SIMD-only mode"); 11741 } 11742 11743 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 11744 const RegionCodeGenTy &MasterOpGen, 11745 SourceLocation Loc, 11746 const Expr *Filter) { 11747 llvm_unreachable("Not supported in SIMD-only mode"); 11748 } 11749 11750 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11751 SourceLocation Loc) { 11752 llvm_unreachable("Not supported in SIMD-only mode"); 11753 } 11754 11755 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11756 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11757 SourceLocation Loc) { 11758 llvm_unreachable("Not supported in SIMD-only mode"); 11759 } 11760 11761 void CGOpenMPSIMDRuntime::emitSingleRegion( 11762 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11763 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11764 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11765 ArrayRef<const Expr *> AssignmentOps) { 11766 llvm_unreachable("Not supported in SIMD-only mode"); 11767 } 11768 11769 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11770 const RegionCodeGenTy &OrderedOpGen, 11771 SourceLocation Loc, 11772 bool IsThreads) { 11773 llvm_unreachable("Not supported in SIMD-only mode"); 11774 } 11775 11776 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11777 SourceLocation Loc, 11778 OpenMPDirectiveKind Kind, 11779 bool EmitChecks, 11780 bool ForceSimpleCall) { 11781 llvm_unreachable("Not supported in SIMD-only mode"); 11782 } 11783 11784 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11785 CodeGenFunction &CGF, SourceLocation Loc, 11786 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11787 bool Ordered, const DispatchRTInput &DispatchValues) { 11788 llvm_unreachable("Not supported in SIMD-only mode"); 11789 } 11790 11791 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, 11792 SourceLocation Loc) { 11793 llvm_unreachable("Not supported in SIMD-only mode"); 11794 } 11795 11796 void CGOpenMPSIMDRuntime::emitForStaticInit( 11797 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11798 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11799 llvm_unreachable("Not supported in SIMD-only mode"); 11800 } 11801 11802 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11803 CodeGenFunction &CGF, SourceLocation Loc, 11804 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11805 llvm_unreachable("Not supported in SIMD-only mode"); 11806 } 11807 11808 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11809 SourceLocation Loc, 11810 unsigned IVSize, 11811 bool IVSigned) { 11812 llvm_unreachable("Not supported in SIMD-only mode"); 11813 } 11814 11815 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11816 SourceLocation Loc, 11817 OpenMPDirectiveKind DKind) { 11818 llvm_unreachable("Not supported in SIMD-only mode"); 11819 } 11820 11821 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11822 SourceLocation Loc, 11823 unsigned IVSize, bool IVSigned, 11824 Address IL, Address LB, 11825 Address UB, Address ST) { 11826 llvm_unreachable("Not supported in SIMD-only mode"); 11827 } 11828 11829 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11830 llvm::Value *NumThreads, 11831 SourceLocation Loc) { 11832 llvm_unreachable("Not supported in SIMD-only mode"); 11833 } 11834 11835 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11836 ProcBindKind ProcBind, 11837 SourceLocation Loc) { 11838 llvm_unreachable("Not supported in SIMD-only mode"); 11839 } 11840 11841 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11842 const VarDecl *VD, 11843 Address VDAddr, 11844 SourceLocation Loc) { 11845 llvm_unreachable("Not supported in SIMD-only mode"); 11846 } 11847 11848 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11849 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11850 CodeGenFunction *CGF) { 11851 llvm_unreachable("Not supported in SIMD-only mode"); 11852 } 11853 11854 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11855 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11856 llvm_unreachable("Not supported in SIMD-only mode"); 11857 } 11858 11859 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11860 ArrayRef<const Expr *> Vars, 11861 SourceLocation Loc, 11862 llvm::AtomicOrdering AO) { 11863 llvm_unreachable("Not supported in SIMD-only mode"); 11864 } 11865 11866 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11867 const OMPExecutableDirective &D, 11868 llvm::Function *TaskFunction, 11869 QualType SharedsTy, Address Shareds, 11870 const Expr *IfCond, 11871 const OMPTaskDataTy &Data) { 11872 llvm_unreachable("Not supported in SIMD-only mode"); 11873 } 11874 11875 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11876 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11877 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11878 const Expr *IfCond, const OMPTaskDataTy &Data) { 11879 llvm_unreachable("Not supported in SIMD-only mode"); 11880 } 11881 11882 void CGOpenMPSIMDRuntime::emitReduction( 11883 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11884 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11885 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11886 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11887 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11888 ReductionOps, Options); 11889 } 11890 11891 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11892 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11893 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11894 llvm_unreachable("Not supported in SIMD-only mode"); 11895 } 11896 11897 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11898 SourceLocation Loc, 11899 bool IsWorksharingReduction) { 11900 llvm_unreachable("Not supported in SIMD-only mode"); 11901 } 11902 11903 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11904 SourceLocation Loc, 11905 ReductionCodeGen &RCG, 11906 unsigned N) { 11907 llvm_unreachable("Not supported in SIMD-only mode"); 11908 } 11909 11910 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11911 SourceLocation Loc, 11912 llvm::Value *ReductionsPtr, 11913 LValue SharedLVal) { 11914 llvm_unreachable("Not supported in SIMD-only mode"); 11915 } 11916 11917 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11918 SourceLocation Loc, 11919 const OMPTaskDataTy &Data) { 11920 llvm_unreachable("Not supported in SIMD-only mode"); 11921 } 11922 11923 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11924 CodeGenFunction &CGF, SourceLocation Loc, 11925 OpenMPDirectiveKind CancelRegion) { 11926 llvm_unreachable("Not supported in SIMD-only mode"); 11927 } 11928 11929 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11930 SourceLocation Loc, const Expr *IfCond, 11931 OpenMPDirectiveKind CancelRegion) { 11932 llvm_unreachable("Not supported in SIMD-only mode"); 11933 } 11934 11935 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11936 const OMPExecutableDirective &D, StringRef ParentName, 11937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11939 llvm_unreachable("Not supported in SIMD-only mode"); 11940 } 11941 11942 void CGOpenMPSIMDRuntime::emitTargetCall( 11943 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11944 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11945 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 11946 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11947 const OMPLoopDirective &D)> 11948 SizeEmitter) { 11949 llvm_unreachable("Not supported in SIMD-only mode"); 11950 } 11951 11952 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11953 llvm_unreachable("Not supported in SIMD-only mode"); 11954 } 11955 11956 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 11960 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11961 return false; 11962 } 11963 11964 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11965 const OMPExecutableDirective &D, 11966 SourceLocation Loc, 11967 llvm::Function *OutlinedFn, 11968 ArrayRef<llvm::Value *> CapturedVars) { 11969 llvm_unreachable("Not supported in SIMD-only mode"); 11970 } 11971 11972 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11973 const Expr *NumTeams, 11974 const Expr *ThreadLimit, 11975 SourceLocation Loc) { 11976 llvm_unreachable("Not supported in SIMD-only mode"); 11977 } 11978 11979 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11980 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11981 const Expr *Device, const RegionCodeGenTy &CodeGen, 11982 CGOpenMPRuntime::TargetDataInfo &Info) { 11983 llvm_unreachable("Not supported in SIMD-only mode"); 11984 } 11985 11986 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11987 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11988 const Expr *Device) { 11989 llvm_unreachable("Not supported in SIMD-only mode"); 11990 } 11991 11992 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11993 const OMPLoopDirective &D, 11994 ArrayRef<Expr *> NumIterations) { 11995 llvm_unreachable("Not supported in SIMD-only mode"); 11996 } 11997 11998 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11999 const OMPDependClause *C) { 12000 llvm_unreachable("Not supported in SIMD-only mode"); 12001 } 12002 12003 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12004 const OMPDoacrossClause *C) { 12005 llvm_unreachable("Not supported in SIMD-only mode"); 12006 } 12007 12008 const VarDecl * 12009 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12010 const VarDecl *NativeParam) const { 12011 llvm_unreachable("Not supported in SIMD-only mode"); 12012 } 12013 12014 Address 12015 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12016 const VarDecl *NativeParam, 12017 const VarDecl *TargetParam) const { 12018 llvm_unreachable("Not supported in SIMD-only mode"); 12019 } 12020