1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/APValue.h" 20 #include "clang/AST/Attr.h" 21 #include "clang/AST/Decl.h" 22 #include "clang/AST/OpenMPClause.h" 23 #include "clang/AST/StmtOpenMP.h" 24 #include "clang/AST/StmtVisitor.h" 25 #include "clang/Basic/BitmaskEnum.h" 26 #include "clang/Basic/FileManager.h" 27 #include "clang/Basic/OpenMPKinds.h" 28 #include "clang/Basic/SourceManager.h" 29 #include "clang/CodeGen/ConstantInitBuilder.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/SetOperations.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/Bitcode/BitcodeReader.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/GlobalValue.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/Support/AtomicOrdering.h" 39 #include "llvm/Support/Format.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <numeric> 43 44 using namespace clang; 45 using namespace CodeGen; 46 using namespace llvm::omp; 47 48 namespace { 49 /// Base class for handling code generation inside OpenMP regions. 50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 51 public: 52 /// Kinds of OpenMP regions used in codegen. 53 enum CGOpenMPRegionKind { 54 /// Region with outlined function for standalone 'parallel' 55 /// directive. 56 ParallelOutlinedRegion, 57 /// Region with outlined function for standalone 'task' directive. 58 TaskOutlinedRegion, 59 /// Region for constructs that do not require function outlining, 60 /// like 'for', 'sections', 'atomic' etc. directives. 61 InlinedRegion, 62 /// Region with outlined function for standalone 'target' directive. 63 TargetRegion, 64 }; 65 66 CGOpenMPRegionInfo(const CapturedStmt &CS, 67 const CGOpenMPRegionKind RegionKind, 68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 69 bool HasCancel) 70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 72 73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 75 bool HasCancel) 76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 77 Kind(Kind), HasCancel(HasCancel) {} 78 79 /// Get a variable or parameter for storing global thread id 80 /// inside OpenMP construct. 81 virtual const VarDecl *getThreadIDVariable() const = 0; 82 83 /// Emit the captured statement body. 84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 85 86 /// Get an LValue for the current ThreadID variable. 87 /// \return LValue for thread id variable. This LValue always has type int32*. 88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 89 90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 91 92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 93 94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 95 96 bool hasCancel() const { return HasCancel; } 97 98 static bool classof(const CGCapturedStmtInfo *Info) { 99 return Info->getKind() == CR_OpenMP; 100 } 101 102 ~CGOpenMPRegionInfo() override = default; 103 104 protected: 105 CGOpenMPRegionKind RegionKind; 106 RegionCodeGenTy CodeGen; 107 OpenMPDirectiveKind Kind; 108 bool HasCancel; 109 }; 110 111 /// API for captured statement code generation in OpenMP constructs. 112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 113 public: 114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 115 const RegionCodeGenTy &CodeGen, 116 OpenMPDirectiveKind Kind, bool HasCancel, 117 StringRef HelperName) 118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 119 HasCancel), 120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 122 } 123 124 /// Get a variable or parameter for storing global thread id 125 /// inside OpenMP construct. 126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 127 128 /// Get the name of the capture helper. 129 StringRef getHelperName() const override { return HelperName; } 130 131 static bool classof(const CGCapturedStmtInfo *Info) { 132 return CGOpenMPRegionInfo::classof(Info) && 133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 134 ParallelOutlinedRegion; 135 } 136 137 private: 138 /// A variable or parameter storing global thread id for OpenMP 139 /// constructs. 140 const VarDecl *ThreadIDVar; 141 StringRef HelperName; 142 }; 143 144 /// API for captured statement code generation in OpenMP constructs. 145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 146 public: 147 class UntiedTaskActionTy final : public PrePostActionTy { 148 bool Untied; 149 const VarDecl *PartIDVar; 150 const RegionCodeGenTy UntiedCodeGen; 151 llvm::SwitchInst *UntiedSwitch = nullptr; 152 153 public: 154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 155 const RegionCodeGenTy &UntiedCodeGen) 156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 157 void Enter(CodeGenFunction &CGF) override { 158 if (Untied) { 159 // Emit task switching point. 160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 161 CGF.GetAddrOfLocalVar(PartIDVar), 162 PartIDVar->getType()->castAs<PointerType>()); 163 llvm::Value *Res = 164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 167 CGF.EmitBlock(DoneBB); 168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 170 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 171 CGF.Builder.GetInsertBlock()); 172 emitUntiedSwitch(CGF); 173 } 174 } 175 void emitUntiedSwitch(CodeGenFunction &CGF) const { 176 if (Untied) { 177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 178 CGF.GetAddrOfLocalVar(PartIDVar), 179 PartIDVar->getType()->castAs<PointerType>()); 180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 PartIdLVal); 182 UntiedCodeGen(CGF); 183 CodeGenFunction::JumpDest CurPoint = 184 CGF.getJumpDestInCurrentScope(".untied.next."); 185 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 188 CGF.Builder.GetInsertBlock()); 189 CGF.EmitBranchThroughCleanup(CurPoint); 190 CGF.EmitBlock(CurPoint.getBlock()); 191 } 192 } 193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 194 }; 195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 196 const VarDecl *ThreadIDVar, 197 const RegionCodeGenTy &CodeGen, 198 OpenMPDirectiveKind Kind, bool HasCancel, 199 const UntiedTaskActionTy &Action) 200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 201 ThreadIDVar(ThreadIDVar), Action(Action) { 202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 203 } 204 205 /// Get a variable or parameter for storing global thread id 206 /// inside OpenMP construct. 207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 208 209 /// Get an LValue for the current ThreadID variable. 210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 211 212 /// Get the name of the capture helper. 213 StringRef getHelperName() const override { return ".omp_outlined."; } 214 215 void emitUntiedSwitch(CodeGenFunction &CGF) override { 216 Action.emitUntiedSwitch(CGF); 217 } 218 219 static bool classof(const CGCapturedStmtInfo *Info) { 220 return CGOpenMPRegionInfo::classof(Info) && 221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 222 TaskOutlinedRegion; 223 } 224 225 private: 226 /// A variable or parameter storing global thread id for OpenMP 227 /// constructs. 228 const VarDecl *ThreadIDVar; 229 /// Action for emitting code for untied tasks. 230 const UntiedTaskActionTy &Action; 231 }; 232 233 /// API for inlined captured statement code generation in OpenMP 234 /// constructs. 235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 236 public: 237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 238 const RegionCodeGenTy &CodeGen, 239 OpenMPDirectiveKind Kind, bool HasCancel) 240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 241 OldCSI(OldCSI), 242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 243 244 // Retrieve the value of the context parameter. 245 llvm::Value *getContextValue() const override { 246 if (OuterRegionInfo) 247 return OuterRegionInfo->getContextValue(); 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 void setContextValue(llvm::Value *V) override { 252 if (OuterRegionInfo) { 253 OuterRegionInfo->setContextValue(V); 254 return; 255 } 256 llvm_unreachable("No context value for inlined OpenMP region"); 257 } 258 259 /// Lookup the captured field decl for a variable. 260 const FieldDecl *lookup(const VarDecl *VD) const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->lookup(VD); 263 // If there is no outer outlined region,no need to lookup in a list of 264 // captured variables, we can use the original one. 265 return nullptr; 266 } 267 268 FieldDecl *getThisFieldDecl() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThisFieldDecl(); 271 return nullptr; 272 } 273 274 /// Get a variable or parameter for storing global thread id 275 /// inside OpenMP construct. 276 const VarDecl *getThreadIDVariable() const override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariable(); 279 return nullptr; 280 } 281 282 /// Get an LValue for the current ThreadID variable. 283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 284 if (OuterRegionInfo) 285 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 286 llvm_unreachable("No LValue for inlined OpenMP construct"); 287 } 288 289 /// Get the name of the capture helper. 290 StringRef getHelperName() const override { 291 if (auto *OuterRegionInfo = getOldCSI()) 292 return OuterRegionInfo->getHelperName(); 293 llvm_unreachable("No helper name for inlined OpenMP construct"); 294 } 295 296 void emitUntiedSwitch(CodeGenFunction &CGF) override { 297 if (OuterRegionInfo) 298 OuterRegionInfo->emitUntiedSwitch(CGF); 299 } 300 301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 302 303 static bool classof(const CGCapturedStmtInfo *Info) { 304 return CGOpenMPRegionInfo::classof(Info) && 305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 306 } 307 308 ~CGOpenMPInlinedRegionInfo() override = default; 309 310 private: 311 /// CodeGen info about outer OpenMP region. 312 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 313 CGOpenMPRegionInfo *OuterRegionInfo; 314 }; 315 316 /// API for captured statement code generation in OpenMP target 317 /// constructs. For this captures, implicit parameters are used instead of the 318 /// captured fields. The name of the target region has to be unique in a given 319 /// application so it is provided by the client, because only the client has 320 /// the information to generate that. 321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 322 public: 323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 324 const RegionCodeGenTy &CodeGen, StringRef HelperName) 325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 326 /*HasCancel=*/false), 327 HelperName(HelperName) {} 328 329 /// This is unused for target regions because each starts executing 330 /// with a single thread. 331 const VarDecl *getThreadIDVariable() const override { return nullptr; } 332 333 /// Get the name of the capture helper. 334 StringRef getHelperName() const override { return HelperName; } 335 336 static bool classof(const CGCapturedStmtInfo *Info) { 337 return CGOpenMPRegionInfo::classof(Info) && 338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 339 } 340 341 private: 342 StringRef HelperName; 343 }; 344 345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 346 llvm_unreachable("No codegen for expressions"); 347 } 348 /// API for generation of expressions captured in a innermost OpenMP 349 /// region. 350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 351 public: 352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 354 OMPD_unknown, 355 /*HasCancel=*/false), 356 PrivScope(CGF) { 357 // Make sure the globals captured in the provided statement are local by 358 // using the privatization logic. We assume the same variable is not 359 // captured more than once. 360 for (const auto &C : CS.captures()) { 361 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 362 continue; 363 364 const VarDecl *VD = C.getCapturedVar(); 365 if (VD->isLocalVarDeclOrParm()) 366 continue; 367 368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 369 /*RefersToEnclosingVariableOrCapture=*/false, 370 VD->getType().getNonReferenceType(), VK_LValue, 371 C.getLocation()); 372 PrivScope.addPrivate( 373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 374 } 375 (void)PrivScope.Privatize(); 376 } 377 378 /// Lookup the captured field decl for a variable. 379 const FieldDecl *lookup(const VarDecl *VD) const override { 380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 381 return FD; 382 return nullptr; 383 } 384 385 /// Emit the captured statement body. 386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 387 llvm_unreachable("No body for expressions"); 388 } 389 390 /// Get a variable or parameter for storing global thread id 391 /// inside OpenMP construct. 392 const VarDecl *getThreadIDVariable() const override { 393 llvm_unreachable("No thread id for expressions"); 394 } 395 396 /// Get the name of the capture helper. 397 StringRef getHelperName() const override { 398 llvm_unreachable("No helper name for expressions"); 399 } 400 401 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 402 403 private: 404 /// Private scope to capture global variables. 405 CodeGenFunction::OMPPrivateScope PrivScope; 406 }; 407 408 /// RAII for emitting code of OpenMP constructs. 409 class InlinedOpenMPRegionRAII { 410 CodeGenFunction &CGF; 411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 412 FieldDecl *LambdaThisCaptureField = nullptr; 413 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 414 bool NoInheritance = false; 415 416 public: 417 /// Constructs region for combined constructs. 418 /// \param CodeGen Code generation sequence for combined directives. Includes 419 /// a list of functions used for code generation of implicitly inlined 420 /// regions. 421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 422 OpenMPDirectiveKind Kind, bool HasCancel, 423 bool NoInheritance = true) 424 : CGF(CGF), NoInheritance(NoInheritance) { 425 // Start emission for the construct. 426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 428 if (NoInheritance) { 429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 430 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 431 CGF.LambdaThisCaptureField = nullptr; 432 BlockInfo = CGF.BlockInfo; 433 CGF.BlockInfo = nullptr; 434 } 435 } 436 437 ~InlinedOpenMPRegionRAII() { 438 // Restore original CapturedStmtInfo only if we're done with code emission. 439 auto *OldCSI = 440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 441 delete CGF.CapturedStmtInfo; 442 CGF.CapturedStmtInfo = OldCSI; 443 if (NoInheritance) { 444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 445 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 446 CGF.BlockInfo = BlockInfo; 447 } 448 } 449 }; 450 451 /// Values for bit flags used in the ident_t to describe the fields. 452 /// All enumeric elements are named and described in accordance with the code 453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 454 enum OpenMPLocationFlags : unsigned { 455 /// Use trampoline for internal microtask. 456 OMP_IDENT_IMD = 0x01, 457 /// Use c-style ident structure. 458 OMP_IDENT_KMPC = 0x02, 459 /// Atomic reduction option for kmpc_reduce. 460 OMP_ATOMIC_REDUCE = 0x10, 461 /// Explicit 'barrier' directive. 462 OMP_IDENT_BARRIER_EXPL = 0x20, 463 /// Implicit barrier in code. 464 OMP_IDENT_BARRIER_IMPL = 0x40, 465 /// Implicit barrier in 'for' directive. 466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 467 /// Implicit barrier in 'sections' directive. 468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 469 /// Implicit barrier in 'single' directive. 470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 471 /// Call of __kmp_for_static_init for static loop. 472 OMP_IDENT_WORK_LOOP = 0x200, 473 /// Call of __kmp_for_static_init for sections. 474 OMP_IDENT_WORK_SECTIONS = 0x400, 475 /// Call of __kmp_for_static_init for distribute. 476 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 478 }; 479 480 namespace { 481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 482 /// Values for bit flags for marking which requires clauses have been used. 483 enum OpenMPOffloadingRequiresDirFlags : int64_t { 484 /// flag undefined. 485 OMP_REQ_UNDEFINED = 0x000, 486 /// no requires clause present. 487 OMP_REQ_NONE = 0x001, 488 /// reverse_offload clause. 489 OMP_REQ_REVERSE_OFFLOAD = 0x002, 490 /// unified_address clause. 491 OMP_REQ_UNIFIED_ADDRESS = 0x004, 492 /// unified_shared_memory clause. 493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 494 /// dynamic_allocators clause. 495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 497 }; 498 499 enum OpenMPOffloadingReservedDeviceIDs { 500 /// Device ID if the device was not defined, runtime should get it 501 /// from environment variables in the spec. 502 OMP_DEVICEID_UNDEF = -1, 503 }; 504 } // anonymous namespace 505 506 /// Describes ident structure that describes a source location. 507 /// All descriptions are taken from 508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h 509 /// Original structure: 510 /// typedef struct ident { 511 /// kmp_int32 reserved_1; /**< might be used in Fortran; 512 /// see above */ 513 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 514 /// KMP_IDENT_KMPC identifies this union 515 /// member */ 516 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 517 /// see above */ 518 ///#if USE_ITT_BUILD 519 /// /* but currently used for storing 520 /// region-specific ITT */ 521 /// /* contextual information. */ 522 ///#endif /* USE_ITT_BUILD */ 523 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 524 /// C++ */ 525 /// char const *psource; /**< String describing the source location. 526 /// The string is composed of semi-colon separated 527 // fields which describe the source file, 528 /// the function and a pair of line numbers that 529 /// delimit the construct. 530 /// */ 531 /// } ident_t; 532 enum IdentFieldIndex { 533 /// might be used in Fortran 534 IdentField_Reserved_1, 535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 536 IdentField_Flags, 537 /// Not really used in Fortran any more 538 IdentField_Reserved_2, 539 /// Source[4] in Fortran, do not use for C++ 540 IdentField_Reserved_3, 541 /// String describing the source location. The string is composed of 542 /// semi-colon separated fields which describe the source file, the function 543 /// and a pair of line numbers that delimit the construct. 544 IdentField_PSource 545 }; 546 547 /// Schedule types for 'omp for' loops (these enumerators are taken from 548 /// the enum sched_type in kmp.h). 549 enum OpenMPSchedType { 550 /// Lower bound for default (unordered) versions. 551 OMP_sch_lower = 32, 552 OMP_sch_static_chunked = 33, 553 OMP_sch_static = 34, 554 OMP_sch_dynamic_chunked = 35, 555 OMP_sch_guided_chunked = 36, 556 OMP_sch_runtime = 37, 557 OMP_sch_auto = 38, 558 /// static with chunk adjustment (e.g., simd) 559 OMP_sch_static_balanced_chunked = 45, 560 /// Lower bound for 'ordered' versions. 561 OMP_ord_lower = 64, 562 OMP_ord_static_chunked = 65, 563 OMP_ord_static = 66, 564 OMP_ord_dynamic_chunked = 67, 565 OMP_ord_guided_chunked = 68, 566 OMP_ord_runtime = 69, 567 OMP_ord_auto = 70, 568 OMP_sch_default = OMP_sch_static, 569 /// dist_schedule types 570 OMP_dist_sch_static_chunked = 91, 571 OMP_dist_sch_static = 92, 572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 573 /// Set if the monotonic schedule modifier was present. 574 OMP_sch_modifier_monotonic = (1 << 29), 575 /// Set if the nonmonotonic schedule modifier was present. 576 OMP_sch_modifier_nonmonotonic = (1 << 30), 577 }; 578 579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 580 /// region. 581 class CleanupTy final : public EHScopeStack::Cleanup { 582 PrePostActionTy *Action; 583 584 public: 585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 587 if (!CGF.HaveInsertPoint()) 588 return; 589 Action->Exit(CGF); 590 } 591 }; 592 593 } // anonymous namespace 594 595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 596 CodeGenFunction::RunCleanupsScope Scope(CGF); 597 if (PrePostAction) { 598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 599 Callback(CodeGen, CGF, *PrePostAction); 600 } else { 601 PrePostActionTy Action; 602 Callback(CodeGen, CGF, Action); 603 } 604 } 605 606 /// Check if the combiner is a call to UDR combiner and if it is so return the 607 /// UDR decl used for reduction. 608 static const OMPDeclareReductionDecl * 609 getReductionInit(const Expr *ReductionOp) { 610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 612 if (const auto *DRE = 613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 615 return DRD; 616 return nullptr; 617 } 618 619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 620 const OMPDeclareReductionDecl *DRD, 621 const Expr *InitOp, 622 Address Private, Address Original, 623 QualType Ty) { 624 if (DRD->getInitializer()) { 625 std::pair<llvm::Function *, llvm::Function *> Reduction = 626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 627 const auto *CE = cast<CallExpr>(InitOp); 628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 631 const auto *LHSDRE = 632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 633 const auto *RHSDRE = 634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 637 [=]() { return Private; }); 638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 639 [=]() { return Original; }); 640 (void)PrivateScope.Privatize(); 641 RValue Func = RValue::get(Reduction.second); 642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 643 CGF.EmitIgnoredExpr(InitOp); 644 } else { 645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 647 auto *GV = new llvm::GlobalVariable( 648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 649 llvm::GlobalValue::PrivateLinkage, Init, Name); 650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 651 RValue InitRVal; 652 switch (CGF.getEvaluationKind(Ty)) { 653 case TEK_Scalar: 654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 655 break; 656 case TEK_Complex: 657 InitRVal = 658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 659 break; 660 case TEK_Aggregate: { 661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); 662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); 663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 664 /*IsInitializer=*/false); 665 return; 666 } 667 } 668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); 669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 671 /*IsInitializer=*/false); 672 } 673 } 674 675 /// Emit initialization of arrays of complex types. 676 /// \param DestAddr Address of the array. 677 /// \param Type Type of array. 678 /// \param Init Initial expression of array. 679 /// \param SrcAddr Address of the original array. 680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 681 QualType Type, bool EmitDeclareReductionInit, 682 const Expr *Init, 683 const OMPDeclareReductionDecl *DRD, 684 Address SrcAddr = Address::invalid()) { 685 // Perform element-by-element initialization. 686 QualType ElementTy; 687 688 // Drill down to the base element type on both arrays. 689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 691 if (DRD) 692 SrcAddr = 693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 694 695 llvm::Value *SrcBegin = nullptr; 696 if (DRD) 697 SrcBegin = SrcAddr.getPointer(); 698 llvm::Value *DestBegin = DestAddr.getPointer(); 699 // Cast from pointer to array type to pointer to single element. 700 llvm::Value *DestEnd = 701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); 702 // The basic structure here is a while-do loop. 703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 705 llvm::Value *IsEmpty = 706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 708 709 // Enter the loop body, making that address the current address. 710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 711 CGF.EmitBlock(BodyBB); 712 713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 714 715 llvm::PHINode *SrcElementPHI = nullptr; 716 Address SrcElementCurrent = Address::invalid(); 717 if (DRD) { 718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 719 "omp.arraycpy.srcElementPast"); 720 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 721 SrcElementCurrent = 722 Address(SrcElementPHI, 723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 724 } 725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 727 DestElementPHI->addIncoming(DestBegin, EntryBB); 728 Address DestElementCurrent = 729 Address(DestElementPHI, 730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 731 732 // Emit copy. 733 { 734 CodeGenFunction::RunCleanupsScope InitScope(CGF); 735 if (EmitDeclareReductionInit) { 736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 737 SrcElementCurrent, ElementTy); 738 } else 739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 740 /*IsInitializer=*/false); 741 } 742 743 if (DRD) { 744 // Shift the address forward by one element. 745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, 747 "omp.arraycpy.dest.element"); 748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 749 } 750 751 // Shift the address forward by one element. 752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, 754 "omp.arraycpy.dest.element"); 755 // Check whether we've reached the end. 756 llvm::Value *Done = 757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 760 761 // Done. 762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 763 } 764 765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 766 return CGF.EmitOMPSharedLValue(E); 767 } 768 769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 770 const Expr *E) { 771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 773 return LValue(); 774 } 775 776 void ReductionCodeGen::emitAggregateInitialization( 777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 778 const OMPDeclareReductionDecl *DRD) { 779 // Emit VarDecl with copy init for arrays. 780 // Get the address of the original variable captured in current 781 // captured region. 782 const auto *PrivateVD = 783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 784 bool EmitDeclareReductionInit = 785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 787 EmitDeclareReductionInit, 788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 789 : PrivateVD->getInit(), 790 DRD, SharedAddr); 791 } 792 793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 794 ArrayRef<const Expr *> Origs, 795 ArrayRef<const Expr *> Privates, 796 ArrayRef<const Expr *> ReductionOps) { 797 ClausesData.reserve(Shareds.size()); 798 SharedAddresses.reserve(Shareds.size()); 799 Sizes.reserve(Shareds.size()); 800 BaseDecls.reserve(Shareds.size()); 801 const auto *IOrig = Origs.begin(); 802 const auto *IPriv = Privates.begin(); 803 const auto *IRed = ReductionOps.begin(); 804 for (const Expr *Ref : Shareds) { 805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 806 std::advance(IOrig, 1); 807 std::advance(IPriv, 1); 808 std::advance(IRed, 1); 809 } 810 } 811 812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 814 "Number of generated lvalues must be exactly N."); 815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 817 SharedAddresses.emplace_back(First, Second); 818 if (ClausesData[N].Shared == ClausesData[N].Ref) { 819 OrigAddresses.emplace_back(First, Second); 820 } else { 821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 823 OrigAddresses.emplace_back(First, Second); 824 } 825 } 826 827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 828 const auto *PrivateVD = 829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 830 QualType PrivateType = PrivateVD->getType(); 831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 832 if (!PrivateType->isVariablyModifiedType()) { 833 Sizes.emplace_back( 834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 835 nullptr); 836 return; 837 } 838 llvm::Value *Size; 839 llvm::Value *SizeInChars; 840 auto *ElemType = 841 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 842 ->getElementType(); 843 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 844 if (AsArraySection) { 845 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 846 OrigAddresses[N].first.getPointer(CGF)); 847 Size = CGF.Builder.CreateNUWAdd( 848 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 849 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 850 } else { 851 SizeInChars = 852 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 853 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 854 } 855 Sizes.emplace_back(SizeInChars, Size); 856 CodeGenFunction::OpaqueValueMapping OpaqueMap( 857 CGF, 858 cast<OpaqueValueExpr>( 859 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 860 RValue::get(Size)); 861 CGF.EmitVariablyModifiedType(PrivateType); 862 } 863 864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 865 llvm::Value *Size) { 866 const auto *PrivateVD = 867 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 868 QualType PrivateType = PrivateVD->getType(); 869 if (!PrivateType->isVariablyModifiedType()) { 870 assert(!Size && !Sizes[N].second && 871 "Size should be nullptr for non-variably modified reduction " 872 "items."); 873 return; 874 } 875 CodeGenFunction::OpaqueValueMapping OpaqueMap( 876 CGF, 877 cast<OpaqueValueExpr>( 878 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 879 RValue::get(Size)); 880 CGF.EmitVariablyModifiedType(PrivateType); 881 } 882 883 void ReductionCodeGen::emitInitialization( 884 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, 885 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 886 assert(SharedAddresses.size() > N && "No variable was generated"); 887 const auto *PrivateVD = 888 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 889 const OMPDeclareReductionDecl *DRD = 890 getReductionInit(ClausesData[N].ReductionOp); 891 QualType PrivateType = PrivateVD->getType(); 892 PrivateAddr = CGF.Builder.CreateElementBitCast( 893 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 894 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 895 if (DRD && DRD->getInitializer()) 896 (void)DefaultInit(CGF); 897 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); 898 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 899 (void)DefaultInit(CGF); 900 QualType SharedType = SharedAddresses[N].first.getType(); 901 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 902 PrivateAddr, SharedAddr, SharedType); 903 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 904 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 905 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 906 PrivateVD->getType().getQualifiers(), 907 /*IsInitializer=*/false); 908 } 909 } 910 911 bool ReductionCodeGen::needCleanups(unsigned N) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 return DTorKind != QualType::DK_none; 917 } 918 919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 920 Address PrivateAddr) { 921 const auto *PrivateVD = 922 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 923 QualType PrivateType = PrivateVD->getType(); 924 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 925 if (needCleanups(N)) { 926 PrivateAddr = CGF.Builder.CreateElementBitCast( 927 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 928 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 929 } 930 } 931 932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 933 LValue BaseLV) { 934 BaseTy = BaseTy.getNonReferenceType(); 935 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 936 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 937 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 938 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 939 } else { 940 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 941 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 942 } 943 BaseTy = BaseTy->getPointeeType(); 944 } 945 return CGF.MakeAddrLValue( 946 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 947 CGF.ConvertTypeForMem(ElTy)), 948 BaseLV.getType(), BaseLV.getBaseInfo(), 949 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 950 } 951 952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 953 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 954 llvm::Value *Addr) { 955 Address Tmp = Address::invalid(); 956 Address TopTmp = Address::invalid(); 957 Address MostTopTmp = Address::invalid(); 958 BaseTy = BaseTy.getNonReferenceType(); 959 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 960 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 961 Tmp = CGF.CreateMemTemp(BaseTy); 962 if (TopTmp.isValid()) 963 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 964 else 965 MostTopTmp = Tmp; 966 TopTmp = Tmp; 967 BaseTy = BaseTy->getPointeeType(); 968 } 969 llvm::Type *Ty = BaseLVType; 970 if (Tmp.isValid()) 971 Ty = Tmp.getElementType(); 972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 973 if (Tmp.isValid()) { 974 CGF.Builder.CreateStore(Addr, Tmp); 975 return MostTopTmp; 976 } 977 return Address(Addr, BaseLVAlignment); 978 } 979 980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 981 const VarDecl *OrigVD = nullptr; 982 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 983 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 984 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 985 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 987 Base = TempASE->getBase()->IgnoreParenImpCasts(); 988 DE = cast<DeclRefExpr>(Base); 989 OrigVD = cast<VarDecl>(DE->getDecl()); 990 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 991 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 992 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 993 Base = TempASE->getBase()->IgnoreParenImpCasts(); 994 DE = cast<DeclRefExpr>(Base); 995 OrigVD = cast<VarDecl>(DE->getDecl()); 996 } 997 return OrigVD; 998 } 999 1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1001 Address PrivateAddr) { 1002 const DeclRefExpr *DE; 1003 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1004 BaseDecls.emplace_back(OrigVD); 1005 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1006 LValue BaseLValue = 1007 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1008 OriginalBaseLValue); 1009 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); 1010 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1011 BaseLValue.getPointer(CGF), SharedAddr.getPointer()); 1012 llvm::Value *PrivatePointer = 1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1014 PrivateAddr.getPointer(), SharedAddr.getType()); 1015 llvm::Value *Ptr = CGF.Builder.CreateGEP( 1016 SharedAddr.getElementType(), PrivatePointer, Adjustment); 1017 return castToBase(CGF, OrigVD->getType(), 1018 SharedAddresses[N].first.getType(), 1019 OriginalBaseLValue.getAddress(CGF).getType(), 1020 OriginalBaseLValue.getAlignment(), Ptr); 1021 } 1022 BaseDecls.emplace_back( 1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1024 return PrivateAddr; 1025 } 1026 1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1028 const OMPDeclareReductionDecl *DRD = 1029 getReductionInit(ClausesData[N].ReductionOp); 1030 return DRD && DRD->getInitializer(); 1031 } 1032 1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1034 return CGF.EmitLoadOfPointerLValue( 1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1036 getThreadIDVariable()->getType()->castAs<PointerType>()); 1037 } 1038 1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { 1040 if (!CGF.HaveInsertPoint()) 1041 return; 1042 // 1.2.2 OpenMP Language Terminology 1043 // Structured block - An executable statement with a single entry at the 1044 // top and a single exit at the bottom. 1045 // The point of exit cannot be a branch out of the structured block. 1046 // longjmp() and throw() must not violate the entry/exit criteria. 1047 CGF.EHStack.pushTerminate(); 1048 if (S) 1049 CGF.incrementProfileCounter(S); 1050 CodeGen(CGF); 1051 CGF.EHStack.popTerminate(); 1052 } 1053 1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1055 CodeGenFunction &CGF) { 1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1057 getThreadIDVariable()->getType(), 1058 AlignmentSource::Decl); 1059 } 1060 1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1062 QualType FieldTy) { 1063 auto *Field = FieldDecl::Create( 1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1067 Field->setAccess(AS_public); 1068 DC->addDecl(Field); 1069 return Field; 1070 } 1071 1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1073 StringRef Separator) 1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1077 1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1079 OMPBuilder.initialize(); 1080 loadOffloadInfoMetadata(); 1081 } 1082 1083 void CGOpenMPRuntime::clear() { 1084 InternalVars.clear(); 1085 // Clean non-target variable declarations possibly used only in debug info. 1086 for (const auto &Data : EmittedNonTargetVariables) { 1087 if (!Data.getValue().pointsToAliveValue()) 1088 continue; 1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1090 if (!GV) 1091 continue; 1092 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1093 continue; 1094 GV->eraseFromParent(); 1095 } 1096 } 1097 1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1099 SmallString<128> Buffer; 1100 llvm::raw_svector_ostream OS(Buffer); 1101 StringRef Sep = FirstSeparator; 1102 for (StringRef Part : Parts) { 1103 OS << Sep << Part; 1104 Sep = Separator; 1105 } 1106 return std::string(OS.str()); 1107 } 1108 1109 static llvm::Function * 1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1111 const Expr *CombinerInitializer, const VarDecl *In, 1112 const VarDecl *Out, bool IsCombiner) { 1113 // void .omp_combiner.(Ty *in, Ty *out); 1114 ASTContext &C = CGM.getContext(); 1115 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1116 FunctionArgList Args; 1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1121 Args.push_back(&OmpOutParm); 1122 Args.push_back(&OmpInParm); 1123 const CGFunctionInfo &FnInfo = 1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1126 std::string Name = CGM.getOpenMPRuntime().getName( 1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1129 Name, &CGM.getModule()); 1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1131 if (CGM.getLangOpts().Optimize) { 1132 Fn->removeFnAttr(llvm::Attribute::NoInline); 1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1135 } 1136 CodeGenFunction CGF(CGM); 1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1140 Out->getLocation()); 1141 CodeGenFunction::OMPPrivateScope Scope(CGF); 1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1145 .getAddress(CGF); 1146 }); 1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 (void)Scope.Privatize(); 1153 if (!IsCombiner && Out->hasInit() && 1154 !CGF.isTrivialInitializer(Out->getInit())) { 1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1156 Out->getType().getQualifiers(), 1157 /*IsInitializer=*/true); 1158 } 1159 if (CombinerInitializer) 1160 CGF.EmitIgnoredExpr(CombinerInitializer); 1161 Scope.ForceCleanup(); 1162 CGF.FinishFunction(); 1163 return Fn; 1164 } 1165 1166 void CGOpenMPRuntime::emitUserDefinedReduction( 1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1168 if (UDRMap.count(D) > 0) 1169 return; 1170 llvm::Function *Combiner = emitCombinerOrInitializer( 1171 CGM, D->getType(), D->getCombiner(), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1174 /*IsCombiner=*/true); 1175 llvm::Function *Initializer = nullptr; 1176 if (const Expr *Init = D->getInitializer()) { 1177 Initializer = emitCombinerOrInitializer( 1178 CGM, D->getType(), 1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1180 : nullptr, 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1183 /*IsCombiner=*/false); 1184 } 1185 UDRMap.try_emplace(D, Combiner, Initializer); 1186 if (CGF) { 1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1188 Decls.second.push_back(D); 1189 } 1190 } 1191 1192 std::pair<llvm::Function *, llvm::Function *> 1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1194 auto I = UDRMap.find(D); 1195 if (I != UDRMap.end()) 1196 return I->second; 1197 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1198 return UDRMap.lookup(D); 1199 } 1200 1201 namespace { 1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1203 // Builder if one is present. 1204 struct PushAndPopStackRAII { 1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1206 bool HasCancel, llvm::omp::Directive Kind) 1207 : OMPBuilder(OMPBuilder) { 1208 if (!OMPBuilder) 1209 return; 1210 1211 // The following callback is the crucial part of clangs cleanup process. 1212 // 1213 // NOTE: 1214 // Once the OpenMPIRBuilder is used to create parallel regions (and 1215 // similar), the cancellation destination (Dest below) is determined via 1216 // IP. That means if we have variables to finalize we split the block at IP, 1217 // use the new block (=BB) as destination to build a JumpDest (via 1218 // getJumpDestInCurrentScope(BB)) which then is fed to 1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1220 // to push & pop an FinalizationInfo object. 1221 // The FiniCB will still be needed but at the point where the 1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1224 assert(IP.getBlock()->end() == IP.getPoint() && 1225 "Clang CG should cause non-terminated block!"); 1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1227 CGF.Builder.restoreIP(IP); 1228 CodeGenFunction::JumpDest Dest = 1229 CGF.getOMPCancelDestination(OMPD_parallel); 1230 CGF.EmitBranchThroughCleanup(Dest); 1231 }; 1232 1233 // TODO: Remove this once we emit parallel regions through the 1234 // OpenMPIRBuilder as it can do this setup internally. 1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); 1236 OMPBuilder->pushFinalizationCB(std::move(FI)); 1237 } 1238 ~PushAndPopStackRAII() { 1239 if (OMPBuilder) 1240 OMPBuilder->popFinalizationCB(); 1241 } 1242 llvm::OpenMPIRBuilder *OMPBuilder; 1243 }; 1244 } // namespace 1245 1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1250 assert(ThreadIDVar->getType()->isPointerType() && 1251 "thread id variable must be of type kmp_int32 *"); 1252 CodeGenFunction CGF(CGM, true); 1253 bool HasCancel = false; 1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1255 HasCancel = OPD->hasCancel(); 1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1257 HasCancel = OPD->hasCancel(); 1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1259 HasCancel = OPSD->hasCancel(); 1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1261 HasCancel = OPFD->hasCancel(); 1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1263 HasCancel = OPFD->hasCancel(); 1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1265 HasCancel = OPFD->hasCancel(); 1266 else if (const auto *OPFD = 1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1268 HasCancel = OPFD->hasCancel(); 1269 else if (const auto *OPFD = 1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1274 // parallel region to make cancellation barriers work properly. 1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); 1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1278 HasCancel, OutlinedHelperName); 1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1281 } 1282 1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1287 return emitParallelOrTeamsOutlinedFunction( 1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1289 } 1290 1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1295 return emitParallelOrTeamsOutlinedFunction( 1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1297 } 1298 1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1303 bool Tied, unsigned &NumberOfParts) { 1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1305 PrePostActionTy &) { 1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1308 llvm::Value *TaskArgs[] = { 1309 UpLoc, ThreadID, 1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1311 TaskTVar->getType()->castAs<PointerType>()) 1312 .getPointer(CGF)}; 1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1314 CGM.getModule(), OMPRTL___kmpc_omp_task), 1315 TaskArgs); 1316 }; 1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1318 UntiedCodeGen); 1319 CodeGen.setAction(Action); 1320 assert(!ThreadIDVar->getType()->isPointerType() && 1321 "thread id variable must be of type kmp_int32 for tasks"); 1322 const OpenMPDirectiveKind Region = 1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1324 : OMPD_task; 1325 const CapturedStmt *CS = D.getCapturedStmt(Region); 1326 bool HasCancel = false; 1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1328 HasCancel = TD->hasCancel(); 1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1330 HasCancel = TD->hasCancel(); 1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1332 HasCancel = TD->hasCancel(); 1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 1336 CodeGenFunction CGF(CGM, true); 1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1338 InnermostKind, HasCancel, Action); 1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1341 if (!Tied) 1342 NumberOfParts = Action.getNumberOfParts(); 1343 return Res; 1344 } 1345 1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1347 const RecordDecl *RD, const CGRecordLayout &RL, 1348 ArrayRef<llvm::Constant *> Data) { 1349 llvm::StructType *StructTy = RL.getLLVMType(); 1350 unsigned PrevIdx = 0; 1351 ConstantInitBuilder CIBuilder(CGM); 1352 auto DI = Data.begin(); 1353 for (const FieldDecl *FD : RD->fields()) { 1354 unsigned Idx = RL.getLLVMFieldNo(FD); 1355 // Fill the alignment. 1356 for (unsigned I = PrevIdx; I < Idx; ++I) 1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1358 PrevIdx = Idx + 1; 1359 Fields.add(*DI); 1360 ++DI; 1361 } 1362 } 1363 1364 template <class... As> 1365 static llvm::GlobalVariable * 1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1367 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1368 As &&... Args) { 1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1371 ConstantInitBuilder CIBuilder(CGM); 1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1373 buildStructValue(Fields, CGM, RD, RL, Data); 1374 return Fields.finishAndCreateGlobal( 1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1376 std::forward<As>(Args)...); 1377 } 1378 1379 template <typename T> 1380 static void 1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1382 ArrayRef<llvm::Constant *> Data, 1383 T &Parent) { 1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1387 buildStructValue(Fields, CGM, RD, RL, Data); 1388 Fields.finishAndAddTo(Parent); 1389 } 1390 1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1392 bool AtCurrentPoint) { 1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1395 1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1397 if (AtCurrentPoint) { 1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1400 } else { 1401 Elem.second.ServiceInsertPt = 1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1404 } 1405 } 1406 1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1409 if (Elem.second.ServiceInsertPt) { 1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1411 Elem.second.ServiceInsertPt = nullptr; 1412 Ptr->eraseFromParent(); 1413 } 1414 } 1415 1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1417 SourceLocation Loc, 1418 SmallString<128> &Buffer) { 1419 llvm::raw_svector_ostream OS(Buffer); 1420 // Build debug location 1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1422 OS << ";" << PLoc.getFilename() << ";"; 1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1424 OS << FD->getQualifiedNameAsString(); 1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1426 return OS.str(); 1427 } 1428 1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1430 SourceLocation Loc, 1431 unsigned Flags) { 1432 llvm::Constant *SrcLocStr; 1433 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1434 Loc.isInvalid()) { 1435 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1436 } else { 1437 std::string FunctionName = ""; 1438 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1439 FunctionName = FD->getQualifiedNameAsString(); 1440 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1441 const char *FileName = PLoc.getFilename(); 1442 unsigned Line = PLoc.getLine(); 1443 unsigned Column = PLoc.getColumn(); 1444 SrcLocStr = 1445 OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); 1446 } 1447 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1448 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1449 Reserved2Flags); 1450 } 1451 1452 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1453 SourceLocation Loc) { 1454 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1455 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1456 // the clang invariants used below might be broken. 1457 if (CGM.getLangOpts().OpenMPIRBuilder) { 1458 SmallString<128> Buffer; 1459 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1460 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1461 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1462 return OMPBuilder.getOrCreateThreadID( 1463 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1464 } 1465 1466 llvm::Value *ThreadID = nullptr; 1467 // Check whether we've already cached a load of the thread id in this 1468 // function. 1469 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1470 if (I != OpenMPLocThreadIDMap.end()) { 1471 ThreadID = I->second.ThreadID; 1472 if (ThreadID != nullptr) 1473 return ThreadID; 1474 } 1475 // If exceptions are enabled, do not use parameter to avoid possible crash. 1476 if (auto *OMPRegionInfo = 1477 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1478 if (OMPRegionInfo->getThreadIDVariable()) { 1479 // Check if this an outlined function with thread id passed as argument. 1480 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1481 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1482 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1483 !CGF.getLangOpts().CXXExceptions || 1484 CGF.Builder.GetInsertBlock() == TopBlock || 1485 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1486 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1487 TopBlock || 1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1489 CGF.Builder.GetInsertBlock()) { 1490 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1491 // If value loaded in entry block, cache it and use it everywhere in 1492 // function. 1493 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1494 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1495 Elem.second.ThreadID = ThreadID; 1496 } 1497 return ThreadID; 1498 } 1499 } 1500 } 1501 1502 // This is not an outlined function region - need to call __kmpc_int32 1503 // kmpc_global_thread_num(ident_t *loc). 1504 // Generate thread id value and cache this value for use across the 1505 // function. 1506 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1507 if (!Elem.second.ServiceInsertPt) 1508 setLocThreadIdInsertPt(CGF); 1509 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1510 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1511 llvm::CallInst *Call = CGF.Builder.CreateCall( 1512 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1513 OMPRTL___kmpc_global_thread_num), 1514 emitUpdateLocation(CGF, Loc)); 1515 Call->setCallingConv(CGF.getRuntimeCC()); 1516 Elem.second.ThreadID = Call; 1517 return Call; 1518 } 1519 1520 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1521 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1522 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1523 clearLocThreadIdInsertPt(CGF); 1524 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1525 } 1526 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1527 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1528 UDRMap.erase(D); 1529 FunctionUDRMap.erase(CGF.CurFn); 1530 } 1531 auto I = FunctionUDMMap.find(CGF.CurFn); 1532 if (I != FunctionUDMMap.end()) { 1533 for(const auto *D : I->second) 1534 UDMMap.erase(D); 1535 FunctionUDMMap.erase(I); 1536 } 1537 LastprivateConditionalToTypes.erase(CGF.CurFn); 1538 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1539 } 1540 1541 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1542 return OMPBuilder.IdentPtr; 1543 } 1544 1545 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1546 if (!Kmpc_MicroTy) { 1547 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1548 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1549 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1550 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1551 } 1552 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1553 } 1554 1555 llvm::FunctionCallee 1556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, 1557 bool IsGPUDistribute) { 1558 assert((IVSize == 32 || IVSize == 64) && 1559 "IV size is not compatible with the omp runtime"); 1560 StringRef Name; 1561 if (IsGPUDistribute) 1562 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" 1563 : "__kmpc_distribute_static_init_4u") 1564 : (IVSigned ? "__kmpc_distribute_static_init_8" 1565 : "__kmpc_distribute_static_init_8u"); 1566 else 1567 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1568 : "__kmpc_for_static_init_4u") 1569 : (IVSigned ? "__kmpc_for_static_init_8" 1570 : "__kmpc_for_static_init_8u"); 1571 1572 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1573 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1574 llvm::Type *TypeParams[] = { 1575 getIdentTyPointerTy(), // loc 1576 CGM.Int32Ty, // tid 1577 CGM.Int32Ty, // schedtype 1578 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1579 PtrTy, // p_lower 1580 PtrTy, // p_upper 1581 PtrTy, // p_stride 1582 ITy, // incr 1583 ITy // chunk 1584 }; 1585 auto *FnTy = 1586 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1587 return CGM.CreateRuntimeFunction(FnTy, Name); 1588 } 1589 1590 llvm::FunctionCallee 1591 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1592 assert((IVSize == 32 || IVSize == 64) && 1593 "IV size is not compatible with the omp runtime"); 1594 StringRef Name = 1595 IVSize == 32 1596 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1597 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1598 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1599 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1600 CGM.Int32Ty, // tid 1601 CGM.Int32Ty, // schedtype 1602 ITy, // lower 1603 ITy, // upper 1604 ITy, // stride 1605 ITy // chunk 1606 }; 1607 auto *FnTy = 1608 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1609 return CGM.CreateRuntimeFunction(FnTy, Name); 1610 } 1611 1612 llvm::FunctionCallee 1613 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1614 assert((IVSize == 32 || IVSize == 64) && 1615 "IV size is not compatible with the omp runtime"); 1616 StringRef Name = 1617 IVSize == 32 1618 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1619 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1620 llvm::Type *TypeParams[] = { 1621 getIdentTyPointerTy(), // loc 1622 CGM.Int32Ty, // tid 1623 }; 1624 auto *FnTy = 1625 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1626 return CGM.CreateRuntimeFunction(FnTy, Name); 1627 } 1628 1629 llvm::FunctionCallee 1630 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1631 assert((IVSize == 32 || IVSize == 64) && 1632 "IV size is not compatible with the omp runtime"); 1633 StringRef Name = 1634 IVSize == 32 1635 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1636 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1637 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1638 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1639 llvm::Type *TypeParams[] = { 1640 getIdentTyPointerTy(), // loc 1641 CGM.Int32Ty, // tid 1642 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1643 PtrTy, // p_lower 1644 PtrTy, // p_upper 1645 PtrTy // p_stride 1646 }; 1647 auto *FnTy = 1648 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1649 return CGM.CreateRuntimeFunction(FnTy, Name); 1650 } 1651 1652 /// Obtain information that uniquely identifies a target entry. This 1653 /// consists of the file and device IDs as well as line number associated with 1654 /// the relevant entry source location. 1655 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1656 unsigned &DeviceID, unsigned &FileID, 1657 unsigned &LineNum) { 1658 SourceManager &SM = C.getSourceManager(); 1659 1660 // The loc should be always valid and have a file ID (the user cannot use 1661 // #pragma directives in macros) 1662 1663 assert(Loc.isValid() && "Source location is expected to be always valid."); 1664 1665 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1666 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1667 1668 llvm::sys::fs::UniqueID ID; 1669 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { 1670 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); 1671 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1672 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1673 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1674 << PLoc.getFilename() << EC.message(); 1675 } 1676 1677 DeviceID = ID.getDevice(); 1678 FileID = ID.getFile(); 1679 LineNum = PLoc.getLine(); 1680 } 1681 1682 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1683 if (CGM.getLangOpts().OpenMPSimd) 1684 return Address::invalid(); 1685 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1686 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1687 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1688 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1689 HasRequiresUnifiedSharedMemory))) { 1690 SmallString<64> PtrName; 1691 { 1692 llvm::raw_svector_ostream OS(PtrName); 1693 OS << CGM.getMangledName(GlobalDecl(VD)); 1694 if (!VD->isExternallyVisible()) { 1695 unsigned DeviceID, FileID, Line; 1696 getTargetEntryUniqueInfo(CGM.getContext(), 1697 VD->getCanonicalDecl()->getBeginLoc(), 1698 DeviceID, FileID, Line); 1699 OS << llvm::format("_%x", FileID); 1700 } 1701 OS << "_decl_tgt_ref_ptr"; 1702 } 1703 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1704 if (!Ptr) { 1705 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1706 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1707 PtrName); 1708 1709 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1710 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1711 1712 if (!CGM.getLangOpts().OpenMPIsDevice) 1713 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1714 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1715 } 1716 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1717 } 1718 return Address::invalid(); 1719 } 1720 1721 llvm::Constant * 1722 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1723 assert(!CGM.getLangOpts().OpenMPUseTLS || 1724 !CGM.getContext().getTargetInfo().isTLSSupported()); 1725 // Lookup the entry, lazily creating it if necessary. 1726 std::string Suffix = getName({"cache", ""}); 1727 return getOrCreateInternalVariable( 1728 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1729 } 1730 1731 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1732 const VarDecl *VD, 1733 Address VDAddr, 1734 SourceLocation Loc) { 1735 if (CGM.getLangOpts().OpenMPUseTLS && 1736 CGM.getContext().getTargetInfo().isTLSSupported()) 1737 return VDAddr; 1738 1739 llvm::Type *VarTy = VDAddr.getElementType(); 1740 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1741 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1742 CGM.Int8PtrTy), 1743 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1744 getOrCreateThreadPrivateCache(VD)}; 1745 return Address(CGF.EmitRuntimeCall( 1746 OMPBuilder.getOrCreateRuntimeFunction( 1747 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1748 Args), 1749 VDAddr.getAlignment()); 1750 } 1751 1752 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1753 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1754 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1755 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1756 // library. 1757 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1758 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1759 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1760 OMPLoc); 1761 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1762 // to register constructor/destructor for variable. 1763 llvm::Value *Args[] = { 1764 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1765 Ctor, CopyCtor, Dtor}; 1766 CGF.EmitRuntimeCall( 1767 OMPBuilder.getOrCreateRuntimeFunction( 1768 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1769 Args); 1770 } 1771 1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1773 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1774 bool PerformInit, CodeGenFunction *CGF) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return nullptr; 1778 1779 VD = VD->getDefinition(CGM.getContext()); 1780 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1781 QualType ASTTy = VD->getType(); 1782 1783 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1784 const Expr *Init = VD->getAnyInitializer(); 1785 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1786 // Generate function that re-emits the declaration's initializer into the 1787 // threadprivate copy of the variable VD 1788 CodeGenFunction CtorCGF(CGM); 1789 FunctionArgList Args; 1790 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1791 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1792 ImplicitParamDecl::Other); 1793 Args.push_back(&Dst); 1794 1795 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1796 CGM.getContext().VoidPtrTy, Args); 1797 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1798 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1799 llvm::Function *Fn = 1800 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1801 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1802 Args, Loc, Loc); 1803 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1804 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1805 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1806 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1807 Arg = CtorCGF.Builder.CreateElementBitCast( 1808 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1809 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1810 /*IsInitializer=*/true); 1811 ArgVal = CtorCGF.EmitLoadOfScalar( 1812 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1813 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1814 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1815 CtorCGF.FinishFunction(); 1816 Ctor = Fn; 1817 } 1818 if (VD->getType().isDestructedType() != QualType::DK_none) { 1819 // Generate function that emits destructor call for the threadprivate copy 1820 // of the variable VD 1821 CodeGenFunction DtorCGF(CGM); 1822 FunctionArgList Args; 1823 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1824 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1825 ImplicitParamDecl::Other); 1826 Args.push_back(&Dst); 1827 1828 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1829 CGM.getContext().VoidTy, Args); 1830 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1831 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1832 llvm::Function *Fn = 1833 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1834 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1835 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1836 Loc, Loc); 1837 // Create a scope with an artificial location for the body of this function. 1838 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1839 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1840 DtorCGF.GetAddrOfLocalVar(&Dst), 1841 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1842 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1843 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1844 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1845 DtorCGF.FinishFunction(); 1846 Dtor = Fn; 1847 } 1848 // Do not emit init function if it is not required. 1849 if (!Ctor && !Dtor) 1850 return nullptr; 1851 1852 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1853 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1854 /*isVarArg=*/false) 1855 ->getPointerTo(); 1856 // Copying constructor for the threadprivate variable. 1857 // Must be NULL - reserved by runtime, but currently it requires that this 1858 // parameter is always NULL. Otherwise it fires assertion. 1859 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1860 if (Ctor == nullptr) { 1861 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1862 /*isVarArg=*/false) 1863 ->getPointerTo(); 1864 Ctor = llvm::Constant::getNullValue(CtorTy); 1865 } 1866 if (Dtor == nullptr) { 1867 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1868 /*isVarArg=*/false) 1869 ->getPointerTo(); 1870 Dtor = llvm::Constant::getNullValue(DtorTy); 1871 } 1872 if (!CGF) { 1873 auto *InitFunctionTy = 1874 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1875 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1876 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1877 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1878 CodeGenFunction InitCGF(CGM); 1879 FunctionArgList ArgList; 1880 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1881 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1882 Loc, Loc); 1883 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1884 InitCGF.FinishFunction(); 1885 return InitFunction; 1886 } 1887 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1888 } 1889 return nullptr; 1890 } 1891 1892 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1893 llvm::GlobalVariable *Addr, 1894 bool PerformInit) { 1895 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1896 !CGM.getLangOpts().OpenMPIsDevice) 1897 return false; 1898 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1899 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1900 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1901 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1902 HasRequiresUnifiedSharedMemory)) 1903 return CGM.getLangOpts().OpenMPIsDevice; 1904 VD = VD->getDefinition(CGM.getContext()); 1905 assert(VD && "Unknown VarDecl"); 1906 1907 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1908 return CGM.getLangOpts().OpenMPIsDevice; 1909 1910 QualType ASTTy = VD->getType(); 1911 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1912 1913 // Produce the unique prefix to identify the new target regions. We use 1914 // the source location of the variable declaration which we know to not 1915 // conflict with any target region. 1916 unsigned DeviceID; 1917 unsigned FileID; 1918 unsigned Line; 1919 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1920 SmallString<128> Buffer, Out; 1921 { 1922 llvm::raw_svector_ostream OS(Buffer); 1923 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1924 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1925 } 1926 1927 const Expr *Init = VD->getAnyInitializer(); 1928 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1929 llvm::Constant *Ctor; 1930 llvm::Constant *ID; 1931 if (CGM.getLangOpts().OpenMPIsDevice) { 1932 // Generate function that re-emits the declaration's initializer into 1933 // the threadprivate copy of the variable VD 1934 CodeGenFunction CtorCGF(CGM); 1935 1936 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1937 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1938 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1939 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1940 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1941 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1942 FunctionArgList(), Loc, Loc); 1943 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1944 CtorCGF.EmitAnyExprToMem(Init, 1945 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1946 Init->getType().getQualifiers(), 1947 /*IsInitializer=*/true); 1948 CtorCGF.FinishFunction(); 1949 Ctor = Fn; 1950 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1951 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1952 } else { 1953 Ctor = new llvm::GlobalVariable( 1954 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1955 llvm::GlobalValue::PrivateLinkage, 1956 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1957 ID = Ctor; 1958 } 1959 1960 // Register the information for the entry associated with the constructor. 1961 Out.clear(); 1962 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1963 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1964 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1965 } 1966 if (VD->getType().isDestructedType() != QualType::DK_none) { 1967 llvm::Constant *Dtor; 1968 llvm::Constant *ID; 1969 if (CGM.getLangOpts().OpenMPIsDevice) { 1970 // Generate function that emits destructor call for the threadprivate 1971 // copy of the variable VD 1972 CodeGenFunction DtorCGF(CGM); 1973 1974 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1975 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1976 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1977 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1978 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1979 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1980 FunctionArgList(), Loc, Loc); 1981 // Create a scope with an artificial location for the body of this 1982 // function. 1983 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1984 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1985 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1986 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1987 DtorCGF.FinishFunction(); 1988 Dtor = Fn; 1989 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1990 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1991 } else { 1992 Dtor = new llvm::GlobalVariable( 1993 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1994 llvm::GlobalValue::PrivateLinkage, 1995 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1996 ID = Dtor; 1997 } 1998 // Register the information for the entry associated with the destructor. 1999 Out.clear(); 2000 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2001 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2002 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2003 } 2004 return CGM.getLangOpts().OpenMPIsDevice; 2005 } 2006 2007 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2008 QualType VarType, 2009 StringRef Name) { 2010 std::string Suffix = getName({"artificial", ""}); 2011 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2012 llvm::GlobalVariable *GAddr = 2013 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2014 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2015 CGM.getTarget().isTLSSupported()) { 2016 GAddr->setThreadLocal(/*Val=*/true); 2017 return Address(GAddr, GAddr->getValueType(), 2018 CGM.getContext().getTypeAlignInChars(VarType)); 2019 } 2020 std::string CacheSuffix = getName({"cache", ""}); 2021 llvm::Value *Args[] = { 2022 emitUpdateLocation(CGF, SourceLocation()), 2023 getThreadID(CGF, SourceLocation()), 2024 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2025 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2026 /*isSigned=*/false), 2027 getOrCreateInternalVariable( 2028 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2029 return Address( 2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2031 CGF.EmitRuntimeCall( 2032 OMPBuilder.getOrCreateRuntimeFunction( 2033 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2034 Args), 2035 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2036 CGM.getContext().getTypeAlignInChars(VarType)); 2037 } 2038 2039 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2040 const RegionCodeGenTy &ThenGen, 2041 const RegionCodeGenTy &ElseGen) { 2042 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2043 2044 // If the condition constant folds and can be elided, try to avoid emitting 2045 // the condition and the dead arm of the if/else. 2046 bool CondConstant; 2047 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2048 if (CondConstant) 2049 ThenGen(CGF); 2050 else 2051 ElseGen(CGF); 2052 return; 2053 } 2054 2055 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2056 // emit the conditional branch. 2057 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2058 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2059 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2060 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2061 2062 // Emit the 'then' code. 2063 CGF.EmitBlock(ThenBlock); 2064 ThenGen(CGF); 2065 CGF.EmitBranch(ContBlock); 2066 // Emit the 'else' code if present. 2067 // There is no need to emit line number for unconditional branch. 2068 (void)ApplyDebugLocation::CreateEmpty(CGF); 2069 CGF.EmitBlock(ElseBlock); 2070 ElseGen(CGF); 2071 // There is no need to emit line number for unconditional branch. 2072 (void)ApplyDebugLocation::CreateEmpty(CGF); 2073 CGF.EmitBranch(ContBlock); 2074 // Emit the continuation block for code after the if. 2075 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2076 } 2077 2078 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2079 llvm::Function *OutlinedFn, 2080 ArrayRef<llvm::Value *> CapturedVars, 2081 const Expr *IfCond, 2082 llvm::Value *NumThreads) { 2083 if (!CGF.HaveInsertPoint()) 2084 return; 2085 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2086 auto &M = CGM.getModule(); 2087 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2088 this](CodeGenFunction &CGF, PrePostActionTy &) { 2089 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2090 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2091 llvm::Value *Args[] = { 2092 RTLoc, 2093 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2094 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2095 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2096 RealArgs.append(std::begin(Args), std::end(Args)); 2097 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2098 2099 llvm::FunctionCallee RTLFn = 2100 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2101 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2102 }; 2103 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2104 this](CodeGenFunction &CGF, PrePostActionTy &) { 2105 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2106 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2107 // Build calls: 2108 // __kmpc_serialized_parallel(&Loc, GTid); 2109 llvm::Value *Args[] = {RTLoc, ThreadID}; 2110 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2111 M, OMPRTL___kmpc_serialized_parallel), 2112 Args); 2113 2114 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2115 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2116 Address ZeroAddrBound = 2117 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2118 /*Name=*/".bound.zero.addr"); 2119 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); 2120 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2121 // ThreadId for serialized parallels is 0. 2122 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2123 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2124 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2125 2126 // Ensure we do not inline the function. This is trivially true for the ones 2127 // passed to __kmpc_fork_call but the ones called in serialized regions 2128 // could be inlined. This is not a perfect but it is closer to the invariant 2129 // we want, namely, every data environment starts with a new function. 2130 // TODO: We should pass the if condition to the runtime function and do the 2131 // handling there. Much cleaner code. 2132 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); 2133 OutlinedFn->addFnAttr(llvm::Attribute::NoInline); 2134 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2135 2136 // __kmpc_end_serialized_parallel(&Loc, GTid); 2137 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2138 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2139 M, OMPRTL___kmpc_end_serialized_parallel), 2140 EndArgs); 2141 }; 2142 if (IfCond) { 2143 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2144 } else { 2145 RegionCodeGenTy ThenRCG(ThenGen); 2146 ThenRCG(CGF); 2147 } 2148 } 2149 2150 // If we're inside an (outlined) parallel region, use the region info's 2151 // thread-ID variable (it is passed in a first argument of the outlined function 2152 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2153 // regular serial code region, get thread ID by calling kmp_int32 2154 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2155 // return the address of that temp. 2156 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2157 SourceLocation Loc) { 2158 if (auto *OMPRegionInfo = 2159 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2160 if (OMPRegionInfo->getThreadIDVariable()) 2161 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2162 2163 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2164 QualType Int32Ty = 2165 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2166 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2167 CGF.EmitStoreOfScalar(ThreadID, 2168 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2169 2170 return ThreadIDTemp; 2171 } 2172 2173 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable( 2174 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2175 SmallString<256> Buffer; 2176 llvm::raw_svector_ostream Out(Buffer); 2177 Out << Name; 2178 StringRef RuntimeName = Out.str(); 2179 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2180 if (Elem.second) { 2181 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && 2182 "OMP internal variable has different type than requested"); 2183 return &*Elem.second; 2184 } 2185 2186 return Elem.second = new llvm::GlobalVariable( 2187 CGM.getModule(), Ty, /*IsConstant*/ false, 2188 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2189 Elem.first(), /*InsertBefore=*/nullptr, 2190 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2191 } 2192 2193 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2194 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2195 std::string Name = getName({Prefix, "var"}); 2196 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2197 } 2198 2199 namespace { 2200 /// Common pre(post)-action for different OpenMP constructs. 2201 class CommonActionTy final : public PrePostActionTy { 2202 llvm::FunctionCallee EnterCallee; 2203 ArrayRef<llvm::Value *> EnterArgs; 2204 llvm::FunctionCallee ExitCallee; 2205 ArrayRef<llvm::Value *> ExitArgs; 2206 bool Conditional; 2207 llvm::BasicBlock *ContBlock = nullptr; 2208 2209 public: 2210 CommonActionTy(llvm::FunctionCallee EnterCallee, 2211 ArrayRef<llvm::Value *> EnterArgs, 2212 llvm::FunctionCallee ExitCallee, 2213 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2214 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2215 ExitArgs(ExitArgs), Conditional(Conditional) {} 2216 void Enter(CodeGenFunction &CGF) override { 2217 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2218 if (Conditional) { 2219 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2220 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2221 ContBlock = CGF.createBasicBlock("omp_if.end"); 2222 // Generate the branch (If-stmt) 2223 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2224 CGF.EmitBlock(ThenBlock); 2225 } 2226 } 2227 void Done(CodeGenFunction &CGF) { 2228 // Emit the rest of blocks/branches 2229 CGF.EmitBranch(ContBlock); 2230 CGF.EmitBlock(ContBlock, true); 2231 } 2232 void Exit(CodeGenFunction &CGF) override { 2233 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2234 } 2235 }; 2236 } // anonymous namespace 2237 2238 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2239 StringRef CriticalName, 2240 const RegionCodeGenTy &CriticalOpGen, 2241 SourceLocation Loc, const Expr *Hint) { 2242 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2243 // CriticalOpGen(); 2244 // __kmpc_end_critical(ident_t *, gtid, Lock); 2245 // Prepare arguments and build a call to __kmpc_critical 2246 if (!CGF.HaveInsertPoint()) 2247 return; 2248 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2249 getCriticalRegionLock(CriticalName)}; 2250 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2251 std::end(Args)); 2252 if (Hint) { 2253 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2254 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2255 } 2256 CommonActionTy Action( 2257 OMPBuilder.getOrCreateRuntimeFunction( 2258 CGM.getModule(), 2259 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2260 EnterArgs, 2261 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2262 OMPRTL___kmpc_end_critical), 2263 Args); 2264 CriticalOpGen.setAction(Action); 2265 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2266 } 2267 2268 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2269 const RegionCodeGenTy &MasterOpGen, 2270 SourceLocation Loc) { 2271 if (!CGF.HaveInsertPoint()) 2272 return; 2273 // if(__kmpc_master(ident_t *, gtid)) { 2274 // MasterOpGen(); 2275 // __kmpc_end_master(ident_t *, gtid); 2276 // } 2277 // Prepare arguments and build a call to __kmpc_master 2278 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2279 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2280 CGM.getModule(), OMPRTL___kmpc_master), 2281 Args, 2282 OMPBuilder.getOrCreateRuntimeFunction( 2283 CGM.getModule(), OMPRTL___kmpc_end_master), 2284 Args, 2285 /*Conditional=*/true); 2286 MasterOpGen.setAction(Action); 2287 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2288 Action.Done(CGF); 2289 } 2290 2291 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, 2292 const RegionCodeGenTy &MaskedOpGen, 2293 SourceLocation Loc, const Expr *Filter) { 2294 if (!CGF.HaveInsertPoint()) 2295 return; 2296 // if(__kmpc_masked(ident_t *, gtid, filter)) { 2297 // MaskedOpGen(); 2298 // __kmpc_end_masked(iden_t *, gtid); 2299 // } 2300 // Prepare arguments and build a call to __kmpc_masked 2301 llvm::Value *FilterVal = Filter 2302 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) 2303 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); 2304 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2305 FilterVal}; 2306 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), 2307 getThreadID(CGF, Loc)}; 2308 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2309 CGM.getModule(), OMPRTL___kmpc_masked), 2310 Args, 2311 OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_end_masked), 2313 ArgsEnd, 2314 /*Conditional=*/true); 2315 MaskedOpGen.setAction(Action); 2316 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); 2317 Action.Done(CGF); 2318 } 2319 2320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2321 SourceLocation Loc) { 2322 if (!CGF.HaveInsertPoint()) 2323 return; 2324 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2325 OMPBuilder.createTaskyield(CGF.Builder); 2326 } else { 2327 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2328 llvm::Value *Args[] = { 2329 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2330 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2331 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2332 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2333 Args); 2334 } 2335 2336 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2337 Region->emitUntiedSwitch(CGF); 2338 } 2339 2340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2341 const RegionCodeGenTy &TaskgroupOpGen, 2342 SourceLocation Loc) { 2343 if (!CGF.HaveInsertPoint()) 2344 return; 2345 // __kmpc_taskgroup(ident_t *, gtid); 2346 // TaskgroupOpGen(); 2347 // __kmpc_end_taskgroup(ident_t *, gtid); 2348 // Prepare arguments and build a call to __kmpc_taskgroup 2349 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2350 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2351 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2352 Args, 2353 OMPBuilder.getOrCreateRuntimeFunction( 2354 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2355 Args); 2356 TaskgroupOpGen.setAction(Action); 2357 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2358 } 2359 2360 /// Given an array of pointers to variables, project the address of a 2361 /// given variable. 2362 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2363 unsigned Index, const VarDecl *Var) { 2364 // Pull out the pointer to the variable. 2365 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2366 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2367 2368 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2369 Addr = CGF.Builder.CreateElementBitCast( 2370 Addr, CGF.ConvertTypeForMem(Var->getType())); 2371 return Addr; 2372 } 2373 2374 static llvm::Value *emitCopyprivateCopyFunction( 2375 CodeGenModule &CGM, llvm::Type *ArgsType, 2376 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2377 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2378 SourceLocation Loc) { 2379 ASTContext &C = CGM.getContext(); 2380 // void copy_func(void *LHSArg, void *RHSArg); 2381 FunctionArgList Args; 2382 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2383 ImplicitParamDecl::Other); 2384 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 Args.push_back(&LHSArg); 2387 Args.push_back(&RHSArg); 2388 const auto &CGFI = 2389 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2390 std::string Name = 2391 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2392 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2393 llvm::GlobalValue::InternalLinkage, Name, 2394 &CGM.getModule()); 2395 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2396 Fn->setDoesNotRecurse(); 2397 CodeGenFunction CGF(CGM); 2398 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2399 // Dest = (void*[n])(LHSArg); 2400 // Src = (void*[n])(RHSArg); 2401 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2402 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2403 ArgsType), CGF.getPointerAlign()); 2404 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2406 ArgsType), CGF.getPointerAlign()); 2407 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2408 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2409 // ... 2410 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2411 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2412 const auto *DestVar = 2413 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2414 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2415 2416 const auto *SrcVar = 2417 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2418 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2419 2420 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2421 QualType Type = VD->getType(); 2422 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2423 } 2424 CGF.FinishFunction(); 2425 return Fn; 2426 } 2427 2428 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2429 const RegionCodeGenTy &SingleOpGen, 2430 SourceLocation Loc, 2431 ArrayRef<const Expr *> CopyprivateVars, 2432 ArrayRef<const Expr *> SrcExprs, 2433 ArrayRef<const Expr *> DstExprs, 2434 ArrayRef<const Expr *> AssignmentOps) { 2435 if (!CGF.HaveInsertPoint()) 2436 return; 2437 assert(CopyprivateVars.size() == SrcExprs.size() && 2438 CopyprivateVars.size() == DstExprs.size() && 2439 CopyprivateVars.size() == AssignmentOps.size()); 2440 ASTContext &C = CGM.getContext(); 2441 // int32 did_it = 0; 2442 // if(__kmpc_single(ident_t *, gtid)) { 2443 // SingleOpGen(); 2444 // __kmpc_end_single(ident_t *, gtid); 2445 // did_it = 1; 2446 // } 2447 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2448 // <copy_func>, did_it); 2449 2450 Address DidIt = Address::invalid(); 2451 if (!CopyprivateVars.empty()) { 2452 // int32 did_it = 0; 2453 QualType KmpInt32Ty = 2454 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2455 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2456 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2457 } 2458 // Prepare arguments and build a call to __kmpc_single 2459 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2460 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2461 CGM.getModule(), OMPRTL___kmpc_single), 2462 Args, 2463 OMPBuilder.getOrCreateRuntimeFunction( 2464 CGM.getModule(), OMPRTL___kmpc_end_single), 2465 Args, 2466 /*Conditional=*/true); 2467 SingleOpGen.setAction(Action); 2468 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2469 if (DidIt.isValid()) { 2470 // did_it = 1; 2471 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2472 } 2473 Action.Done(CGF); 2474 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2475 // <copy_func>, did_it); 2476 if (DidIt.isValid()) { 2477 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2478 QualType CopyprivateArrayTy = C.getConstantArrayType( 2479 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2480 /*IndexTypeQuals=*/0); 2481 // Create a list of all private variables for copyprivate. 2482 Address CopyprivateList = 2483 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2484 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2485 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2486 CGF.Builder.CreateStore( 2487 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2488 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2489 CGF.VoidPtrTy), 2490 Elem); 2491 } 2492 // Build function that copies private values from single region to all other 2493 // threads in the corresponding parallel region. 2494 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2495 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2496 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2497 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2498 Address CL = 2499 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2500 CGF.VoidPtrTy); 2501 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2502 llvm::Value *Args[] = { 2503 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2504 getThreadID(CGF, Loc), // i32 <gtid> 2505 BufSize, // size_t <buf_size> 2506 CL.getPointer(), // void *<copyprivate list> 2507 CpyFn, // void (*) (void *, void *) <copy_func> 2508 DidItVal // i32 did_it 2509 }; 2510 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2511 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2512 Args); 2513 } 2514 } 2515 2516 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2517 const RegionCodeGenTy &OrderedOpGen, 2518 SourceLocation Loc, bool IsThreads) { 2519 if (!CGF.HaveInsertPoint()) 2520 return; 2521 // __kmpc_ordered(ident_t *, gtid); 2522 // OrderedOpGen(); 2523 // __kmpc_end_ordered(ident_t *, gtid); 2524 // Prepare arguments and build a call to __kmpc_ordered 2525 if (IsThreads) { 2526 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2527 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2528 CGM.getModule(), OMPRTL___kmpc_ordered), 2529 Args, 2530 OMPBuilder.getOrCreateRuntimeFunction( 2531 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2532 Args); 2533 OrderedOpGen.setAction(Action); 2534 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2535 return; 2536 } 2537 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2538 } 2539 2540 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2541 unsigned Flags; 2542 if (Kind == OMPD_for) 2543 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2544 else if (Kind == OMPD_sections) 2545 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2546 else if (Kind == OMPD_single) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2548 else if (Kind == OMPD_barrier) 2549 Flags = OMP_IDENT_BARRIER_EXPL; 2550 else 2551 Flags = OMP_IDENT_BARRIER_IMPL; 2552 return Flags; 2553 } 2554 2555 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2556 CodeGenFunction &CGF, const OMPLoopDirective &S, 2557 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2558 // Check if the loop directive is actually a doacross loop directive. In this 2559 // case choose static, 1 schedule. 2560 if (llvm::any_of( 2561 S.getClausesOfKind<OMPOrderedClause>(), 2562 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2563 ScheduleKind = OMPC_SCHEDULE_static; 2564 // Chunk size is 1 in this case. 2565 llvm::APInt ChunkSize(32, 1); 2566 ChunkExpr = IntegerLiteral::Create( 2567 CGF.getContext(), ChunkSize, 2568 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2569 SourceLocation()); 2570 } 2571 } 2572 2573 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2574 OpenMPDirectiveKind Kind, bool EmitChecks, 2575 bool ForceSimpleCall) { 2576 // Check if we should use the OMPBuilder 2577 auto *OMPRegionInfo = 2578 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2579 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2580 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2581 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2582 return; 2583 } 2584 2585 if (!CGF.HaveInsertPoint()) 2586 return; 2587 // Build call __kmpc_cancel_barrier(loc, thread_id); 2588 // Build call __kmpc_barrier(loc, thread_id); 2589 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2590 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2591 // thread_id); 2592 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2593 getThreadID(CGF, Loc)}; 2594 if (OMPRegionInfo) { 2595 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2596 llvm::Value *Result = CGF.EmitRuntimeCall( 2597 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2598 OMPRTL___kmpc_cancel_barrier), 2599 Args); 2600 if (EmitChecks) { 2601 // if (__kmpc_cancel_barrier()) { 2602 // exit from construct; 2603 // } 2604 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2605 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2606 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2607 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2608 CGF.EmitBlock(ExitBB); 2609 // exit from construct; 2610 CodeGenFunction::JumpDest CancelDestination = 2611 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2612 CGF.EmitBranchThroughCleanup(CancelDestination); 2613 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2614 } 2615 return; 2616 } 2617 } 2618 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2619 CGM.getModule(), OMPRTL___kmpc_barrier), 2620 Args); 2621 } 2622 2623 /// Map the OpenMP loop schedule to the runtime enumeration. 2624 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2625 bool Chunked, bool Ordered) { 2626 switch (ScheduleKind) { 2627 case OMPC_SCHEDULE_static: 2628 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2629 : (Ordered ? OMP_ord_static : OMP_sch_static); 2630 case OMPC_SCHEDULE_dynamic: 2631 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2632 case OMPC_SCHEDULE_guided: 2633 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2634 case OMPC_SCHEDULE_runtime: 2635 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2636 case OMPC_SCHEDULE_auto: 2637 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2638 case OMPC_SCHEDULE_unknown: 2639 assert(!Chunked && "chunk was specified but schedule kind not known"); 2640 return Ordered ? OMP_ord_static : OMP_sch_static; 2641 } 2642 llvm_unreachable("Unexpected runtime schedule"); 2643 } 2644 2645 /// Map the OpenMP distribute schedule to the runtime enumeration. 2646 static OpenMPSchedType 2647 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2648 // only static is allowed for dist_schedule 2649 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2650 } 2651 2652 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2653 bool Chunked) const { 2654 OpenMPSchedType Schedule = 2655 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2656 return Schedule == OMP_sch_static; 2657 } 2658 2659 bool CGOpenMPRuntime::isStaticNonchunked( 2660 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2661 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2662 return Schedule == OMP_dist_sch_static; 2663 } 2664 2665 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2666 bool Chunked) const { 2667 OpenMPSchedType Schedule = 2668 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2669 return Schedule == OMP_sch_static_chunked; 2670 } 2671 2672 bool CGOpenMPRuntime::isStaticChunked( 2673 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2674 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2675 return Schedule == OMP_dist_sch_static_chunked; 2676 } 2677 2678 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2679 OpenMPSchedType Schedule = 2680 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2681 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2682 return Schedule != OMP_sch_static; 2683 } 2684 2685 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2686 OpenMPScheduleClauseModifier M1, 2687 OpenMPScheduleClauseModifier M2) { 2688 int Modifier = 0; 2689 switch (M1) { 2690 case OMPC_SCHEDULE_MODIFIER_monotonic: 2691 Modifier = OMP_sch_modifier_monotonic; 2692 break; 2693 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2694 Modifier = OMP_sch_modifier_nonmonotonic; 2695 break; 2696 case OMPC_SCHEDULE_MODIFIER_simd: 2697 if (Schedule == OMP_sch_static_chunked) 2698 Schedule = OMP_sch_static_balanced_chunked; 2699 break; 2700 case OMPC_SCHEDULE_MODIFIER_last: 2701 case OMPC_SCHEDULE_MODIFIER_unknown: 2702 break; 2703 } 2704 switch (M2) { 2705 case OMPC_SCHEDULE_MODIFIER_monotonic: 2706 Modifier = OMP_sch_modifier_monotonic; 2707 break; 2708 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2709 Modifier = OMP_sch_modifier_nonmonotonic; 2710 break; 2711 case OMPC_SCHEDULE_MODIFIER_simd: 2712 if (Schedule == OMP_sch_static_chunked) 2713 Schedule = OMP_sch_static_balanced_chunked; 2714 break; 2715 case OMPC_SCHEDULE_MODIFIER_last: 2716 case OMPC_SCHEDULE_MODIFIER_unknown: 2717 break; 2718 } 2719 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2720 // If the static schedule kind is specified or if the ordered clause is 2721 // specified, and if the nonmonotonic modifier is not specified, the effect is 2722 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2723 // modifier is specified, the effect is as if the nonmonotonic modifier is 2724 // specified. 2725 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2726 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2727 Schedule == OMP_sch_static_balanced_chunked || 2728 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2729 Schedule == OMP_dist_sch_static_chunked || 2730 Schedule == OMP_dist_sch_static)) 2731 Modifier = OMP_sch_modifier_nonmonotonic; 2732 } 2733 return Schedule | Modifier; 2734 } 2735 2736 void CGOpenMPRuntime::emitForDispatchInit( 2737 CodeGenFunction &CGF, SourceLocation Loc, 2738 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2739 bool Ordered, const DispatchRTInput &DispatchValues) { 2740 if (!CGF.HaveInsertPoint()) 2741 return; 2742 OpenMPSchedType Schedule = getRuntimeSchedule( 2743 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2744 assert(Ordered || 2745 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2746 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2747 Schedule != OMP_sch_static_balanced_chunked)); 2748 // Call __kmpc_dispatch_init( 2749 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2750 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2751 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2752 2753 // If the Chunk was not specified in the clause - use default value 1. 2754 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2755 : CGF.Builder.getIntN(IVSize, 1); 2756 llvm::Value *Args[] = { 2757 emitUpdateLocation(CGF, Loc), 2758 getThreadID(CGF, Loc), 2759 CGF.Builder.getInt32(addMonoNonMonoModifier( 2760 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2761 DispatchValues.LB, // Lower 2762 DispatchValues.UB, // Upper 2763 CGF.Builder.getIntN(IVSize, 1), // Stride 2764 Chunk // Chunk 2765 }; 2766 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2767 } 2768 2769 static void emitForStaticInitCall( 2770 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2771 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2772 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2773 const CGOpenMPRuntime::StaticRTInput &Values) { 2774 if (!CGF.HaveInsertPoint()) 2775 return; 2776 2777 assert(!Values.Ordered); 2778 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2779 Schedule == OMP_sch_static_balanced_chunked || 2780 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2781 Schedule == OMP_dist_sch_static || 2782 Schedule == OMP_dist_sch_static_chunked); 2783 2784 // Call __kmpc_for_static_init( 2785 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2786 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2787 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2788 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2789 llvm::Value *Chunk = Values.Chunk; 2790 if (Chunk == nullptr) { 2791 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2792 Schedule == OMP_dist_sch_static) && 2793 "expected static non-chunked schedule"); 2794 // If the Chunk was not specified in the clause - use default value 1. 2795 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2796 } else { 2797 assert((Schedule == OMP_sch_static_chunked || 2798 Schedule == OMP_sch_static_balanced_chunked || 2799 Schedule == OMP_ord_static_chunked || 2800 Schedule == OMP_dist_sch_static_chunked) && 2801 "expected static chunked schedule"); 2802 } 2803 llvm::Value *Args[] = { 2804 UpdateLocation, 2805 ThreadId, 2806 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2807 M2)), // Schedule type 2808 Values.IL.getPointer(), // &isLastIter 2809 Values.LB.getPointer(), // &LB 2810 Values.UB.getPointer(), // &UB 2811 Values.ST.getPointer(), // &Stride 2812 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2813 Chunk // Chunk 2814 }; 2815 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2816 } 2817 2818 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2819 SourceLocation Loc, 2820 OpenMPDirectiveKind DKind, 2821 const OpenMPScheduleTy &ScheduleKind, 2822 const StaticRTInput &Values) { 2823 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2824 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2825 assert(isOpenMPWorksharingDirective(DKind) && 2826 "Expected loop-based or sections-based directive."); 2827 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2828 isOpenMPLoopDirective(DKind) 2829 ? OMP_IDENT_WORK_LOOP 2830 : OMP_IDENT_WORK_SECTIONS); 2831 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2832 llvm::FunctionCallee StaticInitFunction = 2833 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); 2834 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2835 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2836 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2837 } 2838 2839 void CGOpenMPRuntime::emitDistributeStaticInit( 2840 CodeGenFunction &CGF, SourceLocation Loc, 2841 OpenMPDistScheduleClauseKind SchedKind, 2842 const CGOpenMPRuntime::StaticRTInput &Values) { 2843 OpenMPSchedType ScheduleNum = 2844 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2845 llvm::Value *UpdatedLocation = 2846 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2847 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2848 llvm::FunctionCallee StaticInitFunction; 2849 bool isGPUDistribute = 2850 CGM.getLangOpts().OpenMPIsDevice && 2851 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); 2852 StaticInitFunction = createForStaticInitFunction( 2853 Values.IVSize, Values.IVSigned, isGPUDistribute); 2854 2855 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2856 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2857 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2858 } 2859 2860 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2861 SourceLocation Loc, 2862 OpenMPDirectiveKind DKind) { 2863 if (!CGF.HaveInsertPoint()) 2864 return; 2865 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2866 llvm::Value *Args[] = { 2867 emitUpdateLocation(CGF, Loc, 2868 isOpenMPDistributeDirective(DKind) 2869 ? OMP_IDENT_WORK_DISTRIBUTE 2870 : isOpenMPLoopDirective(DKind) 2871 ? OMP_IDENT_WORK_LOOP 2872 : OMP_IDENT_WORK_SECTIONS), 2873 getThreadID(CGF, Loc)}; 2874 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2875 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && 2876 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) 2877 CGF.EmitRuntimeCall( 2878 OMPBuilder.getOrCreateRuntimeFunction( 2879 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), 2880 Args); 2881 else 2882 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2883 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2884 Args); 2885 } 2886 2887 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2888 SourceLocation Loc, 2889 unsigned IVSize, 2890 bool IVSigned) { 2891 if (!CGF.HaveInsertPoint()) 2892 return; 2893 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2894 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2895 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2896 } 2897 2898 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2899 SourceLocation Loc, unsigned IVSize, 2900 bool IVSigned, Address IL, 2901 Address LB, Address UB, 2902 Address ST) { 2903 // Call __kmpc_dispatch_next( 2904 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2905 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2906 // kmp_int[32|64] *p_stride); 2907 llvm::Value *Args[] = { 2908 emitUpdateLocation(CGF, Loc), 2909 getThreadID(CGF, Loc), 2910 IL.getPointer(), // &isLastIter 2911 LB.getPointer(), // &Lower 2912 UB.getPointer(), // &Upper 2913 ST.getPointer() // &Stride 2914 }; 2915 llvm::Value *Call = 2916 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2917 return CGF.EmitScalarConversion( 2918 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2919 CGF.getContext().BoolTy, Loc); 2920 } 2921 2922 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2923 llvm::Value *NumThreads, 2924 SourceLocation Loc) { 2925 if (!CGF.HaveInsertPoint()) 2926 return; 2927 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2928 llvm::Value *Args[] = { 2929 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2930 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2931 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2932 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2933 Args); 2934 } 2935 2936 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2937 ProcBindKind ProcBind, 2938 SourceLocation Loc) { 2939 if (!CGF.HaveInsertPoint()) 2940 return; 2941 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2942 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2943 llvm::Value *Args[] = { 2944 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2945 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2946 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2947 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2948 Args); 2949 } 2950 2951 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2952 SourceLocation Loc, llvm::AtomicOrdering AO) { 2953 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2954 OMPBuilder.createFlush(CGF.Builder); 2955 } else { 2956 if (!CGF.HaveInsertPoint()) 2957 return; 2958 // Build call void __kmpc_flush(ident_t *loc) 2959 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2960 CGM.getModule(), OMPRTL___kmpc_flush), 2961 emitUpdateLocation(CGF, Loc)); 2962 } 2963 } 2964 2965 namespace { 2966 /// Indexes of fields for type kmp_task_t. 2967 enum KmpTaskTFields { 2968 /// List of shared variables. 2969 KmpTaskTShareds, 2970 /// Task routine. 2971 KmpTaskTRoutine, 2972 /// Partition id for the untied tasks. 2973 KmpTaskTPartId, 2974 /// Function with call of destructors for private variables. 2975 Data1, 2976 /// Task priority. 2977 Data2, 2978 /// (Taskloops only) Lower bound. 2979 KmpTaskTLowerBound, 2980 /// (Taskloops only) Upper bound. 2981 KmpTaskTUpperBound, 2982 /// (Taskloops only) Stride. 2983 KmpTaskTStride, 2984 /// (Taskloops only) Is last iteration flag. 2985 KmpTaskTLastIter, 2986 /// (Taskloops only) Reduction data. 2987 KmpTaskTReductions, 2988 }; 2989 } // anonymous namespace 2990 2991 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2992 return OffloadEntriesTargetRegion.empty() && 2993 OffloadEntriesDeviceGlobalVar.empty(); 2994 } 2995 2996 /// Initialize target region entry. 2997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2998 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2999 StringRef ParentName, unsigned LineNum, 3000 unsigned Order) { 3001 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3002 "only required for the device " 3003 "code generation."); 3004 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3005 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3006 OMPTargetRegionEntryTargetRegion); 3007 ++OffloadingEntriesNum; 3008 } 3009 3010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3011 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3012 StringRef ParentName, unsigned LineNum, 3013 llvm::Constant *Addr, llvm::Constant *ID, 3014 OMPTargetRegionEntryKind Flags) { 3015 // If we are emitting code for a target, the entry is already initialized, 3016 // only has to be registered. 3017 if (CGM.getLangOpts().OpenMPIsDevice) { 3018 // This could happen if the device compilation is invoked standalone. 3019 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) 3020 return; 3021 auto &Entry = 3022 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3023 Entry.setAddress(Addr); 3024 Entry.setID(ID); 3025 Entry.setFlags(Flags); 3026 } else { 3027 if (Flags == 3028 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 3029 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 3030 /*IgnoreAddressId*/ true)) 3031 return; 3032 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 3033 "Target region entry already registered!"); 3034 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3035 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3036 ++OffloadingEntriesNum; 3037 } 3038 } 3039 3040 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3041 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 3042 bool IgnoreAddressId) const { 3043 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3044 if (PerDevice == OffloadEntriesTargetRegion.end()) 3045 return false; 3046 auto PerFile = PerDevice->second.find(FileID); 3047 if (PerFile == PerDevice->second.end()) 3048 return false; 3049 auto PerParentName = PerFile->second.find(ParentName); 3050 if (PerParentName == PerFile->second.end()) 3051 return false; 3052 auto PerLine = PerParentName->second.find(LineNum); 3053 if (PerLine == PerParentName->second.end()) 3054 return false; 3055 // Fail if this entry is already registered. 3056 if (!IgnoreAddressId && 3057 (PerLine->second.getAddress() || PerLine->second.getID())) 3058 return false; 3059 return true; 3060 } 3061 3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3063 const OffloadTargetRegionEntryInfoActTy &Action) { 3064 // Scan all target region entries and perform the provided action. 3065 for (const auto &D : OffloadEntriesTargetRegion) 3066 for (const auto &F : D.second) 3067 for (const auto &P : F.second) 3068 for (const auto &L : P.second) 3069 Action(D.first, F.first, P.first(), L.first, L.second); 3070 } 3071 3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3073 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3074 OMPTargetGlobalVarEntryKind Flags, 3075 unsigned Order) { 3076 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3077 "only required for the device " 3078 "code generation."); 3079 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3080 ++OffloadingEntriesNum; 3081 } 3082 3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3084 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3085 CharUnits VarSize, 3086 OMPTargetGlobalVarEntryKind Flags, 3087 llvm::GlobalValue::LinkageTypes Linkage) { 3088 if (CGM.getLangOpts().OpenMPIsDevice) { 3089 // This could happen if the device compilation is invoked standalone. 3090 if (!hasDeviceGlobalVarEntryInfo(VarName)) 3091 return; 3092 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3093 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3094 if (Entry.getVarSize().isZero()) { 3095 Entry.setVarSize(VarSize); 3096 Entry.setLinkage(Linkage); 3097 } 3098 return; 3099 } 3100 Entry.setVarSize(VarSize); 3101 Entry.setLinkage(Linkage); 3102 Entry.setAddress(Addr); 3103 } else { 3104 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3105 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3106 assert(Entry.isValid() && Entry.getFlags() == Flags && 3107 "Entry not initialized!"); 3108 if (Entry.getVarSize().isZero()) { 3109 Entry.setVarSize(VarSize); 3110 Entry.setLinkage(Linkage); 3111 } 3112 return; 3113 } 3114 OffloadEntriesDeviceGlobalVar.try_emplace( 3115 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3116 ++OffloadingEntriesNum; 3117 } 3118 } 3119 3120 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3121 actOnDeviceGlobalVarEntriesInfo( 3122 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3123 // Scan all target region entries and perform the provided action. 3124 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3125 Action(E.getKey(), E.getValue()); 3126 } 3127 3128 void CGOpenMPRuntime::createOffloadEntry( 3129 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3130 llvm::GlobalValue::LinkageTypes Linkage) { 3131 StringRef Name = Addr->getName(); 3132 llvm::Module &M = CGM.getModule(); 3133 llvm::LLVMContext &C = M.getContext(); 3134 3135 // Create constant string with the name. 3136 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3137 3138 std::string StringName = getName({"omp_offloading", "entry_name"}); 3139 auto *Str = new llvm::GlobalVariable( 3140 M, StrPtrInit->getType(), /*isConstant=*/true, 3141 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3142 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3143 3144 llvm::Constant *Data[] = { 3145 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3146 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3147 llvm::ConstantInt::get(CGM.SizeTy, Size), 3148 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3149 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3150 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3151 llvm::GlobalVariable *Entry = createGlobalStruct( 3152 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3153 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3154 3155 // The entry has to be created in the section the linker expects it to be. 3156 Entry->setSection("omp_offloading_entries"); 3157 } 3158 3159 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3160 // Emit the offloading entries and metadata so that the device codegen side 3161 // can easily figure out what to emit. The produced metadata looks like 3162 // this: 3163 // 3164 // !omp_offload.info = !{!1, ...} 3165 // 3166 // Right now we only generate metadata for function that contain target 3167 // regions. 3168 3169 // If we are in simd mode or there are no entries, we don't need to do 3170 // anything. 3171 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3172 return; 3173 3174 llvm::Module &M = CGM.getModule(); 3175 llvm::LLVMContext &C = M.getContext(); 3176 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3177 SourceLocation, StringRef>, 3178 16> 3179 OrderedEntries(OffloadEntriesInfoManager.size()); 3180 llvm::SmallVector<StringRef, 16> ParentFunctions( 3181 OffloadEntriesInfoManager.size()); 3182 3183 // Auxiliary methods to create metadata values and strings. 3184 auto &&GetMDInt = [this](unsigned V) { 3185 return llvm::ConstantAsMetadata::get( 3186 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3187 }; 3188 3189 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3190 3191 // Create the offloading info metadata node. 3192 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3193 3194 // Create function that emits metadata for each target region entry; 3195 auto &&TargetRegionMetadataEmitter = 3196 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3197 &GetMDString]( 3198 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3199 unsigned Line, 3200 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3201 // Generate metadata for target regions. Each entry of this metadata 3202 // contains: 3203 // - Entry 0 -> Kind of this type of metadata (0). 3204 // - Entry 1 -> Device ID of the file where the entry was identified. 3205 // - Entry 2 -> File ID of the file where the entry was identified. 3206 // - Entry 3 -> Mangled name of the function where the entry was 3207 // identified. 3208 // - Entry 4 -> Line in the file where the entry was identified. 3209 // - Entry 5 -> Order the entry was created. 3210 // The first element of the metadata node is the kind. 3211 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3212 GetMDInt(FileID), GetMDString(ParentName), 3213 GetMDInt(Line), GetMDInt(E.getOrder())}; 3214 3215 SourceLocation Loc; 3216 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3217 E = CGM.getContext().getSourceManager().fileinfo_end(); 3218 I != E; ++I) { 3219 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3220 I->getFirst()->getUniqueID().getFile() == FileID) { 3221 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3222 I->getFirst(), Line, 1); 3223 break; 3224 } 3225 } 3226 // Save this entry in the right position of the ordered entries array. 3227 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3228 ParentFunctions[E.getOrder()] = ParentName; 3229 3230 // Add metadata to the named metadata node. 3231 MD->addOperand(llvm::MDNode::get(C, Ops)); 3232 }; 3233 3234 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3235 TargetRegionMetadataEmitter); 3236 3237 // Create function that emits metadata for each device global variable entry; 3238 auto &&DeviceGlobalVarMetadataEmitter = 3239 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3240 MD](StringRef MangledName, 3241 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3242 &E) { 3243 // Generate metadata for global variables. Each entry of this metadata 3244 // contains: 3245 // - Entry 0 -> Kind of this type of metadata (1). 3246 // - Entry 1 -> Mangled name of the variable. 3247 // - Entry 2 -> Declare target kind. 3248 // - Entry 3 -> Order the entry was created. 3249 // The first element of the metadata node is the kind. 3250 llvm::Metadata *Ops[] = { 3251 GetMDInt(E.getKind()), GetMDString(MangledName), 3252 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3253 3254 // Save this entry in the right position of the ordered entries array. 3255 OrderedEntries[E.getOrder()] = 3256 std::make_tuple(&E, SourceLocation(), MangledName); 3257 3258 // Add metadata to the named metadata node. 3259 MD->addOperand(llvm::MDNode::get(C, Ops)); 3260 }; 3261 3262 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3263 DeviceGlobalVarMetadataEmitter); 3264 3265 for (const auto &E : OrderedEntries) { 3266 assert(std::get<0>(E) && "All ordered entries must exist!"); 3267 if (const auto *CE = 3268 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3269 std::get<0>(E))) { 3270 if (!CE->getID() || !CE->getAddress()) { 3271 // Do not blame the entry if the parent funtion is not emitted. 3272 StringRef FnName = ParentFunctions[CE->getOrder()]; 3273 if (!CGM.GetGlobalValue(FnName)) 3274 continue; 3275 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3276 DiagnosticsEngine::Error, 3277 "Offloading entry for target region in %0 is incorrect: either the " 3278 "address or the ID is invalid."); 3279 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3280 continue; 3281 } 3282 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3283 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3284 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3285 OffloadEntryInfoDeviceGlobalVar>( 3286 std::get<0>(E))) { 3287 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3288 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3289 CE->getFlags()); 3290 switch (Flags) { 3291 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3292 if (CGM.getLangOpts().OpenMPIsDevice && 3293 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3294 continue; 3295 if (!CE->getAddress()) { 3296 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3297 DiagnosticsEngine::Error, "Offloading entry for declare target " 3298 "variable %0 is incorrect: the " 3299 "address is invalid."); 3300 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3301 continue; 3302 } 3303 // The vaiable has no definition - no need to add the entry. 3304 if (CE->getVarSize().isZero()) 3305 continue; 3306 break; 3307 } 3308 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3309 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3310 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3311 "Declaret target link address is set."); 3312 if (CGM.getLangOpts().OpenMPIsDevice) 3313 continue; 3314 if (!CE->getAddress()) { 3315 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3316 DiagnosticsEngine::Error, 3317 "Offloading entry for declare target variable is incorrect: the " 3318 "address is invalid."); 3319 CGM.getDiags().Report(DiagID); 3320 continue; 3321 } 3322 break; 3323 } 3324 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3325 CE->getVarSize().getQuantity(), Flags, 3326 CE->getLinkage()); 3327 } else { 3328 llvm_unreachable("Unsupported entry kind."); 3329 } 3330 } 3331 } 3332 3333 /// Loads all the offload entries information from the host IR 3334 /// metadata. 3335 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3336 // If we are in target mode, load the metadata from the host IR. This code has 3337 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3338 3339 if (!CGM.getLangOpts().OpenMPIsDevice) 3340 return; 3341 3342 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3343 return; 3344 3345 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3346 if (auto EC = Buf.getError()) { 3347 CGM.getDiags().Report(diag::err_cannot_open_file) 3348 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3349 return; 3350 } 3351 3352 llvm::LLVMContext C; 3353 auto ME = expectedToErrorOrAndEmitErrors( 3354 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3355 3356 if (auto EC = ME.getError()) { 3357 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3358 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3359 CGM.getDiags().Report(DiagID) 3360 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3361 return; 3362 } 3363 3364 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3365 if (!MD) 3366 return; 3367 3368 for (llvm::MDNode *MN : MD->operands()) { 3369 auto &&GetMDInt = [MN](unsigned Idx) { 3370 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3371 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3372 }; 3373 3374 auto &&GetMDString = [MN](unsigned Idx) { 3375 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3376 return V->getString(); 3377 }; 3378 3379 switch (GetMDInt(0)) { 3380 default: 3381 llvm_unreachable("Unexpected metadata!"); 3382 break; 3383 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3384 OffloadingEntryInfoTargetRegion: 3385 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3386 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3387 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3388 /*Order=*/GetMDInt(5)); 3389 break; 3390 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3391 OffloadingEntryInfoDeviceGlobalVar: 3392 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3393 /*MangledName=*/GetMDString(1), 3394 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3395 /*Flags=*/GetMDInt(2)), 3396 /*Order=*/GetMDInt(3)); 3397 break; 3398 } 3399 } 3400 } 3401 3402 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3403 if (!KmpRoutineEntryPtrTy) { 3404 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3405 ASTContext &C = CGM.getContext(); 3406 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3407 FunctionProtoType::ExtProtoInfo EPI; 3408 KmpRoutineEntryPtrQTy = C.getPointerType( 3409 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3410 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3411 } 3412 } 3413 3414 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3415 // Make sure the type of the entry is already created. This is the type we 3416 // have to create: 3417 // struct __tgt_offload_entry{ 3418 // void *addr; // Pointer to the offload entry info. 3419 // // (function or global) 3420 // char *name; // Name of the function or global. 3421 // size_t size; // Size of the entry info (0 if it a function). 3422 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3423 // int32_t reserved; // Reserved, to use by the runtime library. 3424 // }; 3425 if (TgtOffloadEntryQTy.isNull()) { 3426 ASTContext &C = CGM.getContext(); 3427 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3428 RD->startDefinition(); 3429 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3430 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3431 addFieldToRecordDecl(C, RD, C.getSizeType()); 3432 addFieldToRecordDecl( 3433 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3434 addFieldToRecordDecl( 3435 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3436 RD->completeDefinition(); 3437 RD->addAttr(PackedAttr::CreateImplicit(C)); 3438 TgtOffloadEntryQTy = C.getRecordType(RD); 3439 } 3440 return TgtOffloadEntryQTy; 3441 } 3442 3443 namespace { 3444 struct PrivateHelpersTy { 3445 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3446 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3447 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3448 PrivateElemInit(PrivateElemInit) {} 3449 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3450 const Expr *OriginalRef = nullptr; 3451 const VarDecl *Original = nullptr; 3452 const VarDecl *PrivateCopy = nullptr; 3453 const VarDecl *PrivateElemInit = nullptr; 3454 bool isLocalPrivate() const { 3455 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3456 } 3457 }; 3458 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3459 } // anonymous namespace 3460 3461 static bool isAllocatableDecl(const VarDecl *VD) { 3462 const VarDecl *CVD = VD->getCanonicalDecl(); 3463 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3464 return false; 3465 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3466 // Use the default allocation. 3467 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3468 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3469 !AA->getAllocator()); 3470 } 3471 3472 static RecordDecl * 3473 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3474 if (!Privates.empty()) { 3475 ASTContext &C = CGM.getContext(); 3476 // Build struct .kmp_privates_t. { 3477 // /* private vars */ 3478 // }; 3479 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3480 RD->startDefinition(); 3481 for (const auto &Pair : Privates) { 3482 const VarDecl *VD = Pair.second.Original; 3483 QualType Type = VD->getType().getNonReferenceType(); 3484 // If the private variable is a local variable with lvalue ref type, 3485 // allocate the pointer instead of the pointee type. 3486 if (Pair.second.isLocalPrivate()) { 3487 if (VD->getType()->isLValueReferenceType()) 3488 Type = C.getPointerType(Type); 3489 if (isAllocatableDecl(VD)) 3490 Type = C.getPointerType(Type); 3491 } 3492 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3493 if (VD->hasAttrs()) { 3494 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3495 E(VD->getAttrs().end()); 3496 I != E; ++I) 3497 FD->addAttr(*I); 3498 } 3499 } 3500 RD->completeDefinition(); 3501 return RD; 3502 } 3503 return nullptr; 3504 } 3505 3506 static RecordDecl * 3507 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3508 QualType KmpInt32Ty, 3509 QualType KmpRoutineEntryPointerQTy) { 3510 ASTContext &C = CGM.getContext(); 3511 // Build struct kmp_task_t { 3512 // void * shareds; 3513 // kmp_routine_entry_t routine; 3514 // kmp_int32 part_id; 3515 // kmp_cmplrdata_t data1; 3516 // kmp_cmplrdata_t data2; 3517 // For taskloops additional fields: 3518 // kmp_uint64 lb; 3519 // kmp_uint64 ub; 3520 // kmp_int64 st; 3521 // kmp_int32 liter; 3522 // void * reductions; 3523 // }; 3524 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3525 UD->startDefinition(); 3526 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3527 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3528 UD->completeDefinition(); 3529 QualType KmpCmplrdataTy = C.getRecordType(UD); 3530 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3531 RD->startDefinition(); 3532 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3533 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3534 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3535 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3536 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3537 if (isOpenMPTaskLoopDirective(Kind)) { 3538 QualType KmpUInt64Ty = 3539 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3540 QualType KmpInt64Ty = 3541 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3542 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3543 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3544 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3545 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3546 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3547 } 3548 RD->completeDefinition(); 3549 return RD; 3550 } 3551 3552 static RecordDecl * 3553 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3554 ArrayRef<PrivateDataTy> Privates) { 3555 ASTContext &C = CGM.getContext(); 3556 // Build struct kmp_task_t_with_privates { 3557 // kmp_task_t task_data; 3558 // .kmp_privates_t. privates; 3559 // }; 3560 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3561 RD->startDefinition(); 3562 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3563 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3564 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3565 RD->completeDefinition(); 3566 return RD; 3567 } 3568 3569 /// Emit a proxy function which accepts kmp_task_t as the second 3570 /// argument. 3571 /// \code 3572 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3573 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3574 /// For taskloops: 3575 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3576 /// tt->reductions, tt->shareds); 3577 /// return 0; 3578 /// } 3579 /// \endcode 3580 static llvm::Function * 3581 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3582 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3583 QualType KmpTaskTWithPrivatesPtrQTy, 3584 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3585 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3586 llvm::Value *TaskPrivatesMap) { 3587 ASTContext &C = CGM.getContext(); 3588 FunctionArgList Args; 3589 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3590 ImplicitParamDecl::Other); 3591 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3592 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3593 ImplicitParamDecl::Other); 3594 Args.push_back(&GtidArg); 3595 Args.push_back(&TaskTypeArg); 3596 const auto &TaskEntryFnInfo = 3597 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3598 llvm::FunctionType *TaskEntryTy = 3599 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3600 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3601 auto *TaskEntry = llvm::Function::Create( 3602 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3603 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3604 TaskEntry->setDoesNotRecurse(); 3605 CodeGenFunction CGF(CGM); 3606 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3607 Loc, Loc); 3608 3609 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3610 // tt, 3611 // For taskloops: 3612 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3613 // tt->task_data.shareds); 3614 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3615 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3616 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3617 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3618 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3619 const auto *KmpTaskTWithPrivatesQTyRD = 3620 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3621 LValue Base = 3622 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3623 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3624 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3625 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3626 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3627 3628 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3629 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3630 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3631 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3632 CGF.ConvertTypeForMem(SharedsPtrTy)); 3633 3634 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3635 llvm::Value *PrivatesParam; 3636 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3637 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3638 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3639 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3640 } else { 3641 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3642 } 3643 3644 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3645 TaskPrivatesMap, 3646 CGF.Builder 3647 .CreatePointerBitCastOrAddrSpaceCast( 3648 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3649 .getPointer()}; 3650 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3651 std::end(CommonArgs)); 3652 if (isOpenMPTaskLoopDirective(Kind)) { 3653 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3654 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3655 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3656 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3657 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3658 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3659 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3660 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3661 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3662 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3663 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3664 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3665 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3666 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3667 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3668 CallArgs.push_back(LBParam); 3669 CallArgs.push_back(UBParam); 3670 CallArgs.push_back(StParam); 3671 CallArgs.push_back(LIParam); 3672 CallArgs.push_back(RParam); 3673 } 3674 CallArgs.push_back(SharedsParam); 3675 3676 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3677 CallArgs); 3678 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3679 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3680 CGF.FinishFunction(); 3681 return TaskEntry; 3682 } 3683 3684 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3685 SourceLocation Loc, 3686 QualType KmpInt32Ty, 3687 QualType KmpTaskTWithPrivatesPtrQTy, 3688 QualType KmpTaskTWithPrivatesQTy) { 3689 ASTContext &C = CGM.getContext(); 3690 FunctionArgList Args; 3691 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3692 ImplicitParamDecl::Other); 3693 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3694 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3695 ImplicitParamDecl::Other); 3696 Args.push_back(&GtidArg); 3697 Args.push_back(&TaskTypeArg); 3698 const auto &DestructorFnInfo = 3699 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3700 llvm::FunctionType *DestructorFnTy = 3701 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3702 std::string Name = 3703 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3704 auto *DestructorFn = 3705 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3706 Name, &CGM.getModule()); 3707 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3708 DestructorFnInfo); 3709 DestructorFn->setDoesNotRecurse(); 3710 CodeGenFunction CGF(CGM); 3711 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3712 Args, Loc, Loc); 3713 3714 LValue Base = CGF.EmitLoadOfPointerLValue( 3715 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3716 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3717 const auto *KmpTaskTWithPrivatesQTyRD = 3718 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3719 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3720 Base = CGF.EmitLValueForField(Base, *FI); 3721 for (const auto *Field : 3722 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3723 if (QualType::DestructionKind DtorKind = 3724 Field->getType().isDestructedType()) { 3725 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3726 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3727 } 3728 } 3729 CGF.FinishFunction(); 3730 return DestructorFn; 3731 } 3732 3733 /// Emit a privates mapping function for correct handling of private and 3734 /// firstprivate variables. 3735 /// \code 3736 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3737 /// **noalias priv1,..., <tyn> **noalias privn) { 3738 /// *priv1 = &.privates.priv1; 3739 /// ...; 3740 /// *privn = &.privates.privn; 3741 /// } 3742 /// \endcode 3743 static llvm::Value * 3744 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3745 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3746 ArrayRef<PrivateDataTy> Privates) { 3747 ASTContext &C = CGM.getContext(); 3748 FunctionArgList Args; 3749 ImplicitParamDecl TaskPrivatesArg( 3750 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3751 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3752 ImplicitParamDecl::Other); 3753 Args.push_back(&TaskPrivatesArg); 3754 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3755 unsigned Counter = 1; 3756 for (const Expr *E : Data.PrivateVars) { 3757 Args.push_back(ImplicitParamDecl::Create( 3758 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3759 C.getPointerType(C.getPointerType(E->getType())) 3760 .withConst() 3761 .withRestrict(), 3762 ImplicitParamDecl::Other)); 3763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3764 PrivateVarsPos[VD] = Counter; 3765 ++Counter; 3766 } 3767 for (const Expr *E : Data.FirstprivateVars) { 3768 Args.push_back(ImplicitParamDecl::Create( 3769 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3770 C.getPointerType(C.getPointerType(E->getType())) 3771 .withConst() 3772 .withRestrict(), 3773 ImplicitParamDecl::Other)); 3774 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3775 PrivateVarsPos[VD] = Counter; 3776 ++Counter; 3777 } 3778 for (const Expr *E : Data.LastprivateVars) { 3779 Args.push_back(ImplicitParamDecl::Create( 3780 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3781 C.getPointerType(C.getPointerType(E->getType())) 3782 .withConst() 3783 .withRestrict(), 3784 ImplicitParamDecl::Other)); 3785 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3786 PrivateVarsPos[VD] = Counter; 3787 ++Counter; 3788 } 3789 for (const VarDecl *VD : Data.PrivateLocals) { 3790 QualType Ty = VD->getType().getNonReferenceType(); 3791 if (VD->getType()->isLValueReferenceType()) 3792 Ty = C.getPointerType(Ty); 3793 if (isAllocatableDecl(VD)) 3794 Ty = C.getPointerType(Ty); 3795 Args.push_back(ImplicitParamDecl::Create( 3796 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3797 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3798 ImplicitParamDecl::Other)); 3799 PrivateVarsPos[VD] = Counter; 3800 ++Counter; 3801 } 3802 const auto &TaskPrivatesMapFnInfo = 3803 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3804 llvm::FunctionType *TaskPrivatesMapTy = 3805 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3806 std::string Name = 3807 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3808 auto *TaskPrivatesMap = llvm::Function::Create( 3809 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3810 &CGM.getModule()); 3811 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3812 TaskPrivatesMapFnInfo); 3813 if (CGM.getLangOpts().Optimize) { 3814 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3815 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3816 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3817 } 3818 CodeGenFunction CGF(CGM); 3819 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3820 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3821 3822 // *privi = &.privates.privi; 3823 LValue Base = CGF.EmitLoadOfPointerLValue( 3824 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3825 TaskPrivatesArg.getType()->castAs<PointerType>()); 3826 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3827 Counter = 0; 3828 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3829 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3830 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3831 LValue RefLVal = 3832 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3833 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3834 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3835 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3836 ++Counter; 3837 } 3838 CGF.FinishFunction(); 3839 return TaskPrivatesMap; 3840 } 3841 3842 /// Emit initialization for private variables in task-based directives. 3843 static void emitPrivatesInit(CodeGenFunction &CGF, 3844 const OMPExecutableDirective &D, 3845 Address KmpTaskSharedsPtr, LValue TDBase, 3846 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3847 QualType SharedsTy, QualType SharedsPtrTy, 3848 const OMPTaskDataTy &Data, 3849 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3850 ASTContext &C = CGF.getContext(); 3851 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3852 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3853 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3854 ? OMPD_taskloop 3855 : OMPD_task; 3856 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3857 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3858 LValue SrcBase; 3859 bool IsTargetTask = 3860 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3861 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3862 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3863 // PointersArray, SizesArray, and MappersArray. The original variables for 3864 // these arrays are not captured and we get their addresses explicitly. 3865 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3866 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3867 SrcBase = CGF.MakeAddrLValue( 3868 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3869 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3870 SharedsTy); 3871 } 3872 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3873 for (const PrivateDataTy &Pair : Privates) { 3874 // Do not initialize private locals. 3875 if (Pair.second.isLocalPrivate()) { 3876 ++FI; 3877 continue; 3878 } 3879 const VarDecl *VD = Pair.second.PrivateCopy; 3880 const Expr *Init = VD->getAnyInitializer(); 3881 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3882 !CGF.isTrivialInitializer(Init)))) { 3883 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3884 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3885 const VarDecl *OriginalVD = Pair.second.Original; 3886 // Check if the variable is the target-based BasePointersArray, 3887 // PointersArray, SizesArray, or MappersArray. 3888 LValue SharedRefLValue; 3889 QualType Type = PrivateLValue.getType(); 3890 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3891 if (IsTargetTask && !SharedField) { 3892 assert(isa<ImplicitParamDecl>(OriginalVD) && 3893 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3894 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3895 ->getNumParams() == 0 && 3896 isa<TranslationUnitDecl>( 3897 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3898 ->getDeclContext()) && 3899 "Expected artificial target data variable."); 3900 SharedRefLValue = 3901 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3902 } else if (ForDup) { 3903 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3904 SharedRefLValue = CGF.MakeAddrLValue( 3905 Address(SharedRefLValue.getPointer(CGF), 3906 C.getDeclAlign(OriginalVD)), 3907 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3908 SharedRefLValue.getTBAAInfo()); 3909 } else if (CGF.LambdaCaptureFields.count( 3910 Pair.second.Original->getCanonicalDecl()) > 0 || 3911 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { 3912 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3913 } else { 3914 // Processing for implicitly captured variables. 3915 InlinedOpenMPRegionRAII Region( 3916 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3917 /*HasCancel=*/false, /*NoInheritance=*/true); 3918 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3919 } 3920 if (Type->isArrayType()) { 3921 // Initialize firstprivate array. 3922 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3923 // Perform simple memcpy. 3924 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3925 } else { 3926 // Initialize firstprivate array using element-by-element 3927 // initialization. 3928 CGF.EmitOMPAggregateAssign( 3929 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3930 Type, 3931 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3932 Address SrcElement) { 3933 // Clean up any temporaries needed by the initialization. 3934 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3935 InitScope.addPrivate( 3936 Elem, [SrcElement]() -> Address { return SrcElement; }); 3937 (void)InitScope.Privatize(); 3938 // Emit initialization for single element. 3939 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3940 CGF, &CapturesInfo); 3941 CGF.EmitAnyExprToMem(Init, DestElement, 3942 Init->getType().getQualifiers(), 3943 /*IsInitializer=*/false); 3944 }); 3945 } 3946 } else { 3947 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3948 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3949 return SharedRefLValue.getAddress(CGF); 3950 }); 3951 (void)InitScope.Privatize(); 3952 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3953 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3954 /*capturedByInit=*/false); 3955 } 3956 } else { 3957 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3958 } 3959 } 3960 ++FI; 3961 } 3962 } 3963 3964 /// Check if duplication function is required for taskloops. 3965 static bool checkInitIsRequired(CodeGenFunction &CGF, 3966 ArrayRef<PrivateDataTy> Privates) { 3967 bool InitRequired = false; 3968 for (const PrivateDataTy &Pair : Privates) { 3969 if (Pair.second.isLocalPrivate()) 3970 continue; 3971 const VarDecl *VD = Pair.second.PrivateCopy; 3972 const Expr *Init = VD->getAnyInitializer(); 3973 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3974 !CGF.isTrivialInitializer(Init)); 3975 if (InitRequired) 3976 break; 3977 } 3978 return InitRequired; 3979 } 3980 3981 3982 /// Emit task_dup function (for initialization of 3983 /// private/firstprivate/lastprivate vars and last_iter flag) 3984 /// \code 3985 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3986 /// lastpriv) { 3987 /// // setup lastprivate flag 3988 /// task_dst->last = lastpriv; 3989 /// // could be constructor calls here... 3990 /// } 3991 /// \endcode 3992 static llvm::Value * 3993 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3994 const OMPExecutableDirective &D, 3995 QualType KmpTaskTWithPrivatesPtrQTy, 3996 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3997 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3998 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3999 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4000 ASTContext &C = CGM.getContext(); 4001 FunctionArgList Args; 4002 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4003 KmpTaskTWithPrivatesPtrQTy, 4004 ImplicitParamDecl::Other); 4005 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4006 KmpTaskTWithPrivatesPtrQTy, 4007 ImplicitParamDecl::Other); 4008 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4009 ImplicitParamDecl::Other); 4010 Args.push_back(&DstArg); 4011 Args.push_back(&SrcArg); 4012 Args.push_back(&LastprivArg); 4013 const auto &TaskDupFnInfo = 4014 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4015 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4016 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4017 auto *TaskDup = llvm::Function::Create( 4018 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4019 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4020 TaskDup->setDoesNotRecurse(); 4021 CodeGenFunction CGF(CGM); 4022 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4023 Loc); 4024 4025 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4026 CGF.GetAddrOfLocalVar(&DstArg), 4027 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4028 // task_dst->liter = lastpriv; 4029 if (WithLastIter) { 4030 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4031 LValue Base = CGF.EmitLValueForField( 4032 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4033 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4034 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4035 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4036 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4037 } 4038 4039 // Emit initial values for private copies (if any). 4040 assert(!Privates.empty()); 4041 Address KmpTaskSharedsPtr = Address::invalid(); 4042 if (!Data.FirstprivateVars.empty()) { 4043 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4044 CGF.GetAddrOfLocalVar(&SrcArg), 4045 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4046 LValue Base = CGF.EmitLValueForField( 4047 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4048 KmpTaskSharedsPtr = Address( 4049 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4050 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4051 KmpTaskTShareds)), 4052 Loc), 4053 CGM.getNaturalTypeAlignment(SharedsTy)); 4054 } 4055 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4056 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4057 CGF.FinishFunction(); 4058 return TaskDup; 4059 } 4060 4061 /// Checks if destructor function is required to be generated. 4062 /// \return true if cleanups are required, false otherwise. 4063 static bool 4064 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4065 ArrayRef<PrivateDataTy> Privates) { 4066 for (const PrivateDataTy &P : Privates) { 4067 if (P.second.isLocalPrivate()) 4068 continue; 4069 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4070 if (Ty.isDestructedType()) 4071 return true; 4072 } 4073 return false; 4074 } 4075 4076 namespace { 4077 /// Loop generator for OpenMP iterator expression. 4078 class OMPIteratorGeneratorScope final 4079 : public CodeGenFunction::OMPPrivateScope { 4080 CodeGenFunction &CGF; 4081 const OMPIteratorExpr *E = nullptr; 4082 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4083 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4084 OMPIteratorGeneratorScope() = delete; 4085 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4086 4087 public: 4088 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4089 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4090 if (!E) 4091 return; 4092 SmallVector<llvm::Value *, 4> Uppers; 4093 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4094 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4095 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4096 addPrivate(VD, [&CGF, VD]() { 4097 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4098 }); 4099 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4100 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4101 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4102 "counter.addr"); 4103 }); 4104 } 4105 Privatize(); 4106 4107 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4108 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4109 LValue CLVal = 4110 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4111 HelperData.CounterVD->getType()); 4112 // Counter = 0; 4113 CGF.EmitStoreOfScalar( 4114 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4115 CLVal); 4116 CodeGenFunction::JumpDest &ContDest = 4117 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4118 CodeGenFunction::JumpDest &ExitDest = 4119 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4120 // N = <number-of_iterations>; 4121 llvm::Value *N = Uppers[I]; 4122 // cont: 4123 // if (Counter < N) goto body; else goto exit; 4124 CGF.EmitBlock(ContDest.getBlock()); 4125 auto *CVal = 4126 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4127 llvm::Value *Cmp = 4128 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4129 ? CGF.Builder.CreateICmpSLT(CVal, N) 4130 : CGF.Builder.CreateICmpULT(CVal, N); 4131 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4132 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4133 // body: 4134 CGF.EmitBlock(BodyBB); 4135 // Iteri = Begini + Counter * Stepi; 4136 CGF.EmitIgnoredExpr(HelperData.Update); 4137 } 4138 } 4139 ~OMPIteratorGeneratorScope() { 4140 if (!E) 4141 return; 4142 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4143 // Counter = Counter + 1; 4144 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4145 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4146 // goto cont; 4147 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4148 // exit: 4149 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4150 } 4151 } 4152 }; 4153 } // namespace 4154 4155 static std::pair<llvm::Value *, llvm::Value *> 4156 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4157 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4158 llvm::Value *Addr; 4159 if (OASE) { 4160 const Expr *Base = OASE->getBase(); 4161 Addr = CGF.EmitScalarExpr(Base); 4162 } else { 4163 Addr = CGF.EmitLValue(E).getPointer(CGF); 4164 } 4165 llvm::Value *SizeVal; 4166 QualType Ty = E->getType(); 4167 if (OASE) { 4168 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4169 for (const Expr *SE : OASE->getDimensions()) { 4170 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4171 Sz = CGF.EmitScalarConversion( 4172 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4173 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4174 } 4175 } else if (const auto *ASE = 4176 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4177 LValue UpAddrLVal = 4178 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4179 Address UpAddrAddress = UpAddrLVal.getAddress(CGF); 4180 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 4181 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); 4182 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4183 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4184 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4185 } else { 4186 SizeVal = CGF.getTypeSize(Ty); 4187 } 4188 return std::make_pair(Addr, SizeVal); 4189 } 4190 4191 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4192 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4193 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4194 if (KmpTaskAffinityInfoTy.isNull()) { 4195 RecordDecl *KmpAffinityInfoRD = 4196 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4197 KmpAffinityInfoRD->startDefinition(); 4198 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4199 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4200 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4201 KmpAffinityInfoRD->completeDefinition(); 4202 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4203 } 4204 } 4205 4206 CGOpenMPRuntime::TaskResultTy 4207 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4208 const OMPExecutableDirective &D, 4209 llvm::Function *TaskFunction, QualType SharedsTy, 4210 Address Shareds, const OMPTaskDataTy &Data) { 4211 ASTContext &C = CGM.getContext(); 4212 llvm::SmallVector<PrivateDataTy, 4> Privates; 4213 // Aggregate privates and sort them by the alignment. 4214 const auto *I = Data.PrivateCopies.begin(); 4215 for (const Expr *E : Data.PrivateVars) { 4216 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4217 Privates.emplace_back( 4218 C.getDeclAlign(VD), 4219 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4220 /*PrivateElemInit=*/nullptr)); 4221 ++I; 4222 } 4223 I = Data.FirstprivateCopies.begin(); 4224 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4225 for (const Expr *E : Data.FirstprivateVars) { 4226 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4227 Privates.emplace_back( 4228 C.getDeclAlign(VD), 4229 PrivateHelpersTy( 4230 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4231 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4232 ++I; 4233 ++IElemInitRef; 4234 } 4235 I = Data.LastprivateCopies.begin(); 4236 for (const Expr *E : Data.LastprivateVars) { 4237 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4238 Privates.emplace_back( 4239 C.getDeclAlign(VD), 4240 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4241 /*PrivateElemInit=*/nullptr)); 4242 ++I; 4243 } 4244 for (const VarDecl *VD : Data.PrivateLocals) { 4245 if (isAllocatableDecl(VD)) 4246 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4247 else 4248 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4249 } 4250 llvm::stable_sort(Privates, 4251 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4252 return L.first > R.first; 4253 }); 4254 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4255 // Build type kmp_routine_entry_t (if not built yet). 4256 emitKmpRoutineEntryT(KmpInt32Ty); 4257 // Build type kmp_task_t (if not built yet). 4258 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4259 if (SavedKmpTaskloopTQTy.isNull()) { 4260 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4261 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4262 } 4263 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4264 } else { 4265 assert((D.getDirectiveKind() == OMPD_task || 4266 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4267 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4268 "Expected taskloop, task or target directive"); 4269 if (SavedKmpTaskTQTy.isNull()) { 4270 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4271 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4272 } 4273 KmpTaskTQTy = SavedKmpTaskTQTy; 4274 } 4275 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4276 // Build particular struct kmp_task_t for the given task. 4277 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4278 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4279 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4280 QualType KmpTaskTWithPrivatesPtrQTy = 4281 C.getPointerType(KmpTaskTWithPrivatesQTy); 4282 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4283 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4284 KmpTaskTWithPrivatesTy->getPointerTo(); 4285 llvm::Value *KmpTaskTWithPrivatesTySize = 4286 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4287 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4288 4289 // Emit initial values for private copies (if any). 4290 llvm::Value *TaskPrivatesMap = nullptr; 4291 llvm::Type *TaskPrivatesMapTy = 4292 std::next(TaskFunction->arg_begin(), 3)->getType(); 4293 if (!Privates.empty()) { 4294 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4295 TaskPrivatesMap = 4296 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4297 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4298 TaskPrivatesMap, TaskPrivatesMapTy); 4299 } else { 4300 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4301 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4302 } 4303 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4304 // kmp_task_t *tt); 4305 llvm::Function *TaskEntry = emitProxyTaskFunction( 4306 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4307 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4308 TaskPrivatesMap); 4309 4310 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4311 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4312 // kmp_routine_entry_t *task_entry); 4313 // Task flags. Format is taken from 4314 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, 4315 // description of kmp_tasking_flags struct. 4316 enum { 4317 TiedFlag = 0x1, 4318 FinalFlag = 0x2, 4319 DestructorsFlag = 0x8, 4320 PriorityFlag = 0x20, 4321 DetachableFlag = 0x40, 4322 }; 4323 unsigned Flags = Data.Tied ? TiedFlag : 0; 4324 bool NeedsCleanup = false; 4325 if (!Privates.empty()) { 4326 NeedsCleanup = 4327 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4328 if (NeedsCleanup) 4329 Flags = Flags | DestructorsFlag; 4330 } 4331 if (Data.Priority.getInt()) 4332 Flags = Flags | PriorityFlag; 4333 if (D.hasClausesOfKind<OMPDetachClause>()) 4334 Flags = Flags | DetachableFlag; 4335 llvm::Value *TaskFlags = 4336 Data.Final.getPointer() 4337 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4338 CGF.Builder.getInt32(FinalFlag), 4339 CGF.Builder.getInt32(/*C=*/0)) 4340 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4341 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4342 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4343 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4344 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4345 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4346 TaskEntry, KmpRoutineEntryPtrTy)}; 4347 llvm::Value *NewTask; 4348 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4349 // Check if we have any device clause associated with the directive. 4350 const Expr *Device = nullptr; 4351 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4352 Device = C->getDevice(); 4353 // Emit device ID if any otherwise use default value. 4354 llvm::Value *DeviceID; 4355 if (Device) 4356 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4357 CGF.Int64Ty, /*isSigned=*/true); 4358 else 4359 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4360 AllocArgs.push_back(DeviceID); 4361 NewTask = CGF.EmitRuntimeCall( 4362 OMPBuilder.getOrCreateRuntimeFunction( 4363 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4364 AllocArgs); 4365 } else { 4366 NewTask = 4367 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4368 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4369 AllocArgs); 4370 } 4371 // Emit detach clause initialization. 4372 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4373 // task_descriptor); 4374 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4375 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4376 LValue EvtLVal = CGF.EmitLValue(Evt); 4377 4378 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4379 // int gtid, kmp_task_t *task); 4380 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4381 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4382 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4383 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4384 OMPBuilder.getOrCreateRuntimeFunction( 4385 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4386 {Loc, Tid, NewTask}); 4387 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4388 Evt->getExprLoc()); 4389 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4390 } 4391 // Process affinity clauses. 4392 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4393 // Process list of affinity data. 4394 ASTContext &C = CGM.getContext(); 4395 Address AffinitiesArray = Address::invalid(); 4396 // Calculate number of elements to form the array of affinity data. 4397 llvm::Value *NumOfElements = nullptr; 4398 unsigned NumAffinities = 0; 4399 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4400 if (const Expr *Modifier = C->getModifier()) { 4401 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4402 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4403 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4404 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4405 NumOfElements = 4406 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4407 } 4408 } else { 4409 NumAffinities += C->varlist_size(); 4410 } 4411 } 4412 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4413 // Fields ids in kmp_task_affinity_info record. 4414 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4415 4416 QualType KmpTaskAffinityInfoArrayTy; 4417 if (NumOfElements) { 4418 NumOfElements = CGF.Builder.CreateNUWAdd( 4419 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4420 auto *OVE = new (C) OpaqueValueExpr( 4421 Loc, 4422 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4423 VK_PRValue); 4424 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4425 RValue::get(NumOfElements)); 4426 KmpTaskAffinityInfoArrayTy = 4427 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, 4428 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4429 // Properly emit variable-sized array. 4430 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4431 ImplicitParamDecl::Other); 4432 CGF.EmitVarDecl(*PD); 4433 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4434 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4435 /*isSigned=*/false); 4436 } else { 4437 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4438 KmpTaskAffinityInfoTy, 4439 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4440 ArrayType::Normal, /*IndexTypeQuals=*/0); 4441 AffinitiesArray = 4442 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4443 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4444 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4445 /*isSigned=*/false); 4446 } 4447 4448 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4449 // Fill array by elements without iterators. 4450 unsigned Pos = 0; 4451 bool HasIterator = false; 4452 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4453 if (C->getModifier()) { 4454 HasIterator = true; 4455 continue; 4456 } 4457 for (const Expr *E : C->varlists()) { 4458 llvm::Value *Addr; 4459 llvm::Value *Size; 4460 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4461 LValue Base = 4462 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4463 KmpTaskAffinityInfoTy); 4464 // affs[i].base_addr = &<Affinities[i].second>; 4465 LValue BaseAddrLVal = CGF.EmitLValueForField( 4466 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4467 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4468 BaseAddrLVal); 4469 // affs[i].len = sizeof(<Affinities[i].second>); 4470 LValue LenLVal = CGF.EmitLValueForField( 4471 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4472 CGF.EmitStoreOfScalar(Size, LenLVal); 4473 ++Pos; 4474 } 4475 } 4476 LValue PosLVal; 4477 if (HasIterator) { 4478 PosLVal = CGF.MakeAddrLValue( 4479 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4480 C.getSizeType()); 4481 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4482 } 4483 // Process elements with iterators. 4484 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4485 const Expr *Modifier = C->getModifier(); 4486 if (!Modifier) 4487 continue; 4488 OMPIteratorGeneratorScope IteratorScope( 4489 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4490 for (const Expr *E : C->varlists()) { 4491 llvm::Value *Addr; 4492 llvm::Value *Size; 4493 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4494 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4495 LValue Base = CGF.MakeAddrLValue( 4496 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); 4497 // affs[i].base_addr = &<Affinities[i].second>; 4498 LValue BaseAddrLVal = CGF.EmitLValueForField( 4499 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4500 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4501 BaseAddrLVal); 4502 // affs[i].len = sizeof(<Affinities[i].second>); 4503 LValue LenLVal = CGF.EmitLValueForField( 4504 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4505 CGF.EmitStoreOfScalar(Size, LenLVal); 4506 Idx = CGF.Builder.CreateNUWAdd( 4507 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4508 CGF.EmitStoreOfScalar(Idx, PosLVal); 4509 } 4510 } 4511 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4512 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4513 // naffins, kmp_task_affinity_info_t *affin_list); 4514 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4515 llvm::Value *GTid = getThreadID(CGF, Loc); 4516 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4517 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4518 // FIXME: Emit the function and ignore its result for now unless the 4519 // runtime function is properly implemented. 4520 (void)CGF.EmitRuntimeCall( 4521 OMPBuilder.getOrCreateRuntimeFunction( 4522 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4523 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4524 } 4525 llvm::Value *NewTaskNewTaskTTy = 4526 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4527 NewTask, KmpTaskTWithPrivatesPtrTy); 4528 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4529 KmpTaskTWithPrivatesQTy); 4530 LValue TDBase = 4531 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4532 // Fill the data in the resulting kmp_task_t record. 4533 // Copy shareds if there are any. 4534 Address KmpTaskSharedsPtr = Address::invalid(); 4535 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4536 KmpTaskSharedsPtr = 4537 Address(CGF.EmitLoadOfScalar( 4538 CGF.EmitLValueForField( 4539 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4540 KmpTaskTShareds)), 4541 Loc), 4542 CGM.getNaturalTypeAlignment(SharedsTy)); 4543 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4544 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4545 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4546 } 4547 // Emit initial values for private copies (if any). 4548 TaskResultTy Result; 4549 if (!Privates.empty()) { 4550 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4551 SharedsTy, SharedsPtrTy, Data, Privates, 4552 /*ForDup=*/false); 4553 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4554 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4555 Result.TaskDupFn = emitTaskDupFunction( 4556 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4557 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4558 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4559 } 4560 } 4561 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4562 enum { Priority = 0, Destructors = 1 }; 4563 // Provide pointer to function with destructors for privates. 4564 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4565 const RecordDecl *KmpCmplrdataUD = 4566 (*FI)->getType()->getAsUnionType()->getDecl(); 4567 if (NeedsCleanup) { 4568 llvm::Value *DestructorFn = emitDestructorsFunction( 4569 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4570 KmpTaskTWithPrivatesQTy); 4571 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4572 LValue DestructorsLV = CGF.EmitLValueForField( 4573 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4574 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4575 DestructorFn, KmpRoutineEntryPtrTy), 4576 DestructorsLV); 4577 } 4578 // Set priority. 4579 if (Data.Priority.getInt()) { 4580 LValue Data2LV = CGF.EmitLValueForField( 4581 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4582 LValue PriorityLV = CGF.EmitLValueForField( 4583 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4584 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4585 } 4586 Result.NewTask = NewTask; 4587 Result.TaskEntry = TaskEntry; 4588 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4589 Result.TDBase = TDBase; 4590 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4591 return Result; 4592 } 4593 4594 namespace { 4595 /// Dependence kind for RTL. 4596 enum RTLDependenceKindTy { 4597 DepIn = 0x01, 4598 DepInOut = 0x3, 4599 DepMutexInOutSet = 0x4 4600 }; 4601 /// Fields ids in kmp_depend_info record. 4602 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4603 } // namespace 4604 4605 /// Translates internal dependency kind into the runtime kind. 4606 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4607 RTLDependenceKindTy DepKind; 4608 switch (K) { 4609 case OMPC_DEPEND_in: 4610 DepKind = DepIn; 4611 break; 4612 // Out and InOut dependencies must use the same code. 4613 case OMPC_DEPEND_out: 4614 case OMPC_DEPEND_inout: 4615 DepKind = DepInOut; 4616 break; 4617 case OMPC_DEPEND_mutexinoutset: 4618 DepKind = DepMutexInOutSet; 4619 break; 4620 case OMPC_DEPEND_source: 4621 case OMPC_DEPEND_sink: 4622 case OMPC_DEPEND_depobj: 4623 case OMPC_DEPEND_unknown: 4624 llvm_unreachable("Unknown task dependence type"); 4625 } 4626 return DepKind; 4627 } 4628 4629 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4630 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4631 QualType &FlagsTy) { 4632 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4633 if (KmpDependInfoTy.isNull()) { 4634 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4635 KmpDependInfoRD->startDefinition(); 4636 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4637 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4638 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4639 KmpDependInfoRD->completeDefinition(); 4640 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4641 } 4642 } 4643 4644 std::pair<llvm::Value *, LValue> 4645 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4646 SourceLocation Loc) { 4647 ASTContext &C = CGM.getContext(); 4648 QualType FlagsTy; 4649 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4650 RecordDecl *KmpDependInfoRD = 4651 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4652 LValue Base = CGF.EmitLoadOfPointerLValue( 4653 DepobjLVal.getAddress(CGF), 4654 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4655 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4656 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4657 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4658 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4659 Base.getTBAAInfo()); 4660 Address DepObjAddr = CGF.Builder.CreateGEP( 4661 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4662 LValue NumDepsBase = CGF.MakeAddrLValue( 4663 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4664 // NumDeps = deps[i].base_addr; 4665 LValue BaseAddrLVal = CGF.EmitLValueForField( 4666 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4667 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4668 return std::make_pair(NumDeps, Base); 4669 } 4670 4671 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4672 llvm::PointerUnion<unsigned *, LValue *> Pos, 4673 const OMPTaskDataTy::DependData &Data, 4674 Address DependenciesArray) { 4675 CodeGenModule &CGM = CGF.CGM; 4676 ASTContext &C = CGM.getContext(); 4677 QualType FlagsTy; 4678 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4679 RecordDecl *KmpDependInfoRD = 4680 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4681 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4682 4683 OMPIteratorGeneratorScope IteratorScope( 4684 CGF, cast_or_null<OMPIteratorExpr>( 4685 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4686 : nullptr)); 4687 for (const Expr *E : Data.DepExprs) { 4688 llvm::Value *Addr; 4689 llvm::Value *Size; 4690 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4691 LValue Base; 4692 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4693 Base = CGF.MakeAddrLValue( 4694 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4695 } else { 4696 LValue &PosLVal = *Pos.get<LValue *>(); 4697 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4698 Base = CGF.MakeAddrLValue( 4699 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); 4700 } 4701 // deps[i].base_addr = &<Dependencies[i].second>; 4702 LValue BaseAddrLVal = CGF.EmitLValueForField( 4703 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4704 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4705 BaseAddrLVal); 4706 // deps[i].len = sizeof(<Dependencies[i].second>); 4707 LValue LenLVal = CGF.EmitLValueForField( 4708 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4709 CGF.EmitStoreOfScalar(Size, LenLVal); 4710 // deps[i].flags = <Dependencies[i].first>; 4711 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4712 LValue FlagsLVal = CGF.EmitLValueForField( 4713 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4714 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4715 FlagsLVal); 4716 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4717 ++(*P); 4718 } else { 4719 LValue &PosLVal = *Pos.get<LValue *>(); 4720 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4721 Idx = CGF.Builder.CreateNUWAdd(Idx, 4722 llvm::ConstantInt::get(Idx->getType(), 1)); 4723 CGF.EmitStoreOfScalar(Idx, PosLVal); 4724 } 4725 } 4726 } 4727 4728 static SmallVector<llvm::Value *, 4> 4729 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4730 const OMPTaskDataTy::DependData &Data) { 4731 assert(Data.DepKind == OMPC_DEPEND_depobj && 4732 "Expected depobj dependecy kind."); 4733 SmallVector<llvm::Value *, 4> Sizes; 4734 SmallVector<LValue, 4> SizeLVals; 4735 ASTContext &C = CGF.getContext(); 4736 QualType FlagsTy; 4737 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4738 RecordDecl *KmpDependInfoRD = 4739 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4740 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4741 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4742 { 4743 OMPIteratorGeneratorScope IteratorScope( 4744 CGF, cast_or_null<OMPIteratorExpr>( 4745 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4746 : nullptr)); 4747 for (const Expr *E : Data.DepExprs) { 4748 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4749 LValue Base = CGF.EmitLoadOfPointerLValue( 4750 DepobjLVal.getAddress(CGF), 4751 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4752 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4753 Base.getAddress(CGF), KmpDependInfoPtrT); 4754 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4755 Base.getTBAAInfo()); 4756 Address DepObjAddr = CGF.Builder.CreateGEP( 4757 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4758 LValue NumDepsBase = CGF.MakeAddrLValue( 4759 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4760 // NumDeps = deps[i].base_addr; 4761 LValue BaseAddrLVal = CGF.EmitLValueForField( 4762 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4763 llvm::Value *NumDeps = 4764 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4765 LValue NumLVal = CGF.MakeAddrLValue( 4766 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4767 C.getUIntPtrType()); 4768 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), 4769 NumLVal.getAddress(CGF)); 4770 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4771 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4772 CGF.EmitStoreOfScalar(Add, NumLVal); 4773 SizeLVals.push_back(NumLVal); 4774 } 4775 } 4776 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4777 llvm::Value *Size = 4778 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4779 Sizes.push_back(Size); 4780 } 4781 return Sizes; 4782 } 4783 4784 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4785 LValue PosLVal, 4786 const OMPTaskDataTy::DependData &Data, 4787 Address DependenciesArray) { 4788 assert(Data.DepKind == OMPC_DEPEND_depobj && 4789 "Expected depobj dependecy kind."); 4790 ASTContext &C = CGF.getContext(); 4791 QualType FlagsTy; 4792 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4793 RecordDecl *KmpDependInfoRD = 4794 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4795 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4796 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4797 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4798 { 4799 OMPIteratorGeneratorScope IteratorScope( 4800 CGF, cast_or_null<OMPIteratorExpr>( 4801 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4802 : nullptr)); 4803 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4804 const Expr *E = Data.DepExprs[I]; 4805 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4806 LValue Base = CGF.EmitLoadOfPointerLValue( 4807 DepobjLVal.getAddress(CGF), 4808 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4809 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4810 Base.getAddress(CGF), KmpDependInfoPtrT); 4811 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4812 Base.getTBAAInfo()); 4813 4814 // Get number of elements in a single depobj. 4815 Address DepObjAddr = CGF.Builder.CreateGEP( 4816 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4817 LValue NumDepsBase = CGF.MakeAddrLValue( 4818 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); 4819 // NumDeps = deps[i].base_addr; 4820 LValue BaseAddrLVal = CGF.EmitLValueForField( 4821 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4822 llvm::Value *NumDeps = 4823 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4824 4825 // memcopy dependency data. 4826 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4827 ElSize, 4828 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4829 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4830 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); 4831 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4832 4833 // Increase pos. 4834 // pos += size; 4835 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4836 CGF.EmitStoreOfScalar(Add, PosLVal); 4837 } 4838 } 4839 } 4840 4841 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4842 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4843 SourceLocation Loc) { 4844 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4845 return D.DepExprs.empty(); 4846 })) 4847 return std::make_pair(nullptr, Address::invalid()); 4848 // Process list of dependencies. 4849 ASTContext &C = CGM.getContext(); 4850 Address DependenciesArray = Address::invalid(); 4851 llvm::Value *NumOfElements = nullptr; 4852 unsigned NumDependencies = std::accumulate( 4853 Dependencies.begin(), Dependencies.end(), 0, 4854 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4855 return D.DepKind == OMPC_DEPEND_depobj 4856 ? V 4857 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4858 }); 4859 QualType FlagsTy; 4860 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4861 bool HasDepobjDeps = false; 4862 bool HasRegularWithIterators = false; 4863 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4864 llvm::Value *NumOfRegularWithIterators = 4865 llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4866 // Calculate number of depobj dependecies and regular deps with the iterators. 4867 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4868 if (D.DepKind == OMPC_DEPEND_depobj) { 4869 SmallVector<llvm::Value *, 4> Sizes = 4870 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4871 for (llvm::Value *Size : Sizes) { 4872 NumOfDepobjElements = 4873 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4874 } 4875 HasDepobjDeps = true; 4876 continue; 4877 } 4878 // Include number of iterations, if any. 4879 4880 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4881 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4882 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4883 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4884 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( 4885 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); 4886 NumOfRegularWithIterators = 4887 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); 4888 } 4889 HasRegularWithIterators = true; 4890 continue; 4891 } 4892 } 4893 4894 QualType KmpDependInfoArrayTy; 4895 if (HasDepobjDeps || HasRegularWithIterators) { 4896 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4897 /*isSigned=*/false); 4898 if (HasDepobjDeps) { 4899 NumOfElements = 4900 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4901 } 4902 if (HasRegularWithIterators) { 4903 NumOfElements = 4904 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4905 } 4906 auto *OVE = new (C) OpaqueValueExpr( 4907 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4908 VK_PRValue); 4909 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, 4910 RValue::get(NumOfElements)); 4911 KmpDependInfoArrayTy = 4912 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, 4913 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4914 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4915 // Properly emit variable-sized array. 4916 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4917 ImplicitParamDecl::Other); 4918 CGF.EmitVarDecl(*PD); 4919 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4920 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4921 /*isSigned=*/false); 4922 } else { 4923 KmpDependInfoArrayTy = C.getConstantArrayType( 4924 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4925 ArrayType::Normal, /*IndexTypeQuals=*/0); 4926 DependenciesArray = 4927 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4928 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4929 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4930 /*isSigned=*/false); 4931 } 4932 unsigned Pos = 0; 4933 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4934 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4935 Dependencies[I].IteratorExpr) 4936 continue; 4937 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4938 DependenciesArray); 4939 } 4940 // Copy regular dependecies with iterators. 4941 LValue PosLVal = CGF.MakeAddrLValue( 4942 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4943 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4944 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4945 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4946 !Dependencies[I].IteratorExpr) 4947 continue; 4948 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4949 DependenciesArray); 4950 } 4951 // Copy final depobj arrays without iterators. 4952 if (HasDepobjDeps) { 4953 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4954 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4955 continue; 4956 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4957 DependenciesArray); 4958 } 4959 } 4960 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4961 DependenciesArray, CGF.VoidPtrTy); 4962 return std::make_pair(NumOfElements, DependenciesArray); 4963 } 4964 4965 Address CGOpenMPRuntime::emitDepobjDependClause( 4966 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4967 SourceLocation Loc) { 4968 if (Dependencies.DepExprs.empty()) 4969 return Address::invalid(); 4970 // Process list of dependencies. 4971 ASTContext &C = CGM.getContext(); 4972 Address DependenciesArray = Address::invalid(); 4973 unsigned NumDependencies = Dependencies.DepExprs.size(); 4974 QualType FlagsTy; 4975 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4976 RecordDecl *KmpDependInfoRD = 4977 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4978 4979 llvm::Value *Size; 4980 // Define type kmp_depend_info[<Dependencies.size()>]; 4981 // For depobj reserve one extra element to store the number of elements. 4982 // It is required to handle depobj(x) update(in) construct. 4983 // kmp_depend_info[<Dependencies.size()>] deps; 4984 llvm::Value *NumDepsVal; 4985 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4986 if (const auto *IE = 4987 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4988 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4989 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4990 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4991 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4992 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4993 } 4994 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4995 NumDepsVal); 4996 CharUnits SizeInBytes = 4997 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4998 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4999 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5000 NumDepsVal = 5001 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5002 } else { 5003 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5004 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5005 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5006 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5007 Size = CGM.getSize(Sz.alignTo(Align)); 5008 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5009 } 5010 // Need to allocate on the dynamic memory. 5011 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5012 // Use default allocator. 5013 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5014 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5015 5016 llvm::Value *Addr = 5017 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5018 CGM.getModule(), OMPRTL___kmpc_alloc), 5019 Args, ".dep.arr.addr"); 5020 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5021 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5022 DependenciesArray = Address(Addr, Align); 5023 // Write number of elements in the first element of array for depobj. 5024 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5025 // deps[i].base_addr = NumDependencies; 5026 LValue BaseAddrLVal = CGF.EmitLValueForField( 5027 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5028 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5029 llvm::PointerUnion<unsigned *, LValue *> Pos; 5030 unsigned Idx = 1; 5031 LValue PosLVal; 5032 if (Dependencies.IteratorExpr) { 5033 PosLVal = CGF.MakeAddrLValue( 5034 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5035 C.getSizeType()); 5036 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5037 /*IsInit=*/true); 5038 Pos = &PosLVal; 5039 } else { 5040 Pos = &Idx; 5041 } 5042 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5043 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5044 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5045 return DependenciesArray; 5046 } 5047 5048 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5049 SourceLocation Loc) { 5050 ASTContext &C = CGM.getContext(); 5051 QualType FlagsTy; 5052 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5053 LValue Base = CGF.EmitLoadOfPointerLValue( 5054 DepobjLVal.getAddress(CGF), 5055 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5057 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5058 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5059 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5060 Addr.getElementType(), Addr.getPointer(), 5061 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5062 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5063 CGF.VoidPtrTy); 5064 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5065 // Use default allocator. 5066 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5067 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5068 5069 // _kmpc_free(gtid, addr, nullptr); 5070 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5071 CGM.getModule(), OMPRTL___kmpc_free), 5072 Args); 5073 } 5074 5075 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5076 OpenMPDependClauseKind NewDepKind, 5077 SourceLocation Loc) { 5078 ASTContext &C = CGM.getContext(); 5079 QualType FlagsTy; 5080 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5081 RecordDecl *KmpDependInfoRD = 5082 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5083 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5084 llvm::Value *NumDeps; 5085 LValue Base; 5086 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5087 5088 Address Begin = Base.getAddress(CGF); 5089 // Cast from pointer to array type to pointer to single element. 5090 llvm::Value *End = CGF.Builder.CreateGEP( 5091 Begin.getElementType(), Begin.getPointer(), NumDeps); 5092 // The basic structure here is a while-do loop. 5093 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5094 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5095 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5096 CGF.EmitBlock(BodyBB); 5097 llvm::PHINode *ElementPHI = 5098 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5099 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5100 Begin = Address(ElementPHI, Begin.getAlignment()); 5101 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5102 Base.getTBAAInfo()); 5103 // deps[i].flags = NewDepKind; 5104 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5105 LValue FlagsLVal = CGF.EmitLValueForField( 5106 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5107 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5108 FlagsLVal); 5109 5110 // Shift the address forward by one element. 5111 Address ElementNext = 5112 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5113 ElementPHI->addIncoming(ElementNext.getPointer(), 5114 CGF.Builder.GetInsertBlock()); 5115 llvm::Value *IsEmpty = 5116 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5117 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5118 // Done. 5119 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5120 } 5121 5122 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5123 const OMPExecutableDirective &D, 5124 llvm::Function *TaskFunction, 5125 QualType SharedsTy, Address Shareds, 5126 const Expr *IfCond, 5127 const OMPTaskDataTy &Data) { 5128 if (!CGF.HaveInsertPoint()) 5129 return; 5130 5131 TaskResultTy Result = 5132 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5133 llvm::Value *NewTask = Result.NewTask; 5134 llvm::Function *TaskEntry = Result.TaskEntry; 5135 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5136 LValue TDBase = Result.TDBase; 5137 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5138 // Process list of dependences. 5139 Address DependenciesArray = Address::invalid(); 5140 llvm::Value *NumOfElements; 5141 std::tie(NumOfElements, DependenciesArray) = 5142 emitDependClause(CGF, Data.Dependences, Loc); 5143 5144 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5145 // libcall. 5146 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5147 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5148 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5149 // list is not empty 5150 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5151 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5152 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5153 llvm::Value *DepTaskArgs[7]; 5154 if (!Data.Dependences.empty()) { 5155 DepTaskArgs[0] = UpLoc; 5156 DepTaskArgs[1] = ThreadID; 5157 DepTaskArgs[2] = NewTask; 5158 DepTaskArgs[3] = NumOfElements; 5159 DepTaskArgs[4] = DependenciesArray.getPointer(); 5160 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5161 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5162 } 5163 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5164 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5165 if (!Data.Tied) { 5166 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5167 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5168 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5169 } 5170 if (!Data.Dependences.empty()) { 5171 CGF.EmitRuntimeCall( 5172 OMPBuilder.getOrCreateRuntimeFunction( 5173 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5174 DepTaskArgs); 5175 } else { 5176 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5177 CGM.getModule(), OMPRTL___kmpc_omp_task), 5178 TaskArgs); 5179 } 5180 // Check if parent region is untied and build return for untied task; 5181 if (auto *Region = 5182 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5183 Region->emitUntiedSwitch(CGF); 5184 }; 5185 5186 llvm::Value *DepWaitTaskArgs[6]; 5187 if (!Data.Dependences.empty()) { 5188 DepWaitTaskArgs[0] = UpLoc; 5189 DepWaitTaskArgs[1] = ThreadID; 5190 DepWaitTaskArgs[2] = NumOfElements; 5191 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5192 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5193 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5194 } 5195 auto &M = CGM.getModule(); 5196 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5197 TaskEntry, &Data, &DepWaitTaskArgs, 5198 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5199 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5200 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5201 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5202 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5203 // is specified. 5204 if (!Data.Dependences.empty()) 5205 CGF.EmitRuntimeCall( 5206 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5207 DepWaitTaskArgs); 5208 // Call proxy_task_entry(gtid, new_task); 5209 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5210 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5211 Action.Enter(CGF); 5212 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5213 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5214 OutlinedFnArgs); 5215 }; 5216 5217 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5218 // kmp_task_t *new_task); 5219 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5220 // kmp_task_t *new_task); 5221 RegionCodeGenTy RCG(CodeGen); 5222 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5223 M, OMPRTL___kmpc_omp_task_begin_if0), 5224 TaskArgs, 5225 OMPBuilder.getOrCreateRuntimeFunction( 5226 M, OMPRTL___kmpc_omp_task_complete_if0), 5227 TaskArgs); 5228 RCG.setAction(Action); 5229 RCG(CGF); 5230 }; 5231 5232 if (IfCond) { 5233 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5234 } else { 5235 RegionCodeGenTy ThenRCG(ThenCodeGen); 5236 ThenRCG(CGF); 5237 } 5238 } 5239 5240 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5241 const OMPLoopDirective &D, 5242 llvm::Function *TaskFunction, 5243 QualType SharedsTy, Address Shareds, 5244 const Expr *IfCond, 5245 const OMPTaskDataTy &Data) { 5246 if (!CGF.HaveInsertPoint()) 5247 return; 5248 TaskResultTy Result = 5249 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5250 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5251 // libcall. 5252 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5253 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5254 // sched, kmp_uint64 grainsize, void *task_dup); 5255 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5256 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5257 llvm::Value *IfVal; 5258 if (IfCond) { 5259 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5260 /*isSigned=*/true); 5261 } else { 5262 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5263 } 5264 5265 LValue LBLVal = CGF.EmitLValueForField( 5266 Result.TDBase, 5267 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5268 const auto *LBVar = 5269 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5270 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5271 LBLVal.getQuals(), 5272 /*IsInitializer=*/true); 5273 LValue UBLVal = CGF.EmitLValueForField( 5274 Result.TDBase, 5275 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5276 const auto *UBVar = 5277 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5278 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5279 UBLVal.getQuals(), 5280 /*IsInitializer=*/true); 5281 LValue StLVal = CGF.EmitLValueForField( 5282 Result.TDBase, 5283 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5284 const auto *StVar = 5285 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5286 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5287 StLVal.getQuals(), 5288 /*IsInitializer=*/true); 5289 // Store reductions address. 5290 LValue RedLVal = CGF.EmitLValueForField( 5291 Result.TDBase, 5292 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5293 if (Data.Reductions) { 5294 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5295 } else { 5296 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5297 CGF.getContext().VoidPtrTy); 5298 } 5299 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5300 llvm::Value *TaskArgs[] = { 5301 UpLoc, 5302 ThreadID, 5303 Result.NewTask, 5304 IfVal, 5305 LBLVal.getPointer(CGF), 5306 UBLVal.getPointer(CGF), 5307 CGF.EmitLoadOfScalar(StLVal, Loc), 5308 llvm::ConstantInt::getSigned( 5309 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5310 llvm::ConstantInt::getSigned( 5311 CGF.IntTy, Data.Schedule.getPointer() 5312 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5313 : NoSchedule), 5314 Data.Schedule.getPointer() 5315 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5316 /*isSigned=*/false) 5317 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5318 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5319 Result.TaskDupFn, CGF.VoidPtrTy) 5320 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5321 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5322 CGM.getModule(), OMPRTL___kmpc_taskloop), 5323 TaskArgs); 5324 } 5325 5326 /// Emit reduction operation for each element of array (required for 5327 /// array sections) LHS op = RHS. 5328 /// \param Type Type of array. 5329 /// \param LHSVar Variable on the left side of the reduction operation 5330 /// (references element of array in original variable). 5331 /// \param RHSVar Variable on the right side of the reduction operation 5332 /// (references element of array in original variable). 5333 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5334 /// RHSVar. 5335 static void EmitOMPAggregateReduction( 5336 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5337 const VarDecl *RHSVar, 5338 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5339 const Expr *, const Expr *)> &RedOpGen, 5340 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5341 const Expr *UpExpr = nullptr) { 5342 // Perform element-by-element initialization. 5343 QualType ElementTy; 5344 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5345 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5346 5347 // Drill down to the base element type on both arrays. 5348 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5349 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5350 5351 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5352 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5353 // Cast from pointer to array type to pointer to single element. 5354 llvm::Value *LHSEnd = 5355 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); 5356 // The basic structure here is a while-do loop. 5357 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5358 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5359 llvm::Value *IsEmpty = 5360 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5361 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5362 5363 // Enter the loop body, making that address the current address. 5364 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5365 CGF.EmitBlock(BodyBB); 5366 5367 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5368 5369 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5370 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5371 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5372 Address RHSElementCurrent = 5373 Address(RHSElementPHI, 5374 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5375 5376 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5377 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5378 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5379 Address LHSElementCurrent = 5380 Address(LHSElementPHI, 5381 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5382 5383 // Emit copy. 5384 CodeGenFunction::OMPPrivateScope Scope(CGF); 5385 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5386 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5387 Scope.Privatize(); 5388 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5389 Scope.ForceCleanup(); 5390 5391 // Shift the address forward by one element. 5392 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5393 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, 5394 "omp.arraycpy.dest.element"); 5395 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5396 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, 5397 "omp.arraycpy.src.element"); 5398 // Check whether we've reached the end. 5399 llvm::Value *Done = 5400 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5401 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5402 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5403 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5404 5405 // Done. 5406 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5407 } 5408 5409 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5410 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5411 /// UDR combiner function. 5412 static void emitReductionCombiner(CodeGenFunction &CGF, 5413 const Expr *ReductionOp) { 5414 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5415 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5416 if (const auto *DRE = 5417 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5418 if (const auto *DRD = 5419 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5420 std::pair<llvm::Function *, llvm::Function *> Reduction = 5421 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5422 RValue Func = RValue::get(Reduction.first); 5423 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5424 CGF.EmitIgnoredExpr(ReductionOp); 5425 return; 5426 } 5427 CGF.EmitIgnoredExpr(ReductionOp); 5428 } 5429 5430 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5431 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5432 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5433 ArrayRef<const Expr *> ReductionOps) { 5434 ASTContext &C = CGM.getContext(); 5435 5436 // void reduction_func(void *LHSArg, void *RHSArg); 5437 FunctionArgList Args; 5438 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5439 ImplicitParamDecl::Other); 5440 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5441 ImplicitParamDecl::Other); 5442 Args.push_back(&LHSArg); 5443 Args.push_back(&RHSArg); 5444 const auto &CGFI = 5445 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5446 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5447 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5448 llvm::GlobalValue::InternalLinkage, Name, 5449 &CGM.getModule()); 5450 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5451 Fn->setDoesNotRecurse(); 5452 CodeGenFunction CGF(CGM); 5453 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5454 5455 // Dst = (void*[n])(LHSArg); 5456 // Src = (void*[n])(RHSArg); 5457 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5458 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5459 ArgsType), CGF.getPointerAlign()); 5460 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5461 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5462 ArgsType), CGF.getPointerAlign()); 5463 5464 // ... 5465 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5466 // ... 5467 CodeGenFunction::OMPPrivateScope Scope(CGF); 5468 auto IPriv = Privates.begin(); 5469 unsigned Idx = 0; 5470 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5471 const auto *RHSVar = 5472 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5473 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5474 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5475 }); 5476 const auto *LHSVar = 5477 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5478 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5479 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5480 }); 5481 QualType PrivTy = (*IPriv)->getType(); 5482 if (PrivTy->isVariablyModifiedType()) { 5483 // Get array size and emit VLA type. 5484 ++Idx; 5485 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5486 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5487 const VariableArrayType *VLA = 5488 CGF.getContext().getAsVariableArrayType(PrivTy); 5489 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5490 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5491 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5492 CGF.EmitVariablyModifiedType(PrivTy); 5493 } 5494 } 5495 Scope.Privatize(); 5496 IPriv = Privates.begin(); 5497 auto ILHS = LHSExprs.begin(); 5498 auto IRHS = RHSExprs.begin(); 5499 for (const Expr *E : ReductionOps) { 5500 if ((*IPriv)->getType()->isArrayType()) { 5501 // Emit reduction for array section. 5502 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5503 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5504 EmitOMPAggregateReduction( 5505 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5506 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5507 emitReductionCombiner(CGF, E); 5508 }); 5509 } else { 5510 // Emit reduction for array subscript or single variable. 5511 emitReductionCombiner(CGF, E); 5512 } 5513 ++IPriv; 5514 ++ILHS; 5515 ++IRHS; 5516 } 5517 Scope.ForceCleanup(); 5518 CGF.FinishFunction(); 5519 return Fn; 5520 } 5521 5522 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5523 const Expr *ReductionOp, 5524 const Expr *PrivateRef, 5525 const DeclRefExpr *LHS, 5526 const DeclRefExpr *RHS) { 5527 if (PrivateRef->getType()->isArrayType()) { 5528 // Emit reduction for array section. 5529 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5530 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5531 EmitOMPAggregateReduction( 5532 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5533 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5534 emitReductionCombiner(CGF, ReductionOp); 5535 }); 5536 } else { 5537 // Emit reduction for array subscript or single variable. 5538 emitReductionCombiner(CGF, ReductionOp); 5539 } 5540 } 5541 5542 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5543 ArrayRef<const Expr *> Privates, 5544 ArrayRef<const Expr *> LHSExprs, 5545 ArrayRef<const Expr *> RHSExprs, 5546 ArrayRef<const Expr *> ReductionOps, 5547 ReductionOptionsTy Options) { 5548 if (!CGF.HaveInsertPoint()) 5549 return; 5550 5551 bool WithNowait = Options.WithNowait; 5552 bool SimpleReduction = Options.SimpleReduction; 5553 5554 // Next code should be emitted for reduction: 5555 // 5556 // static kmp_critical_name lock = { 0 }; 5557 // 5558 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5559 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5560 // ... 5561 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5562 // *(Type<n>-1*)rhs[<n>-1]); 5563 // } 5564 // 5565 // ... 5566 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5567 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5568 // RedList, reduce_func, &<lock>)) { 5569 // case 1: 5570 // ... 5571 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5572 // ... 5573 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5574 // break; 5575 // case 2: 5576 // ... 5577 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5578 // ... 5579 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5580 // break; 5581 // default:; 5582 // } 5583 // 5584 // if SimpleReduction is true, only the next code is generated: 5585 // ... 5586 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5587 // ... 5588 5589 ASTContext &C = CGM.getContext(); 5590 5591 if (SimpleReduction) { 5592 CodeGenFunction::RunCleanupsScope Scope(CGF); 5593 auto IPriv = Privates.begin(); 5594 auto ILHS = LHSExprs.begin(); 5595 auto IRHS = RHSExprs.begin(); 5596 for (const Expr *E : ReductionOps) { 5597 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5598 cast<DeclRefExpr>(*IRHS)); 5599 ++IPriv; 5600 ++ILHS; 5601 ++IRHS; 5602 } 5603 return; 5604 } 5605 5606 // 1. Build a list of reduction variables. 5607 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5608 auto Size = RHSExprs.size(); 5609 for (const Expr *E : Privates) { 5610 if (E->getType()->isVariablyModifiedType()) 5611 // Reserve place for array size. 5612 ++Size; 5613 } 5614 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5615 QualType ReductionArrayTy = 5616 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5617 /*IndexTypeQuals=*/0); 5618 Address ReductionList = 5619 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5620 auto IPriv = Privates.begin(); 5621 unsigned Idx = 0; 5622 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5623 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5624 CGF.Builder.CreateStore( 5625 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5626 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5627 Elem); 5628 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5629 // Store array size. 5630 ++Idx; 5631 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5632 llvm::Value *Size = CGF.Builder.CreateIntCast( 5633 CGF.getVLASize( 5634 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5635 .NumElts, 5636 CGF.SizeTy, /*isSigned=*/false); 5637 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5638 Elem); 5639 } 5640 } 5641 5642 // 2. Emit reduce_func(). 5643 llvm::Function *ReductionFn = emitReductionFunction( 5644 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5645 LHSExprs, RHSExprs, ReductionOps); 5646 5647 // 3. Create static kmp_critical_name lock = { 0 }; 5648 std::string Name = getName({"reduction"}); 5649 llvm::Value *Lock = getCriticalRegionLock(Name); 5650 5651 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5652 // RedList, reduce_func, &<lock>); 5653 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5654 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5655 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5656 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5657 ReductionList.getPointer(), CGF.VoidPtrTy); 5658 llvm::Value *Args[] = { 5659 IdentTLoc, // ident_t *<loc> 5660 ThreadId, // i32 <gtid> 5661 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5662 ReductionArrayTySize, // size_type sizeof(RedList) 5663 RL, // void *RedList 5664 ReductionFn, // void (*) (void *, void *) <reduce_func> 5665 Lock // kmp_critical_name *&<lock> 5666 }; 5667 llvm::Value *Res = CGF.EmitRuntimeCall( 5668 OMPBuilder.getOrCreateRuntimeFunction( 5669 CGM.getModule(), 5670 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5671 Args); 5672 5673 // 5. Build switch(res) 5674 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5675 llvm::SwitchInst *SwInst = 5676 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5677 5678 // 6. Build case 1: 5679 // ... 5680 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5681 // ... 5682 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5683 // break; 5684 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5685 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5686 CGF.EmitBlock(Case1BB); 5687 5688 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5689 llvm::Value *EndArgs[] = { 5690 IdentTLoc, // ident_t *<loc> 5691 ThreadId, // i32 <gtid> 5692 Lock // kmp_critical_name *&<lock> 5693 }; 5694 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5695 CodeGenFunction &CGF, PrePostActionTy &Action) { 5696 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5697 auto IPriv = Privates.begin(); 5698 auto ILHS = LHSExprs.begin(); 5699 auto IRHS = RHSExprs.begin(); 5700 for (const Expr *E : ReductionOps) { 5701 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5702 cast<DeclRefExpr>(*IRHS)); 5703 ++IPriv; 5704 ++ILHS; 5705 ++IRHS; 5706 } 5707 }; 5708 RegionCodeGenTy RCG(CodeGen); 5709 CommonActionTy Action( 5710 nullptr, llvm::None, 5711 OMPBuilder.getOrCreateRuntimeFunction( 5712 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5713 : OMPRTL___kmpc_end_reduce), 5714 EndArgs); 5715 RCG.setAction(Action); 5716 RCG(CGF); 5717 5718 CGF.EmitBranch(DefaultBB); 5719 5720 // 7. Build case 2: 5721 // ... 5722 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5723 // ... 5724 // break; 5725 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5726 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5727 CGF.EmitBlock(Case2BB); 5728 5729 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5730 CodeGenFunction &CGF, PrePostActionTy &Action) { 5731 auto ILHS = LHSExprs.begin(); 5732 auto IRHS = RHSExprs.begin(); 5733 auto IPriv = Privates.begin(); 5734 for (const Expr *E : ReductionOps) { 5735 const Expr *XExpr = nullptr; 5736 const Expr *EExpr = nullptr; 5737 const Expr *UpExpr = nullptr; 5738 BinaryOperatorKind BO = BO_Comma; 5739 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5740 if (BO->getOpcode() == BO_Assign) { 5741 XExpr = BO->getLHS(); 5742 UpExpr = BO->getRHS(); 5743 } 5744 } 5745 // Try to emit update expression as a simple atomic. 5746 const Expr *RHSExpr = UpExpr; 5747 if (RHSExpr) { 5748 // Analyze RHS part of the whole expression. 5749 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5750 RHSExpr->IgnoreParenImpCasts())) { 5751 // If this is a conditional operator, analyze its condition for 5752 // min/max reduction operator. 5753 RHSExpr = ACO->getCond(); 5754 } 5755 if (const auto *BORHS = 5756 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5757 EExpr = BORHS->getRHS(); 5758 BO = BORHS->getOpcode(); 5759 } 5760 } 5761 if (XExpr) { 5762 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5763 auto &&AtomicRedGen = [BO, VD, 5764 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5765 const Expr *EExpr, const Expr *UpExpr) { 5766 LValue X = CGF.EmitLValue(XExpr); 5767 RValue E; 5768 if (EExpr) 5769 E = CGF.EmitAnyExpr(EExpr); 5770 CGF.EmitOMPAtomicSimpleUpdateExpr( 5771 X, E, BO, /*IsXLHSInRHSPart=*/true, 5772 llvm::AtomicOrdering::Monotonic, Loc, 5773 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5774 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5775 PrivateScope.addPrivate( 5776 VD, [&CGF, VD, XRValue, Loc]() { 5777 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5778 CGF.emitOMPSimpleStore( 5779 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5780 VD->getType().getNonReferenceType(), Loc); 5781 return LHSTemp; 5782 }); 5783 (void)PrivateScope.Privatize(); 5784 return CGF.EmitAnyExpr(UpExpr); 5785 }); 5786 }; 5787 if ((*IPriv)->getType()->isArrayType()) { 5788 // Emit atomic reduction for array section. 5789 const auto *RHSVar = 5790 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5791 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5792 AtomicRedGen, XExpr, EExpr, UpExpr); 5793 } else { 5794 // Emit atomic reduction for array subscript or single variable. 5795 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5796 } 5797 } else { 5798 // Emit as a critical region. 5799 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5800 const Expr *, const Expr *) { 5801 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5802 std::string Name = RT.getName({"atomic_reduction"}); 5803 RT.emitCriticalRegion( 5804 CGF, Name, 5805 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5806 Action.Enter(CGF); 5807 emitReductionCombiner(CGF, E); 5808 }, 5809 Loc); 5810 }; 5811 if ((*IPriv)->getType()->isArrayType()) { 5812 const auto *LHSVar = 5813 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5814 const auto *RHSVar = 5815 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5816 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5817 CritRedGen); 5818 } else { 5819 CritRedGen(CGF, nullptr, nullptr, nullptr); 5820 } 5821 } 5822 ++ILHS; 5823 ++IRHS; 5824 ++IPriv; 5825 } 5826 }; 5827 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5828 if (!WithNowait) { 5829 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5830 llvm::Value *EndArgs[] = { 5831 IdentTLoc, // ident_t *<loc> 5832 ThreadId, // i32 <gtid> 5833 Lock // kmp_critical_name *&<lock> 5834 }; 5835 CommonActionTy Action(nullptr, llvm::None, 5836 OMPBuilder.getOrCreateRuntimeFunction( 5837 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5838 EndArgs); 5839 AtomicRCG.setAction(Action); 5840 AtomicRCG(CGF); 5841 } else { 5842 AtomicRCG(CGF); 5843 } 5844 5845 CGF.EmitBranch(DefaultBB); 5846 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5847 } 5848 5849 /// Generates unique name for artificial threadprivate variables. 5850 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5851 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5852 const Expr *Ref) { 5853 SmallString<256> Buffer; 5854 llvm::raw_svector_ostream Out(Buffer); 5855 const clang::DeclRefExpr *DE; 5856 const VarDecl *D = ::getBaseDecl(Ref, DE); 5857 if (!D) 5858 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5859 D = D->getCanonicalDecl(); 5860 std::string Name = CGM.getOpenMPRuntime().getName( 5861 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5862 Out << Prefix << Name << "_" 5863 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5864 return std::string(Out.str()); 5865 } 5866 5867 /// Emits reduction initializer function: 5868 /// \code 5869 /// void @.red_init(void* %arg, void* %orig) { 5870 /// %0 = bitcast void* %arg to <type>* 5871 /// store <type> <init>, <type>* %0 5872 /// ret void 5873 /// } 5874 /// \endcode 5875 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5876 SourceLocation Loc, 5877 ReductionCodeGen &RCG, unsigned N) { 5878 ASTContext &C = CGM.getContext(); 5879 QualType VoidPtrTy = C.VoidPtrTy; 5880 VoidPtrTy.addRestrict(); 5881 FunctionArgList Args; 5882 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5883 ImplicitParamDecl::Other); 5884 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5885 ImplicitParamDecl::Other); 5886 Args.emplace_back(&Param); 5887 Args.emplace_back(&ParamOrig); 5888 const auto &FnInfo = 5889 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5890 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5891 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5892 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5893 Name, &CGM.getModule()); 5894 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5895 Fn->setDoesNotRecurse(); 5896 CodeGenFunction CGF(CGM); 5897 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5898 Address PrivateAddr = CGF.EmitLoadOfPointer( 5899 CGF.GetAddrOfLocalVar(&Param), 5900 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5901 llvm::Value *Size = nullptr; 5902 // If the size of the reduction item is non-constant, load it from global 5903 // threadprivate variable. 5904 if (RCG.getSizes(N).second) { 5905 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5906 CGF, CGM.getContext().getSizeType(), 5907 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5908 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5909 CGM.getContext().getSizeType(), Loc); 5910 } 5911 RCG.emitAggregateType(CGF, N, Size); 5912 Address OrigAddr = Address::invalid(); 5913 // If initializer uses initializer from declare reduction construct, emit a 5914 // pointer to the address of the original reduction item (reuired by reduction 5915 // initializer) 5916 if (RCG.usesReductionInitializer(N)) { 5917 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5918 OrigAddr = CGF.EmitLoadOfPointer( 5919 SharedAddr, 5920 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5921 } 5922 // Emit the initializer: 5923 // %0 = bitcast void* %arg to <type>* 5924 // store <type> <init>, <type>* %0 5925 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, 5926 [](CodeGenFunction &) { return false; }); 5927 CGF.FinishFunction(); 5928 return Fn; 5929 } 5930 5931 /// Emits reduction combiner function: 5932 /// \code 5933 /// void @.red_comb(void* %arg0, void* %arg1) { 5934 /// %lhs = bitcast void* %arg0 to <type>* 5935 /// %rhs = bitcast void* %arg1 to <type>* 5936 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5937 /// store <type> %2, <type>* %lhs 5938 /// ret void 5939 /// } 5940 /// \endcode 5941 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5942 SourceLocation Loc, 5943 ReductionCodeGen &RCG, unsigned N, 5944 const Expr *ReductionOp, 5945 const Expr *LHS, const Expr *RHS, 5946 const Expr *PrivateRef) { 5947 ASTContext &C = CGM.getContext(); 5948 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5949 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5950 FunctionArgList Args; 5951 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5952 C.VoidPtrTy, ImplicitParamDecl::Other); 5953 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5954 ImplicitParamDecl::Other); 5955 Args.emplace_back(&ParamInOut); 5956 Args.emplace_back(&ParamIn); 5957 const auto &FnInfo = 5958 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5959 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5960 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5961 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5962 Name, &CGM.getModule()); 5963 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5964 Fn->setDoesNotRecurse(); 5965 CodeGenFunction CGF(CGM); 5966 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5967 llvm::Value *Size = nullptr; 5968 // If the size of the reduction item is non-constant, load it from global 5969 // threadprivate variable. 5970 if (RCG.getSizes(N).second) { 5971 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5972 CGF, CGM.getContext().getSizeType(), 5973 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5974 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5975 CGM.getContext().getSizeType(), Loc); 5976 } 5977 RCG.emitAggregateType(CGF, N, Size); 5978 // Remap lhs and rhs variables to the addresses of the function arguments. 5979 // %lhs = bitcast void* %arg0 to <type>* 5980 // %rhs = bitcast void* %arg1 to <type>* 5981 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5982 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5983 // Pull out the pointer to the variable. 5984 Address PtrAddr = CGF.EmitLoadOfPointer( 5985 CGF.GetAddrOfLocalVar(&ParamInOut), 5986 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5987 return CGF.Builder.CreateElementBitCast( 5988 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5989 }); 5990 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5991 // Pull out the pointer to the variable. 5992 Address PtrAddr = CGF.EmitLoadOfPointer( 5993 CGF.GetAddrOfLocalVar(&ParamIn), 5994 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5995 return CGF.Builder.CreateElementBitCast( 5996 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5997 }); 5998 PrivateScope.Privatize(); 5999 // Emit the combiner body: 6000 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6001 // store <type> %2, <type>* %lhs 6002 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6003 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6004 cast<DeclRefExpr>(RHS)); 6005 CGF.FinishFunction(); 6006 return Fn; 6007 } 6008 6009 /// Emits reduction finalizer function: 6010 /// \code 6011 /// void @.red_fini(void* %arg) { 6012 /// %0 = bitcast void* %arg to <type>* 6013 /// <destroy>(<type>* %0) 6014 /// ret void 6015 /// } 6016 /// \endcode 6017 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6018 SourceLocation Loc, 6019 ReductionCodeGen &RCG, unsigned N) { 6020 if (!RCG.needCleanups(N)) 6021 return nullptr; 6022 ASTContext &C = CGM.getContext(); 6023 FunctionArgList Args; 6024 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6025 ImplicitParamDecl::Other); 6026 Args.emplace_back(&Param); 6027 const auto &FnInfo = 6028 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6029 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6030 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6031 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6032 Name, &CGM.getModule()); 6033 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6034 Fn->setDoesNotRecurse(); 6035 CodeGenFunction CGF(CGM); 6036 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6037 Address PrivateAddr = CGF.EmitLoadOfPointer( 6038 CGF.GetAddrOfLocalVar(&Param), 6039 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6040 llvm::Value *Size = nullptr; 6041 // If the size of the reduction item is non-constant, load it from global 6042 // threadprivate variable. 6043 if (RCG.getSizes(N).second) { 6044 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6045 CGF, CGM.getContext().getSizeType(), 6046 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6047 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6048 CGM.getContext().getSizeType(), Loc); 6049 } 6050 RCG.emitAggregateType(CGF, N, Size); 6051 // Emit the finalizer body: 6052 // <destroy>(<type>* %0) 6053 RCG.emitCleanups(CGF, N, PrivateAddr); 6054 CGF.FinishFunction(Loc); 6055 return Fn; 6056 } 6057 6058 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6059 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6060 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6061 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6062 return nullptr; 6063 6064 // Build typedef struct: 6065 // kmp_taskred_input { 6066 // void *reduce_shar; // shared reduction item 6067 // void *reduce_orig; // original reduction item used for initialization 6068 // size_t reduce_size; // size of data item 6069 // void *reduce_init; // data initialization routine 6070 // void *reduce_fini; // data finalization routine 6071 // void *reduce_comb; // data combiner routine 6072 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6073 // } kmp_taskred_input_t; 6074 ASTContext &C = CGM.getContext(); 6075 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6076 RD->startDefinition(); 6077 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6078 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6079 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6080 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6081 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6082 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6083 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6084 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6085 RD->completeDefinition(); 6086 QualType RDType = C.getRecordType(RD); 6087 unsigned Size = Data.ReductionVars.size(); 6088 llvm::APInt ArraySize(/*numBits=*/64, Size); 6089 QualType ArrayRDType = C.getConstantArrayType( 6090 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6091 // kmp_task_red_input_t .rd_input.[Size]; 6092 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6093 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6094 Data.ReductionCopies, Data.ReductionOps); 6095 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6096 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6097 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6098 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6099 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6100 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, 6101 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6102 ".rd_input.gep."); 6103 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6104 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6105 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6106 RCG.emitSharedOrigLValue(CGF, Cnt); 6107 llvm::Value *CastedShared = 6108 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6109 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6110 // ElemLVal.reduce_orig = &Origs[Cnt]; 6111 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6112 llvm::Value *CastedOrig = 6113 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6114 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6115 RCG.emitAggregateType(CGF, Cnt); 6116 llvm::Value *SizeValInChars; 6117 llvm::Value *SizeVal; 6118 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6119 // We use delayed creation/initialization for VLAs and array sections. It is 6120 // required because runtime does not provide the way to pass the sizes of 6121 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6122 // threadprivate global variables are used to store these values and use 6123 // them in the functions. 6124 bool DelayedCreation = !!SizeVal; 6125 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6126 /*isSigned=*/false); 6127 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6128 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6129 // ElemLVal.reduce_init = init; 6130 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6131 llvm::Value *InitAddr = 6132 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6133 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6134 // ElemLVal.reduce_fini = fini; 6135 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6136 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6137 llvm::Value *FiniAddr = Fini 6138 ? CGF.EmitCastToVoidPtr(Fini) 6139 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6140 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6141 // ElemLVal.reduce_comb = comb; 6142 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6143 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6144 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6145 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6146 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6147 // ElemLVal.flags = 0; 6148 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6149 if (DelayedCreation) { 6150 CGF.EmitStoreOfScalar( 6151 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6152 FlagsLVal); 6153 } else 6154 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6155 FlagsLVal.getType()); 6156 } 6157 if (Data.IsReductionWithTaskMod) { 6158 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6159 // is_ws, int num, void *data); 6160 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6161 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6162 CGM.IntTy, /*isSigned=*/true); 6163 llvm::Value *Args[] = { 6164 IdentTLoc, GTid, 6165 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6166 /*isSigned=*/true), 6167 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6168 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6169 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6170 return CGF.EmitRuntimeCall( 6171 OMPBuilder.getOrCreateRuntimeFunction( 6172 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6173 Args); 6174 } 6175 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6176 llvm::Value *Args[] = { 6177 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6178 /*isSigned=*/true), 6179 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6180 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6181 CGM.VoidPtrTy)}; 6182 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6183 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6184 Args); 6185 } 6186 6187 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6188 SourceLocation Loc, 6189 bool IsWorksharingReduction) { 6190 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6191 // is_ws, int num, void *data); 6192 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6193 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6194 CGM.IntTy, /*isSigned=*/true); 6195 llvm::Value *Args[] = {IdentTLoc, GTid, 6196 llvm::ConstantInt::get(CGM.IntTy, 6197 IsWorksharingReduction ? 1 : 0, 6198 /*isSigned=*/true)}; 6199 (void)CGF.EmitRuntimeCall( 6200 OMPBuilder.getOrCreateRuntimeFunction( 6201 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6202 Args); 6203 } 6204 6205 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6206 SourceLocation Loc, 6207 ReductionCodeGen &RCG, 6208 unsigned N) { 6209 auto Sizes = RCG.getSizes(N); 6210 // Emit threadprivate global variable if the type is non-constant 6211 // (Sizes.second = nullptr). 6212 if (Sizes.second) { 6213 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6214 /*isSigned=*/false); 6215 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6216 CGF, CGM.getContext().getSizeType(), 6217 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6218 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6219 } 6220 } 6221 6222 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6223 SourceLocation Loc, 6224 llvm::Value *ReductionsPtr, 6225 LValue SharedLVal) { 6226 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6227 // *d); 6228 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6229 CGM.IntTy, 6230 /*isSigned=*/true), 6231 ReductionsPtr, 6232 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6233 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6234 return Address( 6235 CGF.EmitRuntimeCall( 6236 OMPBuilder.getOrCreateRuntimeFunction( 6237 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6238 Args), 6239 SharedLVal.getAlignment()); 6240 } 6241 6242 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, 6243 const OMPTaskDataTy &Data) { 6244 if (!CGF.HaveInsertPoint()) 6245 return; 6246 6247 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { 6248 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. 6249 OMPBuilder.createTaskwait(CGF.Builder); 6250 } else { 6251 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6252 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6253 auto &M = CGM.getModule(); 6254 Address DependenciesArray = Address::invalid(); 6255 llvm::Value *NumOfElements; 6256 std::tie(NumOfElements, DependenciesArray) = 6257 emitDependClause(CGF, Data.Dependences, Loc); 6258 llvm::Value *DepWaitTaskArgs[6]; 6259 if (!Data.Dependences.empty()) { 6260 DepWaitTaskArgs[0] = UpLoc; 6261 DepWaitTaskArgs[1] = ThreadID; 6262 DepWaitTaskArgs[2] = NumOfElements; 6263 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 6264 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 6265 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6266 6267 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 6268 6269 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 6270 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 6271 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 6272 // is specified. 6273 CGF.EmitRuntimeCall( 6274 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 6275 DepWaitTaskArgs); 6276 6277 } else { 6278 6279 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6280 // global_tid); 6281 llvm::Value *Args[] = {UpLoc, ThreadID}; 6282 // Ignore return result until untied tasks are supported. 6283 CGF.EmitRuntimeCall( 6284 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), 6285 Args); 6286 } 6287 } 6288 6289 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6290 Region->emitUntiedSwitch(CGF); 6291 } 6292 6293 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6294 OpenMPDirectiveKind InnerKind, 6295 const RegionCodeGenTy &CodeGen, 6296 bool HasCancel) { 6297 if (!CGF.HaveInsertPoint()) 6298 return; 6299 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, 6300 InnerKind != OMPD_critical && 6301 InnerKind != OMPD_master && 6302 InnerKind != OMPD_masked); 6303 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6304 } 6305 6306 namespace { 6307 enum RTCancelKind { 6308 CancelNoreq = 0, 6309 CancelParallel = 1, 6310 CancelLoop = 2, 6311 CancelSections = 3, 6312 CancelTaskgroup = 4 6313 }; 6314 } // anonymous namespace 6315 6316 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6317 RTCancelKind CancelKind = CancelNoreq; 6318 if (CancelRegion == OMPD_parallel) 6319 CancelKind = CancelParallel; 6320 else if (CancelRegion == OMPD_for) 6321 CancelKind = CancelLoop; 6322 else if (CancelRegion == OMPD_sections) 6323 CancelKind = CancelSections; 6324 else { 6325 assert(CancelRegion == OMPD_taskgroup); 6326 CancelKind = CancelTaskgroup; 6327 } 6328 return CancelKind; 6329 } 6330 6331 void CGOpenMPRuntime::emitCancellationPointCall( 6332 CodeGenFunction &CGF, SourceLocation Loc, 6333 OpenMPDirectiveKind CancelRegion) { 6334 if (!CGF.HaveInsertPoint()) 6335 return; 6336 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6337 // global_tid, kmp_int32 cncl_kind); 6338 if (auto *OMPRegionInfo = 6339 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6340 // For 'cancellation point taskgroup', the task region info may not have a 6341 // cancel. This may instead happen in another adjacent task. 6342 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6343 llvm::Value *Args[] = { 6344 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6345 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6346 // Ignore return result until untied tasks are supported. 6347 llvm::Value *Result = CGF.EmitRuntimeCall( 6348 OMPBuilder.getOrCreateRuntimeFunction( 6349 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6350 Args); 6351 // if (__kmpc_cancellationpoint()) { 6352 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6353 // exit from construct; 6354 // } 6355 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6356 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6357 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6358 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6359 CGF.EmitBlock(ExitBB); 6360 if (CancelRegion == OMPD_parallel) 6361 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6362 // exit from construct; 6363 CodeGenFunction::JumpDest CancelDest = 6364 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6365 CGF.EmitBranchThroughCleanup(CancelDest); 6366 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6367 } 6368 } 6369 } 6370 6371 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6372 const Expr *IfCond, 6373 OpenMPDirectiveKind CancelRegion) { 6374 if (!CGF.HaveInsertPoint()) 6375 return; 6376 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6377 // kmp_int32 cncl_kind); 6378 auto &M = CGM.getModule(); 6379 if (auto *OMPRegionInfo = 6380 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6381 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6382 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6383 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6384 llvm::Value *Args[] = { 6385 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6386 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6387 // Ignore return result until untied tasks are supported. 6388 llvm::Value *Result = CGF.EmitRuntimeCall( 6389 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6390 // if (__kmpc_cancel()) { 6391 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only 6392 // exit from construct; 6393 // } 6394 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6395 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6396 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6397 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6398 CGF.EmitBlock(ExitBB); 6399 if (CancelRegion == OMPD_parallel) 6400 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); 6401 // exit from construct; 6402 CodeGenFunction::JumpDest CancelDest = 6403 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6404 CGF.EmitBranchThroughCleanup(CancelDest); 6405 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6406 }; 6407 if (IfCond) { 6408 emitIfClause(CGF, IfCond, ThenGen, 6409 [](CodeGenFunction &, PrePostActionTy &) {}); 6410 } else { 6411 RegionCodeGenTy ThenRCG(ThenGen); 6412 ThenRCG(CGF); 6413 } 6414 } 6415 } 6416 6417 namespace { 6418 /// Cleanup action for uses_allocators support. 6419 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6420 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6421 6422 public: 6423 OMPUsesAllocatorsActionTy( 6424 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6425 : Allocators(Allocators) {} 6426 void Enter(CodeGenFunction &CGF) override { 6427 if (!CGF.HaveInsertPoint()) 6428 return; 6429 for (const auto &AllocatorData : Allocators) { 6430 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6431 CGF, AllocatorData.first, AllocatorData.second); 6432 } 6433 } 6434 void Exit(CodeGenFunction &CGF) override { 6435 if (!CGF.HaveInsertPoint()) 6436 return; 6437 for (const auto &AllocatorData : Allocators) { 6438 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6439 AllocatorData.first); 6440 } 6441 } 6442 }; 6443 } // namespace 6444 6445 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6446 const OMPExecutableDirective &D, StringRef ParentName, 6447 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6448 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6449 assert(!ParentName.empty() && "Invalid target region parent name!"); 6450 HasEmittedTargetRegion = true; 6451 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6452 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6453 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6454 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6455 if (!D.AllocatorTraits) 6456 continue; 6457 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6458 } 6459 } 6460 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6461 CodeGen.setAction(UsesAllocatorAction); 6462 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6463 IsOffloadEntry, CodeGen); 6464 } 6465 6466 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6467 const Expr *Allocator, 6468 const Expr *AllocatorTraits) { 6469 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6470 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6471 // Use default memspace handle. 6472 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6473 llvm::Value *NumTraits = llvm::ConstantInt::get( 6474 CGF.IntTy, cast<ConstantArrayType>( 6475 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6476 ->getSize() 6477 .getLimitedValue()); 6478 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6479 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6480 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6481 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6482 AllocatorTraitsLVal.getBaseInfo(), 6483 AllocatorTraitsLVal.getTBAAInfo()); 6484 llvm::Value *Traits = 6485 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6486 6487 llvm::Value *AllocatorVal = 6488 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6489 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6490 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6491 // Store to allocator. 6492 CGF.EmitVarDecl(*cast<VarDecl>( 6493 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6494 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6495 AllocatorVal = 6496 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6497 Allocator->getType(), Allocator->getExprLoc()); 6498 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6499 } 6500 6501 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6502 const Expr *Allocator) { 6503 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6504 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6505 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6506 llvm::Value *AllocatorVal = 6507 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6508 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6509 CGF.getContext().VoidPtrTy, 6510 Allocator->getExprLoc()); 6511 (void)CGF.EmitRuntimeCall( 6512 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6513 OMPRTL___kmpc_destroy_allocator), 6514 {ThreadId, AllocatorVal}); 6515 } 6516 6517 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6518 const OMPExecutableDirective &D, StringRef ParentName, 6519 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6520 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6521 // Create a unique name for the entry function using the source location 6522 // information of the current target region. The name will be something like: 6523 // 6524 // __omp_offloading_DD_FFFF_PP_lBB 6525 // 6526 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6527 // mangled name of the function that encloses the target region and BB is the 6528 // line number of the target region. 6529 6530 unsigned DeviceID; 6531 unsigned FileID; 6532 unsigned Line; 6533 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6534 Line); 6535 SmallString<64> EntryFnName; 6536 { 6537 llvm::raw_svector_ostream OS(EntryFnName); 6538 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6539 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6540 } 6541 6542 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6543 6544 CodeGenFunction CGF(CGM, true); 6545 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6546 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6547 6548 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6549 6550 // If this target outline function is not an offload entry, we don't need to 6551 // register it. 6552 if (!IsOffloadEntry) 6553 return; 6554 6555 // The target region ID is used by the runtime library to identify the current 6556 // target region, so it only has to be unique and not necessarily point to 6557 // anything. It could be the pointer to the outlined function that implements 6558 // the target region, but we aren't using that so that the compiler doesn't 6559 // need to keep that, and could therefore inline the host function if proven 6560 // worthwhile during optimization. In the other hand, if emitting code for the 6561 // device, the ID has to be the function address so that it can retrieved from 6562 // the offloading entry and launched by the runtime library. We also mark the 6563 // outlined function to have external linkage in case we are emitting code for 6564 // the device, because these functions will be entry points to the device. 6565 6566 if (CGM.getLangOpts().OpenMPIsDevice) { 6567 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6568 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6569 OutlinedFn->setDSOLocal(false); 6570 if (CGM.getTriple().isAMDGCN()) 6571 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6572 } else { 6573 std::string Name = getName({EntryFnName, "region_id"}); 6574 OutlinedFnID = new llvm::GlobalVariable( 6575 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6576 llvm::GlobalValue::WeakAnyLinkage, 6577 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6578 } 6579 6580 // Register the information for the entry associated with this target region. 6581 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6582 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6583 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6584 6585 // Add NumTeams and ThreadLimit attributes to the outlined GPU function 6586 int32_t DefaultValTeams = -1; 6587 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); 6588 if (DefaultValTeams > 0) { 6589 OutlinedFn->addFnAttr("omp_target_num_teams", 6590 std::to_string(DefaultValTeams)); 6591 } 6592 int32_t DefaultValThreads = -1; 6593 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); 6594 if (DefaultValThreads > 0) { 6595 OutlinedFn->addFnAttr("omp_target_thread_limit", 6596 std::to_string(DefaultValThreads)); 6597 } 6598 6599 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); 6600 } 6601 6602 /// Checks if the expression is constant or does not have non-trivial function 6603 /// calls. 6604 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6605 // We can skip constant expressions. 6606 // We can skip expressions with trivial calls or simple expressions. 6607 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6608 !E->hasNonTrivialCall(Ctx)) && 6609 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6610 } 6611 6612 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6613 const Stmt *Body) { 6614 const Stmt *Child = Body->IgnoreContainers(); 6615 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6616 Child = nullptr; 6617 for (const Stmt *S : C->body()) { 6618 if (const auto *E = dyn_cast<Expr>(S)) { 6619 if (isTrivial(Ctx, E)) 6620 continue; 6621 } 6622 // Some of the statements can be ignored. 6623 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6624 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6625 continue; 6626 // Analyze declarations. 6627 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6628 if (llvm::all_of(DS->decls(), [](const Decl *D) { 6629 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6630 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6631 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6632 isa<UsingDirectiveDecl>(D) || 6633 isa<OMPDeclareReductionDecl>(D) || 6634 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6635 return true; 6636 const auto *VD = dyn_cast<VarDecl>(D); 6637 if (!VD) 6638 return false; 6639 return VD->hasGlobalStorage() || !VD->isUsed(); 6640 })) 6641 continue; 6642 } 6643 // Found multiple children - cannot get the one child only. 6644 if (Child) 6645 return nullptr; 6646 Child = S; 6647 } 6648 if (Child) 6649 Child = Child->IgnoreContainers(); 6650 } 6651 return Child; 6652 } 6653 6654 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( 6655 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6656 int32_t &DefaultVal) { 6657 6658 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6659 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6660 "Expected target-based executable directive."); 6661 switch (DirectiveKind) { 6662 case OMPD_target: { 6663 const auto *CS = D.getInnermostCapturedStmt(); 6664 const auto *Body = 6665 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6666 const Stmt *ChildStmt = 6667 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6668 if (const auto *NestedDir = 6669 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6670 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6671 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6672 const Expr *NumTeams = 6673 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6674 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6675 if (auto Constant = 6676 NumTeams->getIntegerConstantExpr(CGF.getContext())) 6677 DefaultVal = Constant->getExtValue(); 6678 return NumTeams; 6679 } 6680 DefaultVal = 0; 6681 return nullptr; 6682 } 6683 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6684 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { 6685 DefaultVal = 1; 6686 return nullptr; 6687 } 6688 DefaultVal = 1; 6689 return nullptr; 6690 } 6691 // A value of -1 is used to check if we need to emit no teams region 6692 DefaultVal = -1; 6693 return nullptr; 6694 } 6695 case OMPD_target_teams: 6696 case OMPD_target_teams_distribute: 6697 case OMPD_target_teams_distribute_simd: 6698 case OMPD_target_teams_distribute_parallel_for: 6699 case OMPD_target_teams_distribute_parallel_for_simd: { 6700 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6701 const Expr *NumTeams = 6702 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6703 if (NumTeams->isIntegerConstantExpr(CGF.getContext())) 6704 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) 6705 DefaultVal = Constant->getExtValue(); 6706 return NumTeams; 6707 } 6708 DefaultVal = 0; 6709 return nullptr; 6710 } 6711 case OMPD_target_parallel: 6712 case OMPD_target_parallel_for: 6713 case OMPD_target_parallel_for_simd: 6714 case OMPD_target_simd: 6715 DefaultVal = 1; 6716 return nullptr; 6717 case OMPD_parallel: 6718 case OMPD_for: 6719 case OMPD_parallel_for: 6720 case OMPD_parallel_master: 6721 case OMPD_parallel_sections: 6722 case OMPD_for_simd: 6723 case OMPD_parallel_for_simd: 6724 case OMPD_cancel: 6725 case OMPD_cancellation_point: 6726 case OMPD_ordered: 6727 case OMPD_threadprivate: 6728 case OMPD_allocate: 6729 case OMPD_task: 6730 case OMPD_simd: 6731 case OMPD_tile: 6732 case OMPD_unroll: 6733 case OMPD_sections: 6734 case OMPD_section: 6735 case OMPD_single: 6736 case OMPD_master: 6737 case OMPD_critical: 6738 case OMPD_taskyield: 6739 case OMPD_barrier: 6740 case OMPD_taskwait: 6741 case OMPD_taskgroup: 6742 case OMPD_atomic: 6743 case OMPD_flush: 6744 case OMPD_depobj: 6745 case OMPD_scan: 6746 case OMPD_teams: 6747 case OMPD_target_data: 6748 case OMPD_target_exit_data: 6749 case OMPD_target_enter_data: 6750 case OMPD_distribute: 6751 case OMPD_distribute_simd: 6752 case OMPD_distribute_parallel_for: 6753 case OMPD_distribute_parallel_for_simd: 6754 case OMPD_teams_distribute: 6755 case OMPD_teams_distribute_simd: 6756 case OMPD_teams_distribute_parallel_for: 6757 case OMPD_teams_distribute_parallel_for_simd: 6758 case OMPD_target_update: 6759 case OMPD_declare_simd: 6760 case OMPD_declare_variant: 6761 case OMPD_begin_declare_variant: 6762 case OMPD_end_declare_variant: 6763 case OMPD_declare_target: 6764 case OMPD_end_declare_target: 6765 case OMPD_declare_reduction: 6766 case OMPD_declare_mapper: 6767 case OMPD_taskloop: 6768 case OMPD_taskloop_simd: 6769 case OMPD_master_taskloop: 6770 case OMPD_master_taskloop_simd: 6771 case OMPD_parallel_master_taskloop: 6772 case OMPD_parallel_master_taskloop_simd: 6773 case OMPD_requires: 6774 case OMPD_metadirective: 6775 case OMPD_unknown: 6776 break; 6777 default: 6778 break; 6779 } 6780 llvm_unreachable("Unexpected directive kind."); 6781 } 6782 6783 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( 6784 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 6785 assert(!CGF.getLangOpts().OpenMPIsDevice && 6786 "Clauses associated with the teams directive expected to be emitted " 6787 "only for the host!"); 6788 CGBuilderTy &Bld = CGF.Builder; 6789 int32_t DefaultNT = -1; 6790 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); 6791 if (NumTeams != nullptr) { 6792 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6793 6794 switch (DirectiveKind) { 6795 case OMPD_target: { 6796 const auto *CS = D.getInnermostCapturedStmt(); 6797 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6798 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6799 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6800 /*IgnoreResultAssign*/ true); 6801 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6802 /*isSigned=*/true); 6803 } 6804 case OMPD_target_teams: 6805 case OMPD_target_teams_distribute: 6806 case OMPD_target_teams_distribute_simd: 6807 case OMPD_target_teams_distribute_parallel_for: 6808 case OMPD_target_teams_distribute_parallel_for_simd: { 6809 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6810 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, 6811 /*IgnoreResultAssign*/ true); 6812 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6813 /*isSigned=*/true); 6814 } 6815 default: 6816 break; 6817 } 6818 } else if (DefaultNT == -1) { 6819 return nullptr; 6820 } 6821 6822 return Bld.getInt32(DefaultNT); 6823 } 6824 6825 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6826 llvm::Value *DefaultThreadLimitVal) { 6827 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6828 CGF.getContext(), CS->getCapturedStmt()); 6829 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6830 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6831 llvm::Value *NumThreads = nullptr; 6832 llvm::Value *CondVal = nullptr; 6833 // Handle if clause. If if clause present, the number of threads is 6834 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6835 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6836 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6837 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6838 const OMPIfClause *IfClause = nullptr; 6839 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6840 if (C->getNameModifier() == OMPD_unknown || 6841 C->getNameModifier() == OMPD_parallel) { 6842 IfClause = C; 6843 break; 6844 } 6845 } 6846 if (IfClause) { 6847 const Expr *Cond = IfClause->getCondition(); 6848 bool Result; 6849 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6850 if (!Result) 6851 return CGF.Builder.getInt32(1); 6852 } else { 6853 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6854 if (const auto *PreInit = 6855 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6856 for (const auto *I : PreInit->decls()) { 6857 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6858 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6859 } else { 6860 CodeGenFunction::AutoVarEmission Emission = 6861 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6862 CGF.EmitAutoVarCleanups(Emission); 6863 } 6864 } 6865 } 6866 CondVal = CGF.EvaluateExprAsBool(Cond); 6867 } 6868 } 6869 } 6870 // Check the value of num_threads clause iff if clause was not specified 6871 // or is not evaluated to false. 6872 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6873 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6874 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6875 const auto *NumThreadsClause = 6876 Dir->getSingleClause<OMPNumThreadsClause>(); 6877 CodeGenFunction::LexicalScope Scope( 6878 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6879 if (const auto *PreInit = 6880 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6881 for (const auto *I : PreInit->decls()) { 6882 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6883 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6884 } else { 6885 CodeGenFunction::AutoVarEmission Emission = 6886 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6887 CGF.EmitAutoVarCleanups(Emission); 6888 } 6889 } 6890 } 6891 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6892 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6893 /*isSigned=*/false); 6894 if (DefaultThreadLimitVal) 6895 NumThreads = CGF.Builder.CreateSelect( 6896 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6897 DefaultThreadLimitVal, NumThreads); 6898 } else { 6899 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6900 : CGF.Builder.getInt32(0); 6901 } 6902 // Process condition of the if clause. 6903 if (CondVal) { 6904 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6905 CGF.Builder.getInt32(1)); 6906 } 6907 return NumThreads; 6908 } 6909 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6910 return CGF.Builder.getInt32(1); 6911 return DefaultThreadLimitVal; 6912 } 6913 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6914 : CGF.Builder.getInt32(0); 6915 } 6916 6917 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( 6918 CodeGenFunction &CGF, const OMPExecutableDirective &D, 6919 int32_t &DefaultVal) { 6920 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6921 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6922 "Expected target-based executable directive."); 6923 6924 switch (DirectiveKind) { 6925 case OMPD_target: 6926 // Teams have no clause thread_limit 6927 return nullptr; 6928 case OMPD_target_teams: 6929 case OMPD_target_teams_distribute: 6930 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6931 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6932 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit(); 6933 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6934 if (auto Constant = 6935 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6936 DefaultVal = Constant->getExtValue(); 6937 return ThreadLimit; 6938 } 6939 return nullptr; 6940 case OMPD_target_parallel: 6941 case OMPD_target_parallel_for: 6942 case OMPD_target_parallel_for_simd: 6943 case OMPD_target_teams_distribute_parallel_for: 6944 case OMPD_target_teams_distribute_parallel_for_simd: { 6945 Expr *ThreadLimit = nullptr; 6946 Expr *NumThreads = nullptr; 6947 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6948 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6949 ThreadLimit = ThreadLimitClause->getThreadLimit(); 6950 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext())) 6951 if (auto Constant = 6952 ThreadLimit->getIntegerConstantExpr(CGF.getContext())) 6953 DefaultVal = Constant->getExtValue(); 6954 } 6955 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6956 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6957 NumThreads = NumThreadsClause->getNumThreads(); 6958 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) { 6959 if (auto Constant = 6960 NumThreads->getIntegerConstantExpr(CGF.getContext())) { 6961 if (Constant->getExtValue() < DefaultVal) { 6962 DefaultVal = Constant->getExtValue(); 6963 ThreadLimit = NumThreads; 6964 } 6965 } 6966 } 6967 } 6968 return ThreadLimit; 6969 } 6970 case OMPD_target_teams_distribute_simd: 6971 case OMPD_target_simd: 6972 DefaultVal = 1; 6973 return nullptr; 6974 case OMPD_parallel: 6975 case OMPD_for: 6976 case OMPD_parallel_for: 6977 case OMPD_parallel_master: 6978 case OMPD_parallel_sections: 6979 case OMPD_for_simd: 6980 case OMPD_parallel_for_simd: 6981 case OMPD_cancel: 6982 case OMPD_cancellation_point: 6983 case OMPD_ordered: 6984 case OMPD_threadprivate: 6985 case OMPD_allocate: 6986 case OMPD_task: 6987 case OMPD_simd: 6988 case OMPD_tile: 6989 case OMPD_unroll: 6990 case OMPD_sections: 6991 case OMPD_section: 6992 case OMPD_single: 6993 case OMPD_master: 6994 case OMPD_critical: 6995 case OMPD_taskyield: 6996 case OMPD_barrier: 6997 case OMPD_taskwait: 6998 case OMPD_taskgroup: 6999 case OMPD_atomic: 7000 case OMPD_flush: 7001 case OMPD_depobj: 7002 case OMPD_scan: 7003 case OMPD_teams: 7004 case OMPD_target_data: 7005 case OMPD_target_exit_data: 7006 case OMPD_target_enter_data: 7007 case OMPD_distribute: 7008 case OMPD_distribute_simd: 7009 case OMPD_distribute_parallel_for: 7010 case OMPD_distribute_parallel_for_simd: 7011 case OMPD_teams_distribute: 7012 case OMPD_teams_distribute_simd: 7013 case OMPD_teams_distribute_parallel_for: 7014 case OMPD_teams_distribute_parallel_for_simd: 7015 case OMPD_target_update: 7016 case OMPD_declare_simd: 7017 case OMPD_declare_variant: 7018 case OMPD_begin_declare_variant: 7019 case OMPD_end_declare_variant: 7020 case OMPD_declare_target: 7021 case OMPD_end_declare_target: 7022 case OMPD_declare_reduction: 7023 case OMPD_declare_mapper: 7024 case OMPD_taskloop: 7025 case OMPD_taskloop_simd: 7026 case OMPD_master_taskloop: 7027 case OMPD_master_taskloop_simd: 7028 case OMPD_parallel_master_taskloop: 7029 case OMPD_parallel_master_taskloop_simd: 7030 case OMPD_requires: 7031 case OMPD_unknown: 7032 break; 7033 default: 7034 break; 7035 } 7036 llvm_unreachable("Unsupported directive kind."); 7037 } 7038 7039 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( 7040 CodeGenFunction &CGF, const OMPExecutableDirective &D) { 7041 assert(!CGF.getLangOpts().OpenMPIsDevice && 7042 "Clauses associated with the teams directive expected to be emitted " 7043 "only for the host!"); 7044 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7045 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7046 "Expected target-based executable directive."); 7047 CGBuilderTy &Bld = CGF.Builder; 7048 llvm::Value *ThreadLimitVal = nullptr; 7049 llvm::Value *NumThreadsVal = nullptr; 7050 switch (DirectiveKind) { 7051 case OMPD_target: { 7052 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7053 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7054 return NumThreads; 7055 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7056 CGF.getContext(), CS->getCapturedStmt()); 7057 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7058 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7059 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7060 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7061 const auto *ThreadLimitClause = 7062 Dir->getSingleClause<OMPThreadLimitClause>(); 7063 CodeGenFunction::LexicalScope Scope( 7064 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7065 if (const auto *PreInit = 7066 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7067 for (const auto *I : PreInit->decls()) { 7068 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7069 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7070 } else { 7071 CodeGenFunction::AutoVarEmission Emission = 7072 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7073 CGF.EmitAutoVarCleanups(Emission); 7074 } 7075 } 7076 } 7077 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7078 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7079 ThreadLimitVal = 7080 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7081 } 7082 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7083 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7084 CS = Dir->getInnermostCapturedStmt(); 7085 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7086 CGF.getContext(), CS->getCapturedStmt()); 7087 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7088 } 7089 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7090 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7091 CS = Dir->getInnermostCapturedStmt(); 7092 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7093 return NumThreads; 7094 } 7095 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7096 return Bld.getInt32(1); 7097 } 7098 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7099 } 7100 case OMPD_target_teams: { 7101 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7102 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7103 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7104 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7105 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7106 ThreadLimitVal = 7107 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7108 } 7109 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7110 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7111 return NumThreads; 7112 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7113 CGF.getContext(), CS->getCapturedStmt()); 7114 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7115 if (Dir->getDirectiveKind() == OMPD_distribute) { 7116 CS = Dir->getInnermostCapturedStmt(); 7117 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7118 return NumThreads; 7119 } 7120 } 7121 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7122 } 7123 case OMPD_target_teams_distribute: 7124 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7125 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7126 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7127 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7128 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7129 ThreadLimitVal = 7130 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7131 } 7132 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7133 case OMPD_target_parallel: 7134 case OMPD_target_parallel_for: 7135 case OMPD_target_parallel_for_simd: 7136 case OMPD_target_teams_distribute_parallel_for: 7137 case OMPD_target_teams_distribute_parallel_for_simd: { 7138 llvm::Value *CondVal = nullptr; 7139 // Handle if clause. If if clause present, the number of threads is 7140 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7141 if (D.hasClausesOfKind<OMPIfClause>()) { 7142 const OMPIfClause *IfClause = nullptr; 7143 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7144 if (C->getNameModifier() == OMPD_unknown || 7145 C->getNameModifier() == OMPD_parallel) { 7146 IfClause = C; 7147 break; 7148 } 7149 } 7150 if (IfClause) { 7151 const Expr *Cond = IfClause->getCondition(); 7152 bool Result; 7153 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7154 if (!Result) 7155 return Bld.getInt32(1); 7156 } else { 7157 CodeGenFunction::RunCleanupsScope Scope(CGF); 7158 CondVal = CGF.EvaluateExprAsBool(Cond); 7159 } 7160 } 7161 } 7162 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7163 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7164 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7165 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7166 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7167 ThreadLimitVal = 7168 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7169 } 7170 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7171 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7172 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7173 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7174 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7175 NumThreadsVal = 7176 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7177 ThreadLimitVal = ThreadLimitVal 7178 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7179 ThreadLimitVal), 7180 NumThreadsVal, ThreadLimitVal) 7181 : NumThreadsVal; 7182 } 7183 if (!ThreadLimitVal) 7184 ThreadLimitVal = Bld.getInt32(0); 7185 if (CondVal) 7186 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7187 return ThreadLimitVal; 7188 } 7189 case OMPD_target_teams_distribute_simd: 7190 case OMPD_target_simd: 7191 return Bld.getInt32(1); 7192 case OMPD_parallel: 7193 case OMPD_for: 7194 case OMPD_parallel_for: 7195 case OMPD_parallel_master: 7196 case OMPD_parallel_sections: 7197 case OMPD_for_simd: 7198 case OMPD_parallel_for_simd: 7199 case OMPD_cancel: 7200 case OMPD_cancellation_point: 7201 case OMPD_ordered: 7202 case OMPD_threadprivate: 7203 case OMPD_allocate: 7204 case OMPD_task: 7205 case OMPD_simd: 7206 case OMPD_tile: 7207 case OMPD_unroll: 7208 case OMPD_sections: 7209 case OMPD_section: 7210 case OMPD_single: 7211 case OMPD_master: 7212 case OMPD_critical: 7213 case OMPD_taskyield: 7214 case OMPD_barrier: 7215 case OMPD_taskwait: 7216 case OMPD_taskgroup: 7217 case OMPD_atomic: 7218 case OMPD_flush: 7219 case OMPD_depobj: 7220 case OMPD_scan: 7221 case OMPD_teams: 7222 case OMPD_target_data: 7223 case OMPD_target_exit_data: 7224 case OMPD_target_enter_data: 7225 case OMPD_distribute: 7226 case OMPD_distribute_simd: 7227 case OMPD_distribute_parallel_for: 7228 case OMPD_distribute_parallel_for_simd: 7229 case OMPD_teams_distribute: 7230 case OMPD_teams_distribute_simd: 7231 case OMPD_teams_distribute_parallel_for: 7232 case OMPD_teams_distribute_parallel_for_simd: 7233 case OMPD_target_update: 7234 case OMPD_declare_simd: 7235 case OMPD_declare_variant: 7236 case OMPD_begin_declare_variant: 7237 case OMPD_end_declare_variant: 7238 case OMPD_declare_target: 7239 case OMPD_end_declare_target: 7240 case OMPD_declare_reduction: 7241 case OMPD_declare_mapper: 7242 case OMPD_taskloop: 7243 case OMPD_taskloop_simd: 7244 case OMPD_master_taskloop: 7245 case OMPD_master_taskloop_simd: 7246 case OMPD_parallel_master_taskloop: 7247 case OMPD_parallel_master_taskloop_simd: 7248 case OMPD_requires: 7249 case OMPD_metadirective: 7250 case OMPD_unknown: 7251 break; 7252 default: 7253 break; 7254 } 7255 llvm_unreachable("Unsupported directive kind."); 7256 } 7257 7258 namespace { 7259 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7260 7261 // Utility to handle information from clauses associated with a given 7262 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7263 // It provides a convenient interface to obtain the information and generate 7264 // code for that information. 7265 class MappableExprsHandler { 7266 public: 7267 /// Values for bit flags used to specify the mapping type for 7268 /// offloading. 7269 enum OpenMPOffloadMappingFlags : uint64_t { 7270 /// No flags 7271 OMP_MAP_NONE = 0x0, 7272 /// Allocate memory on the device and move data from host to device. 7273 OMP_MAP_TO = 0x01, 7274 /// Allocate memory on the device and move data from device to host. 7275 OMP_MAP_FROM = 0x02, 7276 /// Always perform the requested mapping action on the element, even 7277 /// if it was already mapped before. 7278 OMP_MAP_ALWAYS = 0x04, 7279 /// Delete the element from the device environment, ignoring the 7280 /// current reference count associated with the element. 7281 OMP_MAP_DELETE = 0x08, 7282 /// The element being mapped is a pointer-pointee pair; both the 7283 /// pointer and the pointee should be mapped. 7284 OMP_MAP_PTR_AND_OBJ = 0x10, 7285 /// This flags signals that the base address of an entry should be 7286 /// passed to the target kernel as an argument. 7287 OMP_MAP_TARGET_PARAM = 0x20, 7288 /// Signal that the runtime library has to return the device pointer 7289 /// in the current position for the data being mapped. Used when we have the 7290 /// use_device_ptr or use_device_addr clause. 7291 OMP_MAP_RETURN_PARAM = 0x40, 7292 /// This flag signals that the reference being passed is a pointer to 7293 /// private data. 7294 OMP_MAP_PRIVATE = 0x80, 7295 /// Pass the element to the device by value. 7296 OMP_MAP_LITERAL = 0x100, 7297 /// Implicit map 7298 OMP_MAP_IMPLICIT = 0x200, 7299 /// Close is a hint to the runtime to allocate memory close to 7300 /// the target device. 7301 OMP_MAP_CLOSE = 0x400, 7302 /// 0x800 is reserved for compatibility with XLC. 7303 /// Produce a runtime error if the data is not already allocated. 7304 OMP_MAP_PRESENT = 0x1000, 7305 // Increment and decrement a separate reference counter so that the data 7306 // cannot be unmapped within the associated region. Thus, this flag is 7307 // intended to be used on 'target' and 'target data' directives because they 7308 // are inherently structured. It is not intended to be used on 'target 7309 // enter data' and 'target exit data' directives because they are inherently 7310 // dynamic. 7311 // This is an OpenMP extension for the sake of OpenACC support. 7312 OMP_MAP_OMPX_HOLD = 0x2000, 7313 /// Signal that the runtime library should use args as an array of 7314 /// descriptor_dim pointers and use args_size as dims. Used when we have 7315 /// non-contiguous list items in target update directive 7316 OMP_MAP_NON_CONTIG = 0x100000000000, 7317 /// The 16 MSBs of the flags indicate whether the entry is member of some 7318 /// struct/class. 7319 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7320 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7321 }; 7322 7323 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7324 static unsigned getFlagMemberOffset() { 7325 unsigned Offset = 0; 7326 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7327 Remain = Remain >> 1) 7328 Offset++; 7329 return Offset; 7330 } 7331 7332 /// Class that holds debugging information for a data mapping to be passed to 7333 /// the runtime library. 7334 class MappingExprInfo { 7335 /// The variable declaration used for the data mapping. 7336 const ValueDecl *MapDecl = nullptr; 7337 /// The original expression used in the map clause, or null if there is 7338 /// none. 7339 const Expr *MapExpr = nullptr; 7340 7341 public: 7342 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7343 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7344 7345 const ValueDecl *getMapDecl() const { return MapDecl; } 7346 const Expr *getMapExpr() const { return MapExpr; } 7347 }; 7348 7349 /// Class that associates information with a base pointer to be passed to the 7350 /// runtime library. 7351 class BasePointerInfo { 7352 /// The base pointer. 7353 llvm::Value *Ptr = nullptr; 7354 /// The base declaration that refers to this device pointer, or null if 7355 /// there is none. 7356 const ValueDecl *DevPtrDecl = nullptr; 7357 7358 public: 7359 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7360 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7361 llvm::Value *operator*() const { return Ptr; } 7362 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7363 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7364 }; 7365 7366 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7367 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7368 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7369 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7370 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7371 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7372 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7373 7374 /// This structure contains combined information generated for mappable 7375 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7376 /// mappers, and non-contiguous information. 7377 struct MapCombinedInfoTy { 7378 struct StructNonContiguousInfo { 7379 bool IsNonContiguous = false; 7380 MapDimArrayTy Dims; 7381 MapNonContiguousArrayTy Offsets; 7382 MapNonContiguousArrayTy Counts; 7383 MapNonContiguousArrayTy Strides; 7384 }; 7385 MapExprsArrayTy Exprs; 7386 MapBaseValuesArrayTy BasePointers; 7387 MapValuesArrayTy Pointers; 7388 MapValuesArrayTy Sizes; 7389 MapFlagsArrayTy Types; 7390 MapMappersArrayTy Mappers; 7391 StructNonContiguousInfo NonContigInfo; 7392 7393 /// Append arrays in \a CurInfo. 7394 void append(MapCombinedInfoTy &CurInfo) { 7395 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7396 BasePointers.append(CurInfo.BasePointers.begin(), 7397 CurInfo.BasePointers.end()); 7398 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7399 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7400 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7401 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7402 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7403 CurInfo.NonContigInfo.Dims.end()); 7404 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7405 CurInfo.NonContigInfo.Offsets.end()); 7406 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7407 CurInfo.NonContigInfo.Counts.end()); 7408 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7409 CurInfo.NonContigInfo.Strides.end()); 7410 } 7411 }; 7412 7413 /// Map between a struct and the its lowest & highest elements which have been 7414 /// mapped. 7415 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7416 /// HE(FieldIndex, Pointer)} 7417 struct StructRangeInfoTy { 7418 MapCombinedInfoTy PreliminaryMapData; 7419 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7420 0, Address::invalid()}; 7421 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7422 0, Address::invalid()}; 7423 Address Base = Address::invalid(); 7424 Address LB = Address::invalid(); 7425 bool IsArraySection = false; 7426 bool HasCompleteRecord = false; 7427 }; 7428 7429 private: 7430 /// Kind that defines how a device pointer has to be returned. 7431 struct MapInfo { 7432 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7433 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7434 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7435 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7436 bool ReturnDevicePointer = false; 7437 bool IsImplicit = false; 7438 const ValueDecl *Mapper = nullptr; 7439 const Expr *VarRef = nullptr; 7440 bool ForDeviceAddr = false; 7441 7442 MapInfo() = default; 7443 MapInfo( 7444 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7445 OpenMPMapClauseKind MapType, 7446 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7447 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7448 bool ReturnDevicePointer, bool IsImplicit, 7449 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7450 bool ForDeviceAddr = false) 7451 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7452 MotionModifiers(MotionModifiers), 7453 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7454 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7455 }; 7456 7457 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7458 /// member and there is no map information about it, then emission of that 7459 /// entry is deferred until the whole struct has been processed. 7460 struct DeferredDevicePtrEntryTy { 7461 const Expr *IE = nullptr; 7462 const ValueDecl *VD = nullptr; 7463 bool ForDeviceAddr = false; 7464 7465 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7466 bool ForDeviceAddr) 7467 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7468 }; 7469 7470 /// The target directive from where the mappable clauses were extracted. It 7471 /// is either a executable directive or a user-defined mapper directive. 7472 llvm::PointerUnion<const OMPExecutableDirective *, 7473 const OMPDeclareMapperDecl *> 7474 CurDir; 7475 7476 /// Function the directive is being generated for. 7477 CodeGenFunction &CGF; 7478 7479 /// Set of all first private variables in the current directive. 7480 /// bool data is set to true if the variable is implicitly marked as 7481 /// firstprivate, false otherwise. 7482 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7483 7484 /// Map between device pointer declarations and their expression components. 7485 /// The key value for declarations in 'this' is null. 7486 llvm::DenseMap< 7487 const ValueDecl *, 7488 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7489 DevPointersMap; 7490 7491 /// Map between lambda declarations and their map type. 7492 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; 7493 7494 llvm::Value *getExprTypeSize(const Expr *E) const { 7495 QualType ExprTy = E->getType().getCanonicalType(); 7496 7497 // Calculate the size for array shaping expression. 7498 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7499 llvm::Value *Size = 7500 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7501 for (const Expr *SE : OAE->getDimensions()) { 7502 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7503 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7504 CGF.getContext().getSizeType(), 7505 SE->getExprLoc()); 7506 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7507 } 7508 return Size; 7509 } 7510 7511 // Reference types are ignored for mapping purposes. 7512 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7513 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7514 7515 // Given that an array section is considered a built-in type, we need to 7516 // do the calculation based on the length of the section instead of relying 7517 // on CGF.getTypeSize(E->getType()). 7518 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7519 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7520 OAE->getBase()->IgnoreParenImpCasts()) 7521 .getCanonicalType(); 7522 7523 // If there is no length associated with the expression and lower bound is 7524 // not specified too, that means we are using the whole length of the 7525 // base. 7526 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7527 !OAE->getLowerBound()) 7528 return CGF.getTypeSize(BaseTy); 7529 7530 llvm::Value *ElemSize; 7531 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7532 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7533 } else { 7534 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7535 assert(ATy && "Expecting array type if not a pointer type."); 7536 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7537 } 7538 7539 // If we don't have a length at this point, that is because we have an 7540 // array section with a single element. 7541 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7542 return ElemSize; 7543 7544 if (const Expr *LenExpr = OAE->getLength()) { 7545 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7546 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7547 CGF.getContext().getSizeType(), 7548 LenExpr->getExprLoc()); 7549 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7550 } 7551 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7552 OAE->getLowerBound() && "expected array_section[lb:]."); 7553 // Size = sizetype - lb * elemtype; 7554 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7555 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7556 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7557 CGF.getContext().getSizeType(), 7558 OAE->getLowerBound()->getExprLoc()); 7559 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7560 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7561 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7562 LengthVal = CGF.Builder.CreateSelect( 7563 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7564 return LengthVal; 7565 } 7566 return CGF.getTypeSize(ExprTy); 7567 } 7568 7569 /// Return the corresponding bits for a given map clause modifier. Add 7570 /// a flag marking the map as a pointer if requested. Add a flag marking the 7571 /// map as the first one of a series of maps that relate to the same map 7572 /// expression. 7573 OpenMPOffloadMappingFlags getMapTypeBits( 7574 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7575 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7576 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7577 OpenMPOffloadMappingFlags Bits = 7578 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7579 switch (MapType) { 7580 case OMPC_MAP_alloc: 7581 case OMPC_MAP_release: 7582 // alloc and release is the default behavior in the runtime library, i.e. 7583 // if we don't pass any bits alloc/release that is what the runtime is 7584 // going to do. Therefore, we don't need to signal anything for these two 7585 // type modifiers. 7586 break; 7587 case OMPC_MAP_to: 7588 Bits |= OMP_MAP_TO; 7589 break; 7590 case OMPC_MAP_from: 7591 Bits |= OMP_MAP_FROM; 7592 break; 7593 case OMPC_MAP_tofrom: 7594 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7595 break; 7596 case OMPC_MAP_delete: 7597 Bits |= OMP_MAP_DELETE; 7598 break; 7599 case OMPC_MAP_unknown: 7600 llvm_unreachable("Unexpected map type!"); 7601 } 7602 if (AddPtrFlag) 7603 Bits |= OMP_MAP_PTR_AND_OBJ; 7604 if (AddIsTargetParamFlag) 7605 Bits |= OMP_MAP_TARGET_PARAM; 7606 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) 7607 Bits |= OMP_MAP_ALWAYS; 7608 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) 7609 Bits |= OMP_MAP_CLOSE; 7610 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || 7611 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) 7612 Bits |= OMP_MAP_PRESENT; 7613 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) 7614 Bits |= OMP_MAP_OMPX_HOLD; 7615 if (IsNonContiguous) 7616 Bits |= OMP_MAP_NON_CONTIG; 7617 return Bits; 7618 } 7619 7620 /// Return true if the provided expression is a final array section. A 7621 /// final array section, is one whose length can't be proved to be one. 7622 bool isFinalArraySectionExpression(const Expr *E) const { 7623 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7624 7625 // It is not an array section and therefore not a unity-size one. 7626 if (!OASE) 7627 return false; 7628 7629 // An array section with no colon always refer to a single element. 7630 if (OASE->getColonLocFirst().isInvalid()) 7631 return false; 7632 7633 const Expr *Length = OASE->getLength(); 7634 7635 // If we don't have a length we have to check if the array has size 1 7636 // for this dimension. Also, we should always expect a length if the 7637 // base type is pointer. 7638 if (!Length) { 7639 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7640 OASE->getBase()->IgnoreParenImpCasts()) 7641 .getCanonicalType(); 7642 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7643 return ATy->getSize().getSExtValue() != 1; 7644 // If we don't have a constant dimension length, we have to consider 7645 // the current section as having any size, so it is not necessarily 7646 // unitary. If it happen to be unity size, that's user fault. 7647 return true; 7648 } 7649 7650 // Check if the length evaluates to 1. 7651 Expr::EvalResult Result; 7652 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7653 return true; // Can have more that size 1. 7654 7655 llvm::APSInt ConstLength = Result.Val.getInt(); 7656 return ConstLength.getSExtValue() != 1; 7657 } 7658 7659 /// Generate the base pointers, section pointers, sizes, map type bits, and 7660 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7661 /// map type, map or motion modifiers, and expression components. 7662 /// \a IsFirstComponent should be set to true if the provided set of 7663 /// components is the first associated with a capture. 7664 void generateInfoForComponentList( 7665 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7666 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7667 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7668 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7669 bool IsFirstComponentList, bool IsImplicit, 7670 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7671 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7672 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7673 OverlappedElements = llvm::None) const { 7674 // The following summarizes what has to be generated for each map and the 7675 // types below. The generated information is expressed in this order: 7676 // base pointer, section pointer, size, flags 7677 // (to add to the ones that come from the map type and modifier). 7678 // 7679 // double d; 7680 // int i[100]; 7681 // float *p; 7682 // 7683 // struct S1 { 7684 // int i; 7685 // float f[50]; 7686 // } 7687 // struct S2 { 7688 // int i; 7689 // float f[50]; 7690 // S1 s; 7691 // double *p; 7692 // struct S2 *ps; 7693 // int &ref; 7694 // } 7695 // S2 s; 7696 // S2 *ps; 7697 // 7698 // map(d) 7699 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7700 // 7701 // map(i) 7702 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7703 // 7704 // map(i[1:23]) 7705 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7706 // 7707 // map(p) 7708 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7709 // 7710 // map(p[1:24]) 7711 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7712 // in unified shared memory mode or for local pointers 7713 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7714 // 7715 // map(s) 7716 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7717 // 7718 // map(s.i) 7719 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7720 // 7721 // map(s.s.f) 7722 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7723 // 7724 // map(s.p) 7725 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7726 // 7727 // map(to: s.p[:22]) 7728 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7729 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7730 // &(s.p), &(s.p[0]), 22*sizeof(double), 7731 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7732 // (*) alloc space for struct members, only this is a target parameter 7733 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7734 // optimizes this entry out, same in the examples below) 7735 // (***) map the pointee (map: to) 7736 // 7737 // map(to: s.ref) 7738 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) 7739 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7740 // (*) alloc space for struct members, only this is a target parameter 7741 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7742 // optimizes this entry out, same in the examples below) 7743 // (***) map the pointee (map: to) 7744 // 7745 // map(s.ps) 7746 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7747 // 7748 // map(from: s.ps->s.i) 7749 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7750 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7751 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7752 // 7753 // map(to: s.ps->ps) 7754 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7755 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7756 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7757 // 7758 // map(s.ps->ps->ps) 7759 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7760 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7761 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7762 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7763 // 7764 // map(to: s.ps->ps->s.f[:22]) 7765 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7766 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7767 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7768 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7769 // 7770 // map(ps) 7771 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7772 // 7773 // map(ps->i) 7774 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7775 // 7776 // map(ps->s.f) 7777 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7778 // 7779 // map(from: ps->p) 7780 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7781 // 7782 // map(to: ps->p[:22]) 7783 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7784 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7785 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7786 // 7787 // map(ps->ps) 7788 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7789 // 7790 // map(from: ps->ps->s.i) 7791 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7792 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7793 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7794 // 7795 // map(from: ps->ps->ps) 7796 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7797 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7798 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7799 // 7800 // map(ps->ps->ps->ps) 7801 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7802 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7803 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7804 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7805 // 7806 // map(to: ps->ps->ps->s.f[:22]) 7807 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7808 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7809 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7810 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7811 // 7812 // map(to: s.f[:22]) map(from: s.p[:33]) 7813 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7814 // sizeof(double*) (**), TARGET_PARAM 7815 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7816 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7817 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7818 // (*) allocate contiguous space needed to fit all mapped members even if 7819 // we allocate space for members not mapped (in this example, 7820 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7821 // them as well because they fall between &s.f[0] and &s.p) 7822 // 7823 // map(from: s.f[:22]) map(to: ps->p[:33]) 7824 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7825 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7826 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7827 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7828 // (*) the struct this entry pertains to is the 2nd element in the list of 7829 // arguments, hence MEMBER_OF(2) 7830 // 7831 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7832 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7833 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7834 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7835 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7836 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7837 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7838 // (*) the struct this entry pertains to is the 4th element in the list 7839 // of arguments, hence MEMBER_OF(4) 7840 7841 // Track if the map information being generated is the first for a capture. 7842 bool IsCaptureFirstInfo = IsFirstComponentList; 7843 // When the variable is on a declare target link or in a to clause with 7844 // unified memory, a reference is needed to hold the host/device address 7845 // of the variable. 7846 bool RequiresReference = false; 7847 7848 // Scan the components from the base to the complete expression. 7849 auto CI = Components.rbegin(); 7850 auto CE = Components.rend(); 7851 auto I = CI; 7852 7853 // Track if the map information being generated is the first for a list of 7854 // components. 7855 bool IsExpressionFirstInfo = true; 7856 bool FirstPointerInComplexData = false; 7857 Address BP = Address::invalid(); 7858 const Expr *AssocExpr = I->getAssociatedExpression(); 7859 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7860 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7861 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7862 7863 if (isa<MemberExpr>(AssocExpr)) { 7864 // The base is the 'this' pointer. The content of the pointer is going 7865 // to be the base of the field being mapped. 7866 BP = CGF.LoadCXXThisAddress(); 7867 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7868 (OASE && 7869 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7870 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7871 } else if (OAShE && 7872 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7873 BP = Address( 7874 CGF.EmitScalarExpr(OAShE->getBase()), 7875 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7876 } else { 7877 // The base is the reference to the variable. 7878 // BP = &Var. 7879 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7880 if (const auto *VD = 7881 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7882 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7883 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7884 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7885 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7886 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7887 RequiresReference = true; 7888 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7889 } 7890 } 7891 } 7892 7893 // If the variable is a pointer and is being dereferenced (i.e. is not 7894 // the last component), the base has to be the pointer itself, not its 7895 // reference. References are ignored for mapping purposes. 7896 QualType Ty = 7897 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7898 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7899 // No need to generate individual map information for the pointer, it 7900 // can be associated with the combined storage if shared memory mode is 7901 // active or the base declaration is not global variable. 7902 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7903 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7904 !VD || VD->hasLocalStorage()) 7905 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7906 else 7907 FirstPointerInComplexData = true; 7908 ++I; 7909 } 7910 } 7911 7912 // Track whether a component of the list should be marked as MEMBER_OF some 7913 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7914 // in a component list should be marked as MEMBER_OF, all subsequent entries 7915 // do not belong to the base struct. E.g. 7916 // struct S2 s; 7917 // s.ps->ps->ps->f[:] 7918 // (1) (2) (3) (4) 7919 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7920 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7921 // is the pointee of ps(2) which is not member of struct s, so it should not 7922 // be marked as such (it is still PTR_AND_OBJ). 7923 // The variable is initialized to false so that PTR_AND_OBJ entries which 7924 // are not struct members are not considered (e.g. array of pointers to 7925 // data). 7926 bool ShouldBeMemberOf = false; 7927 7928 // Variable keeping track of whether or not we have encountered a component 7929 // in the component list which is a member expression. Useful when we have a 7930 // pointer or a final array section, in which case it is the previous 7931 // component in the list which tells us whether we have a member expression. 7932 // E.g. X.f[:] 7933 // While processing the final array section "[:]" it is "f" which tells us 7934 // whether we are dealing with a member of a declared struct. 7935 const MemberExpr *EncounteredME = nullptr; 7936 7937 // Track for the total number of dimension. Start from one for the dummy 7938 // dimension. 7939 uint64_t DimSize = 1; 7940 7941 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7942 bool IsPrevMemberReference = false; 7943 7944 for (; I != CE; ++I) { 7945 // If the current component is member of a struct (parent struct) mark it. 7946 if (!EncounteredME) { 7947 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7948 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7949 // as MEMBER_OF the parent struct. 7950 if (EncounteredME) { 7951 ShouldBeMemberOf = true; 7952 // Do not emit as complex pointer if this is actually not array-like 7953 // expression. 7954 if (FirstPointerInComplexData) { 7955 QualType Ty = std::prev(I) 7956 ->getAssociatedDeclaration() 7957 ->getType() 7958 .getNonReferenceType(); 7959 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7960 FirstPointerInComplexData = false; 7961 } 7962 } 7963 } 7964 7965 auto Next = std::next(I); 7966 7967 // We need to generate the addresses and sizes if this is the last 7968 // component, if the component is a pointer or if it is an array section 7969 // whose length can't be proved to be one. If this is a pointer, it 7970 // becomes the base address for the following components. 7971 7972 // A final array section, is one whose length can't be proved to be one. 7973 // If the map item is non-contiguous then we don't treat any array section 7974 // as final array section. 7975 bool IsFinalArraySection = 7976 !IsNonContiguous && 7977 isFinalArraySectionExpression(I->getAssociatedExpression()); 7978 7979 // If we have a declaration for the mapping use that, otherwise use 7980 // the base declaration of the map clause. 7981 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7982 ? I->getAssociatedDeclaration() 7983 : BaseDecl; 7984 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() 7985 : MapExpr; 7986 7987 // Get information on whether the element is a pointer. Have to do a 7988 // special treatment for array sections given that they are built-in 7989 // types. 7990 const auto *OASE = 7991 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7992 const auto *OAShE = 7993 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7994 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7995 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7996 bool IsPointer = 7997 OAShE || 7998 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7999 .getCanonicalType() 8000 ->isAnyPointerType()) || 8001 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8002 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && 8003 MapDecl && 8004 MapDecl->getType()->isLValueReferenceType(); 8005 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 8006 8007 if (OASE) 8008 ++DimSize; 8009 8010 if (Next == CE || IsMemberReference || IsNonDerefPointer || 8011 IsFinalArraySection) { 8012 // If this is not the last component, we expect the pointer to be 8013 // associated with an array expression or member expression. 8014 assert((Next == CE || 8015 isa<MemberExpr>(Next->getAssociatedExpression()) || 8016 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8017 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8018 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 8019 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8020 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8021 "Unexpected expression"); 8022 8023 Address LB = Address::invalid(); 8024 Address LowestElem = Address::invalid(); 8025 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, 8026 const MemberExpr *E) { 8027 const Expr *BaseExpr = E->getBase(); 8028 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a 8029 // scalar. 8030 LValue BaseLV; 8031 if (E->isArrow()) { 8032 LValueBaseInfo BaseInfo; 8033 TBAAAccessInfo TBAAInfo; 8034 Address Addr = 8035 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); 8036 QualType PtrTy = BaseExpr->getType()->getPointeeType(); 8037 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); 8038 } else { 8039 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); 8040 } 8041 return BaseLV; 8042 }; 8043 if (OAShE) { 8044 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8045 CGF.getContext().getTypeAlignInChars( 8046 OAShE->getBase()->getType())); 8047 } else if (IsMemberReference) { 8048 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); 8049 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8050 LowestElem = CGF.EmitLValueForFieldInitialization( 8051 BaseLVal, cast<FieldDecl>(MapDecl)) 8052 .getAddress(CGF); 8053 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) 8054 .getAddress(CGF); 8055 } else { 8056 LowestElem = LB = 8057 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8058 .getAddress(CGF); 8059 } 8060 8061 // If this component is a pointer inside the base struct then we don't 8062 // need to create any entry for it - it will be combined with the object 8063 // it is pointing to into a single PTR_AND_OBJ entry. 8064 bool IsMemberPointerOrAddr = 8065 EncounteredME && 8066 (((IsPointer || ForDeviceAddr) && 8067 I->getAssociatedExpression() == EncounteredME) || 8068 (IsPrevMemberReference && !IsPointer) || 8069 (IsMemberReference && Next != CE && 8070 !Next->getAssociatedExpression()->getType()->isPointerType())); 8071 if (!OverlappedElements.empty() && Next == CE) { 8072 // Handle base element with the info for overlapped elements. 8073 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8074 assert(!IsPointer && 8075 "Unexpected base element with the pointer type."); 8076 // Mark the whole struct as the struct that requires allocation on the 8077 // device. 8078 PartialStruct.LowestElem = {0, LowestElem}; 8079 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8080 I->getAssociatedExpression()->getType()); 8081 Address HB = CGF.Builder.CreateConstGEP( 8082 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem, 8083 CGF.VoidPtrTy), 8084 TypeSize.getQuantity() - 1); 8085 PartialStruct.HighestElem = { 8086 std::numeric_limits<decltype( 8087 PartialStruct.HighestElem.first)>::max(), 8088 HB}; 8089 PartialStruct.Base = BP; 8090 PartialStruct.LB = LB; 8091 assert( 8092 PartialStruct.PreliminaryMapData.BasePointers.empty() && 8093 "Overlapped elements must be used only once for the variable."); 8094 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); 8095 // Emit data for non-overlapped data. 8096 OpenMPOffloadMappingFlags Flags = 8097 OMP_MAP_MEMBER_OF | 8098 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 8099 /*AddPtrFlag=*/false, 8100 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 8101 llvm::Value *Size = nullptr; 8102 // Do bitcopy of all non-overlapped structure elements. 8103 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8104 Component : OverlappedElements) { 8105 Address ComponentLB = Address::invalid(); 8106 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8107 Component) { 8108 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { 8109 const auto *FD = dyn_cast<FieldDecl>(VD); 8110 if (FD && FD->getType()->isLValueReferenceType()) { 8111 const auto *ME = 8112 cast<MemberExpr>(MC.getAssociatedExpression()); 8113 LValue BaseLVal = EmitMemberExprBase(CGF, ME); 8114 ComponentLB = 8115 CGF.EmitLValueForFieldInitialization(BaseLVal, FD) 8116 .getAddress(CGF); 8117 } else { 8118 ComponentLB = 8119 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8120 .getAddress(CGF); 8121 } 8122 Size = CGF.Builder.CreatePtrDiff( 8123 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8124 CGF.EmitCastToVoidPtr(LB.getPointer())); 8125 break; 8126 } 8127 } 8128 assert(Size && "Failed to determine structure size"); 8129 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8130 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8131 CombinedInfo.Pointers.push_back(LB.getPointer()); 8132 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8133 Size, CGF.Int64Ty, /*isSigned=*/true)); 8134 CombinedInfo.Types.push_back(Flags); 8135 CombinedInfo.Mappers.push_back(nullptr); 8136 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8137 : 1); 8138 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8139 } 8140 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8141 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8142 CombinedInfo.Pointers.push_back(LB.getPointer()); 8143 Size = CGF.Builder.CreatePtrDiff( 8144 CGF.Builder.CreateConstGEP(HB, 1).getPointer(), 8145 CGF.EmitCastToVoidPtr(LB.getPointer())); 8146 CombinedInfo.Sizes.push_back( 8147 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8148 CombinedInfo.Types.push_back(Flags); 8149 CombinedInfo.Mappers.push_back(nullptr); 8150 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8151 : 1); 8152 break; 8153 } 8154 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8155 if (!IsMemberPointerOrAddr || 8156 (Next == CE && MapType != OMPC_MAP_unknown)) { 8157 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 8158 CombinedInfo.BasePointers.push_back(BP.getPointer()); 8159 CombinedInfo.Pointers.push_back(LB.getPointer()); 8160 CombinedInfo.Sizes.push_back( 8161 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8162 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 8163 : 1); 8164 8165 // If Mapper is valid, the last component inherits the mapper. 8166 bool HasMapper = Mapper && Next == CE; 8167 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 8168 8169 // We need to add a pointer flag for each map that comes from the 8170 // same expression except for the first one. We also need to signal 8171 // this map is the first one that relates with the current capture 8172 // (there is a set of entries for each capture). 8173 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8174 MapType, MapModifiers, MotionModifiers, IsImplicit, 8175 !IsExpressionFirstInfo || RequiresReference || 8176 FirstPointerInComplexData || IsMemberReference, 8177 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 8178 8179 if (!IsExpressionFirstInfo || IsMemberReference) { 8180 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8181 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8182 if (IsPointer || (IsMemberReference && Next != CE)) 8183 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8184 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8185 8186 if (ShouldBeMemberOf) { 8187 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8188 // should be later updated with the correct value of MEMBER_OF. 8189 Flags |= OMP_MAP_MEMBER_OF; 8190 // From now on, all subsequent PTR_AND_OBJ entries should not be 8191 // marked as MEMBER_OF. 8192 ShouldBeMemberOf = false; 8193 } 8194 } 8195 8196 CombinedInfo.Types.push_back(Flags); 8197 } 8198 8199 // If we have encountered a member expression so far, keep track of the 8200 // mapped member. If the parent is "*this", then the value declaration 8201 // is nullptr. 8202 if (EncounteredME) { 8203 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8204 unsigned FieldIndex = FD->getFieldIndex(); 8205 8206 // Update info about the lowest and highest elements for this struct 8207 if (!PartialStruct.Base.isValid()) { 8208 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8209 if (IsFinalArraySection) { 8210 Address HB = 8211 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 8212 .getAddress(CGF); 8213 PartialStruct.HighestElem = {FieldIndex, HB}; 8214 } else { 8215 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8216 } 8217 PartialStruct.Base = BP; 8218 PartialStruct.LB = BP; 8219 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8220 PartialStruct.LowestElem = {FieldIndex, LowestElem}; 8221 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8222 PartialStruct.HighestElem = {FieldIndex, LowestElem}; 8223 } 8224 } 8225 8226 // Need to emit combined struct for array sections. 8227 if (IsFinalArraySection || IsNonContiguous) 8228 PartialStruct.IsArraySection = true; 8229 8230 // If we have a final array section, we are done with this expression. 8231 if (IsFinalArraySection) 8232 break; 8233 8234 // The pointer becomes the base for the next element. 8235 if (Next != CE) 8236 BP = IsMemberReference ? LowestElem : LB; 8237 8238 IsExpressionFirstInfo = false; 8239 IsCaptureFirstInfo = false; 8240 FirstPointerInComplexData = false; 8241 IsPrevMemberReference = IsMemberReference; 8242 } else if (FirstPointerInComplexData) { 8243 QualType Ty = Components.rbegin() 8244 ->getAssociatedDeclaration() 8245 ->getType() 8246 .getNonReferenceType(); 8247 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8248 FirstPointerInComplexData = false; 8249 } 8250 } 8251 // If ran into the whole component - allocate the space for the whole 8252 // record. 8253 if (!EncounteredME) 8254 PartialStruct.HasCompleteRecord = true; 8255 8256 if (!IsNonContiguous) 8257 return; 8258 8259 const ASTContext &Context = CGF.getContext(); 8260 8261 // For supporting stride in array section, we need to initialize the first 8262 // dimension size as 1, first offset as 0, and first count as 1 8263 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 8264 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8265 MapValuesArrayTy CurStrides; 8266 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 8267 uint64_t ElementTypeSize; 8268 8269 // Collect Size information for each dimension and get the element size as 8270 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 8271 // should be [10, 10] and the first stride is 4 btyes. 8272 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8273 Components) { 8274 const Expr *AssocExpr = Component.getAssociatedExpression(); 8275 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8276 8277 if (!OASE) 8278 continue; 8279 8280 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 8281 auto *CAT = Context.getAsConstantArrayType(Ty); 8282 auto *VAT = Context.getAsVariableArrayType(Ty); 8283 8284 // We need all the dimension size except for the last dimension. 8285 assert((VAT || CAT || &Component == &*Components.begin()) && 8286 "Should be either ConstantArray or VariableArray if not the " 8287 "first Component"); 8288 8289 // Get element size if CurStrides is empty. 8290 if (CurStrides.empty()) { 8291 const Type *ElementType = nullptr; 8292 if (CAT) 8293 ElementType = CAT->getElementType().getTypePtr(); 8294 else if (VAT) 8295 ElementType = VAT->getElementType().getTypePtr(); 8296 else 8297 assert(&Component == &*Components.begin() && 8298 "Only expect pointer (non CAT or VAT) when this is the " 8299 "first Component"); 8300 // If ElementType is null, then it means the base is a pointer 8301 // (neither CAT nor VAT) and we'll attempt to get ElementType again 8302 // for next iteration. 8303 if (ElementType) { 8304 // For the case that having pointer as base, we need to remove one 8305 // level of indirection. 8306 if (&Component != &*Components.begin()) 8307 ElementType = ElementType->getPointeeOrArrayElementType(); 8308 ElementTypeSize = 8309 Context.getTypeSizeInChars(ElementType).getQuantity(); 8310 CurStrides.push_back( 8311 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 8312 } 8313 } 8314 // Get dimension value except for the last dimension since we don't need 8315 // it. 8316 if (DimSizes.size() < Components.size() - 1) { 8317 if (CAT) 8318 DimSizes.push_back(llvm::ConstantInt::get( 8319 CGF.Int64Ty, CAT->getSize().getZExtValue())); 8320 else if (VAT) 8321 DimSizes.push_back(CGF.Builder.CreateIntCast( 8322 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 8323 /*IsSigned=*/false)); 8324 } 8325 } 8326 8327 // Skip the dummy dimension since we have already have its information. 8328 auto DI = DimSizes.begin() + 1; 8329 // Product of dimension. 8330 llvm::Value *DimProd = 8331 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 8332 8333 // Collect info for non-contiguous. Notice that offset, count, and stride 8334 // are only meaningful for array-section, so we insert a null for anything 8335 // other than array-section. 8336 // Also, the size of offset, count, and stride are not the same as 8337 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8338 // count, and stride are the same as the number of non-contiguous 8339 // declaration in target update to/from clause. 8340 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8341 Components) { 8342 const Expr *AssocExpr = Component.getAssociatedExpression(); 8343 8344 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8345 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8346 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8347 /*isSigned=*/false); 8348 CurOffsets.push_back(Offset); 8349 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8350 CurStrides.push_back(CurStrides.back()); 8351 continue; 8352 } 8353 8354 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8355 8356 if (!OASE) 8357 continue; 8358 8359 // Offset 8360 const Expr *OffsetExpr = OASE->getLowerBound(); 8361 llvm::Value *Offset = nullptr; 8362 if (!OffsetExpr) { 8363 // If offset is absent, then we just set it to zero. 8364 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8365 } else { 8366 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8367 CGF.Int64Ty, 8368 /*isSigned=*/false); 8369 } 8370 CurOffsets.push_back(Offset); 8371 8372 // Count 8373 const Expr *CountExpr = OASE->getLength(); 8374 llvm::Value *Count = nullptr; 8375 if (!CountExpr) { 8376 // In Clang, once a high dimension is an array section, we construct all 8377 // the lower dimension as array section, however, for case like 8378 // arr[0:2][2], Clang construct the inner dimension as an array section 8379 // but it actually is not in an array section form according to spec. 8380 if (!OASE->getColonLocFirst().isValid() && 8381 !OASE->getColonLocSecond().isValid()) { 8382 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8383 } else { 8384 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8385 // When the length is absent it defaults to ⌈(size − 8386 // lower-bound)/stride⌉, where size is the size of the array 8387 // dimension. 8388 const Expr *StrideExpr = OASE->getStride(); 8389 llvm::Value *Stride = 8390 StrideExpr 8391 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8392 CGF.Int64Ty, /*isSigned=*/false) 8393 : nullptr; 8394 if (Stride) 8395 Count = CGF.Builder.CreateUDiv( 8396 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8397 else 8398 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8399 } 8400 } else { 8401 Count = CGF.EmitScalarExpr(CountExpr); 8402 } 8403 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8404 CurCounts.push_back(Count); 8405 8406 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8407 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8408 // Offset Count Stride 8409 // D0 0 1 4 (int) <- dummy dimension 8410 // D1 0 2 8 (2 * (1) * 4) 8411 // D2 1 2 20 (1 * (1 * 5) * 4) 8412 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8413 const Expr *StrideExpr = OASE->getStride(); 8414 llvm::Value *Stride = 8415 StrideExpr 8416 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8417 CGF.Int64Ty, /*isSigned=*/false) 8418 : nullptr; 8419 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8420 if (Stride) 8421 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8422 else 8423 CurStrides.push_back(DimProd); 8424 if (DI != DimSizes.end()) 8425 ++DI; 8426 } 8427 8428 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8429 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8430 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8431 } 8432 8433 /// Return the adjusted map modifiers if the declaration a capture refers to 8434 /// appears in a first-private clause. This is expected to be used only with 8435 /// directives that start with 'target'. 8436 MappableExprsHandler::OpenMPOffloadMappingFlags 8437 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8438 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8439 8440 // A first private variable captured by reference will use only the 8441 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8442 // declaration is known as first-private in this handler. 8443 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8444 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8445 return MappableExprsHandler::OMP_MAP_TO | 8446 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8447 return MappableExprsHandler::OMP_MAP_PRIVATE | 8448 MappableExprsHandler::OMP_MAP_TO; 8449 } 8450 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); 8451 if (I != LambdasMap.end()) 8452 // for map(to: lambda): using user specified map type. 8453 return getMapTypeBits( 8454 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), 8455 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(), 8456 /*AddPtrFlag=*/false, 8457 /*AddIsTargetParamFlag=*/false, 8458 /*isNonContiguous=*/false); 8459 return MappableExprsHandler::OMP_MAP_TO | 8460 MappableExprsHandler::OMP_MAP_FROM; 8461 } 8462 8463 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8464 // Rotate by getFlagMemberOffset() bits. 8465 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8466 << getFlagMemberOffset()); 8467 } 8468 8469 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8470 OpenMPOffloadMappingFlags MemberOfFlag) { 8471 // If the entry is PTR_AND_OBJ but has not been marked with the special 8472 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8473 // marked as MEMBER_OF. 8474 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8475 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8476 return; 8477 8478 // Reset the placeholder value to prepare the flag for the assignment of the 8479 // proper MEMBER_OF value. 8480 Flags &= ~OMP_MAP_MEMBER_OF; 8481 Flags |= MemberOfFlag; 8482 } 8483 8484 void getPlainLayout(const CXXRecordDecl *RD, 8485 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8486 bool AsBase) const { 8487 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8488 8489 llvm::StructType *St = 8490 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8491 8492 unsigned NumElements = St->getNumElements(); 8493 llvm::SmallVector< 8494 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8495 RecordLayout(NumElements); 8496 8497 // Fill bases. 8498 for (const auto &I : RD->bases()) { 8499 if (I.isVirtual()) 8500 continue; 8501 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8502 // Ignore empty bases. 8503 if (Base->isEmpty() || CGF.getContext() 8504 .getASTRecordLayout(Base) 8505 .getNonVirtualSize() 8506 .isZero()) 8507 continue; 8508 8509 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8510 RecordLayout[FieldIndex] = Base; 8511 } 8512 // Fill in virtual bases. 8513 for (const auto &I : RD->vbases()) { 8514 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8515 // Ignore empty bases. 8516 if (Base->isEmpty()) 8517 continue; 8518 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8519 if (RecordLayout[FieldIndex]) 8520 continue; 8521 RecordLayout[FieldIndex] = Base; 8522 } 8523 // Fill in all the fields. 8524 assert(!RD->isUnion() && "Unexpected union."); 8525 for (const auto *Field : RD->fields()) { 8526 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8527 // will fill in later.) 8528 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8529 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8530 RecordLayout[FieldIndex] = Field; 8531 } 8532 } 8533 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8534 &Data : RecordLayout) { 8535 if (Data.isNull()) 8536 continue; 8537 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8538 getPlainLayout(Base, Layout, /*AsBase=*/true); 8539 else 8540 Layout.push_back(Data.get<const FieldDecl *>()); 8541 } 8542 } 8543 8544 /// Generate all the base pointers, section pointers, sizes, map types, and 8545 /// mappers for the extracted mappable expressions (all included in \a 8546 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8547 /// pair of the relevant declaration and index where it occurs is appended to 8548 /// the device pointers info array. 8549 void generateAllInfoForClauses( 8550 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, 8551 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8552 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8553 // We have to process the component lists that relate with the same 8554 // declaration in a single chunk so that we can generate the map flags 8555 // correctly. Therefore, we organize all lists in a map. 8556 enum MapKind { Present, Allocs, Other, Total }; 8557 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8558 SmallVector<SmallVector<MapInfo, 8>, 4>> 8559 Info; 8560 8561 // Helper function to fill the information map for the different supported 8562 // clauses. 8563 auto &&InfoGen = 8564 [&Info, &SkipVarSet]( 8565 const ValueDecl *D, MapKind Kind, 8566 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8567 OpenMPMapClauseKind MapType, 8568 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8569 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8570 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8571 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8572 if (SkipVarSet.contains(D)) 8573 return; 8574 auto It = Info.find(D); 8575 if (It == Info.end()) 8576 It = Info 8577 .insert(std::make_pair( 8578 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) 8579 .first; 8580 It->second[Kind].emplace_back( 8581 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, 8582 IsImplicit, Mapper, VarRef, ForDeviceAddr); 8583 }; 8584 8585 for (const auto *Cl : Clauses) { 8586 const auto *C = dyn_cast<OMPMapClause>(Cl); 8587 if (!C) 8588 continue; 8589 MapKind Kind = Other; 8590 if (llvm::is_contained(C->getMapTypeModifiers(), 8591 OMPC_MAP_MODIFIER_present)) 8592 Kind = Present; 8593 else if (C->getMapType() == OMPC_MAP_alloc) 8594 Kind = Allocs; 8595 const auto *EI = C->getVarRefs().begin(); 8596 for (const auto L : C->component_lists()) { 8597 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8598 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), 8599 C->getMapTypeModifiers(), llvm::None, 8600 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8601 E); 8602 ++EI; 8603 } 8604 } 8605 for (const auto *Cl : Clauses) { 8606 const auto *C = dyn_cast<OMPToClause>(Cl); 8607 if (!C) 8608 continue; 8609 MapKind Kind = Other; 8610 if (llvm::is_contained(C->getMotionModifiers(), 8611 OMPC_MOTION_MODIFIER_present)) 8612 Kind = Present; 8613 const auto *EI = C->getVarRefs().begin(); 8614 for (const auto L : C->component_lists()) { 8615 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None, 8616 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8617 C->isImplicit(), std::get<2>(L), *EI); 8618 ++EI; 8619 } 8620 } 8621 for (const auto *Cl : Clauses) { 8622 const auto *C = dyn_cast<OMPFromClause>(Cl); 8623 if (!C) 8624 continue; 8625 MapKind Kind = Other; 8626 if (llvm::is_contained(C->getMotionModifiers(), 8627 OMPC_MOTION_MODIFIER_present)) 8628 Kind = Present; 8629 const auto *EI = C->getVarRefs().begin(); 8630 for (const auto L : C->component_lists()) { 8631 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None, 8632 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8633 C->isImplicit(), std::get<2>(L), *EI); 8634 ++EI; 8635 } 8636 } 8637 8638 // Look at the use_device_ptr clause information and mark the existing map 8639 // entries as such. If there is no map information for an entry in the 8640 // use_device_ptr list, we create one with map type 'alloc' and zero size 8641 // section. It is the user fault if that was not mapped before. If there is 8642 // no map information and the pointer is a struct member, then we defer the 8643 // emission of that entry until the whole struct has been processed. 8644 llvm::MapVector<CanonicalDeclPtr<const Decl>, 8645 SmallVector<DeferredDevicePtrEntryTy, 4>> 8646 DeferredInfo; 8647 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8648 8649 for (const auto *Cl : Clauses) { 8650 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); 8651 if (!C) 8652 continue; 8653 for (const auto L : C->component_lists()) { 8654 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8655 std::get<1>(L); 8656 assert(!Components.empty() && 8657 "Not expecting empty list of components!"); 8658 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8659 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8660 const Expr *IE = Components.back().getAssociatedExpression(); 8661 // If the first component is a member expression, we have to look into 8662 // 'this', which maps to null in the map of map information. Otherwise 8663 // look directly for the information. 8664 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8665 8666 // We potentially have map information for this declaration already. 8667 // Look for the first set of components that refer to it. 8668 if (It != Info.end()) { 8669 bool Found = false; 8670 for (auto &Data : It->second) { 8671 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8672 return MI.Components.back().getAssociatedDeclaration() == VD; 8673 }); 8674 // If we found a map entry, signal that the pointer has to be 8675 // returned and move on to the next declaration. Exclude cases where 8676 // the base pointer is mapped as array subscript, array section or 8677 // array shaping. The base address is passed as a pointer to base in 8678 // this case and cannot be used as a base for use_device_ptr list 8679 // item. 8680 if (CI != Data.end()) { 8681 auto PrevCI = std::next(CI->Components.rbegin()); 8682 const auto *VarD = dyn_cast<VarDecl>(VD); 8683 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8684 isa<MemberExpr>(IE) || 8685 !VD->getType().getNonReferenceType()->isPointerType() || 8686 PrevCI == CI->Components.rend() || 8687 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8688 VarD->hasLocalStorage()) { 8689 CI->ReturnDevicePointer = true; 8690 Found = true; 8691 break; 8692 } 8693 } 8694 } 8695 if (Found) 8696 continue; 8697 } 8698 8699 // We didn't find any match in our map information - generate a zero 8700 // size array section - if the pointer is a struct member we defer this 8701 // action until the whole struct has been processed. 8702 if (isa<MemberExpr>(IE)) { 8703 // Insert the pointer into Info to be processed by 8704 // generateInfoForComponentList. Because it is a member pointer 8705 // without a pointee, no entry will be generated for it, therefore 8706 // we need to generate one after the whole struct has been processed. 8707 // Nonetheless, generateInfoForComponentList must be called to take 8708 // the pointer into account for the calculation of the range of the 8709 // partial struct. 8710 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None, 8711 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8712 nullptr); 8713 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8714 } else { 8715 llvm::Value *Ptr = 8716 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8717 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8718 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8719 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8720 UseDevicePtrCombinedInfo.Sizes.push_back( 8721 llvm::Constant::getNullValue(CGF.Int64Ty)); 8722 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8723 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8724 } 8725 } 8726 } 8727 8728 // Look at the use_device_addr clause information and mark the existing map 8729 // entries as such. If there is no map information for an entry in the 8730 // use_device_addr list, we create one with map type 'alloc' and zero size 8731 // section. It is the user fault if that was not mapped before. If there is 8732 // no map information and the pointer is a struct member, then we defer the 8733 // emission of that entry until the whole struct has been processed. 8734 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8735 for (const auto *Cl : Clauses) { 8736 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); 8737 if (!C) 8738 continue; 8739 for (const auto L : C->component_lists()) { 8740 assert(!std::get<1>(L).empty() && 8741 "Not expecting empty list of components!"); 8742 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8743 if (!Processed.insert(VD).second) 8744 continue; 8745 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8746 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8747 // If the first component is a member expression, we have to look into 8748 // 'this', which maps to null in the map of map information. Otherwise 8749 // look directly for the information. 8750 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8751 8752 // We potentially have map information for this declaration already. 8753 // Look for the first set of components that refer to it. 8754 if (It != Info.end()) { 8755 bool Found = false; 8756 for (auto &Data : It->second) { 8757 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { 8758 return MI.Components.back().getAssociatedDeclaration() == VD; 8759 }); 8760 // If we found a map entry, signal that the pointer has to be 8761 // returned and move on to the next declaration. 8762 if (CI != Data.end()) { 8763 CI->ReturnDevicePointer = true; 8764 Found = true; 8765 break; 8766 } 8767 } 8768 if (Found) 8769 continue; 8770 } 8771 8772 // We didn't find any match in our map information - generate a zero 8773 // size array section - if the pointer is a struct member we defer this 8774 // action until the whole struct has been processed. 8775 if (isa<MemberExpr>(IE)) { 8776 // Insert the pointer into Info to be processed by 8777 // generateInfoForComponentList. Because it is a member pointer 8778 // without a pointee, no entry will be generated for it, therefore 8779 // we need to generate one after the whole struct has been processed. 8780 // Nonetheless, generateInfoForComponentList must be called to take 8781 // the pointer into account for the calculation of the range of the 8782 // partial struct. 8783 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8784 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8785 nullptr, nullptr, /*ForDeviceAddr=*/true); 8786 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8787 } else { 8788 llvm::Value *Ptr; 8789 if (IE->isGLValue()) 8790 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8791 else 8792 Ptr = CGF.EmitScalarExpr(IE); 8793 CombinedInfo.Exprs.push_back(VD); 8794 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8795 CombinedInfo.Pointers.push_back(Ptr); 8796 CombinedInfo.Sizes.push_back( 8797 llvm::Constant::getNullValue(CGF.Int64Ty)); 8798 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); 8799 CombinedInfo.Mappers.push_back(nullptr); 8800 } 8801 } 8802 } 8803 8804 for (const auto &Data : Info) { 8805 StructRangeInfoTy PartialStruct; 8806 // Temporary generated information. 8807 MapCombinedInfoTy CurInfo; 8808 const Decl *D = Data.first; 8809 const ValueDecl *VD = cast_or_null<ValueDecl>(D); 8810 for (const auto &M : Data.second) { 8811 for (const MapInfo &L : M) { 8812 assert(!L.Components.empty() && 8813 "Not expecting declaration with no component lists."); 8814 8815 // Remember the current base pointer index. 8816 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8817 CurInfo.NonContigInfo.IsNonContiguous = 8818 L.Components.back().isNonContiguous(); 8819 generateInfoForComponentList( 8820 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, 8821 CurInfo, PartialStruct, /*IsFirstComponentList=*/false, 8822 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef); 8823 8824 // If this entry relates with a device pointer, set the relevant 8825 // declaration and add the 'return pointer' flag. 8826 if (L.ReturnDevicePointer) { 8827 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8828 "Unexpected number of mapped base pointers."); 8829 8830 const ValueDecl *RelevantVD = 8831 L.Components.back().getAssociatedDeclaration(); 8832 assert(RelevantVD && 8833 "No relevant declaration related with device pointer??"); 8834 8835 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8836 RelevantVD); 8837 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8838 } 8839 } 8840 } 8841 8842 // Append any pending zero-length pointers which are struct members and 8843 // used with use_device_ptr or use_device_addr. 8844 auto CI = DeferredInfo.find(Data.first); 8845 if (CI != DeferredInfo.end()) { 8846 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8847 llvm::Value *BasePtr; 8848 llvm::Value *Ptr; 8849 if (L.ForDeviceAddr) { 8850 if (L.IE->isGLValue()) 8851 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8852 else 8853 Ptr = this->CGF.EmitScalarExpr(L.IE); 8854 BasePtr = Ptr; 8855 // Entry is RETURN_PARAM. Also, set the placeholder value 8856 // MEMBER_OF=FFFF so that the entry is later updated with the 8857 // correct value of MEMBER_OF. 8858 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8859 } else { 8860 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8861 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8862 L.IE->getExprLoc()); 8863 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the 8864 // placeholder value MEMBER_OF=FFFF so that the entry is later 8865 // updated with the correct value of MEMBER_OF. 8866 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8867 OMP_MAP_MEMBER_OF); 8868 } 8869 CurInfo.Exprs.push_back(L.VD); 8870 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8871 CurInfo.Pointers.push_back(Ptr); 8872 CurInfo.Sizes.push_back( 8873 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8874 CurInfo.Mappers.push_back(nullptr); 8875 } 8876 } 8877 // If there is an entry in PartialStruct it means we have a struct with 8878 // individual members mapped. Emit an extra combined entry. 8879 if (PartialStruct.Base.isValid()) { 8880 CurInfo.NonContigInfo.Dims.push_back(0); 8881 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8882 } 8883 8884 // We need to append the results of this capture to what we already 8885 // have. 8886 CombinedInfo.append(CurInfo); 8887 } 8888 // Append data for use_device_ptr clauses. 8889 CombinedInfo.append(UseDevicePtrCombinedInfo); 8890 } 8891 8892 public: 8893 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8894 : CurDir(&Dir), CGF(CGF) { 8895 // Extract firstprivate clause information. 8896 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8897 for (const auto *D : C->varlists()) 8898 FirstPrivateDecls.try_emplace( 8899 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8900 // Extract implicit firstprivates from uses_allocators clauses. 8901 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8902 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8903 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8904 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8905 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8906 /*Implicit=*/true); 8907 else if (const auto *VD = dyn_cast<VarDecl>( 8908 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8909 ->getDecl())) 8910 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8911 } 8912 } 8913 // Extract device pointer clause information. 8914 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8915 for (auto L : C->component_lists()) 8916 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8917 // Extract map information. 8918 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { 8919 if (C->getMapType() != OMPC_MAP_to) 8920 continue; 8921 for (auto L : C->component_lists()) { 8922 const ValueDecl *VD = std::get<0>(L); 8923 const auto *RD = VD ? VD->getType() 8924 .getCanonicalType() 8925 .getNonReferenceType() 8926 ->getAsCXXRecordDecl() 8927 : nullptr; 8928 if (RD && RD->isLambda()) 8929 LambdasMap.try_emplace(std::get<0>(L), C); 8930 } 8931 } 8932 } 8933 8934 /// Constructor for the declare mapper directive. 8935 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8936 : CurDir(&Dir), CGF(CGF) {} 8937 8938 /// Generate code for the combined entry if we have a partially mapped struct 8939 /// and take care of the mapping flags of the arguments corresponding to 8940 /// individual struct members. 8941 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8942 MapFlagsArrayTy &CurTypes, 8943 const StructRangeInfoTy &PartialStruct, 8944 const ValueDecl *VD = nullptr, 8945 bool NotTargetParams = true) const { 8946 if (CurTypes.size() == 1 && 8947 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8948 !PartialStruct.IsArraySection) 8949 return; 8950 Address LBAddr = PartialStruct.LowestElem.second; 8951 Address HBAddr = PartialStruct.HighestElem.second; 8952 if (PartialStruct.HasCompleteRecord) { 8953 LBAddr = PartialStruct.LB; 8954 HBAddr = PartialStruct.LB; 8955 } 8956 CombinedInfo.Exprs.push_back(VD); 8957 // Base is the base of the struct 8958 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8959 // Pointer is the address of the lowest element 8960 llvm::Value *LB = LBAddr.getPointer(); 8961 CombinedInfo.Pointers.push_back(LB); 8962 // There should not be a mapper for a combined entry. 8963 CombinedInfo.Mappers.push_back(nullptr); 8964 // Size is (addr of {highest+1} element) - (addr of lowest element) 8965 llvm::Value *HB = HBAddr.getPointer(); 8966 llvm::Value *HAddr = 8967 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1); 8968 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8969 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8970 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8971 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8972 /*isSigned=*/false); 8973 CombinedInfo.Sizes.push_back(Size); 8974 // Map type is always TARGET_PARAM, if generate info for captures. 8975 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8976 : OMP_MAP_TARGET_PARAM); 8977 // If any element has the present modifier, then make sure the runtime 8978 // doesn't attempt to allocate the struct. 8979 if (CurTypes.end() != 8980 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8981 return Type & OMP_MAP_PRESENT; 8982 })) 8983 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8984 // Remove TARGET_PARAM flag from the first element 8985 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8986 // If any element has the ompx_hold modifier, then make sure the runtime 8987 // uses the hold reference count for the struct as a whole so that it won't 8988 // be unmapped by an extra dynamic reference count decrement. Add it to all 8989 // elements as well so the runtime knows which reference count to check 8990 // when determining whether it's time for device-to-host transfers of 8991 // individual elements. 8992 if (CurTypes.end() != 8993 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8994 return Type & OMP_MAP_OMPX_HOLD; 8995 })) { 8996 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD; 8997 for (auto &M : CurTypes) 8998 M |= OMP_MAP_OMPX_HOLD; 8999 } 9000 9001 // All other current entries will be MEMBER_OF the combined entry 9002 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9003 // 0xFFFF in the MEMBER_OF field). 9004 OpenMPOffloadMappingFlags MemberOfFlag = 9005 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 9006 for (auto &M : CurTypes) 9007 setCorrectMemberOfFlag(M, MemberOfFlag); 9008 } 9009 9010 /// Generate all the base pointers, section pointers, sizes, map types, and 9011 /// mappers for the extracted mappable expressions (all included in \a 9012 /// CombinedInfo). Also, for each item that relates with a device pointer, a 9013 /// pair of the relevant declaration and index where it occurs is appended to 9014 /// the device pointers info array. 9015 void generateAllInfo( 9016 MapCombinedInfoTy &CombinedInfo, 9017 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 9018 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 9019 assert(CurDir.is<const OMPExecutableDirective *>() && 9020 "Expect a executable directive"); 9021 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9022 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet); 9023 } 9024 9025 /// Generate all the base pointers, section pointers, sizes, map types, and 9026 /// mappers for the extracted map clauses of user-defined mapper (all included 9027 /// in \a CombinedInfo). 9028 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 9029 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 9030 "Expect a declare mapper directive"); 9031 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 9032 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo); 9033 } 9034 9035 /// Emit capture info for lambdas for variables captured by reference. 9036 void generateInfoForLambdaCaptures( 9037 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9038 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 9039 const auto *RD = VD->getType() 9040 .getCanonicalType() 9041 .getNonReferenceType() 9042 ->getAsCXXRecordDecl(); 9043 if (!RD || !RD->isLambda()) 9044 return; 9045 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 9046 LValue VDLVal = CGF.MakeAddrLValue( 9047 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 9048 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 9049 FieldDecl *ThisCapture = nullptr; 9050 RD->getCaptureFields(Captures, ThisCapture); 9051 if (ThisCapture) { 9052 LValue ThisLVal = 9053 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 9054 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 9055 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 9056 VDLVal.getPointer(CGF)); 9057 CombinedInfo.Exprs.push_back(VD); 9058 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 9059 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 9060 CombinedInfo.Sizes.push_back( 9061 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9062 CGF.Int64Ty, /*isSigned=*/true)); 9063 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9064 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9065 CombinedInfo.Mappers.push_back(nullptr); 9066 } 9067 for (const LambdaCapture &LC : RD->captures()) { 9068 if (!LC.capturesVariable()) 9069 continue; 9070 const VarDecl *VD = LC.getCapturedVar(); 9071 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9072 continue; 9073 auto It = Captures.find(VD); 9074 assert(It != Captures.end() && "Found lambda capture without field."); 9075 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9076 if (LC.getCaptureKind() == LCK_ByRef) { 9077 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9078 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9079 VDLVal.getPointer(CGF)); 9080 CombinedInfo.Exprs.push_back(VD); 9081 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9082 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 9083 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9084 CGF.getTypeSize( 9085 VD->getType().getCanonicalType().getNonReferenceType()), 9086 CGF.Int64Ty, /*isSigned=*/true)); 9087 } else { 9088 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9089 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9090 VDLVal.getPointer(CGF)); 9091 CombinedInfo.Exprs.push_back(VD); 9092 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 9093 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 9094 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9095 } 9096 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9097 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9098 CombinedInfo.Mappers.push_back(nullptr); 9099 } 9100 } 9101 9102 /// Set correct indices for lambdas captures. 9103 void adjustMemberOfForLambdaCaptures( 9104 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9105 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9106 MapFlagsArrayTy &Types) const { 9107 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9108 // Set correct member_of idx for all implicit lambda captures. 9109 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9110 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9111 continue; 9112 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9113 assert(BasePtr && "Unable to find base lambda address."); 9114 int TgtIdx = -1; 9115 for (unsigned J = I; J > 0; --J) { 9116 unsigned Idx = J - 1; 9117 if (Pointers[Idx] != BasePtr) 9118 continue; 9119 TgtIdx = Idx; 9120 break; 9121 } 9122 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9123 // All other current entries will be MEMBER_OF the combined entry 9124 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9125 // 0xFFFF in the MEMBER_OF field). 9126 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9127 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9128 } 9129 } 9130 9131 /// Generate the base pointers, section pointers, sizes, map types, and 9132 /// mappers associated to a given capture (all included in \a CombinedInfo). 9133 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9134 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 9135 StructRangeInfoTy &PartialStruct) const { 9136 assert(!Cap->capturesVariableArrayType() && 9137 "Not expecting to generate map info for a variable array type!"); 9138 9139 // We need to know when we generating information for the first component 9140 const ValueDecl *VD = Cap->capturesThis() 9141 ? nullptr 9142 : Cap->getCapturedVar()->getCanonicalDecl(); 9143 9144 // for map(to: lambda): skip here, processing it in 9145 // generateDefaultMapInfo 9146 if (LambdasMap.count(VD)) 9147 return; 9148 9149 // If this declaration appears in a is_device_ptr clause we just have to 9150 // pass the pointer by value. If it is a reference to a declaration, we just 9151 // pass its value. 9152 if (DevPointersMap.count(VD)) { 9153 CombinedInfo.Exprs.push_back(VD); 9154 CombinedInfo.BasePointers.emplace_back(Arg, VD); 9155 CombinedInfo.Pointers.push_back(Arg); 9156 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9157 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 9158 /*isSigned=*/true)); 9159 CombinedInfo.Types.push_back( 9160 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 9161 OMP_MAP_TARGET_PARAM); 9162 CombinedInfo.Mappers.push_back(nullptr); 9163 return; 9164 } 9165 9166 using MapData = 9167 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9168 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 9169 const ValueDecl *, const Expr *>; 9170 SmallVector<MapData, 4> DeclComponentLists; 9171 assert(CurDir.is<const OMPExecutableDirective *>() && 9172 "Expect a executable directive"); 9173 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9174 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9175 const auto *EI = C->getVarRefs().begin(); 9176 for (const auto L : C->decl_component_lists(VD)) { 9177 const ValueDecl *VDecl, *Mapper; 9178 // The Expression is not correct if the mapping is implicit 9179 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 9180 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9181 std::tie(VDecl, Components, Mapper) = L; 9182 assert(VDecl == VD && "We got information for the wrong declaration??"); 9183 assert(!Components.empty() && 9184 "Not expecting declaration with no component lists."); 9185 DeclComponentLists.emplace_back(Components, C->getMapType(), 9186 C->getMapTypeModifiers(), 9187 C->isImplicit(), Mapper, E); 9188 ++EI; 9189 } 9190 } 9191 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, 9192 const MapData &RHS) { 9193 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); 9194 OpenMPMapClauseKind MapType = std::get<1>(RHS); 9195 bool HasPresent = 9196 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9197 bool HasAllocs = MapType == OMPC_MAP_alloc; 9198 MapModifiers = std::get<2>(RHS); 9199 MapType = std::get<1>(LHS); 9200 bool HasPresentR = 9201 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); 9202 bool HasAllocsR = MapType == OMPC_MAP_alloc; 9203 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); 9204 }); 9205 9206 // Find overlapping elements (including the offset from the base element). 9207 llvm::SmallDenseMap< 9208 const MapData *, 9209 llvm::SmallVector< 9210 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9211 4> 9212 OverlappedData; 9213 size_t Count = 0; 9214 for (const MapData &L : DeclComponentLists) { 9215 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9216 OpenMPMapClauseKind MapType; 9217 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9218 bool IsImplicit; 9219 const ValueDecl *Mapper; 9220 const Expr *VarRef; 9221 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9222 L; 9223 ++Count; 9224 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9225 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9226 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 9227 VarRef) = L1; 9228 auto CI = Components.rbegin(); 9229 auto CE = Components.rend(); 9230 auto SI = Components1.rbegin(); 9231 auto SE = Components1.rend(); 9232 for (; CI != CE && SI != SE; ++CI, ++SI) { 9233 if (CI->getAssociatedExpression()->getStmtClass() != 9234 SI->getAssociatedExpression()->getStmtClass()) 9235 break; 9236 // Are we dealing with different variables/fields? 9237 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9238 break; 9239 } 9240 // Found overlapping if, at least for one component, reached the head 9241 // of the components list. 9242 if (CI == CE || SI == SE) { 9243 // Ignore it if it is the same component. 9244 if (CI == CE && SI == SE) 9245 continue; 9246 const auto It = (SI == SE) ? CI : SI; 9247 // If one component is a pointer and another one is a kind of 9248 // dereference of this pointer (array subscript, section, dereference, 9249 // etc.), it is not an overlapping. 9250 // Same, if one component is a base and another component is a 9251 // dereferenced pointer memberexpr with the same base. 9252 if (!isa<MemberExpr>(It->getAssociatedExpression()) || 9253 (std::prev(It)->getAssociatedDeclaration() && 9254 std::prev(It) 9255 ->getAssociatedDeclaration() 9256 ->getType() 9257 ->isPointerType()) || 9258 (It->getAssociatedDeclaration() && 9259 It->getAssociatedDeclaration()->getType()->isPointerType() && 9260 std::next(It) != CE && std::next(It) != SE)) 9261 continue; 9262 const MapData &BaseData = CI == CE ? L : L1; 9263 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9264 SI == SE ? Components : Components1; 9265 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9266 OverlappedElements.getSecond().push_back(SubData); 9267 } 9268 } 9269 } 9270 // Sort the overlapped elements for each item. 9271 llvm::SmallVector<const FieldDecl *, 4> Layout; 9272 if (!OverlappedData.empty()) { 9273 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); 9274 const Type *OrigType = BaseType->getPointeeOrArrayElementType(); 9275 while (BaseType != OrigType) { 9276 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); 9277 OrigType = BaseType->getPointeeOrArrayElementType(); 9278 } 9279 9280 if (const auto *CRD = BaseType->getAsCXXRecordDecl()) 9281 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9282 else { 9283 const auto *RD = BaseType->getAsRecordDecl(); 9284 Layout.append(RD->field_begin(), RD->field_end()); 9285 } 9286 } 9287 for (auto &Pair : OverlappedData) { 9288 llvm::stable_sort( 9289 Pair.getSecond(), 9290 [&Layout]( 9291 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9292 OMPClauseMappableExprCommon::MappableExprComponentListRef 9293 Second) { 9294 auto CI = First.rbegin(); 9295 auto CE = First.rend(); 9296 auto SI = Second.rbegin(); 9297 auto SE = Second.rend(); 9298 for (; CI != CE && SI != SE; ++CI, ++SI) { 9299 if (CI->getAssociatedExpression()->getStmtClass() != 9300 SI->getAssociatedExpression()->getStmtClass()) 9301 break; 9302 // Are we dealing with different variables/fields? 9303 if (CI->getAssociatedDeclaration() != 9304 SI->getAssociatedDeclaration()) 9305 break; 9306 } 9307 9308 // Lists contain the same elements. 9309 if (CI == CE && SI == SE) 9310 return false; 9311 9312 // List with less elements is less than list with more elements. 9313 if (CI == CE || SI == SE) 9314 return CI == CE; 9315 9316 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9317 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9318 if (FD1->getParent() == FD2->getParent()) 9319 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9320 const auto *It = 9321 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9322 return FD == FD1 || FD == FD2; 9323 }); 9324 return *It == FD1; 9325 }); 9326 } 9327 9328 // Associated with a capture, because the mapping flags depend on it. 9329 // Go through all of the elements with the overlapped elements. 9330 bool IsFirstComponentList = true; 9331 for (const auto &Pair : OverlappedData) { 9332 const MapData &L = *Pair.getFirst(); 9333 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9334 OpenMPMapClauseKind MapType; 9335 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9336 bool IsImplicit; 9337 const ValueDecl *Mapper; 9338 const Expr *VarRef; 9339 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9340 L; 9341 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9342 OverlappedComponents = Pair.getSecond(); 9343 generateInfoForComponentList( 9344 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 9345 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 9346 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 9347 IsFirstComponentList = false; 9348 } 9349 // Go through other elements without overlapped elements. 9350 for (const MapData &L : DeclComponentLists) { 9351 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9352 OpenMPMapClauseKind MapType; 9353 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9354 bool IsImplicit; 9355 const ValueDecl *Mapper; 9356 const Expr *VarRef; 9357 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 9358 L; 9359 auto It = OverlappedData.find(&L); 9360 if (It == OverlappedData.end()) 9361 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 9362 Components, CombinedInfo, PartialStruct, 9363 IsFirstComponentList, IsImplicit, Mapper, 9364 /*ForDeviceAddr=*/false, VD, VarRef); 9365 IsFirstComponentList = false; 9366 } 9367 } 9368 9369 /// Generate the default map information for a given capture \a CI, 9370 /// record field declaration \a RI and captured value \a CV. 9371 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9372 const FieldDecl &RI, llvm::Value *CV, 9373 MapCombinedInfoTy &CombinedInfo) const { 9374 bool IsImplicit = true; 9375 // Do the default mapping. 9376 if (CI.capturesThis()) { 9377 CombinedInfo.Exprs.push_back(nullptr); 9378 CombinedInfo.BasePointers.push_back(CV); 9379 CombinedInfo.Pointers.push_back(CV); 9380 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9381 CombinedInfo.Sizes.push_back( 9382 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9383 CGF.Int64Ty, /*isSigned=*/true)); 9384 // Default map type. 9385 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9386 } else if (CI.capturesVariableByCopy()) { 9387 const VarDecl *VD = CI.getCapturedVar(); 9388 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9389 CombinedInfo.BasePointers.push_back(CV); 9390 CombinedInfo.Pointers.push_back(CV); 9391 if (!RI.getType()->isAnyPointerType()) { 9392 // We have to signal to the runtime captures passed by value that are 9393 // not pointers. 9394 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 9395 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9396 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9397 } else { 9398 // Pointers are implicitly mapped with a zero size and no flags 9399 // (other than first map that is added for all implicit maps). 9400 CombinedInfo.Types.push_back(OMP_MAP_NONE); 9401 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9402 } 9403 auto I = FirstPrivateDecls.find(VD); 9404 if (I != FirstPrivateDecls.end()) 9405 IsImplicit = I->getSecond(); 9406 } else { 9407 assert(CI.capturesVariable() && "Expected captured reference."); 9408 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9409 QualType ElementType = PtrTy->getPointeeType(); 9410 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9411 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9412 // The default map type for a scalar/complex type is 'to' because by 9413 // default the value doesn't have to be retrieved. For an aggregate 9414 // type, the default is 'tofrom'. 9415 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9416 const VarDecl *VD = CI.getCapturedVar(); 9417 auto I = FirstPrivateDecls.find(VD); 9418 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9419 CombinedInfo.BasePointers.push_back(CV); 9420 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9421 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9422 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9423 AlignmentSource::Decl)); 9424 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9425 } else { 9426 CombinedInfo.Pointers.push_back(CV); 9427 } 9428 if (I != FirstPrivateDecls.end()) 9429 IsImplicit = I->getSecond(); 9430 } 9431 // Every default map produces a single argument which is a target parameter. 9432 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9433 9434 // Add flag stating this is an implicit map. 9435 if (IsImplicit) 9436 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9437 9438 // No user-defined mapper for default mapping. 9439 CombinedInfo.Mappers.push_back(nullptr); 9440 } 9441 }; 9442 } // anonymous namespace 9443 9444 static void emitNonContiguousDescriptor( 9445 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9446 CGOpenMPRuntime::TargetDataInfo &Info) { 9447 CodeGenModule &CGM = CGF.CGM; 9448 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9449 &NonContigInfo = CombinedInfo.NonContigInfo; 9450 9451 // Build an array of struct descriptor_dim and then assign it to 9452 // offload_args. 9453 // 9454 // struct descriptor_dim { 9455 // uint64_t offset; 9456 // uint64_t count; 9457 // uint64_t stride 9458 // }; 9459 ASTContext &C = CGF.getContext(); 9460 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9461 RecordDecl *RD; 9462 RD = C.buildImplicitRecord("descriptor_dim"); 9463 RD->startDefinition(); 9464 addFieldToRecordDecl(C, RD, Int64Ty); 9465 addFieldToRecordDecl(C, RD, Int64Ty); 9466 addFieldToRecordDecl(C, RD, Int64Ty); 9467 RD->completeDefinition(); 9468 QualType DimTy = C.getRecordType(RD); 9469 9470 enum { OffsetFD = 0, CountFD, StrideFD }; 9471 // We need two index variable here since the size of "Dims" is the same as the 9472 // size of Components, however, the size of offset, count, and stride is equal 9473 // to the size of base declaration that is non-contiguous. 9474 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9475 // Skip emitting ir if dimension size is 1 since it cannot be 9476 // non-contiguous. 9477 if (NonContigInfo.Dims[I] == 1) 9478 continue; 9479 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9480 QualType ArrayTy = 9481 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9482 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9483 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9484 unsigned RevIdx = EE - II - 1; 9485 LValue DimsLVal = CGF.MakeAddrLValue( 9486 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9487 // Offset 9488 LValue OffsetLVal = CGF.EmitLValueForField( 9489 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9490 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9491 // Count 9492 LValue CountLVal = CGF.EmitLValueForField( 9493 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9494 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9495 // Stride 9496 LValue StrideLVal = CGF.EmitLValueForField( 9497 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9498 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9499 } 9500 // args[I] = &dims 9501 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9502 DimsAddr, CGM.Int8PtrTy); 9503 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9504 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9505 Info.PointersArray, 0, I); 9506 Address PAddr(P, CGF.getPointerAlign()); 9507 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9508 ++L; 9509 } 9510 } 9511 9512 // Try to extract the base declaration from a `this->x` expression if possible. 9513 static ValueDecl *getDeclFromThisExpr(const Expr *E) { 9514 if (!E) 9515 return nullptr; 9516 9517 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts())) 9518 if (const MemberExpr *ME = 9519 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) 9520 return ME->getMemberDecl(); 9521 return nullptr; 9522 } 9523 9524 /// Emit a string constant containing the names of the values mapped to the 9525 /// offloading runtime library. 9526 llvm::Constant * 9527 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9528 MappableExprsHandler::MappingExprInfo &MapExprs) { 9529 9530 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) 9531 return OMPBuilder.getOrCreateDefaultSrcLocStr(); 9532 9533 SourceLocation Loc; 9534 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { 9535 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) 9536 Loc = VD->getLocation(); 9537 else 9538 Loc = MapExprs.getMapExpr()->getExprLoc(); 9539 } else { 9540 Loc = MapExprs.getMapDecl()->getLocation(); 9541 } 9542 9543 std::string ExprName = ""; 9544 if (MapExprs.getMapExpr()) { 9545 PrintingPolicy P(CGF.getContext().getLangOpts()); 9546 llvm::raw_string_ostream OS(ExprName); 9547 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9548 OS.flush(); 9549 } else { 9550 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9551 } 9552 9553 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9554 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(), 9555 PLoc.getLine(), PLoc.getColumn()); 9556 } 9557 9558 /// Emit the arrays used to pass the captures and map information to the 9559 /// offloading runtime library. If there is no map or capture information, 9560 /// return nullptr by reference. 9561 static void emitOffloadingArrays( 9562 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9563 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9564 bool IsNonContiguous = false) { 9565 CodeGenModule &CGM = CGF.CGM; 9566 ASTContext &Ctx = CGF.getContext(); 9567 9568 // Reset the array information. 9569 Info.clearArrayInfo(); 9570 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9571 9572 if (Info.NumberOfPtrs) { 9573 // Detect if we have any capture size requiring runtime evaluation of the 9574 // size so that a constant array could be eventually used. 9575 bool hasRuntimeEvaluationCaptureSize = false; 9576 for (llvm::Value *S : CombinedInfo.Sizes) 9577 if (!isa<llvm::Constant>(S)) { 9578 hasRuntimeEvaluationCaptureSize = true; 9579 break; 9580 } 9581 9582 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9583 QualType PointerArrayType = Ctx.getConstantArrayType( 9584 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9585 /*IndexTypeQuals=*/0); 9586 9587 Info.BasePointersArray = 9588 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9589 Info.PointersArray = 9590 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9591 Address MappersArray = 9592 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9593 Info.MappersArray = MappersArray.getPointer(); 9594 9595 // If we don't have any VLA types or other types that require runtime 9596 // evaluation, we can use a constant array for the map sizes, otherwise we 9597 // need to fill up the arrays as we do for the pointers. 9598 QualType Int64Ty = 9599 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9600 if (hasRuntimeEvaluationCaptureSize) { 9601 QualType SizeArrayType = Ctx.getConstantArrayType( 9602 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9603 /*IndexTypeQuals=*/0); 9604 Info.SizesArray = 9605 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9606 } else { 9607 // We expect all the sizes to be constant, so we collect them to create 9608 // a constant array. 9609 SmallVector<llvm::Constant *, 16> ConstSizes; 9610 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9611 if (IsNonContiguous && 9612 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9613 ConstSizes.push_back(llvm::ConstantInt::get( 9614 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9615 } else { 9616 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9617 } 9618 } 9619 9620 auto *SizesArrayInit = llvm::ConstantArray::get( 9621 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9622 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9623 auto *SizesArrayGbl = new llvm::GlobalVariable( 9624 CGM.getModule(), SizesArrayInit->getType(), 9625 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9626 SizesArrayInit, Name); 9627 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9628 Info.SizesArray = SizesArrayGbl; 9629 } 9630 9631 // The map types are always constant so we don't need to generate code to 9632 // fill arrays. Instead, we create an array constant. 9633 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9634 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9635 std::string MaptypesName = 9636 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9637 auto *MapTypesArrayGbl = 9638 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9639 Info.MapTypesArray = MapTypesArrayGbl; 9640 9641 // The information types are only built if there is debug information 9642 // requested. 9643 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9644 Info.MapNamesArray = llvm::Constant::getNullValue( 9645 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9646 } else { 9647 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9648 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9649 }; 9650 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9651 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9652 std::string MapnamesName = 9653 CGM.getOpenMPRuntime().getName({"offload_mapnames"}); 9654 auto *MapNamesArrayGbl = 9655 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName); 9656 Info.MapNamesArray = MapNamesArrayGbl; 9657 } 9658 9659 // If there's a present map type modifier, it must not be applied to the end 9660 // of a region, so generate a separate map type array in that case. 9661 if (Info.separateBeginEndCalls()) { 9662 bool EndMapTypesDiffer = false; 9663 for (uint64_t &Type : Mapping) { 9664 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9665 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9666 EndMapTypesDiffer = true; 9667 } 9668 } 9669 if (EndMapTypesDiffer) { 9670 MapTypesArrayGbl = 9671 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName); 9672 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9673 } 9674 } 9675 9676 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9677 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9678 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9679 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9680 Info.BasePointersArray, 0, I); 9681 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9682 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9683 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9684 CGF.Builder.CreateStore(BPVal, BPAddr); 9685 9686 if (Info.requiresDevicePointerInfo()) 9687 if (const ValueDecl *DevVD = 9688 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9689 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9690 9691 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9692 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9693 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9694 Info.PointersArray, 0, I); 9695 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9696 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9697 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9698 CGF.Builder.CreateStore(PVal, PAddr); 9699 9700 if (hasRuntimeEvaluationCaptureSize) { 9701 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9702 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9703 Info.SizesArray, 9704 /*Idx0=*/0, 9705 /*Idx1=*/I); 9706 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9707 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9708 CGM.Int64Ty, 9709 /*isSigned=*/true), 9710 SAddr); 9711 } 9712 9713 // Fill up the mapper array. 9714 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9715 if (CombinedInfo.Mappers[I]) { 9716 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9717 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9718 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9719 Info.HasMapper = true; 9720 } 9721 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9722 CGF.Builder.CreateStore(MFunc, MAddr); 9723 } 9724 } 9725 9726 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9727 Info.NumberOfPtrs == 0) 9728 return; 9729 9730 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9731 } 9732 9733 namespace { 9734 /// Additional arguments for emitOffloadingArraysArgument function. 9735 struct ArgumentsOptions { 9736 bool ForEndCall = false; 9737 ArgumentsOptions() = default; 9738 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9739 }; 9740 } // namespace 9741 9742 /// Emit the arguments to be passed to the runtime library based on the 9743 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9744 /// ForEndCall, emit map types to be passed for the end of the region instead of 9745 /// the beginning. 9746 static void emitOffloadingArraysArgument( 9747 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9748 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9749 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9750 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9751 const ArgumentsOptions &Options = ArgumentsOptions()) { 9752 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9753 "expected region end call to runtime only when end call is separate"); 9754 CodeGenModule &CGM = CGF.CGM; 9755 if (Info.NumberOfPtrs) { 9756 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9757 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9758 Info.BasePointersArray, 9759 /*Idx0=*/0, /*Idx1=*/0); 9760 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9761 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9762 Info.PointersArray, 9763 /*Idx0=*/0, 9764 /*Idx1=*/0); 9765 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9766 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9767 /*Idx0=*/0, /*Idx1=*/0); 9768 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9769 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9770 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9771 : Info.MapTypesArray, 9772 /*Idx0=*/0, 9773 /*Idx1=*/0); 9774 9775 // Only emit the mapper information arrays if debug information is 9776 // requested. 9777 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9778 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9779 else 9780 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9781 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9782 Info.MapNamesArray, 9783 /*Idx0=*/0, 9784 /*Idx1=*/0); 9785 // If there is no user-defined mapper, set the mapper array to nullptr to 9786 // avoid an unnecessary data privatization 9787 if (!Info.HasMapper) 9788 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9789 else 9790 MappersArrayArg = 9791 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9792 } else { 9793 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9794 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9795 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9796 MapTypesArrayArg = 9797 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9798 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9799 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9800 } 9801 } 9802 9803 /// Check for inner distribute directive. 9804 static const OMPExecutableDirective * 9805 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9806 const auto *CS = D.getInnermostCapturedStmt(); 9807 const auto *Body = 9808 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9809 const Stmt *ChildStmt = 9810 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9811 9812 if (const auto *NestedDir = 9813 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9814 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9815 switch (D.getDirectiveKind()) { 9816 case OMPD_target: 9817 if (isOpenMPDistributeDirective(DKind)) 9818 return NestedDir; 9819 if (DKind == OMPD_teams) { 9820 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9821 /*IgnoreCaptured=*/true); 9822 if (!Body) 9823 return nullptr; 9824 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9825 if (const auto *NND = 9826 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9827 DKind = NND->getDirectiveKind(); 9828 if (isOpenMPDistributeDirective(DKind)) 9829 return NND; 9830 } 9831 } 9832 return nullptr; 9833 case OMPD_target_teams: 9834 if (isOpenMPDistributeDirective(DKind)) 9835 return NestedDir; 9836 return nullptr; 9837 case OMPD_target_parallel: 9838 case OMPD_target_simd: 9839 case OMPD_target_parallel_for: 9840 case OMPD_target_parallel_for_simd: 9841 return nullptr; 9842 case OMPD_target_teams_distribute: 9843 case OMPD_target_teams_distribute_simd: 9844 case OMPD_target_teams_distribute_parallel_for: 9845 case OMPD_target_teams_distribute_parallel_for_simd: 9846 case OMPD_parallel: 9847 case OMPD_for: 9848 case OMPD_parallel_for: 9849 case OMPD_parallel_master: 9850 case OMPD_parallel_sections: 9851 case OMPD_for_simd: 9852 case OMPD_parallel_for_simd: 9853 case OMPD_cancel: 9854 case OMPD_cancellation_point: 9855 case OMPD_ordered: 9856 case OMPD_threadprivate: 9857 case OMPD_allocate: 9858 case OMPD_task: 9859 case OMPD_simd: 9860 case OMPD_tile: 9861 case OMPD_unroll: 9862 case OMPD_sections: 9863 case OMPD_section: 9864 case OMPD_single: 9865 case OMPD_master: 9866 case OMPD_critical: 9867 case OMPD_taskyield: 9868 case OMPD_barrier: 9869 case OMPD_taskwait: 9870 case OMPD_taskgroup: 9871 case OMPD_atomic: 9872 case OMPD_flush: 9873 case OMPD_depobj: 9874 case OMPD_scan: 9875 case OMPD_teams: 9876 case OMPD_target_data: 9877 case OMPD_target_exit_data: 9878 case OMPD_target_enter_data: 9879 case OMPD_distribute: 9880 case OMPD_distribute_simd: 9881 case OMPD_distribute_parallel_for: 9882 case OMPD_distribute_parallel_for_simd: 9883 case OMPD_teams_distribute: 9884 case OMPD_teams_distribute_simd: 9885 case OMPD_teams_distribute_parallel_for: 9886 case OMPD_teams_distribute_parallel_for_simd: 9887 case OMPD_target_update: 9888 case OMPD_declare_simd: 9889 case OMPD_declare_variant: 9890 case OMPD_begin_declare_variant: 9891 case OMPD_end_declare_variant: 9892 case OMPD_declare_target: 9893 case OMPD_end_declare_target: 9894 case OMPD_declare_reduction: 9895 case OMPD_declare_mapper: 9896 case OMPD_taskloop: 9897 case OMPD_taskloop_simd: 9898 case OMPD_master_taskloop: 9899 case OMPD_master_taskloop_simd: 9900 case OMPD_parallel_master_taskloop: 9901 case OMPD_parallel_master_taskloop_simd: 9902 case OMPD_requires: 9903 case OMPD_metadirective: 9904 case OMPD_unknown: 9905 default: 9906 llvm_unreachable("Unexpected directive."); 9907 } 9908 } 9909 9910 return nullptr; 9911 } 9912 9913 /// Emit the user-defined mapper function. The code generation follows the 9914 /// pattern in the example below. 9915 /// \code 9916 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9917 /// void *base, void *begin, 9918 /// int64_t size, int64_t type, 9919 /// void *name = nullptr) { 9920 /// // Allocate space for an array section first or add a base/begin for 9921 /// // pointer dereference. 9922 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && 9923 /// !maptype.IsDelete) 9924 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9925 /// size*sizeof(Ty), clearToFromMember(type)); 9926 /// // Map members. 9927 /// for (unsigned i = 0; i < size; i++) { 9928 /// // For each component specified by this mapper: 9929 /// for (auto c : begin[i]->all_components) { 9930 /// if (c.hasMapper()) 9931 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9932 /// c.arg_type, c.arg_name); 9933 /// else 9934 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9935 /// c.arg_begin, c.arg_size, c.arg_type, 9936 /// c.arg_name); 9937 /// } 9938 /// } 9939 /// // Delete the array section. 9940 /// if (size > 1 && maptype.IsDelete) 9941 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9942 /// size*sizeof(Ty), clearToFromMember(type)); 9943 /// } 9944 /// \endcode 9945 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9946 CodeGenFunction *CGF) { 9947 if (UDMMap.count(D) > 0) 9948 return; 9949 ASTContext &C = CGM.getContext(); 9950 QualType Ty = D->getType(); 9951 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9952 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9953 auto *MapperVarDecl = 9954 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9955 SourceLocation Loc = D->getLocation(); 9956 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9957 9958 // Prepare mapper function arguments and attributes. 9959 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9960 C.VoidPtrTy, ImplicitParamDecl::Other); 9961 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9962 ImplicitParamDecl::Other); 9963 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9964 C.VoidPtrTy, ImplicitParamDecl::Other); 9965 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9966 ImplicitParamDecl::Other); 9967 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9968 ImplicitParamDecl::Other); 9969 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9970 ImplicitParamDecl::Other); 9971 FunctionArgList Args; 9972 Args.push_back(&HandleArg); 9973 Args.push_back(&BaseArg); 9974 Args.push_back(&BeginArg); 9975 Args.push_back(&SizeArg); 9976 Args.push_back(&TypeArg); 9977 Args.push_back(&NameArg); 9978 const CGFunctionInfo &FnInfo = 9979 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9980 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9981 SmallString<64> TyStr; 9982 llvm::raw_svector_ostream Out(TyStr); 9983 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9984 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9985 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9986 Name, &CGM.getModule()); 9987 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9988 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9989 // Start the mapper function code generation. 9990 CodeGenFunction MapperCGF(CGM); 9991 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9992 // Compute the starting and end addresses of array elements. 9993 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9994 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9995 C.getPointerType(Int64Ty), Loc); 9996 // Prepare common arguments for array initiation and deletion. 9997 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9998 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9999 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10000 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 10001 MapperCGF.GetAddrOfLocalVar(&BaseArg), 10002 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10003 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 10004 MapperCGF.GetAddrOfLocalVar(&BeginArg), 10005 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10006 // Convert the size in bytes into the number of array elements. 10007 Size = MapperCGF.Builder.CreateExactUDiv( 10008 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10009 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 10010 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); 10011 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP( 10012 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size); 10013 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 10014 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 10015 C.getPointerType(Int64Ty), Loc); 10016 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( 10017 MapperCGF.GetAddrOfLocalVar(&NameArg), 10018 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 10019 10020 // Emit array initiation if this is an array section and \p MapType indicates 10021 // that memory allocation is required. 10022 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 10023 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10024 MapName, ElementSize, HeadBB, /*IsInit=*/true); 10025 10026 // Emit a for loop to iterate through SizeArg of elements and map all of them. 10027 10028 // Emit the loop header block. 10029 MapperCGF.EmitBlock(HeadBB); 10030 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 10031 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 10032 // Evaluate whether the initial condition is satisfied. 10033 llvm::Value *IsEmpty = 10034 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 10035 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 10036 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 10037 10038 // Emit the loop body block. 10039 MapperCGF.EmitBlock(BodyBB); 10040 llvm::BasicBlock *LastBB = BodyBB; 10041 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 10042 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 10043 PtrPHI->addIncoming(PtrBegin, EntryBB); 10044 Address PtrCurrent = 10045 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 10046 .getAlignment() 10047 .alignmentOfArrayElement(ElementSize)); 10048 // Privatize the declared variable of mapper to be the current array element. 10049 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 10050 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; }); 10051 (void)Scope.Privatize(); 10052 10053 // Get map clause information. Fill up the arrays with all mapped variables. 10054 MappableExprsHandler::MapCombinedInfoTy Info; 10055 MappableExprsHandler MEHandler(*D, MapperCGF); 10056 MEHandler.generateAllInfoForMapper(Info); 10057 10058 // Call the runtime API __tgt_mapper_num_components to get the number of 10059 // pre-existing components. 10060 llvm::Value *OffloadingArgs[] = {Handle}; 10061 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 10062 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10063 OMPRTL___tgt_mapper_num_components), 10064 OffloadingArgs); 10065 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 10066 PreviousSize, 10067 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 10068 10069 // Fill up the runtime mapper handle for all components. 10070 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 10071 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 10072 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10073 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 10074 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 10075 llvm::Value *CurSizeArg = Info.Sizes[I]; 10076 llvm::Value *CurNameArg = 10077 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 10078 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) 10079 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); 10080 10081 // Extract the MEMBER_OF field from the map type. 10082 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 10083 llvm::Value *MemberMapType = 10084 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 10085 10086 // Combine the map type inherited from user-defined mapper with that 10087 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 10088 // bits of the \a MapType, which is the input argument of the mapper 10089 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 10090 // bits of MemberMapType. 10091 // [OpenMP 5.0], 1.2.6. map-type decay. 10092 // | alloc | to | from | tofrom | release | delete 10093 // ---------------------------------------------------------- 10094 // alloc | alloc | alloc | alloc | alloc | release | delete 10095 // to | alloc | to | alloc | to | release | delete 10096 // from | alloc | alloc | from | from | release | delete 10097 // tofrom | alloc | to | from | tofrom | release | delete 10098 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 10099 MapType, 10100 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 10101 MappableExprsHandler::OMP_MAP_FROM)); 10102 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 10103 llvm::BasicBlock *AllocElseBB = 10104 MapperCGF.createBasicBlock("omp.type.alloc.else"); 10105 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 10106 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 10107 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 10108 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 10109 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 10110 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 10111 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 10112 MapperCGF.EmitBlock(AllocBB); 10113 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 10114 MemberMapType, 10115 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10116 MappableExprsHandler::OMP_MAP_FROM))); 10117 MapperCGF.Builder.CreateBr(EndBB); 10118 MapperCGF.EmitBlock(AllocElseBB); 10119 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 10120 LeftToFrom, 10121 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 10122 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 10123 // In case of to, clear OMP_MAP_FROM. 10124 MapperCGF.EmitBlock(ToBB); 10125 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 10126 MemberMapType, 10127 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 10128 MapperCGF.Builder.CreateBr(EndBB); 10129 MapperCGF.EmitBlock(ToElseBB); 10130 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 10131 LeftToFrom, 10132 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 10133 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 10134 // In case of from, clear OMP_MAP_TO. 10135 MapperCGF.EmitBlock(FromBB); 10136 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 10137 MemberMapType, 10138 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 10139 // In case of tofrom, do nothing. 10140 MapperCGF.EmitBlock(EndBB); 10141 LastBB = EndBB; 10142 llvm::PHINode *CurMapType = 10143 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 10144 CurMapType->addIncoming(AllocMapType, AllocBB); 10145 CurMapType->addIncoming(ToMapType, ToBB); 10146 CurMapType->addIncoming(FromMapType, FromBB); 10147 CurMapType->addIncoming(MemberMapType, ToElseBB); 10148 10149 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 10150 CurSizeArg, CurMapType, CurNameArg}; 10151 if (Info.Mappers[I]) { 10152 // Call the corresponding mapper function. 10153 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 10154 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 10155 assert(MapperFunc && "Expect a valid mapper function is available."); 10156 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 10157 } else { 10158 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10159 // data structure. 10160 MapperCGF.EmitRuntimeCall( 10161 OMPBuilder.getOrCreateRuntimeFunction( 10162 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 10163 OffloadingArgs); 10164 } 10165 } 10166 10167 // Update the pointer to point to the next element that needs to be mapped, 10168 // and check whether we have mapped all elements. 10169 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType(); 10170 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 10171 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 10172 PtrPHI->addIncoming(PtrNext, LastBB); 10173 llvm::Value *IsDone = 10174 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 10175 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 10176 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 10177 10178 MapperCGF.EmitBlock(ExitBB); 10179 // Emit array deletion if this is an array section and \p MapType indicates 10180 // that deletion is required. 10181 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 10182 MapName, ElementSize, DoneBB, /*IsInit=*/false); 10183 10184 // Emit the function exit block. 10185 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 10186 MapperCGF.FinishFunction(); 10187 UDMMap.try_emplace(D, Fn); 10188 if (CGF) { 10189 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 10190 Decls.second.push_back(D); 10191 } 10192 } 10193 10194 /// Emit the array initialization or deletion portion for user-defined mapper 10195 /// code generation. First, it evaluates whether an array section is mapped and 10196 /// whether the \a MapType instructs to delete this section. If \a IsInit is 10197 /// true, and \a MapType indicates to not delete this array, array 10198 /// initialization code is generated. If \a IsInit is false, and \a MapType 10199 /// indicates to not this array, array deletion code is generated. 10200 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 10201 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 10202 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 10203 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, 10204 bool IsInit) { 10205 StringRef Prefix = IsInit ? ".init" : ".del"; 10206 10207 // Evaluate if this is an array section. 10208 llvm::BasicBlock *BodyBB = 10209 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 10210 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( 10211 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 10212 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 10213 MapType, 10214 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 10215 llvm::Value *DeleteCond; 10216 llvm::Value *Cond; 10217 if (IsInit) { 10218 // base != begin? 10219 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull( 10220 MapperCGF.Builder.CreatePtrDiff(Base, Begin)); 10221 // IsPtrAndObj? 10222 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( 10223 MapType, 10224 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ)); 10225 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); 10226 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); 10227 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); 10228 DeleteCond = MapperCGF.Builder.CreateIsNull( 10229 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10230 } else { 10231 Cond = IsArray; 10232 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 10233 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 10234 } 10235 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); 10236 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); 10237 10238 MapperCGF.EmitBlock(BodyBB); 10239 // Get the array size by multiplying element size and element number (i.e., \p 10240 // Size). 10241 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 10242 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 10243 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 10244 // memory allocation/deletion purpose only. 10245 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 10246 MapType, 10247 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 10248 MappableExprsHandler::OMP_MAP_FROM))); 10249 MapTypeArg = MapperCGF.Builder.CreateOr( 10250 MapTypeArg, 10251 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT)); 10252 10253 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 10254 // data structure. 10255 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, 10256 ArraySize, MapTypeArg, MapName}; 10257 MapperCGF.EmitRuntimeCall( 10258 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 10259 OMPRTL___tgt_push_mapper_component), 10260 OffloadingArgs); 10261 } 10262 10263 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 10264 const OMPDeclareMapperDecl *D) { 10265 auto I = UDMMap.find(D); 10266 if (I != UDMMap.end()) 10267 return I->second; 10268 emitUserDefinedMapper(D); 10269 return UDMMap.lookup(D); 10270 } 10271 10272 void CGOpenMPRuntime::emitTargetNumIterationsCall( 10273 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10274 llvm::Value *DeviceID, 10275 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10276 const OMPLoopDirective &D)> 10277 SizeEmitter) { 10278 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 10279 const OMPExecutableDirective *TD = &D; 10280 // Get nested teams distribute kind directive, if any. 10281 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 10282 TD = getNestedDistributeDirective(CGM.getContext(), D); 10283 if (!TD) 10284 return; 10285 const auto *LD = cast<OMPLoopDirective>(TD); 10286 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 10287 PrePostActionTy &) { 10288 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 10289 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10290 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 10291 CGF.EmitRuntimeCall( 10292 OMPBuilder.getOrCreateRuntimeFunction( 10293 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), 10294 Args); 10295 } 10296 }; 10297 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 10298 } 10299 10300 void CGOpenMPRuntime::emitTargetCall( 10301 CodeGenFunction &CGF, const OMPExecutableDirective &D, 10302 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 10303 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 10304 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 10305 const OMPLoopDirective &D)> 10306 SizeEmitter) { 10307 if (!CGF.HaveInsertPoint()) 10308 return; 10309 10310 assert(OutlinedFn && "Invalid outlined function!"); 10311 10312 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 10313 D.hasClausesOfKind<OMPNowaitClause>(); 10314 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 10315 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 10316 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 10317 PrePostActionTy &) { 10318 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10319 }; 10320 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 10321 10322 CodeGenFunction::OMPTargetDataInfo InputInfo; 10323 llvm::Value *MapTypesArray = nullptr; 10324 llvm::Value *MapNamesArray = nullptr; 10325 // Fill up the pointer arrays and transfer execution to the device. 10326 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 10327 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 10328 &CapturedVars, 10329 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10330 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10331 // Reverse offloading is not supported, so just execute on the host. 10332 if (RequiresOuterTask) { 10333 CapturedVars.clear(); 10334 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10335 } 10336 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10337 return; 10338 } 10339 10340 // On top of the arrays that were filled up, the target offloading call 10341 // takes as arguments the device id as well as the host pointer. The host 10342 // pointer is used by the runtime library to identify the current target 10343 // region, so it only has to be unique and not necessarily point to 10344 // anything. It could be the pointer to the outlined function that 10345 // implements the target region, but we aren't using that so that the 10346 // compiler doesn't need to keep that, and could therefore inline the host 10347 // function if proven worthwhile during optimization. 10348 10349 // From this point on, we need to have an ID of the target region defined. 10350 assert(OutlinedFnID && "Invalid outlined function ID!"); 10351 10352 // Emit device ID if any. 10353 llvm::Value *DeviceID; 10354 if (Device.getPointer()) { 10355 assert((Device.getInt() == OMPC_DEVICE_unknown || 10356 Device.getInt() == OMPC_DEVICE_device_num) && 10357 "Expected device_num modifier."); 10358 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10359 DeviceID = 10360 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10361 } else { 10362 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10363 } 10364 10365 // Emit the number of elements in the offloading arrays. 10366 llvm::Value *PointerNum = 10367 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10368 10369 // Return value of the runtime offloading call. 10370 llvm::Value *Return; 10371 10372 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10373 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10374 10375 // Source location for the ident struct 10376 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10377 10378 // Emit tripcount for the target loop-based directive. 10379 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10380 10381 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10382 // The target region is an outlined function launched by the runtime 10383 // via calls __tgt_target() or __tgt_target_teams(). 10384 // 10385 // __tgt_target() launches a target region with one team and one thread, 10386 // executing a serial region. This master thread may in turn launch 10387 // more threads within its team upon encountering a parallel region, 10388 // however, no additional teams can be launched on the device. 10389 // 10390 // __tgt_target_teams() launches a target region with one or more teams, 10391 // each with one or more threads. This call is required for target 10392 // constructs such as: 10393 // 'target teams' 10394 // 'target' / 'teams' 10395 // 'target teams distribute parallel for' 10396 // 'target parallel' 10397 // and so on. 10398 // 10399 // Note that on the host and CPU targets, the runtime implementation of 10400 // these calls simply call the outlined function without forking threads. 10401 // The outlined functions themselves have runtime calls to 10402 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10403 // the compiler in emitTeamsCall() and emitParallelCall(). 10404 // 10405 // In contrast, on the NVPTX target, the implementation of 10406 // __tgt_target_teams() launches a GPU kernel with the requested number 10407 // of teams and threads so no additional calls to the runtime are required. 10408 if (NumTeams) { 10409 // If we have NumTeams defined this means that we have an enclosed teams 10410 // region. Therefore we also expect to have NumThreads defined. These two 10411 // values should be defined in the presence of a teams directive, 10412 // regardless of having any clauses associated. If the user is using teams 10413 // but no clauses, these two values will be the default that should be 10414 // passed to the runtime library - a 32-bit integer with the value zero. 10415 assert(NumThreads && "Thread limit expression should be available along " 10416 "with number of teams."); 10417 SmallVector<llvm::Value *> OffloadingArgs = { 10418 RTLoc, 10419 DeviceID, 10420 OutlinedFnID, 10421 PointerNum, 10422 InputInfo.BasePointersArray.getPointer(), 10423 InputInfo.PointersArray.getPointer(), 10424 InputInfo.SizesArray.getPointer(), 10425 MapTypesArray, 10426 MapNamesArray, 10427 InputInfo.MappersArray.getPointer(), 10428 NumTeams, 10429 NumThreads}; 10430 if (HasNowait) { 10431 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10432 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10433 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10434 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10435 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10436 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10437 } 10438 Return = CGF.EmitRuntimeCall( 10439 OMPBuilder.getOrCreateRuntimeFunction( 10440 CGM.getModule(), HasNowait 10441 ? OMPRTL___tgt_target_teams_nowait_mapper 10442 : OMPRTL___tgt_target_teams_mapper), 10443 OffloadingArgs); 10444 } else { 10445 SmallVector<llvm::Value *> OffloadingArgs = { 10446 RTLoc, 10447 DeviceID, 10448 OutlinedFnID, 10449 PointerNum, 10450 InputInfo.BasePointersArray.getPointer(), 10451 InputInfo.PointersArray.getPointer(), 10452 InputInfo.SizesArray.getPointer(), 10453 MapTypesArray, 10454 MapNamesArray, 10455 InputInfo.MappersArray.getPointer()}; 10456 if (HasNowait) { 10457 // Add int32_t depNum = 0, void *depList = nullptr, int32_t 10458 // noAliasDepNum = 0, void *noAliasDepList = nullptr. 10459 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10460 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10461 OffloadingArgs.push_back(CGF.Builder.getInt32(0)); 10462 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy)); 10463 } 10464 Return = CGF.EmitRuntimeCall( 10465 OMPBuilder.getOrCreateRuntimeFunction( 10466 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10467 : OMPRTL___tgt_target_mapper), 10468 OffloadingArgs); 10469 } 10470 10471 // Check the error code and execute the host version if required. 10472 llvm::BasicBlock *OffloadFailedBlock = 10473 CGF.createBasicBlock("omp_offload.failed"); 10474 llvm::BasicBlock *OffloadContBlock = 10475 CGF.createBasicBlock("omp_offload.cont"); 10476 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10477 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10478 10479 CGF.EmitBlock(OffloadFailedBlock); 10480 if (RequiresOuterTask) { 10481 CapturedVars.clear(); 10482 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10483 } 10484 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10485 CGF.EmitBranch(OffloadContBlock); 10486 10487 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10488 }; 10489 10490 // Notify that the host version must be executed. 10491 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10492 RequiresOuterTask](CodeGenFunction &CGF, 10493 PrePostActionTy &) { 10494 if (RequiresOuterTask) { 10495 CapturedVars.clear(); 10496 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10497 } 10498 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10499 }; 10500 10501 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10502 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10503 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10504 // Fill up the arrays with all the captured variables. 10505 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10506 10507 // Get mappable expression information. 10508 MappableExprsHandler MEHandler(D, CGF); 10509 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10510 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10511 10512 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10513 auto *CV = CapturedVars.begin(); 10514 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10515 CE = CS.capture_end(); 10516 CI != CE; ++CI, ++RI, ++CV) { 10517 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10518 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10519 10520 // VLA sizes are passed to the outlined region by copy and do not have map 10521 // information associated. 10522 if (CI->capturesVariableArrayType()) { 10523 CurInfo.Exprs.push_back(nullptr); 10524 CurInfo.BasePointers.push_back(*CV); 10525 CurInfo.Pointers.push_back(*CV); 10526 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10527 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10528 // Copy to the device as an argument. No need to retrieve it. 10529 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10530 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10531 MappableExprsHandler::OMP_MAP_IMPLICIT); 10532 CurInfo.Mappers.push_back(nullptr); 10533 } else { 10534 // If we have any information in the map clause, we use it, otherwise we 10535 // just do a default mapping. 10536 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10537 if (!CI->capturesThis()) 10538 MappedVarSet.insert(CI->getCapturedVar()); 10539 else 10540 MappedVarSet.insert(nullptr); 10541 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10542 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10543 // Generate correct mapping for variables captured by reference in 10544 // lambdas. 10545 if (CI->capturesVariable()) 10546 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10547 CurInfo, LambdaPointers); 10548 } 10549 // We expect to have at least an element of information for this capture. 10550 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10551 "Non-existing map pointer for capture!"); 10552 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10553 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10554 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10555 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10556 "Inconsistent map information sizes!"); 10557 10558 // If there is an entry in PartialStruct it means we have a struct with 10559 // individual members mapped. Emit an extra combined entry. 10560 if (PartialStruct.Base.isValid()) { 10561 CombinedInfo.append(PartialStruct.PreliminaryMapData); 10562 MEHandler.emitCombinedEntry( 10563 CombinedInfo, CurInfo.Types, PartialStruct, nullptr, 10564 !PartialStruct.PreliminaryMapData.BasePointers.empty()); 10565 } 10566 10567 // We need to append the results of this capture to what we already have. 10568 CombinedInfo.append(CurInfo); 10569 } 10570 // Adjust MEMBER_OF flags for the lambdas captures. 10571 MEHandler.adjustMemberOfForLambdaCaptures( 10572 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10573 CombinedInfo.Types); 10574 // Map any list items in a map clause that were not captures because they 10575 // weren't referenced within the construct. 10576 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 10577 10578 TargetDataInfo Info; 10579 // Fill up the arrays and create the arguments. 10580 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10581 emitOffloadingArraysArgument( 10582 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10583 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10584 {/*ForEndTask=*/false}); 10585 10586 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10587 InputInfo.BasePointersArray = 10588 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10589 InputInfo.PointersArray = 10590 Address(Info.PointersArray, CGM.getPointerAlign()); 10591 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10592 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10593 MapTypesArray = Info.MapTypesArray; 10594 MapNamesArray = Info.MapNamesArray; 10595 if (RequiresOuterTask) 10596 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10597 else 10598 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10599 }; 10600 10601 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10602 CodeGenFunction &CGF, PrePostActionTy &) { 10603 if (RequiresOuterTask) { 10604 CodeGenFunction::OMPTargetDataInfo InputInfo; 10605 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10606 } else { 10607 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10608 } 10609 }; 10610 10611 // If we have a target function ID it means that we need to support 10612 // offloading, otherwise, just execute on the host. We need to execute on host 10613 // regardless of the conditional in the if clause if, e.g., the user do not 10614 // specify target triples. 10615 if (OutlinedFnID) { 10616 if (IfCond) { 10617 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10618 } else { 10619 RegionCodeGenTy ThenRCG(TargetThenGen); 10620 ThenRCG(CGF); 10621 } 10622 } else { 10623 RegionCodeGenTy ElseRCG(TargetElseGen); 10624 ElseRCG(CGF); 10625 } 10626 } 10627 10628 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10629 StringRef ParentName) { 10630 if (!S) 10631 return; 10632 10633 // Codegen OMP target directives that offload compute to the device. 10634 bool RequiresDeviceCodegen = 10635 isa<OMPExecutableDirective>(S) && 10636 isOpenMPTargetExecutionDirective( 10637 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10638 10639 if (RequiresDeviceCodegen) { 10640 const auto &E = *cast<OMPExecutableDirective>(S); 10641 unsigned DeviceID; 10642 unsigned FileID; 10643 unsigned Line; 10644 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10645 FileID, Line); 10646 10647 // Is this a target region that should not be emitted as an entry point? If 10648 // so just signal we are done with this target region. 10649 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10650 ParentName, Line)) 10651 return; 10652 10653 switch (E.getDirectiveKind()) { 10654 case OMPD_target: 10655 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10656 cast<OMPTargetDirective>(E)); 10657 break; 10658 case OMPD_target_parallel: 10659 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10660 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10661 break; 10662 case OMPD_target_teams: 10663 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10664 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10665 break; 10666 case OMPD_target_teams_distribute: 10667 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10668 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10669 break; 10670 case OMPD_target_teams_distribute_simd: 10671 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10672 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10673 break; 10674 case OMPD_target_parallel_for: 10675 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10676 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10677 break; 10678 case OMPD_target_parallel_for_simd: 10679 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10680 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10681 break; 10682 case OMPD_target_simd: 10683 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10684 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10685 break; 10686 case OMPD_target_teams_distribute_parallel_for: 10687 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10688 CGM, ParentName, 10689 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10690 break; 10691 case OMPD_target_teams_distribute_parallel_for_simd: 10692 CodeGenFunction:: 10693 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10694 CGM, ParentName, 10695 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10696 break; 10697 case OMPD_parallel: 10698 case OMPD_for: 10699 case OMPD_parallel_for: 10700 case OMPD_parallel_master: 10701 case OMPD_parallel_sections: 10702 case OMPD_for_simd: 10703 case OMPD_parallel_for_simd: 10704 case OMPD_cancel: 10705 case OMPD_cancellation_point: 10706 case OMPD_ordered: 10707 case OMPD_threadprivate: 10708 case OMPD_allocate: 10709 case OMPD_task: 10710 case OMPD_simd: 10711 case OMPD_tile: 10712 case OMPD_unroll: 10713 case OMPD_sections: 10714 case OMPD_section: 10715 case OMPD_single: 10716 case OMPD_master: 10717 case OMPD_critical: 10718 case OMPD_taskyield: 10719 case OMPD_barrier: 10720 case OMPD_taskwait: 10721 case OMPD_taskgroup: 10722 case OMPD_atomic: 10723 case OMPD_flush: 10724 case OMPD_depobj: 10725 case OMPD_scan: 10726 case OMPD_teams: 10727 case OMPD_target_data: 10728 case OMPD_target_exit_data: 10729 case OMPD_target_enter_data: 10730 case OMPD_distribute: 10731 case OMPD_distribute_simd: 10732 case OMPD_distribute_parallel_for: 10733 case OMPD_distribute_parallel_for_simd: 10734 case OMPD_teams_distribute: 10735 case OMPD_teams_distribute_simd: 10736 case OMPD_teams_distribute_parallel_for: 10737 case OMPD_teams_distribute_parallel_for_simd: 10738 case OMPD_target_update: 10739 case OMPD_declare_simd: 10740 case OMPD_declare_variant: 10741 case OMPD_begin_declare_variant: 10742 case OMPD_end_declare_variant: 10743 case OMPD_declare_target: 10744 case OMPD_end_declare_target: 10745 case OMPD_declare_reduction: 10746 case OMPD_declare_mapper: 10747 case OMPD_taskloop: 10748 case OMPD_taskloop_simd: 10749 case OMPD_master_taskloop: 10750 case OMPD_master_taskloop_simd: 10751 case OMPD_parallel_master_taskloop: 10752 case OMPD_parallel_master_taskloop_simd: 10753 case OMPD_requires: 10754 case OMPD_metadirective: 10755 case OMPD_unknown: 10756 default: 10757 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10758 } 10759 return; 10760 } 10761 10762 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10763 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10764 return; 10765 10766 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10767 return; 10768 } 10769 10770 // If this is a lambda function, look into its body. 10771 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10772 S = L->getBody(); 10773 10774 // Keep looking for target regions recursively. 10775 for (const Stmt *II : S->children()) 10776 scanForTargetRegionsFunctions(II, ParentName); 10777 } 10778 10779 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { 10780 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10781 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10782 if (!DevTy) 10783 return false; 10784 // Do not emit device_type(nohost) functions for the host. 10785 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10786 return true; 10787 // Do not emit device_type(host) functions for the device. 10788 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10789 return true; 10790 return false; 10791 } 10792 10793 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10794 // If emitting code for the host, we do not process FD here. Instead we do 10795 // the normal code generation. 10796 if (!CGM.getLangOpts().OpenMPIsDevice) { 10797 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) 10798 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10799 CGM.getLangOpts().OpenMPIsDevice)) 10800 return true; 10801 return false; 10802 } 10803 10804 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10805 // Try to detect target regions in the function. 10806 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10807 StringRef Name = CGM.getMangledName(GD); 10808 scanForTargetRegionsFunctions(FD->getBody(), Name); 10809 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), 10810 CGM.getLangOpts().OpenMPIsDevice)) 10811 return true; 10812 } 10813 10814 // Do not to emit function if it is not marked as declare target. 10815 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10816 AlreadyEmittedTargetDecls.count(VD) == 0; 10817 } 10818 10819 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10820 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), 10821 CGM.getLangOpts().OpenMPIsDevice)) 10822 return true; 10823 10824 if (!CGM.getLangOpts().OpenMPIsDevice) 10825 return false; 10826 10827 // Check if there are Ctors/Dtors in this declaration and look for target 10828 // regions in it. We use the complete variant to produce the kernel name 10829 // mangling. 10830 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10831 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10832 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10833 StringRef ParentName = 10834 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10835 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10836 } 10837 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10838 StringRef ParentName = 10839 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10840 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10841 } 10842 } 10843 10844 // Do not to emit variable if it is not marked as declare target. 10845 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10846 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10847 cast<VarDecl>(GD.getDecl())); 10848 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10849 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10850 HasRequiresUnifiedSharedMemory)) { 10851 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10852 return true; 10853 } 10854 return false; 10855 } 10856 10857 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10858 llvm::Constant *Addr) { 10859 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10860 !CGM.getLangOpts().OpenMPIsDevice) 10861 return; 10862 10863 // If we have host/nohost variables, they do not need to be registered. 10864 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10865 OMPDeclareTargetDeclAttr::getDeviceType(VD); 10866 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any) 10867 return; 10868 10869 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10870 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10871 if (!Res) { 10872 if (CGM.getLangOpts().OpenMPIsDevice) { 10873 // Register non-target variables being emitted in device code (debug info 10874 // may cause this). 10875 StringRef VarName = CGM.getMangledName(VD); 10876 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10877 } 10878 return; 10879 } 10880 // Register declare target variables. 10881 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10882 StringRef VarName; 10883 CharUnits VarSize; 10884 llvm::GlobalValue::LinkageTypes Linkage; 10885 10886 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10887 !HasRequiresUnifiedSharedMemory) { 10888 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10889 VarName = CGM.getMangledName(VD); 10890 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10891 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10892 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10893 } else { 10894 VarSize = CharUnits::Zero(); 10895 } 10896 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10897 // Temp solution to prevent optimizations of the internal variables. 10898 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10899 // Do not create a "ref-variable" if the original is not also available 10900 // on the host. 10901 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) 10902 return; 10903 std::string RefName = getName({VarName, "ref"}); 10904 if (!CGM.GetGlobalValue(RefName)) { 10905 llvm::Constant *AddrRef = 10906 getOrCreateInternalVariable(Addr->getType(), RefName); 10907 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10908 GVAddrRef->setConstant(/*Val=*/true); 10909 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10910 GVAddrRef->setInitializer(Addr); 10911 CGM.addCompilerUsedGlobal(GVAddrRef); 10912 } 10913 } 10914 } else { 10915 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10916 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10917 HasRequiresUnifiedSharedMemory)) && 10918 "Declare target attribute must link or to with unified memory."); 10919 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10920 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10921 else 10922 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10923 10924 if (CGM.getLangOpts().OpenMPIsDevice) { 10925 VarName = Addr->getName(); 10926 Addr = nullptr; 10927 } else { 10928 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10929 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10930 } 10931 VarSize = CGM.getPointerSize(); 10932 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10933 } 10934 10935 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10936 VarName, Addr, VarSize, Flags, Linkage); 10937 } 10938 10939 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10940 if (isa<FunctionDecl>(GD.getDecl()) || 10941 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10942 return emitTargetFunctions(GD); 10943 10944 return emitTargetGlobalVariable(GD); 10945 } 10946 10947 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10948 for (const VarDecl *VD : DeferredGlobalVariables) { 10949 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10950 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10951 if (!Res) 10952 continue; 10953 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10954 !HasRequiresUnifiedSharedMemory) { 10955 CGM.EmitGlobal(VD); 10956 } else { 10957 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10958 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10959 HasRequiresUnifiedSharedMemory)) && 10960 "Expected link clause or to clause with unified memory."); 10961 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10962 } 10963 } 10964 } 10965 10966 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10967 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10968 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10969 " Expected target-based directive."); 10970 } 10971 10972 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10973 for (const OMPClause *Clause : D->clauselists()) { 10974 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10975 HasRequiresUnifiedSharedMemory = true; 10976 } else if (const auto *AC = 10977 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10978 switch (AC->getAtomicDefaultMemOrderKind()) { 10979 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10980 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10981 break; 10982 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10983 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10984 break; 10985 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10986 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10987 break; 10988 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10989 break; 10990 } 10991 } 10992 } 10993 } 10994 10995 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10996 return RequiresAtomicOrdering; 10997 } 10998 10999 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 11000 LangAS &AS) { 11001 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 11002 return false; 11003 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 11004 switch(A->getAllocatorType()) { 11005 case OMPAllocateDeclAttr::OMPNullMemAlloc: 11006 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 11007 // Not supported, fallback to the default mem space. 11008 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 11009 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 11010 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 11011 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 11012 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 11013 case OMPAllocateDeclAttr::OMPConstMemAlloc: 11014 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 11015 AS = LangAS::Default; 11016 return true; 11017 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 11018 llvm_unreachable("Expected predefined allocator for the variables with the " 11019 "static storage."); 11020 } 11021 return false; 11022 } 11023 11024 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 11025 return HasRequiresUnifiedSharedMemory; 11026 } 11027 11028 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 11029 CodeGenModule &CGM) 11030 : CGM(CGM) { 11031 if (CGM.getLangOpts().OpenMPIsDevice) { 11032 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 11033 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 11034 } 11035 } 11036 11037 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 11038 if (CGM.getLangOpts().OpenMPIsDevice) 11039 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 11040 } 11041 11042 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 11043 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 11044 return true; 11045 11046 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11047 // Do not to emit function if it is marked as declare target as it was already 11048 // emitted. 11049 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 11050 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 11051 if (auto *F = dyn_cast_or_null<llvm::Function>( 11052 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 11053 return !F->isDeclaration(); 11054 return false; 11055 } 11056 return true; 11057 } 11058 11059 return !AlreadyEmittedTargetDecls.insert(D).second; 11060 } 11061 11062 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 11063 // If we don't have entries or if we are emitting code for the device, we 11064 // don't need to do anything. 11065 if (CGM.getLangOpts().OMPTargetTriples.empty() || 11066 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 11067 (OffloadEntriesInfoManager.empty() && 11068 !HasEmittedDeclareTargetRegion && 11069 !HasEmittedTargetRegion)) 11070 return nullptr; 11071 11072 // Create and register the function that handles the requires directives. 11073 ASTContext &C = CGM.getContext(); 11074 11075 llvm::Function *RequiresRegFn; 11076 { 11077 CodeGenFunction CGF(CGM); 11078 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 11079 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 11080 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 11081 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 11082 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 11083 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 11084 // TODO: check for other requires clauses. 11085 // The requires directive takes effect only when a target region is 11086 // present in the compilation unit. Otherwise it is ignored and not 11087 // passed to the runtime. This avoids the runtime from throwing an error 11088 // for mismatching requires clauses across compilation units that don't 11089 // contain at least 1 target region. 11090 assert((HasEmittedTargetRegion || 11091 HasEmittedDeclareTargetRegion || 11092 !OffloadEntriesInfoManager.empty()) && 11093 "Target or declare target region expected."); 11094 if (HasRequiresUnifiedSharedMemory) 11095 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 11096 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11097 CGM.getModule(), OMPRTL___tgt_register_requires), 11098 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 11099 CGF.FinishFunction(); 11100 } 11101 return RequiresRegFn; 11102 } 11103 11104 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 11105 const OMPExecutableDirective &D, 11106 SourceLocation Loc, 11107 llvm::Function *OutlinedFn, 11108 ArrayRef<llvm::Value *> CapturedVars) { 11109 if (!CGF.HaveInsertPoint()) 11110 return; 11111 11112 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11113 CodeGenFunction::RunCleanupsScope Scope(CGF); 11114 11115 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 11116 llvm::Value *Args[] = { 11117 RTLoc, 11118 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 11119 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 11120 llvm::SmallVector<llvm::Value *, 16> RealArgs; 11121 RealArgs.append(std::begin(Args), std::end(Args)); 11122 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 11123 11124 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11125 CGM.getModule(), OMPRTL___kmpc_fork_teams); 11126 CGF.EmitRuntimeCall(RTLFn, RealArgs); 11127 } 11128 11129 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11130 const Expr *NumTeams, 11131 const Expr *ThreadLimit, 11132 SourceLocation Loc) { 11133 if (!CGF.HaveInsertPoint()) 11134 return; 11135 11136 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 11137 11138 llvm::Value *NumTeamsVal = 11139 NumTeams 11140 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 11141 CGF.CGM.Int32Ty, /* isSigned = */ true) 11142 : CGF.Builder.getInt32(0); 11143 11144 llvm::Value *ThreadLimitVal = 11145 ThreadLimit 11146 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 11147 CGF.CGM.Int32Ty, /* isSigned = */ true) 11148 : CGF.Builder.getInt32(0); 11149 11150 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 11151 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 11152 ThreadLimitVal}; 11153 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11154 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 11155 PushNumTeamsArgs); 11156 } 11157 11158 void CGOpenMPRuntime::emitTargetDataCalls( 11159 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11160 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11161 if (!CGF.HaveInsertPoint()) 11162 return; 11163 11164 // Action used to replace the default codegen action and turn privatization 11165 // off. 11166 PrePostActionTy NoPrivAction; 11167 11168 // Generate the code for the opening of the data environment. Capture all the 11169 // arguments of the runtime call by reference because they are used in the 11170 // closing of the region. 11171 auto &&BeginThenGen = [this, &D, Device, &Info, 11172 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 11173 // Fill up the arrays with all the mapped variables. 11174 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11175 11176 // Get map clause information. 11177 MappableExprsHandler MEHandler(D, CGF); 11178 MEHandler.generateAllInfo(CombinedInfo); 11179 11180 // Fill up the arrays and create the arguments. 11181 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11182 /*IsNonContiguous=*/true); 11183 11184 llvm::Value *BasePointersArrayArg = nullptr; 11185 llvm::Value *PointersArrayArg = nullptr; 11186 llvm::Value *SizesArrayArg = nullptr; 11187 llvm::Value *MapTypesArrayArg = nullptr; 11188 llvm::Value *MapNamesArrayArg = nullptr; 11189 llvm::Value *MappersArrayArg = nullptr; 11190 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11191 SizesArrayArg, MapTypesArrayArg, 11192 MapNamesArrayArg, MappersArrayArg, Info); 11193 11194 // Emit device ID if any. 11195 llvm::Value *DeviceID = nullptr; 11196 if (Device) { 11197 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11198 CGF.Int64Ty, /*isSigned=*/true); 11199 } else { 11200 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11201 } 11202 11203 // Emit the number of elements in the offloading arrays. 11204 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11205 // 11206 // Source location for the ident struct 11207 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11208 11209 llvm::Value *OffloadingArgs[] = {RTLoc, 11210 DeviceID, 11211 PointerNum, 11212 BasePointersArrayArg, 11213 PointersArrayArg, 11214 SizesArrayArg, 11215 MapTypesArrayArg, 11216 MapNamesArrayArg, 11217 MappersArrayArg}; 11218 CGF.EmitRuntimeCall( 11219 OMPBuilder.getOrCreateRuntimeFunction( 11220 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 11221 OffloadingArgs); 11222 11223 // If device pointer privatization is required, emit the body of the region 11224 // here. It will have to be duplicated: with and without privatization. 11225 if (!Info.CaptureDeviceAddrMap.empty()) 11226 CodeGen(CGF); 11227 }; 11228 11229 // Generate code for the closing of the data region. 11230 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 11231 PrePostActionTy &) { 11232 assert(Info.isValid() && "Invalid data environment closing arguments."); 11233 11234 llvm::Value *BasePointersArrayArg = nullptr; 11235 llvm::Value *PointersArrayArg = nullptr; 11236 llvm::Value *SizesArrayArg = nullptr; 11237 llvm::Value *MapTypesArrayArg = nullptr; 11238 llvm::Value *MapNamesArrayArg = nullptr; 11239 llvm::Value *MappersArrayArg = nullptr; 11240 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 11241 SizesArrayArg, MapTypesArrayArg, 11242 MapNamesArrayArg, MappersArrayArg, Info, 11243 {/*ForEndCall=*/true}); 11244 11245 // Emit device ID if any. 11246 llvm::Value *DeviceID = nullptr; 11247 if (Device) { 11248 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11249 CGF.Int64Ty, /*isSigned=*/true); 11250 } else { 11251 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11252 } 11253 11254 // Emit the number of elements in the offloading arrays. 11255 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 11256 11257 // Source location for the ident struct 11258 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11259 11260 llvm::Value *OffloadingArgs[] = {RTLoc, 11261 DeviceID, 11262 PointerNum, 11263 BasePointersArrayArg, 11264 PointersArrayArg, 11265 SizesArrayArg, 11266 MapTypesArrayArg, 11267 MapNamesArrayArg, 11268 MappersArrayArg}; 11269 CGF.EmitRuntimeCall( 11270 OMPBuilder.getOrCreateRuntimeFunction( 11271 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 11272 OffloadingArgs); 11273 }; 11274 11275 // If we need device pointer privatization, we need to emit the body of the 11276 // region with no privatization in the 'else' branch of the conditional. 11277 // Otherwise, we don't have to do anything. 11278 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 11279 PrePostActionTy &) { 11280 if (!Info.CaptureDeviceAddrMap.empty()) { 11281 CodeGen.setAction(NoPrivAction); 11282 CodeGen(CGF); 11283 } 11284 }; 11285 11286 // We don't have to do anything to close the region if the if clause evaluates 11287 // to false. 11288 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 11289 11290 if (IfCond) { 11291 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 11292 } else { 11293 RegionCodeGenTy RCG(BeginThenGen); 11294 RCG(CGF); 11295 } 11296 11297 // If we don't require privatization of device pointers, we emit the body in 11298 // between the runtime calls. This avoids duplicating the body code. 11299 if (Info.CaptureDeviceAddrMap.empty()) { 11300 CodeGen.setAction(NoPrivAction); 11301 CodeGen(CGF); 11302 } 11303 11304 if (IfCond) { 11305 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 11306 } else { 11307 RegionCodeGenTy RCG(EndThenGen); 11308 RCG(CGF); 11309 } 11310 } 11311 11312 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 11313 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11314 const Expr *Device) { 11315 if (!CGF.HaveInsertPoint()) 11316 return; 11317 11318 assert((isa<OMPTargetEnterDataDirective>(D) || 11319 isa<OMPTargetExitDataDirective>(D) || 11320 isa<OMPTargetUpdateDirective>(D)) && 11321 "Expecting either target enter, exit data, or update directives."); 11322 11323 CodeGenFunction::OMPTargetDataInfo InputInfo; 11324 llvm::Value *MapTypesArray = nullptr; 11325 llvm::Value *MapNamesArray = nullptr; 11326 // Generate the code for the opening of the data environment. 11327 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 11328 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 11329 // Emit device ID if any. 11330 llvm::Value *DeviceID = nullptr; 11331 if (Device) { 11332 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 11333 CGF.Int64Ty, /*isSigned=*/true); 11334 } else { 11335 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 11336 } 11337 11338 // Emit the number of elements in the offloading arrays. 11339 llvm::Constant *PointerNum = 11340 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 11341 11342 // Source location for the ident struct 11343 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 11344 11345 llvm::Value *OffloadingArgs[] = {RTLoc, 11346 DeviceID, 11347 PointerNum, 11348 InputInfo.BasePointersArray.getPointer(), 11349 InputInfo.PointersArray.getPointer(), 11350 InputInfo.SizesArray.getPointer(), 11351 MapTypesArray, 11352 MapNamesArray, 11353 InputInfo.MappersArray.getPointer()}; 11354 11355 // Select the right runtime function call for each standalone 11356 // directive. 11357 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 11358 RuntimeFunction RTLFn; 11359 switch (D.getDirectiveKind()) { 11360 case OMPD_target_enter_data: 11361 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 11362 : OMPRTL___tgt_target_data_begin_mapper; 11363 break; 11364 case OMPD_target_exit_data: 11365 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 11366 : OMPRTL___tgt_target_data_end_mapper; 11367 break; 11368 case OMPD_target_update: 11369 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 11370 : OMPRTL___tgt_target_data_update_mapper; 11371 break; 11372 case OMPD_parallel: 11373 case OMPD_for: 11374 case OMPD_parallel_for: 11375 case OMPD_parallel_master: 11376 case OMPD_parallel_sections: 11377 case OMPD_for_simd: 11378 case OMPD_parallel_for_simd: 11379 case OMPD_cancel: 11380 case OMPD_cancellation_point: 11381 case OMPD_ordered: 11382 case OMPD_threadprivate: 11383 case OMPD_allocate: 11384 case OMPD_task: 11385 case OMPD_simd: 11386 case OMPD_tile: 11387 case OMPD_unroll: 11388 case OMPD_sections: 11389 case OMPD_section: 11390 case OMPD_single: 11391 case OMPD_master: 11392 case OMPD_critical: 11393 case OMPD_taskyield: 11394 case OMPD_barrier: 11395 case OMPD_taskwait: 11396 case OMPD_taskgroup: 11397 case OMPD_atomic: 11398 case OMPD_flush: 11399 case OMPD_depobj: 11400 case OMPD_scan: 11401 case OMPD_teams: 11402 case OMPD_target_data: 11403 case OMPD_distribute: 11404 case OMPD_distribute_simd: 11405 case OMPD_distribute_parallel_for: 11406 case OMPD_distribute_parallel_for_simd: 11407 case OMPD_teams_distribute: 11408 case OMPD_teams_distribute_simd: 11409 case OMPD_teams_distribute_parallel_for: 11410 case OMPD_teams_distribute_parallel_for_simd: 11411 case OMPD_declare_simd: 11412 case OMPD_declare_variant: 11413 case OMPD_begin_declare_variant: 11414 case OMPD_end_declare_variant: 11415 case OMPD_declare_target: 11416 case OMPD_end_declare_target: 11417 case OMPD_declare_reduction: 11418 case OMPD_declare_mapper: 11419 case OMPD_taskloop: 11420 case OMPD_taskloop_simd: 11421 case OMPD_master_taskloop: 11422 case OMPD_master_taskloop_simd: 11423 case OMPD_parallel_master_taskloop: 11424 case OMPD_parallel_master_taskloop_simd: 11425 case OMPD_target: 11426 case OMPD_target_simd: 11427 case OMPD_target_teams_distribute: 11428 case OMPD_target_teams_distribute_simd: 11429 case OMPD_target_teams_distribute_parallel_for: 11430 case OMPD_target_teams_distribute_parallel_for_simd: 11431 case OMPD_target_teams: 11432 case OMPD_target_parallel: 11433 case OMPD_target_parallel_for: 11434 case OMPD_target_parallel_for_simd: 11435 case OMPD_requires: 11436 case OMPD_metadirective: 11437 case OMPD_unknown: 11438 default: 11439 llvm_unreachable("Unexpected standalone target data directive."); 11440 break; 11441 } 11442 CGF.EmitRuntimeCall( 11443 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11444 OffloadingArgs); 11445 }; 11446 11447 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11448 &MapNamesArray](CodeGenFunction &CGF, 11449 PrePostActionTy &) { 11450 // Fill up the arrays with all the mapped variables. 11451 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11452 11453 // Get map clause information. 11454 MappableExprsHandler MEHandler(D, CGF); 11455 MEHandler.generateAllInfo(CombinedInfo); 11456 11457 TargetDataInfo Info; 11458 // Fill up the arrays and create the arguments. 11459 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11460 /*IsNonContiguous=*/true); 11461 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11462 D.hasClausesOfKind<OMPNowaitClause>(); 11463 emitOffloadingArraysArgument( 11464 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11465 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11466 {/*ForEndTask=*/false}); 11467 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11468 InputInfo.BasePointersArray = 11469 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11470 InputInfo.PointersArray = 11471 Address(Info.PointersArray, CGM.getPointerAlign()); 11472 InputInfo.SizesArray = 11473 Address(Info.SizesArray, CGM.getPointerAlign()); 11474 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11475 MapTypesArray = Info.MapTypesArray; 11476 MapNamesArray = Info.MapNamesArray; 11477 if (RequiresOuterTask) 11478 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11479 else 11480 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11481 }; 11482 11483 if (IfCond) { 11484 emitIfClause(CGF, IfCond, TargetThenGen, 11485 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11486 } else { 11487 RegionCodeGenTy ThenRCG(TargetThenGen); 11488 ThenRCG(CGF); 11489 } 11490 } 11491 11492 namespace { 11493 /// Kind of parameter in a function with 'declare simd' directive. 11494 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11495 /// Attribute set of the parameter. 11496 struct ParamAttrTy { 11497 ParamKindTy Kind = Vector; 11498 llvm::APSInt StrideOrArg; 11499 llvm::APSInt Alignment; 11500 }; 11501 } // namespace 11502 11503 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11504 ArrayRef<ParamAttrTy> ParamAttrs) { 11505 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11506 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11507 // of that clause. The VLEN value must be power of 2. 11508 // In other case the notion of the function`s "characteristic data type" (CDT) 11509 // is used to compute the vector length. 11510 // CDT is defined in the following order: 11511 // a) For non-void function, the CDT is the return type. 11512 // b) If the function has any non-uniform, non-linear parameters, then the 11513 // CDT is the type of the first such parameter. 11514 // c) If the CDT determined by a) or b) above is struct, union, or class 11515 // type which is pass-by-value (except for the type that maps to the 11516 // built-in complex data type), the characteristic data type is int. 11517 // d) If none of the above three cases is applicable, the CDT is int. 11518 // The VLEN is then determined based on the CDT and the size of vector 11519 // register of that ISA for which current vector version is generated. The 11520 // VLEN is computed using the formula below: 11521 // VLEN = sizeof(vector_register) / sizeof(CDT), 11522 // where vector register size specified in section 3.2.1 Registers and the 11523 // Stack Frame of original AMD64 ABI document. 11524 QualType RetType = FD->getReturnType(); 11525 if (RetType.isNull()) 11526 return 0; 11527 ASTContext &C = FD->getASTContext(); 11528 QualType CDT; 11529 if (!RetType.isNull() && !RetType->isVoidType()) { 11530 CDT = RetType; 11531 } else { 11532 unsigned Offset = 0; 11533 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11534 if (ParamAttrs[Offset].Kind == Vector) 11535 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11536 ++Offset; 11537 } 11538 if (CDT.isNull()) { 11539 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11540 if (ParamAttrs[I + Offset].Kind == Vector) { 11541 CDT = FD->getParamDecl(I)->getType(); 11542 break; 11543 } 11544 } 11545 } 11546 } 11547 if (CDT.isNull()) 11548 CDT = C.IntTy; 11549 CDT = CDT->getCanonicalTypeUnqualified(); 11550 if (CDT->isRecordType() || CDT->isUnionType()) 11551 CDT = C.IntTy; 11552 return C.getTypeSize(CDT); 11553 } 11554 11555 static void 11556 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11557 const llvm::APSInt &VLENVal, 11558 ArrayRef<ParamAttrTy> ParamAttrs, 11559 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11560 struct ISADataTy { 11561 char ISA; 11562 unsigned VecRegSize; 11563 }; 11564 ISADataTy ISAData[] = { 11565 { 11566 'b', 128 11567 }, // SSE 11568 { 11569 'c', 256 11570 }, // AVX 11571 { 11572 'd', 256 11573 }, // AVX2 11574 { 11575 'e', 512 11576 }, // AVX512 11577 }; 11578 llvm::SmallVector<char, 2> Masked; 11579 switch (State) { 11580 case OMPDeclareSimdDeclAttr::BS_Undefined: 11581 Masked.push_back('N'); 11582 Masked.push_back('M'); 11583 break; 11584 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11585 Masked.push_back('N'); 11586 break; 11587 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11588 Masked.push_back('M'); 11589 break; 11590 } 11591 for (char Mask : Masked) { 11592 for (const ISADataTy &Data : ISAData) { 11593 SmallString<256> Buffer; 11594 llvm::raw_svector_ostream Out(Buffer); 11595 Out << "_ZGV" << Data.ISA << Mask; 11596 if (!VLENVal) { 11597 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11598 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11599 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11600 } else { 11601 Out << VLENVal; 11602 } 11603 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11604 switch (ParamAttr.Kind){ 11605 case LinearWithVarStride: 11606 Out << 's' << ParamAttr.StrideOrArg; 11607 break; 11608 case Linear: 11609 Out << 'l'; 11610 if (ParamAttr.StrideOrArg != 1) 11611 Out << ParamAttr.StrideOrArg; 11612 break; 11613 case Uniform: 11614 Out << 'u'; 11615 break; 11616 case Vector: 11617 Out << 'v'; 11618 break; 11619 } 11620 if (!!ParamAttr.Alignment) 11621 Out << 'a' << ParamAttr.Alignment; 11622 } 11623 Out << '_' << Fn->getName(); 11624 Fn->addFnAttr(Out.str()); 11625 } 11626 } 11627 } 11628 11629 // This are the Functions that are needed to mangle the name of the 11630 // vector functions generated by the compiler, according to the rules 11631 // defined in the "Vector Function ABI specifications for AArch64", 11632 // available at 11633 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11634 11635 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11636 /// 11637 /// TODO: Need to implement the behavior for reference marked with a 11638 /// var or no linear modifiers (1.b in the section). For this, we 11639 /// need to extend ParamKindTy to support the linear modifiers. 11640 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11641 QT = QT.getCanonicalType(); 11642 11643 if (QT->isVoidType()) 11644 return false; 11645 11646 if (Kind == ParamKindTy::Uniform) 11647 return false; 11648 11649 if (Kind == ParamKindTy::Linear) 11650 return false; 11651 11652 // TODO: Handle linear references with modifiers 11653 11654 if (Kind == ParamKindTy::LinearWithVarStride) 11655 return false; 11656 11657 return true; 11658 } 11659 11660 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11661 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11662 QT = QT.getCanonicalType(); 11663 unsigned Size = C.getTypeSize(QT); 11664 11665 // Only scalars and complex within 16 bytes wide set PVB to true. 11666 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11667 return false; 11668 11669 if (QT->isFloatingType()) 11670 return true; 11671 11672 if (QT->isIntegerType()) 11673 return true; 11674 11675 if (QT->isPointerType()) 11676 return true; 11677 11678 // TODO: Add support for complex types (section 3.1.2, item 2). 11679 11680 return false; 11681 } 11682 11683 /// Computes the lane size (LS) of a return type or of an input parameter, 11684 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11685 /// TODO: Add support for references, section 3.2.1, item 1. 11686 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11687 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11688 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11689 if (getAArch64PBV(PTy, C)) 11690 return C.getTypeSize(PTy); 11691 } 11692 if (getAArch64PBV(QT, C)) 11693 return C.getTypeSize(QT); 11694 11695 return C.getTypeSize(C.getUIntPtrType()); 11696 } 11697 11698 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11699 // signature of the scalar function, as defined in 3.2.2 of the 11700 // AAVFABI. 11701 static std::tuple<unsigned, unsigned, bool> 11702 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11703 QualType RetType = FD->getReturnType().getCanonicalType(); 11704 11705 ASTContext &C = FD->getASTContext(); 11706 11707 bool OutputBecomesInput = false; 11708 11709 llvm::SmallVector<unsigned, 8> Sizes; 11710 if (!RetType->isVoidType()) { 11711 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11712 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11713 OutputBecomesInput = true; 11714 } 11715 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11716 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11717 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11718 } 11719 11720 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11721 // The LS of a function parameter / return value can only be a power 11722 // of 2, starting from 8 bits, up to 128. 11723 assert(llvm::all_of(Sizes, 11724 [](unsigned Size) { 11725 return Size == 8 || Size == 16 || Size == 32 || 11726 Size == 64 || Size == 128; 11727 }) && 11728 "Invalid size"); 11729 11730 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11731 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11732 OutputBecomesInput); 11733 } 11734 11735 /// Mangle the parameter part of the vector function name according to 11736 /// their OpenMP classification. The mangling function is defined in 11737 /// section 3.5 of the AAVFABI. 11738 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11739 SmallString<256> Buffer; 11740 llvm::raw_svector_ostream Out(Buffer); 11741 for (const auto &ParamAttr : ParamAttrs) { 11742 switch (ParamAttr.Kind) { 11743 case LinearWithVarStride: 11744 Out << "ls" << ParamAttr.StrideOrArg; 11745 break; 11746 case Linear: 11747 Out << 'l'; 11748 // Don't print the step value if it is not present or if it is 11749 // equal to 1. 11750 if (ParamAttr.StrideOrArg != 1) 11751 Out << ParamAttr.StrideOrArg; 11752 break; 11753 case Uniform: 11754 Out << 'u'; 11755 break; 11756 case Vector: 11757 Out << 'v'; 11758 break; 11759 } 11760 11761 if (!!ParamAttr.Alignment) 11762 Out << 'a' << ParamAttr.Alignment; 11763 } 11764 11765 return std::string(Out.str()); 11766 } 11767 11768 // Function used to add the attribute. The parameter `VLEN` is 11769 // templated to allow the use of "x" when targeting scalable functions 11770 // for SVE. 11771 template <typename T> 11772 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11773 char ISA, StringRef ParSeq, 11774 StringRef MangledName, bool OutputBecomesInput, 11775 llvm::Function *Fn) { 11776 SmallString<256> Buffer; 11777 llvm::raw_svector_ostream Out(Buffer); 11778 Out << Prefix << ISA << LMask << VLEN; 11779 if (OutputBecomesInput) 11780 Out << "v"; 11781 Out << ParSeq << "_" << MangledName; 11782 Fn->addFnAttr(Out.str()); 11783 } 11784 11785 // Helper function to generate the Advanced SIMD names depending on 11786 // the value of the NDS when simdlen is not present. 11787 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11788 StringRef Prefix, char ISA, 11789 StringRef ParSeq, StringRef MangledName, 11790 bool OutputBecomesInput, 11791 llvm::Function *Fn) { 11792 switch (NDS) { 11793 case 8: 11794 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11795 OutputBecomesInput, Fn); 11796 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11797 OutputBecomesInput, Fn); 11798 break; 11799 case 16: 11800 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11801 OutputBecomesInput, Fn); 11802 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11803 OutputBecomesInput, Fn); 11804 break; 11805 case 32: 11806 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11807 OutputBecomesInput, Fn); 11808 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11809 OutputBecomesInput, Fn); 11810 break; 11811 case 64: 11812 case 128: 11813 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11814 OutputBecomesInput, Fn); 11815 break; 11816 default: 11817 llvm_unreachable("Scalar type is too wide."); 11818 } 11819 } 11820 11821 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11822 static void emitAArch64DeclareSimdFunction( 11823 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11824 ArrayRef<ParamAttrTy> ParamAttrs, 11825 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11826 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11827 11828 // Get basic data for building the vector signature. 11829 const auto Data = getNDSWDS(FD, ParamAttrs); 11830 const unsigned NDS = std::get<0>(Data); 11831 const unsigned WDS = std::get<1>(Data); 11832 const bool OutputBecomesInput = std::get<2>(Data); 11833 11834 // Check the values provided via `simdlen` by the user. 11835 // 1. A `simdlen(1)` doesn't produce vector signatures, 11836 if (UserVLEN == 1) { 11837 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11838 DiagnosticsEngine::Warning, 11839 "The clause simdlen(1) has no effect when targeting aarch64."); 11840 CGM.getDiags().Report(SLoc, DiagID); 11841 return; 11842 } 11843 11844 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11845 // Advanced SIMD output. 11846 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11847 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11848 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11849 "power of 2 when targeting Advanced SIMD."); 11850 CGM.getDiags().Report(SLoc, DiagID); 11851 return; 11852 } 11853 11854 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11855 // limits. 11856 if (ISA == 's' && UserVLEN != 0) { 11857 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11858 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11859 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11860 "lanes in the architectural constraints " 11861 "for SVE (min is 128-bit, max is " 11862 "2048-bit, by steps of 128-bit)"); 11863 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11864 return; 11865 } 11866 } 11867 11868 // Sort out parameter sequence. 11869 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11870 StringRef Prefix = "_ZGV"; 11871 // Generate simdlen from user input (if any). 11872 if (UserVLEN) { 11873 if (ISA == 's') { 11874 // SVE generates only a masked function. 11875 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11876 OutputBecomesInput, Fn); 11877 } else { 11878 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11879 // Advanced SIMD generates one or two functions, depending on 11880 // the `[not]inbranch` clause. 11881 switch (State) { 11882 case OMPDeclareSimdDeclAttr::BS_Undefined: 11883 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11884 OutputBecomesInput, Fn); 11885 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11886 OutputBecomesInput, Fn); 11887 break; 11888 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11889 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11890 OutputBecomesInput, Fn); 11891 break; 11892 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11893 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11894 OutputBecomesInput, Fn); 11895 break; 11896 } 11897 } 11898 } else { 11899 // If no user simdlen is provided, follow the AAVFABI rules for 11900 // generating the vector length. 11901 if (ISA == 's') { 11902 // SVE, section 3.4.1, item 1. 11903 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11904 OutputBecomesInput, Fn); 11905 } else { 11906 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11907 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11908 // two vector names depending on the use of the clause 11909 // `[not]inbranch`. 11910 switch (State) { 11911 case OMPDeclareSimdDeclAttr::BS_Undefined: 11912 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11913 OutputBecomesInput, Fn); 11914 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11915 OutputBecomesInput, Fn); 11916 break; 11917 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11918 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11919 OutputBecomesInput, Fn); 11920 break; 11921 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11922 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11923 OutputBecomesInput, Fn); 11924 break; 11925 } 11926 } 11927 } 11928 } 11929 11930 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11931 llvm::Function *Fn) { 11932 ASTContext &C = CGM.getContext(); 11933 FD = FD->getMostRecentDecl(); 11934 // Map params to their positions in function decl. 11935 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11936 if (isa<CXXMethodDecl>(FD)) 11937 ParamPositions.try_emplace(FD, 0); 11938 unsigned ParamPos = ParamPositions.size(); 11939 for (const ParmVarDecl *P : FD->parameters()) { 11940 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11941 ++ParamPos; 11942 } 11943 while (FD) { 11944 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11945 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11946 // Mark uniform parameters. 11947 for (const Expr *E : Attr->uniforms()) { 11948 E = E->IgnoreParenImpCasts(); 11949 unsigned Pos; 11950 if (isa<CXXThisExpr>(E)) { 11951 Pos = ParamPositions[FD]; 11952 } else { 11953 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11954 ->getCanonicalDecl(); 11955 Pos = ParamPositions[PVD]; 11956 } 11957 ParamAttrs[Pos].Kind = Uniform; 11958 } 11959 // Get alignment info. 11960 auto NI = Attr->alignments_begin(); 11961 for (const Expr *E : Attr->aligneds()) { 11962 E = E->IgnoreParenImpCasts(); 11963 unsigned Pos; 11964 QualType ParmTy; 11965 if (isa<CXXThisExpr>(E)) { 11966 Pos = ParamPositions[FD]; 11967 ParmTy = E->getType(); 11968 } else { 11969 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11970 ->getCanonicalDecl(); 11971 Pos = ParamPositions[PVD]; 11972 ParmTy = PVD->getType(); 11973 } 11974 ParamAttrs[Pos].Alignment = 11975 (*NI) 11976 ? (*NI)->EvaluateKnownConstInt(C) 11977 : llvm::APSInt::getUnsigned( 11978 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11979 .getQuantity()); 11980 ++NI; 11981 } 11982 // Mark linear parameters. 11983 auto SI = Attr->steps_begin(); 11984 auto MI = Attr->modifiers_begin(); 11985 for (const Expr *E : Attr->linears()) { 11986 E = E->IgnoreParenImpCasts(); 11987 unsigned Pos; 11988 // Rescaling factor needed to compute the linear parameter 11989 // value in the mangled name. 11990 unsigned PtrRescalingFactor = 1; 11991 if (isa<CXXThisExpr>(E)) { 11992 Pos = ParamPositions[FD]; 11993 } else { 11994 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11995 ->getCanonicalDecl(); 11996 Pos = ParamPositions[PVD]; 11997 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11998 PtrRescalingFactor = CGM.getContext() 11999 .getTypeSizeInChars(P->getPointeeType()) 12000 .getQuantity(); 12001 } 12002 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 12003 ParamAttr.Kind = Linear; 12004 // Assuming a stride of 1, for `linear` without modifiers. 12005 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 12006 if (*SI) { 12007 Expr::EvalResult Result; 12008 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 12009 if (const auto *DRE = 12010 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 12011 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 12012 ParamAttr.Kind = LinearWithVarStride; 12013 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 12014 ParamPositions[StridePVD->getCanonicalDecl()]); 12015 } 12016 } 12017 } else { 12018 ParamAttr.StrideOrArg = Result.Val.getInt(); 12019 } 12020 } 12021 // If we are using a linear clause on a pointer, we need to 12022 // rescale the value of linear_step with the byte size of the 12023 // pointee type. 12024 if (Linear == ParamAttr.Kind) 12025 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 12026 ++SI; 12027 ++MI; 12028 } 12029 llvm::APSInt VLENVal; 12030 SourceLocation ExprLoc; 12031 const Expr *VLENExpr = Attr->getSimdlen(); 12032 if (VLENExpr) { 12033 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 12034 ExprLoc = VLENExpr->getExprLoc(); 12035 } 12036 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 12037 if (CGM.getTriple().isX86()) { 12038 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 12039 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 12040 unsigned VLEN = VLENVal.getExtValue(); 12041 StringRef MangledName = Fn->getName(); 12042 if (CGM.getTarget().hasFeature("sve")) 12043 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12044 MangledName, 's', 128, Fn, ExprLoc); 12045 if (CGM.getTarget().hasFeature("neon")) 12046 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 12047 MangledName, 'n', 128, Fn, ExprLoc); 12048 } 12049 } 12050 FD = FD->getPreviousDecl(); 12051 } 12052 } 12053 12054 namespace { 12055 /// Cleanup action for doacross support. 12056 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 12057 public: 12058 static const int DoacrossFinArgs = 2; 12059 12060 private: 12061 llvm::FunctionCallee RTLFn; 12062 llvm::Value *Args[DoacrossFinArgs]; 12063 12064 public: 12065 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 12066 ArrayRef<llvm::Value *> CallArgs) 12067 : RTLFn(RTLFn) { 12068 assert(CallArgs.size() == DoacrossFinArgs); 12069 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 12070 } 12071 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12072 if (!CGF.HaveInsertPoint()) 12073 return; 12074 CGF.EmitRuntimeCall(RTLFn, Args); 12075 } 12076 }; 12077 } // namespace 12078 12079 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12080 const OMPLoopDirective &D, 12081 ArrayRef<Expr *> NumIterations) { 12082 if (!CGF.HaveInsertPoint()) 12083 return; 12084 12085 ASTContext &C = CGM.getContext(); 12086 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 12087 RecordDecl *RD; 12088 if (KmpDimTy.isNull()) { 12089 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 12090 // kmp_int64 lo; // lower 12091 // kmp_int64 up; // upper 12092 // kmp_int64 st; // stride 12093 // }; 12094 RD = C.buildImplicitRecord("kmp_dim"); 12095 RD->startDefinition(); 12096 addFieldToRecordDecl(C, RD, Int64Ty); 12097 addFieldToRecordDecl(C, RD, Int64Ty); 12098 addFieldToRecordDecl(C, RD, Int64Ty); 12099 RD->completeDefinition(); 12100 KmpDimTy = C.getRecordType(RD); 12101 } else { 12102 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 12103 } 12104 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 12105 QualType ArrayTy = 12106 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 12107 12108 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 12109 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 12110 enum { LowerFD = 0, UpperFD, StrideFD }; 12111 // Fill dims with data. 12112 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 12113 LValue DimsLVal = CGF.MakeAddrLValue( 12114 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 12115 // dims.upper = num_iterations; 12116 LValue UpperLVal = CGF.EmitLValueForField( 12117 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 12118 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 12119 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 12120 Int64Ty, NumIterations[I]->getExprLoc()); 12121 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 12122 // dims.stride = 1; 12123 LValue StrideLVal = CGF.EmitLValueForField( 12124 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 12125 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 12126 StrideLVal); 12127 } 12128 12129 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 12130 // kmp_int32 num_dims, struct kmp_dim * dims); 12131 llvm::Value *Args[] = { 12132 emitUpdateLocation(CGF, D.getBeginLoc()), 12133 getThreadID(CGF, D.getBeginLoc()), 12134 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 12135 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12136 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 12137 CGM.VoidPtrTy)}; 12138 12139 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12140 CGM.getModule(), OMPRTL___kmpc_doacross_init); 12141 CGF.EmitRuntimeCall(RTLFn, Args); 12142 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 12143 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 12144 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12145 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 12146 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 12147 llvm::makeArrayRef(FiniArgs)); 12148 } 12149 12150 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12151 const OMPDependClause *C) { 12152 QualType Int64Ty = 12153 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 12154 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 12155 QualType ArrayTy = CGM.getContext().getConstantArrayType( 12156 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 12157 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 12158 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 12159 const Expr *CounterVal = C->getLoopData(I); 12160 assert(CounterVal); 12161 llvm::Value *CntVal = CGF.EmitScalarConversion( 12162 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 12163 CounterVal->getExprLoc()); 12164 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 12165 /*Volatile=*/false, Int64Ty); 12166 } 12167 llvm::Value *Args[] = { 12168 emitUpdateLocation(CGF, C->getBeginLoc()), 12169 getThreadID(CGF, C->getBeginLoc()), 12170 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 12171 llvm::FunctionCallee RTLFn; 12172 if (C->getDependencyKind() == OMPC_DEPEND_source) { 12173 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12174 OMPRTL___kmpc_doacross_post); 12175 } else { 12176 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 12177 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 12178 OMPRTL___kmpc_doacross_wait); 12179 } 12180 CGF.EmitRuntimeCall(RTLFn, Args); 12181 } 12182 12183 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 12184 llvm::FunctionCallee Callee, 12185 ArrayRef<llvm::Value *> Args) const { 12186 assert(Loc.isValid() && "Outlined function call location must be valid."); 12187 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 12188 12189 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 12190 if (Fn->doesNotThrow()) { 12191 CGF.EmitNounwindRuntimeCall(Fn, Args); 12192 return; 12193 } 12194 } 12195 CGF.EmitRuntimeCall(Callee, Args); 12196 } 12197 12198 void CGOpenMPRuntime::emitOutlinedFunctionCall( 12199 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 12200 ArrayRef<llvm::Value *> Args) const { 12201 emitCall(CGF, Loc, OutlinedFn, Args); 12202 } 12203 12204 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 12205 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 12206 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 12207 HasEmittedDeclareTargetRegion = true; 12208 } 12209 12210 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 12211 const VarDecl *NativeParam, 12212 const VarDecl *TargetParam) const { 12213 return CGF.GetAddrOfLocalVar(NativeParam); 12214 } 12215 12216 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 12217 const VarDecl *VD) { 12218 if (!VD) 12219 return Address::invalid(); 12220 Address UntiedAddr = Address::invalid(); 12221 Address UntiedRealAddr = Address::invalid(); 12222 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12223 if (It != FunctionToUntiedTaskStackMap.end()) { 12224 const UntiedLocalVarsAddressesMap &UntiedData = 12225 UntiedLocalVarsStack[It->second]; 12226 auto I = UntiedData.find(VD); 12227 if (I != UntiedData.end()) { 12228 UntiedAddr = I->second.first; 12229 UntiedRealAddr = I->second.second; 12230 } 12231 } 12232 const VarDecl *CVD = VD->getCanonicalDecl(); 12233 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 12234 // Use the default allocation. 12235 if (!isAllocatableDecl(VD)) 12236 return UntiedAddr; 12237 llvm::Value *Size; 12238 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 12239 if (CVD->getType()->isVariablyModifiedType()) { 12240 Size = CGF.getTypeSize(CVD->getType()); 12241 // Align the size: ((size + align - 1) / align) * align 12242 Size = CGF.Builder.CreateNUWAdd( 12243 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 12244 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 12245 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 12246 } else { 12247 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 12248 Size = CGM.getSize(Sz.alignTo(Align)); 12249 } 12250 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 12251 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 12252 assert(AA->getAllocator() && 12253 "Expected allocator expression for non-default allocator."); 12254 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 12255 // According to the standard, the original allocator type is a enum 12256 // (integer). Convert to pointer type, if required. 12257 Allocator = CGF.EmitScalarConversion( 12258 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 12259 AA->getAllocator()->getExprLoc()); 12260 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 12261 12262 llvm::Value *Addr = 12263 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 12264 CGM.getModule(), OMPRTL___kmpc_alloc), 12265 Args, getName({CVD->getName(), ".void.addr"})); 12266 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 12267 CGM.getModule(), OMPRTL___kmpc_free); 12268 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 12269 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12270 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 12271 if (UntiedAddr.isValid()) 12272 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 12273 12274 // Cleanup action for allocate support. 12275 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 12276 llvm::FunctionCallee RTLFn; 12277 SourceLocation::UIntTy LocEncoding; 12278 Address Addr; 12279 const Expr *Allocator; 12280 12281 public: 12282 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 12283 SourceLocation::UIntTy LocEncoding, Address Addr, 12284 const Expr *Allocator) 12285 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 12286 Allocator(Allocator) {} 12287 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 12288 if (!CGF.HaveInsertPoint()) 12289 return; 12290 llvm::Value *Args[3]; 12291 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 12292 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 12293 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12294 Addr.getPointer(), CGF.VoidPtrTy); 12295 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 12296 // According to the standard, the original allocator type is a enum 12297 // (integer). Convert to pointer type, if required. 12298 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 12299 CGF.getContext().VoidPtrTy, 12300 Allocator->getExprLoc()); 12301 Args[2] = AllocVal; 12302 12303 CGF.EmitRuntimeCall(RTLFn, Args); 12304 } 12305 }; 12306 Address VDAddr = 12307 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 12308 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 12309 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 12310 VDAddr, AA->getAllocator()); 12311 if (UntiedRealAddr.isValid()) 12312 if (auto *Region = 12313 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 12314 Region->emitUntiedSwitch(CGF); 12315 return VDAddr; 12316 } 12317 return UntiedAddr; 12318 } 12319 12320 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 12321 const VarDecl *VD) const { 12322 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 12323 if (It == FunctionToUntiedTaskStackMap.end()) 12324 return false; 12325 return UntiedLocalVarsStack[It->second].count(VD) > 0; 12326 } 12327 12328 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 12329 CodeGenModule &CGM, const OMPLoopDirective &S) 12330 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 12331 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12332 if (!NeedToPush) 12333 return; 12334 NontemporalDeclsSet &DS = 12335 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 12336 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 12337 for (const Stmt *Ref : C->private_refs()) { 12338 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 12339 const ValueDecl *VD; 12340 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 12341 VD = DRE->getDecl(); 12342 } else { 12343 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 12344 assert((ME->isImplicitCXXThis() || 12345 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 12346 "Expected member of current class."); 12347 VD = ME->getMemberDecl(); 12348 } 12349 DS.insert(VD); 12350 } 12351 } 12352 } 12353 12354 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 12355 if (!NeedToPush) 12356 return; 12357 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 12358 } 12359 12360 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 12361 CodeGenFunction &CGF, 12362 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, 12363 std::pair<Address, Address>> &LocalVars) 12364 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 12365 if (!NeedToPush) 12366 return; 12367 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 12368 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 12369 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 12370 } 12371 12372 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 12373 if (!NeedToPush) 12374 return; 12375 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 12376 } 12377 12378 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 12379 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12380 12381 return llvm::any_of( 12382 CGM.getOpenMPRuntime().NontemporalDeclsStack, 12383 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); 12384 } 12385 12386 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 12387 const OMPExecutableDirective &S, 12388 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 12389 const { 12390 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 12391 // Vars in target/task regions must be excluded completely. 12392 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 12393 isOpenMPTaskingDirective(S.getDirectiveKind())) { 12394 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12395 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 12396 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 12397 for (const CapturedStmt::Capture &Cap : CS->captures()) { 12398 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 12399 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 12400 } 12401 } 12402 // Exclude vars in private clauses. 12403 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 12404 for (const Expr *Ref : C->varlists()) { 12405 if (!Ref->getType()->isScalarType()) 12406 continue; 12407 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12408 if (!DRE) 12409 continue; 12410 NeedToCheckForLPCs.insert(DRE->getDecl()); 12411 } 12412 } 12413 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 12414 for (const Expr *Ref : C->varlists()) { 12415 if (!Ref->getType()->isScalarType()) 12416 continue; 12417 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12418 if (!DRE) 12419 continue; 12420 NeedToCheckForLPCs.insert(DRE->getDecl()); 12421 } 12422 } 12423 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12424 for (const Expr *Ref : C->varlists()) { 12425 if (!Ref->getType()->isScalarType()) 12426 continue; 12427 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12428 if (!DRE) 12429 continue; 12430 NeedToCheckForLPCs.insert(DRE->getDecl()); 12431 } 12432 } 12433 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12434 for (const Expr *Ref : C->varlists()) { 12435 if (!Ref->getType()->isScalarType()) 12436 continue; 12437 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12438 if (!DRE) 12439 continue; 12440 NeedToCheckForLPCs.insert(DRE->getDecl()); 12441 } 12442 } 12443 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12444 for (const Expr *Ref : C->varlists()) { 12445 if (!Ref->getType()->isScalarType()) 12446 continue; 12447 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12448 if (!DRE) 12449 continue; 12450 NeedToCheckForLPCs.insert(DRE->getDecl()); 12451 } 12452 } 12453 for (const Decl *VD : NeedToCheckForLPCs) { 12454 for (const LastprivateConditionalData &Data : 12455 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12456 if (Data.DeclToUniqueName.count(VD) > 0) { 12457 if (!Data.Disabled) 12458 NeedToAddForLPCsAsDisabled.insert(VD); 12459 break; 12460 } 12461 } 12462 } 12463 } 12464 12465 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12466 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12467 : CGM(CGF.CGM), 12468 Action((CGM.getLangOpts().OpenMP >= 50 && 12469 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12470 [](const OMPLastprivateClause *C) { 12471 return C->getKind() == 12472 OMPC_LASTPRIVATE_conditional; 12473 })) 12474 ? ActionToDo::PushAsLastprivateConditional 12475 : ActionToDo::DoNotPush) { 12476 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12477 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12478 return; 12479 assert(Action == ActionToDo::PushAsLastprivateConditional && 12480 "Expected a push action."); 12481 LastprivateConditionalData &Data = 12482 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12483 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12484 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12485 continue; 12486 12487 for (const Expr *Ref : C->varlists()) { 12488 Data.DeclToUniqueName.insert(std::make_pair( 12489 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12490 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12491 } 12492 } 12493 Data.IVLVal = IVLVal; 12494 Data.Fn = CGF.CurFn; 12495 } 12496 12497 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12498 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12499 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12500 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12501 if (CGM.getLangOpts().OpenMP < 50) 12502 return; 12503 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12504 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12505 if (!NeedToAddForLPCsAsDisabled.empty()) { 12506 Action = ActionToDo::DisableLastprivateConditional; 12507 LastprivateConditionalData &Data = 12508 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12509 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12510 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12511 Data.Fn = CGF.CurFn; 12512 Data.Disabled = true; 12513 } 12514 } 12515 12516 CGOpenMPRuntime::LastprivateConditionalRAII 12517 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12518 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12519 return LastprivateConditionalRAII(CGF, S); 12520 } 12521 12522 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12523 if (CGM.getLangOpts().OpenMP < 50) 12524 return; 12525 if (Action == ActionToDo::DisableLastprivateConditional) { 12526 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12527 "Expected list of disabled private vars."); 12528 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12529 } 12530 if (Action == ActionToDo::PushAsLastprivateConditional) { 12531 assert( 12532 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12533 "Expected list of lastprivate conditional vars."); 12534 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12535 } 12536 } 12537 12538 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12539 const VarDecl *VD) { 12540 ASTContext &C = CGM.getContext(); 12541 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12542 if (I == LastprivateConditionalToTypes.end()) 12543 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12544 QualType NewType; 12545 const FieldDecl *VDField; 12546 const FieldDecl *FiredField; 12547 LValue BaseLVal; 12548 auto VI = I->getSecond().find(VD); 12549 if (VI == I->getSecond().end()) { 12550 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12551 RD->startDefinition(); 12552 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12553 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12554 RD->completeDefinition(); 12555 NewType = C.getRecordType(RD); 12556 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12557 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12558 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12559 } else { 12560 NewType = std::get<0>(VI->getSecond()); 12561 VDField = std::get<1>(VI->getSecond()); 12562 FiredField = std::get<2>(VI->getSecond()); 12563 BaseLVal = std::get<3>(VI->getSecond()); 12564 } 12565 LValue FiredLVal = 12566 CGF.EmitLValueForField(BaseLVal, FiredField); 12567 CGF.EmitStoreOfScalar( 12568 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12569 FiredLVal); 12570 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12571 } 12572 12573 namespace { 12574 /// Checks if the lastprivate conditional variable is referenced in LHS. 12575 class LastprivateConditionalRefChecker final 12576 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12577 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12578 const Expr *FoundE = nullptr; 12579 const Decl *FoundD = nullptr; 12580 StringRef UniqueDeclName; 12581 LValue IVLVal; 12582 llvm::Function *FoundFn = nullptr; 12583 SourceLocation Loc; 12584 12585 public: 12586 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12587 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12588 llvm::reverse(LPM)) { 12589 auto It = D.DeclToUniqueName.find(E->getDecl()); 12590 if (It == D.DeclToUniqueName.end()) 12591 continue; 12592 if (D.Disabled) 12593 return false; 12594 FoundE = E; 12595 FoundD = E->getDecl()->getCanonicalDecl(); 12596 UniqueDeclName = It->second; 12597 IVLVal = D.IVLVal; 12598 FoundFn = D.Fn; 12599 break; 12600 } 12601 return FoundE == E; 12602 } 12603 bool VisitMemberExpr(const MemberExpr *E) { 12604 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12605 return false; 12606 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12607 llvm::reverse(LPM)) { 12608 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12609 if (It == D.DeclToUniqueName.end()) 12610 continue; 12611 if (D.Disabled) 12612 return false; 12613 FoundE = E; 12614 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12615 UniqueDeclName = It->second; 12616 IVLVal = D.IVLVal; 12617 FoundFn = D.Fn; 12618 break; 12619 } 12620 return FoundE == E; 12621 } 12622 bool VisitStmt(const Stmt *S) { 12623 for (const Stmt *Child : S->children()) { 12624 if (!Child) 12625 continue; 12626 if (const auto *E = dyn_cast<Expr>(Child)) 12627 if (!E->isGLValue()) 12628 continue; 12629 if (Visit(Child)) 12630 return true; 12631 } 12632 return false; 12633 } 12634 explicit LastprivateConditionalRefChecker( 12635 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12636 : LPM(LPM) {} 12637 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12638 getFoundData() const { 12639 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12640 } 12641 }; 12642 } // namespace 12643 12644 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12645 LValue IVLVal, 12646 StringRef UniqueDeclName, 12647 LValue LVal, 12648 SourceLocation Loc) { 12649 // Last updated loop counter for the lastprivate conditional var. 12650 // int<xx> last_iv = 0; 12651 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12652 llvm::Constant *LastIV = 12653 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12654 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12655 IVLVal.getAlignment().getAsAlign()); 12656 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12657 12658 // Last value of the lastprivate conditional. 12659 // decltype(priv_a) last_a; 12660 llvm::GlobalVariable *Last = getOrCreateInternalVariable( 12661 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12662 Last->setAlignment(LVal.getAlignment().getAsAlign()); 12663 LValue LastLVal = CGF.MakeAddrLValue( 12664 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType()); 12665 12666 // Global loop counter. Required to handle inner parallel-for regions. 12667 // iv 12668 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12669 12670 // #pragma omp critical(a) 12671 // if (last_iv <= iv) { 12672 // last_iv = iv; 12673 // last_a = priv_a; 12674 // } 12675 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12676 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12677 Action.Enter(CGF); 12678 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12679 // (last_iv <= iv) ? Check if the variable is updated and store new 12680 // value in global var. 12681 llvm::Value *CmpRes; 12682 if (IVLVal.getType()->isSignedIntegerType()) { 12683 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12684 } else { 12685 assert(IVLVal.getType()->isUnsignedIntegerType() && 12686 "Loop iteration variable must be integer."); 12687 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12688 } 12689 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12690 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12691 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12692 // { 12693 CGF.EmitBlock(ThenBB); 12694 12695 // last_iv = iv; 12696 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12697 12698 // last_a = priv_a; 12699 switch (CGF.getEvaluationKind(LVal.getType())) { 12700 case TEK_Scalar: { 12701 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12702 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12703 break; 12704 } 12705 case TEK_Complex: { 12706 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12707 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12708 break; 12709 } 12710 case TEK_Aggregate: 12711 llvm_unreachable( 12712 "Aggregates are not supported in lastprivate conditional."); 12713 } 12714 // } 12715 CGF.EmitBranch(ExitBB); 12716 // There is no need to emit line number for unconditional branch. 12717 (void)ApplyDebugLocation::CreateEmpty(CGF); 12718 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12719 }; 12720 12721 if (CGM.getLangOpts().OpenMPSimd) { 12722 // Do not emit as a critical region as no parallel region could be emitted. 12723 RegionCodeGenTy ThenRCG(CodeGen); 12724 ThenRCG(CGF); 12725 } else { 12726 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12727 } 12728 } 12729 12730 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12731 const Expr *LHS) { 12732 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12733 return; 12734 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12735 if (!Checker.Visit(LHS)) 12736 return; 12737 const Expr *FoundE; 12738 const Decl *FoundD; 12739 StringRef UniqueDeclName; 12740 LValue IVLVal; 12741 llvm::Function *FoundFn; 12742 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12743 Checker.getFoundData(); 12744 if (FoundFn != CGF.CurFn) { 12745 // Special codegen for inner parallel regions. 12746 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12747 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12748 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12749 "Lastprivate conditional is not found in outer region."); 12750 QualType StructTy = std::get<0>(It->getSecond()); 12751 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12752 LValue PrivLVal = CGF.EmitLValue(FoundE); 12753 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12754 PrivLVal.getAddress(CGF), 12755 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12756 LValue BaseLVal = 12757 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12758 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12759 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12760 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12761 FiredLVal, llvm::AtomicOrdering::Unordered, 12762 /*IsVolatile=*/true, /*isInit=*/false); 12763 return; 12764 } 12765 12766 // Private address of the lastprivate conditional in the current context. 12767 // priv_a 12768 LValue LVal = CGF.EmitLValue(FoundE); 12769 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12770 FoundE->getExprLoc()); 12771 } 12772 12773 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12774 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12775 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12776 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12777 return; 12778 auto Range = llvm::reverse(LastprivateConditionalStack); 12779 auto It = llvm::find_if( 12780 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12781 if (It == Range.end() || It->Fn != CGF.CurFn) 12782 return; 12783 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12784 assert(LPCI != LastprivateConditionalToTypes.end() && 12785 "Lastprivates must be registered already."); 12786 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12787 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12788 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12789 for (const auto &Pair : It->DeclToUniqueName) { 12790 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12791 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) 12792 continue; 12793 auto I = LPCI->getSecond().find(Pair.first); 12794 assert(I != LPCI->getSecond().end() && 12795 "Lastprivate must be rehistered already."); 12796 // bool Cmp = priv_a.Fired != 0; 12797 LValue BaseLVal = std::get<3>(I->getSecond()); 12798 LValue FiredLVal = 12799 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12800 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12801 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12802 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12803 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12804 // if (Cmp) { 12805 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12806 CGF.EmitBlock(ThenBB); 12807 Address Addr = CGF.GetAddrOfLocalVar(VD); 12808 LValue LVal; 12809 if (VD->getType()->isReferenceType()) 12810 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12811 AlignmentSource::Decl); 12812 else 12813 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12814 AlignmentSource::Decl); 12815 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12816 D.getBeginLoc()); 12817 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12818 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12819 // } 12820 } 12821 } 12822 12823 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12824 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12825 SourceLocation Loc) { 12826 if (CGF.getLangOpts().OpenMP < 50) 12827 return; 12828 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12829 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12830 "Unknown lastprivate conditional variable."); 12831 StringRef UniqueName = It->second; 12832 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12833 // The variable was not updated in the region - exit. 12834 if (!GV) 12835 return; 12836 LValue LPLVal = CGF.MakeAddrLValue( 12837 Address(GV, GV->getValueType(), PrivLVal.getAlignment()), 12838 PrivLVal.getType().getNonReferenceType()); 12839 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12840 CGF.EmitStoreOfScalar(Res, PrivLVal); 12841 } 12842 12843 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12844 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12845 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12846 llvm_unreachable("Not supported in SIMD-only mode"); 12847 } 12848 12849 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12850 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12851 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12852 llvm_unreachable("Not supported in SIMD-only mode"); 12853 } 12854 12855 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12856 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12857 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12858 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12859 bool Tied, unsigned &NumberOfParts) { 12860 llvm_unreachable("Not supported in SIMD-only mode"); 12861 } 12862 12863 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12864 SourceLocation Loc, 12865 llvm::Function *OutlinedFn, 12866 ArrayRef<llvm::Value *> CapturedVars, 12867 const Expr *IfCond, 12868 llvm::Value *NumThreads) { 12869 llvm_unreachable("Not supported in SIMD-only mode"); 12870 } 12871 12872 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12873 CodeGenFunction &CGF, StringRef CriticalName, 12874 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12875 const Expr *Hint) { 12876 llvm_unreachable("Not supported in SIMD-only mode"); 12877 } 12878 12879 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12880 const RegionCodeGenTy &MasterOpGen, 12881 SourceLocation Loc) { 12882 llvm_unreachable("Not supported in SIMD-only mode"); 12883 } 12884 12885 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, 12886 const RegionCodeGenTy &MasterOpGen, 12887 SourceLocation Loc, 12888 const Expr *Filter) { 12889 llvm_unreachable("Not supported in SIMD-only mode"); 12890 } 12891 12892 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12893 SourceLocation Loc) { 12894 llvm_unreachable("Not supported in SIMD-only mode"); 12895 } 12896 12897 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12898 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12899 SourceLocation Loc) { 12900 llvm_unreachable("Not supported in SIMD-only mode"); 12901 } 12902 12903 void CGOpenMPSIMDRuntime::emitSingleRegion( 12904 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12905 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12906 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12907 ArrayRef<const Expr *> AssignmentOps) { 12908 llvm_unreachable("Not supported in SIMD-only mode"); 12909 } 12910 12911 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12912 const RegionCodeGenTy &OrderedOpGen, 12913 SourceLocation Loc, 12914 bool IsThreads) { 12915 llvm_unreachable("Not supported in SIMD-only mode"); 12916 } 12917 12918 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12919 SourceLocation Loc, 12920 OpenMPDirectiveKind Kind, 12921 bool EmitChecks, 12922 bool ForceSimpleCall) { 12923 llvm_unreachable("Not supported in SIMD-only mode"); 12924 } 12925 12926 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12927 CodeGenFunction &CGF, SourceLocation Loc, 12928 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12929 bool Ordered, const DispatchRTInput &DispatchValues) { 12930 llvm_unreachable("Not supported in SIMD-only mode"); 12931 } 12932 12933 void CGOpenMPSIMDRuntime::emitForStaticInit( 12934 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12935 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12936 llvm_unreachable("Not supported in SIMD-only mode"); 12937 } 12938 12939 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12940 CodeGenFunction &CGF, SourceLocation Loc, 12941 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12942 llvm_unreachable("Not supported in SIMD-only mode"); 12943 } 12944 12945 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12946 SourceLocation Loc, 12947 unsigned IVSize, 12948 bool IVSigned) { 12949 llvm_unreachable("Not supported in SIMD-only mode"); 12950 } 12951 12952 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12953 SourceLocation Loc, 12954 OpenMPDirectiveKind DKind) { 12955 llvm_unreachable("Not supported in SIMD-only mode"); 12956 } 12957 12958 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12959 SourceLocation Loc, 12960 unsigned IVSize, bool IVSigned, 12961 Address IL, Address LB, 12962 Address UB, Address ST) { 12963 llvm_unreachable("Not supported in SIMD-only mode"); 12964 } 12965 12966 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12967 llvm::Value *NumThreads, 12968 SourceLocation Loc) { 12969 llvm_unreachable("Not supported in SIMD-only mode"); 12970 } 12971 12972 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12973 ProcBindKind ProcBind, 12974 SourceLocation Loc) { 12975 llvm_unreachable("Not supported in SIMD-only mode"); 12976 } 12977 12978 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12979 const VarDecl *VD, 12980 Address VDAddr, 12981 SourceLocation Loc) { 12982 llvm_unreachable("Not supported in SIMD-only mode"); 12983 } 12984 12985 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12986 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12987 CodeGenFunction *CGF) { 12988 llvm_unreachable("Not supported in SIMD-only mode"); 12989 } 12990 12991 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12992 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12993 llvm_unreachable("Not supported in SIMD-only mode"); 12994 } 12995 12996 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12997 ArrayRef<const Expr *> Vars, 12998 SourceLocation Loc, 12999 llvm::AtomicOrdering AO) { 13000 llvm_unreachable("Not supported in SIMD-only mode"); 13001 } 13002 13003 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 13004 const OMPExecutableDirective &D, 13005 llvm::Function *TaskFunction, 13006 QualType SharedsTy, Address Shareds, 13007 const Expr *IfCond, 13008 const OMPTaskDataTy &Data) { 13009 llvm_unreachable("Not supported in SIMD-only mode"); 13010 } 13011 13012 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 13013 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 13014 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 13015 const Expr *IfCond, const OMPTaskDataTy &Data) { 13016 llvm_unreachable("Not supported in SIMD-only mode"); 13017 } 13018 13019 void CGOpenMPSIMDRuntime::emitReduction( 13020 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 13021 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 13022 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 13023 assert(Options.SimpleReduction && "Only simple reduction is expected."); 13024 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 13025 ReductionOps, Options); 13026 } 13027 13028 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 13029 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 13030 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 13031 llvm_unreachable("Not supported in SIMD-only mode"); 13032 } 13033 13034 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 13035 SourceLocation Loc, 13036 bool IsWorksharingReduction) { 13037 llvm_unreachable("Not supported in SIMD-only mode"); 13038 } 13039 13040 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 13041 SourceLocation Loc, 13042 ReductionCodeGen &RCG, 13043 unsigned N) { 13044 llvm_unreachable("Not supported in SIMD-only mode"); 13045 } 13046 13047 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 13048 SourceLocation Loc, 13049 llvm::Value *ReductionsPtr, 13050 LValue SharedLVal) { 13051 llvm_unreachable("Not supported in SIMD-only mode"); 13052 } 13053 13054 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 13055 SourceLocation Loc, 13056 const OMPTaskDataTy &Data) { 13057 llvm_unreachable("Not supported in SIMD-only mode"); 13058 } 13059 13060 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 13061 CodeGenFunction &CGF, SourceLocation Loc, 13062 OpenMPDirectiveKind CancelRegion) { 13063 llvm_unreachable("Not supported in SIMD-only mode"); 13064 } 13065 13066 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 13067 SourceLocation Loc, const Expr *IfCond, 13068 OpenMPDirectiveKind CancelRegion) { 13069 llvm_unreachable("Not supported in SIMD-only mode"); 13070 } 13071 13072 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 13073 const OMPExecutableDirective &D, StringRef ParentName, 13074 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 13075 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 13076 llvm_unreachable("Not supported in SIMD-only mode"); 13077 } 13078 13079 void CGOpenMPSIMDRuntime::emitTargetCall( 13080 CodeGenFunction &CGF, const OMPExecutableDirective &D, 13081 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 13082 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 13083 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 13084 const OMPLoopDirective &D)> 13085 SizeEmitter) { 13086 llvm_unreachable("Not supported in SIMD-only mode"); 13087 } 13088 13089 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 13090 llvm_unreachable("Not supported in SIMD-only mode"); 13091 } 13092 13093 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 13094 llvm_unreachable("Not supported in SIMD-only mode"); 13095 } 13096 13097 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 13098 return false; 13099 } 13100 13101 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 13102 const OMPExecutableDirective &D, 13103 SourceLocation Loc, 13104 llvm::Function *OutlinedFn, 13105 ArrayRef<llvm::Value *> CapturedVars) { 13106 llvm_unreachable("Not supported in SIMD-only mode"); 13107 } 13108 13109 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 13110 const Expr *NumTeams, 13111 const Expr *ThreadLimit, 13112 SourceLocation Loc) { 13113 llvm_unreachable("Not supported in SIMD-only mode"); 13114 } 13115 13116 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 13117 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13118 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 13119 llvm_unreachable("Not supported in SIMD-only mode"); 13120 } 13121 13122 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 13123 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 13124 const Expr *Device) { 13125 llvm_unreachable("Not supported in SIMD-only mode"); 13126 } 13127 13128 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 13129 const OMPLoopDirective &D, 13130 ArrayRef<Expr *> NumIterations) { 13131 llvm_unreachable("Not supported in SIMD-only mode"); 13132 } 13133 13134 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 13135 const OMPDependClause *C) { 13136 llvm_unreachable("Not supported in SIMD-only mode"); 13137 } 13138 13139 const VarDecl * 13140 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 13141 const VarDecl *NativeParam) const { 13142 llvm_unreachable("Not supported in SIMD-only mode"); 13143 } 13144 13145 Address 13146 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 13147 const VarDecl *NativeParam, 13148 const VarDecl *TargetParam) const { 13149 llvm_unreachable("Not supported in SIMD-only mode"); 13150 } 13151