1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 573 /// region. 574 class CleanupTy final : public EHScopeStack::Cleanup { 575 PrePostActionTy *Action; 576 577 public: 578 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 579 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 580 if (!CGF.HaveInsertPoint()) 581 return; 582 Action->Exit(CGF); 583 } 584 }; 585 586 } // anonymous namespace 587 588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 589 CodeGenFunction::RunCleanupsScope Scope(CGF); 590 if (PrePostAction) { 591 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 592 Callback(CodeGen, CGF, *PrePostAction); 593 } else { 594 PrePostActionTy Action; 595 Callback(CodeGen, CGF, Action); 596 } 597 } 598 599 /// Check if the combiner is a call to UDR combiner and if it is so return the 600 /// UDR decl used for reduction. 601 static const OMPDeclareReductionDecl * 602 getReductionInit(const Expr *ReductionOp) { 603 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 604 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 605 if (const auto *DRE = 606 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 607 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 608 return DRD; 609 return nullptr; 610 } 611 612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 613 const OMPDeclareReductionDecl *DRD, 614 const Expr *InitOp, 615 Address Private, Address Original, 616 QualType Ty) { 617 if (DRD->getInitializer()) { 618 std::pair<llvm::Function *, llvm::Function *> Reduction = 619 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 620 const auto *CE = cast<CallExpr>(InitOp); 621 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 622 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 623 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 624 const auto *LHSDRE = 625 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 626 const auto *RHSDRE = 627 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 629 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 630 [=]() { return Private; }); 631 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 632 [=]() { return Original; }); 633 (void)PrivateScope.Privatize(); 634 RValue Func = RValue::get(Reduction.second); 635 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 636 CGF.EmitIgnoredExpr(InitOp); 637 } else { 638 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 639 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 640 auto *GV = new llvm::GlobalVariable( 641 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 642 llvm::GlobalValue::PrivateLinkage, Init, Name); 643 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 644 RValue InitRVal; 645 switch (CGF.getEvaluationKind(Ty)) { 646 case TEK_Scalar: 647 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 648 break; 649 case TEK_Complex: 650 InitRVal = 651 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 652 break; 653 case TEK_Aggregate: 654 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 655 break; 656 } 657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 660 /*IsInitializer=*/false); 661 } 662 } 663 664 /// Emit initialization of arrays of complex types. 665 /// \param DestAddr Address of the array. 666 /// \param Type Type of array. 667 /// \param Init Initial expression of array. 668 /// \param SrcAddr Address of the original array. 669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 670 QualType Type, bool EmitDeclareReductionInit, 671 const Expr *Init, 672 const OMPDeclareReductionDecl *DRD, 673 Address SrcAddr = Address::invalid()) { 674 // Perform element-by-element initialization. 675 QualType ElementTy; 676 677 // Drill down to the base element type on both arrays. 678 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 679 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 680 DestAddr = 681 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 682 if (DRD) 683 SrcAddr = 684 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 685 686 llvm::Value *SrcBegin = nullptr; 687 if (DRD) 688 SrcBegin = SrcAddr.getPointer(); 689 llvm::Value *DestBegin = DestAddr.getPointer(); 690 // Cast from pointer to array type to pointer to single element. 691 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 692 // The basic structure here is a while-do loop. 693 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 694 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 695 llvm::Value *IsEmpty = 696 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 697 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 698 699 // Enter the loop body, making that address the current address. 700 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 701 CGF.EmitBlock(BodyBB); 702 703 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 704 705 llvm::PHINode *SrcElementPHI = nullptr; 706 Address SrcElementCurrent = Address::invalid(); 707 if (DRD) { 708 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 709 "omp.arraycpy.srcElementPast"); 710 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 711 SrcElementCurrent = 712 Address(SrcElementPHI, 713 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 714 } 715 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 716 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 717 DestElementPHI->addIncoming(DestBegin, EntryBB); 718 Address DestElementCurrent = 719 Address(DestElementPHI, 720 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 // Emit copy. 723 { 724 CodeGenFunction::RunCleanupsScope InitScope(CGF); 725 if (EmitDeclareReductionInit) { 726 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 727 SrcElementCurrent, ElementTy); 728 } else 729 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 730 /*IsInitializer=*/false); 731 } 732 733 if (DRD) { 734 // Shift the address forward by one element. 735 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 737 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 738 } 739 740 // Shift the address forward by one element. 741 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 742 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 // Check whether we've reached the end. 744 llvm::Value *Done = 745 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 746 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 747 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 748 749 // Done. 750 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 751 } 752 753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 754 return CGF.EmitOMPSharedLValue(E); 755 } 756 757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 758 const Expr *E) { 759 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 760 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 761 return LValue(); 762 } 763 764 void ReductionCodeGen::emitAggregateInitialization( 765 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 766 const OMPDeclareReductionDecl *DRD) { 767 // Emit VarDecl with copy init for arrays. 768 // Get the address of the original variable captured in current 769 // captured region. 770 const auto *PrivateVD = 771 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 772 bool EmitDeclareReductionInit = 773 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 774 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 775 EmitDeclareReductionInit, 776 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 777 : PrivateVD->getInit(), 778 DRD, SharedLVal.getAddress(CGF)); 779 } 780 781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 782 ArrayRef<const Expr *> Origs, 783 ArrayRef<const Expr *> Privates, 784 ArrayRef<const Expr *> ReductionOps) { 785 ClausesData.reserve(Shareds.size()); 786 SharedAddresses.reserve(Shareds.size()); 787 Sizes.reserve(Shareds.size()); 788 BaseDecls.reserve(Shareds.size()); 789 const auto *IOrig = Origs.begin(); 790 const auto *IPriv = Privates.begin(); 791 const auto *IRed = ReductionOps.begin(); 792 for (const Expr *Ref : Shareds) { 793 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 794 std::advance(IOrig, 1); 795 std::advance(IPriv, 1); 796 std::advance(IRed, 1); 797 } 798 } 799 800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 801 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 802 "Number of generated lvalues must be exactly N."); 803 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 804 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 805 SharedAddresses.emplace_back(First, Second); 806 if (ClausesData[N].Shared == ClausesData[N].Ref) { 807 OrigAddresses.emplace_back(First, Second); 808 } else { 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 811 OrigAddresses.emplace_back(First, Second); 812 } 813 } 814 815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 816 const auto *PrivateVD = 817 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 818 QualType PrivateType = PrivateVD->getType(); 819 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 820 if (!PrivateType->isVariablyModifiedType()) { 821 Sizes.emplace_back( 822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 823 nullptr); 824 return; 825 } 826 llvm::Value *Size; 827 llvm::Value *SizeInChars; 828 auto *ElemType = 829 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 830 ->getElementType(); 831 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 832 if (AsArraySection) { 833 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 834 OrigAddresses[N].first.getPointer(CGF)); 835 Size = CGF.Builder.CreateNUWAdd( 836 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 837 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 838 } else { 839 SizeInChars = 840 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 841 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 842 } 843 Sizes.emplace_back(SizeInChars, Size); 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 853 llvm::Value *Size) { 854 const auto *PrivateVD = 855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 856 QualType PrivateType = PrivateVD->getType(); 857 if (!PrivateType->isVariablyModifiedType()) { 858 assert(!Size && !Sizes[N].second && 859 "Size should be nullptr for non-variably modified reduction " 860 "items."); 861 return; 862 } 863 CodeGenFunction::OpaqueValueMapping OpaqueMap( 864 CGF, 865 cast<OpaqueValueExpr>( 866 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 867 RValue::get(Size)); 868 CGF.EmitVariablyModifiedType(PrivateType); 869 } 870 871 void ReductionCodeGen::emitInitialization( 872 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 873 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 874 assert(SharedAddresses.size() > N && "No variable was generated"); 875 const auto *PrivateVD = 876 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 877 const OMPDeclareReductionDecl *DRD = 878 getReductionInit(ClausesData[N].ReductionOp); 879 QualType PrivateType = PrivateVD->getType(); 880 PrivateAddr = CGF.Builder.CreateElementBitCast( 881 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 882 QualType SharedType = SharedAddresses[N].first.getType(); 883 SharedLVal = CGF.MakeAddrLValue( 884 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 885 CGF.ConvertTypeForMem(SharedType)), 886 SharedType, SharedAddresses[N].first.getBaseInfo(), 887 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 888 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 889 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 890 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 891 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 892 PrivateAddr, SharedLVal.getAddress(CGF), 893 SharedLVal.getType()); 894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 895 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 897 PrivateVD->getType().getQualifiers(), 898 /*IsInitializer=*/false); 899 } 900 } 901 902 bool ReductionCodeGen::needCleanups(unsigned N) { 903 const auto *PrivateVD = 904 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 905 QualType PrivateType = PrivateVD->getType(); 906 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 907 return DTorKind != QualType::DK_none; 908 } 909 910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 911 Address PrivateAddr) { 912 const auto *PrivateVD = 913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 914 QualType PrivateType = PrivateVD->getType(); 915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 916 if (needCleanups(N)) { 917 PrivateAddr = CGF.Builder.CreateElementBitCast( 918 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 919 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 920 } 921 } 922 923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 924 LValue BaseLV) { 925 BaseTy = BaseTy.getNonReferenceType(); 926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 927 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 928 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 929 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 930 } else { 931 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 932 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 933 } 934 BaseTy = BaseTy->getPointeeType(); 935 } 936 return CGF.MakeAddrLValue( 937 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 938 CGF.ConvertTypeForMem(ElTy)), 939 BaseLV.getType(), BaseLV.getBaseInfo(), 940 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 941 } 942 943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 944 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 945 llvm::Value *Addr) { 946 Address Tmp = Address::invalid(); 947 Address TopTmp = Address::invalid(); 948 Address MostTopTmp = Address::invalid(); 949 BaseTy = BaseTy.getNonReferenceType(); 950 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 951 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 952 Tmp = CGF.CreateMemTemp(BaseTy); 953 if (TopTmp.isValid()) 954 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 955 else 956 MostTopTmp = Tmp; 957 TopTmp = Tmp; 958 BaseTy = BaseTy->getPointeeType(); 959 } 960 llvm::Type *Ty = BaseLVType; 961 if (Tmp.isValid()) 962 Ty = Tmp.getElementType(); 963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 964 if (Tmp.isValid()) { 965 CGF.Builder.CreateStore(Addr, Tmp); 966 return MostTopTmp; 967 } 968 return Address(Addr, BaseLVAlignment); 969 } 970 971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 972 const VarDecl *OrigVD = nullptr; 973 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 974 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 975 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 976 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 978 Base = TempASE->getBase()->IgnoreParenImpCasts(); 979 DE = cast<DeclRefExpr>(Base); 980 OrigVD = cast<VarDecl>(DE->getDecl()); 981 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 982 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 983 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 984 Base = TempASE->getBase()->IgnoreParenImpCasts(); 985 DE = cast<DeclRefExpr>(Base); 986 OrigVD = cast<VarDecl>(DE->getDecl()); 987 } 988 return OrigVD; 989 } 990 991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 992 Address PrivateAddr) { 993 const DeclRefExpr *DE; 994 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 995 BaseDecls.emplace_back(OrigVD); 996 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 997 LValue BaseLValue = 998 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 999 OriginalBaseLValue); 1000 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1001 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1002 llvm::Value *PrivatePointer = 1003 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1004 PrivateAddr.getPointer(), 1005 SharedAddresses[N].first.getAddress(CGF).getType()); 1006 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1007 return castToBase(CGF, OrigVD->getType(), 1008 SharedAddresses[N].first.getType(), 1009 OriginalBaseLValue.getAddress(CGF).getType(), 1010 OriginalBaseLValue.getAlignment(), Ptr); 1011 } 1012 BaseDecls.emplace_back( 1013 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1014 return PrivateAddr; 1015 } 1016 1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1018 const OMPDeclareReductionDecl *DRD = 1019 getReductionInit(ClausesData[N].ReductionOp); 1020 return DRD && DRD->getInitializer(); 1021 } 1022 1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1024 return CGF.EmitLoadOfPointerLValue( 1025 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1026 getThreadIDVariable()->getType()->castAs<PointerType>()); 1027 } 1028 1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1030 if (!CGF.HaveInsertPoint()) 1031 return; 1032 // 1.2.2 OpenMP Language Terminology 1033 // Structured block - An executable statement with a single entry at the 1034 // top and a single exit at the bottom. 1035 // The point of exit cannot be a branch out of the structured block. 1036 // longjmp() and throw() must not violate the entry/exit criteria. 1037 CGF.EHStack.pushTerminate(); 1038 CodeGen(CGF); 1039 CGF.EHStack.popTerminate(); 1040 } 1041 1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1043 CodeGenFunction &CGF) { 1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1045 getThreadIDVariable()->getType(), 1046 AlignmentSource::Decl); 1047 } 1048 1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1050 QualType FieldTy) { 1051 auto *Field = FieldDecl::Create( 1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1055 Field->setAccess(AS_public); 1056 DC->addDecl(Field); 1057 return Field; 1058 } 1059 1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1061 StringRef Separator) 1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1064 ASTContext &C = CGM.getContext(); 1065 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1066 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1067 RD->startDefinition(); 1068 // reserved_1 1069 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1070 // flags 1071 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1072 // reserved_2 1073 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1074 // reserved_3 1075 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1076 // psource 1077 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1078 RD->completeDefinition(); 1079 IdentQTy = C.getRecordType(RD); 1080 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1081 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1082 1083 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1084 OMPBuilder.initialize(); 1085 loadOffloadInfoMetadata(); 1086 } 1087 1088 void CGOpenMPRuntime::clear() { 1089 InternalVars.clear(); 1090 // Clean non-target variable declarations possibly used only in debug info. 1091 for (const auto &Data : EmittedNonTargetVariables) { 1092 if (!Data.getValue().pointsToAliveValue()) 1093 continue; 1094 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1095 if (!GV) 1096 continue; 1097 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1098 continue; 1099 GV->eraseFromParent(); 1100 } 1101 } 1102 1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1104 SmallString<128> Buffer; 1105 llvm::raw_svector_ostream OS(Buffer); 1106 StringRef Sep = FirstSeparator; 1107 for (StringRef Part : Parts) { 1108 OS << Sep << Part; 1109 Sep = Separator; 1110 } 1111 return std::string(OS.str()); 1112 } 1113 1114 static llvm::Function * 1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1116 const Expr *CombinerInitializer, const VarDecl *In, 1117 const VarDecl *Out, bool IsCombiner) { 1118 // void .omp_combiner.(Ty *in, Ty *out); 1119 ASTContext &C = CGM.getContext(); 1120 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1121 FunctionArgList Args; 1122 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1123 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1124 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1125 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1126 Args.push_back(&OmpOutParm); 1127 Args.push_back(&OmpInParm); 1128 const CGFunctionInfo &FnInfo = 1129 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1130 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1131 std::string Name = CGM.getOpenMPRuntime().getName( 1132 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1133 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1134 Name, &CGM.getModule()); 1135 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1136 if (CGM.getLangOpts().Optimize) { 1137 Fn->removeFnAttr(llvm::Attribute::NoInline); 1138 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1139 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1140 } 1141 CodeGenFunction CGF(CGM); 1142 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1143 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1144 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1145 Out->getLocation()); 1146 CodeGenFunction::OMPPrivateScope Scope(CGF); 1147 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1148 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1149 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1150 .getAddress(CGF); 1151 }); 1152 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1153 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1154 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1155 .getAddress(CGF); 1156 }); 1157 (void)Scope.Privatize(); 1158 if (!IsCombiner && Out->hasInit() && 1159 !CGF.isTrivialInitializer(Out->getInit())) { 1160 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1161 Out->getType().getQualifiers(), 1162 /*IsInitializer=*/true); 1163 } 1164 if (CombinerInitializer) 1165 CGF.EmitIgnoredExpr(CombinerInitializer); 1166 Scope.ForceCleanup(); 1167 CGF.FinishFunction(); 1168 return Fn; 1169 } 1170 1171 void CGOpenMPRuntime::emitUserDefinedReduction( 1172 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1173 if (UDRMap.count(D) > 0) 1174 return; 1175 llvm::Function *Combiner = emitCombinerOrInitializer( 1176 CGM, D->getType(), D->getCombiner(), 1177 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1178 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1179 /*IsCombiner=*/true); 1180 llvm::Function *Initializer = nullptr; 1181 if (const Expr *Init = D->getInitializer()) { 1182 Initializer = emitCombinerOrInitializer( 1183 CGM, D->getType(), 1184 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1185 : nullptr, 1186 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1187 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1188 /*IsCombiner=*/false); 1189 } 1190 UDRMap.try_emplace(D, Combiner, Initializer); 1191 if (CGF) { 1192 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1193 Decls.second.push_back(D); 1194 } 1195 } 1196 1197 std::pair<llvm::Function *, llvm::Function *> 1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1199 auto I = UDRMap.find(D); 1200 if (I != UDRMap.end()) 1201 return I->second; 1202 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1203 return UDRMap.lookup(D); 1204 } 1205 1206 namespace { 1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1208 // Builder if one is present. 1209 struct PushAndPopStackRAII { 1210 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1211 bool HasCancel) 1212 : OMPBuilder(OMPBuilder) { 1213 if (!OMPBuilder) 1214 return; 1215 1216 // The following callback is the crucial part of clangs cleanup process. 1217 // 1218 // NOTE: 1219 // Once the OpenMPIRBuilder is used to create parallel regions (and 1220 // similar), the cancellation destination (Dest below) is determined via 1221 // IP. That means if we have variables to finalize we split the block at IP, 1222 // use the new block (=BB) as destination to build a JumpDest (via 1223 // getJumpDestInCurrentScope(BB)) which then is fed to 1224 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1225 // to push & pop an FinalizationInfo object. 1226 // The FiniCB will still be needed but at the point where the 1227 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1228 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1229 assert(IP.getBlock()->end() == IP.getPoint() && 1230 "Clang CG should cause non-terminated block!"); 1231 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1232 CGF.Builder.restoreIP(IP); 1233 CodeGenFunction::JumpDest Dest = 1234 CGF.getOMPCancelDestination(OMPD_parallel); 1235 CGF.EmitBranchThroughCleanup(Dest); 1236 }; 1237 1238 // TODO: Remove this once we emit parallel regions through the 1239 // OpenMPIRBuilder as it can do this setup internally. 1240 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1241 {FiniCB, OMPD_parallel, HasCancel}); 1242 OMPBuilder->pushFinalizationCB(std::move(FI)); 1243 } 1244 ~PushAndPopStackRAII() { 1245 if (OMPBuilder) 1246 OMPBuilder->popFinalizationCB(); 1247 } 1248 llvm::OpenMPIRBuilder *OMPBuilder; 1249 }; 1250 } // namespace 1251 1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1253 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1254 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1255 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1256 assert(ThreadIDVar->getType()->isPointerType() && 1257 "thread id variable must be of type kmp_int32 *"); 1258 CodeGenFunction CGF(CGM, true); 1259 bool HasCancel = false; 1260 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1261 HasCancel = OPD->hasCancel(); 1262 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1263 HasCancel = OPD->hasCancel(); 1264 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1265 HasCancel = OPSD->hasCancel(); 1266 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1267 HasCancel = OPFD->hasCancel(); 1268 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1269 HasCancel = OPFD->hasCancel(); 1270 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1271 HasCancel = OPFD->hasCancel(); 1272 else if (const auto *OPFD = 1273 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 else if (const auto *OPFD = 1276 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1277 HasCancel = OPFD->hasCancel(); 1278 1279 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1280 // parallel region to make cancellation barriers work properly. 1281 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1282 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1283 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1284 HasCancel, OutlinedHelperName); 1285 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1286 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1287 } 1288 1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1290 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1292 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1293 return emitParallelOrTeamsOutlinedFunction( 1294 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1295 } 1296 1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1298 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1299 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1300 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1301 return emitParallelOrTeamsOutlinedFunction( 1302 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1303 } 1304 1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1306 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1307 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1308 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1309 bool Tied, unsigned &NumberOfParts) { 1310 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1311 PrePostActionTy &) { 1312 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1313 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1314 llvm::Value *TaskArgs[] = { 1315 UpLoc, ThreadID, 1316 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1317 TaskTVar->getType()->castAs<PointerType>()) 1318 .getPointer(CGF)}; 1319 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1320 CGM.getModule(), OMPRTL___kmpc_omp_task), 1321 TaskArgs); 1322 }; 1323 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1324 UntiedCodeGen); 1325 CodeGen.setAction(Action); 1326 assert(!ThreadIDVar->getType()->isPointerType() && 1327 "thread id variable must be of type kmp_int32 for tasks"); 1328 const OpenMPDirectiveKind Region = 1329 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1330 : OMPD_task; 1331 const CapturedStmt *CS = D.getCapturedStmt(Region); 1332 bool HasCancel = false; 1333 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1334 HasCancel = TD->hasCancel(); 1335 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1336 HasCancel = TD->hasCancel(); 1337 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1338 HasCancel = TD->hasCancel(); 1339 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1340 HasCancel = TD->hasCancel(); 1341 1342 CodeGenFunction CGF(CGM, true); 1343 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1344 InnermostKind, HasCancel, Action); 1345 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1346 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1347 if (!Tied) 1348 NumberOfParts = Action.getNumberOfParts(); 1349 return Res; 1350 } 1351 1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1353 const RecordDecl *RD, const CGRecordLayout &RL, 1354 ArrayRef<llvm::Constant *> Data) { 1355 llvm::StructType *StructTy = RL.getLLVMType(); 1356 unsigned PrevIdx = 0; 1357 ConstantInitBuilder CIBuilder(CGM); 1358 auto DI = Data.begin(); 1359 for (const FieldDecl *FD : RD->fields()) { 1360 unsigned Idx = RL.getLLVMFieldNo(FD); 1361 // Fill the alignment. 1362 for (unsigned I = PrevIdx; I < Idx; ++I) 1363 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1364 PrevIdx = Idx + 1; 1365 Fields.add(*DI); 1366 ++DI; 1367 } 1368 } 1369 1370 template <class... As> 1371 static llvm::GlobalVariable * 1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1373 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1374 As &&... Args) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantInitBuilder CIBuilder(CGM); 1378 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1379 buildStructValue(Fields, CGM, RD, RL, Data); 1380 return Fields.finishAndCreateGlobal( 1381 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1382 std::forward<As>(Args)...); 1383 } 1384 1385 template <typename T> 1386 static void 1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1388 ArrayRef<llvm::Constant *> Data, 1389 T &Parent) { 1390 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1391 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1392 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1393 buildStructValue(Fields, CGM, RD, RL, Data); 1394 Fields.finishAndAddTo(Parent); 1395 } 1396 1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1398 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1399 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1400 FlagsTy FlagsKey(Flags, Reserved2Flags); 1401 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1402 if (!Entry) { 1403 if (!DefaultOpenMPPSource) { 1404 // Initialize default location for psource field of ident_t structure of 1405 // all ident_t objects. Format is ";file;function;line;column;;". 1406 // Taken from 1407 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1408 DefaultOpenMPPSource = 1409 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1410 DefaultOpenMPPSource = 1411 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1412 } 1413 1414 llvm::Constant *Data[] = { 1415 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1416 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1417 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1419 llvm::GlobalValue *DefaultOpenMPLocation = 1420 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1421 llvm::GlobalValue::PrivateLinkage); 1422 DefaultOpenMPLocation->setUnnamedAddr( 1423 llvm::GlobalValue::UnnamedAddr::Global); 1424 1425 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1426 } 1427 return Address(Entry, Align); 1428 } 1429 1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1431 bool AtCurrentPoint) { 1432 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1433 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1434 1435 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1436 if (AtCurrentPoint) { 1437 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1438 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1439 } else { 1440 Elem.second.ServiceInsertPt = 1441 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1442 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1443 } 1444 } 1445 1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1447 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1448 if (Elem.second.ServiceInsertPt) { 1449 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1450 Elem.second.ServiceInsertPt = nullptr; 1451 Ptr->eraseFromParent(); 1452 } 1453 } 1454 1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1456 SourceLocation Loc, 1457 unsigned Flags) { 1458 Flags |= OMP_IDENT_KMPC; 1459 // If no debug info is generated - return global default location. 1460 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1461 Loc.isInvalid()) 1462 return getOrCreateDefaultLocation(Flags).getPointer(); 1463 1464 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1465 1466 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1467 Address LocValue = Address::invalid(); 1468 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1469 if (I != OpenMPLocThreadIDMap.end()) 1470 LocValue = Address(I->second.DebugLoc, Align); 1471 1472 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1473 // GetOpenMPThreadID was called before this routine. 1474 if (!LocValue.isValid()) { 1475 // Generate "ident_t .kmpc_loc.addr;" 1476 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1477 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1478 Elem.second.DebugLoc = AI.getPointer(); 1479 LocValue = AI; 1480 1481 if (!Elem.second.ServiceInsertPt) 1482 setLocThreadIdInsertPt(CGF); 1483 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1484 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1485 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1486 CGF.getTypeSize(IdentQTy)); 1487 } 1488 1489 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1490 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1491 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1492 LValue PSource = 1493 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1494 1495 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1496 if (OMPDebugLoc == nullptr) { 1497 SmallString<128> Buffer2; 1498 llvm::raw_svector_ostream OS2(Buffer2); 1499 // Build debug location 1500 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1501 OS2 << ";" << PLoc.getFilename() << ";"; 1502 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1503 OS2 << FD->getQualifiedNameAsString(); 1504 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1505 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1506 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1507 } 1508 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1509 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1510 1511 // Our callers always pass this to a runtime function, so for 1512 // convenience, go ahead and return a naked pointer. 1513 return LocValue.getPointer(); 1514 } 1515 1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1517 SourceLocation Loc) { 1518 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1519 1520 llvm::Value *ThreadID = nullptr; 1521 // Check whether we've already cached a load of the thread id in this 1522 // function. 1523 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1524 if (I != OpenMPLocThreadIDMap.end()) { 1525 ThreadID = I->second.ThreadID; 1526 if (ThreadID != nullptr) 1527 return ThreadID; 1528 } 1529 // If exceptions are enabled, do not use parameter to avoid possible crash. 1530 if (auto *OMPRegionInfo = 1531 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1532 if (OMPRegionInfo->getThreadIDVariable()) { 1533 // Check if this an outlined function with thread id passed as argument. 1534 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1535 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1536 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1537 !CGF.getLangOpts().CXXExceptions || 1538 CGF.Builder.GetInsertBlock() == TopBlock || 1539 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1540 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1541 TopBlock || 1542 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1543 CGF.Builder.GetInsertBlock()) { 1544 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1545 // If value loaded in entry block, cache it and use it everywhere in 1546 // function. 1547 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 Elem.second.ThreadID = ThreadID; 1550 } 1551 return ThreadID; 1552 } 1553 } 1554 } 1555 1556 // This is not an outlined function region - need to call __kmpc_int32 1557 // kmpc_global_thread_num(ident_t *loc). 1558 // Generate thread id value and cache this value for use across the 1559 // function. 1560 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1561 if (!Elem.second.ServiceInsertPt) 1562 setLocThreadIdInsertPt(CGF); 1563 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1564 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1565 llvm::CallInst *Call = CGF.Builder.CreateCall( 1566 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1567 OMPRTL___kmpc_global_thread_num), 1568 emitUpdateLocation(CGF, Loc)); 1569 Call->setCallingConv(CGF.getRuntimeCC()); 1570 Elem.second.ThreadID = Call; 1571 return Call; 1572 } 1573 1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1575 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1576 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1577 clearLocThreadIdInsertPt(CGF); 1578 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1579 } 1580 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1581 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1582 UDRMap.erase(D); 1583 FunctionUDRMap.erase(CGF.CurFn); 1584 } 1585 auto I = FunctionUDMMap.find(CGF.CurFn); 1586 if (I != FunctionUDMMap.end()) { 1587 for(const auto *D : I->second) 1588 UDMMap.erase(D); 1589 FunctionUDMMap.erase(I); 1590 } 1591 LastprivateConditionalToTypes.erase(CGF.CurFn); 1592 } 1593 1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1595 return IdentTy->getPointerTo(); 1596 } 1597 1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1599 if (!Kmpc_MicroTy) { 1600 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1601 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1602 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1603 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1604 } 1605 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1606 } 1607 1608 llvm::FunctionCallee 1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1610 assert((IVSize == 32 || IVSize == 64) && 1611 "IV size is not compatible with the omp runtime"); 1612 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1613 : "__kmpc_for_static_init_4u") 1614 : (IVSigned ? "__kmpc_for_static_init_8" 1615 : "__kmpc_for_static_init_8u"); 1616 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1617 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1618 llvm::Type *TypeParams[] = { 1619 getIdentTyPointerTy(), // loc 1620 CGM.Int32Ty, // tid 1621 CGM.Int32Ty, // schedtype 1622 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1623 PtrTy, // p_lower 1624 PtrTy, // p_upper 1625 PtrTy, // p_stride 1626 ITy, // incr 1627 ITy // chunk 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 llvm::FunctionCallee 1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1636 assert((IVSize == 32 || IVSize == 64) && 1637 "IV size is not compatible with the omp runtime"); 1638 StringRef Name = 1639 IVSize == 32 1640 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1641 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1642 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1643 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1644 CGM.Int32Ty, // tid 1645 CGM.Int32Ty, // schedtype 1646 ITy, // lower 1647 ITy, // upper 1648 ITy, // stride 1649 ITy // chunk 1650 }; 1651 auto *FnTy = 1652 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1653 return CGM.CreateRuntimeFunction(FnTy, Name); 1654 } 1655 1656 llvm::FunctionCallee 1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1658 assert((IVSize == 32 || IVSize == 64) && 1659 "IV size is not compatible with the omp runtime"); 1660 StringRef Name = 1661 IVSize == 32 1662 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1663 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1664 llvm::Type *TypeParams[] = { 1665 getIdentTyPointerTy(), // loc 1666 CGM.Int32Ty, // tid 1667 }; 1668 auto *FnTy = 1669 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1670 return CGM.CreateRuntimeFunction(FnTy, Name); 1671 } 1672 1673 llvm::FunctionCallee 1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1675 assert((IVSize == 32 || IVSize == 64) && 1676 "IV size is not compatible with the omp runtime"); 1677 StringRef Name = 1678 IVSize == 32 1679 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1680 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1681 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1682 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1683 llvm::Type *TypeParams[] = { 1684 getIdentTyPointerTy(), // loc 1685 CGM.Int32Ty, // tid 1686 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1687 PtrTy, // p_lower 1688 PtrTy, // p_upper 1689 PtrTy // p_stride 1690 }; 1691 auto *FnTy = 1692 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1693 return CGM.CreateRuntimeFunction(FnTy, Name); 1694 } 1695 1696 /// Obtain information that uniquely identifies a target entry. This 1697 /// consists of the file and device IDs as well as line number associated with 1698 /// the relevant entry source location. 1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1700 unsigned &DeviceID, unsigned &FileID, 1701 unsigned &LineNum) { 1702 SourceManager &SM = C.getSourceManager(); 1703 1704 // The loc should be always valid and have a file ID (the user cannot use 1705 // #pragma directives in macros) 1706 1707 assert(Loc.isValid() && "Source location is expected to be always valid."); 1708 1709 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1710 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1711 1712 llvm::sys::fs::UniqueID ID; 1713 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1714 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1715 << PLoc.getFilename() << EC.message(); 1716 1717 DeviceID = ID.getDevice(); 1718 FileID = ID.getFile(); 1719 LineNum = PLoc.getLine(); 1720 } 1721 1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1723 if (CGM.getLangOpts().OpenMPSimd) 1724 return Address::invalid(); 1725 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1726 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1727 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1728 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1729 HasRequiresUnifiedSharedMemory))) { 1730 SmallString<64> PtrName; 1731 { 1732 llvm::raw_svector_ostream OS(PtrName); 1733 OS << CGM.getMangledName(GlobalDecl(VD)); 1734 if (!VD->isExternallyVisible()) { 1735 unsigned DeviceID, FileID, Line; 1736 getTargetEntryUniqueInfo(CGM.getContext(), 1737 VD->getCanonicalDecl()->getBeginLoc(), 1738 DeviceID, FileID, Line); 1739 OS << llvm::format("_%x", FileID); 1740 } 1741 OS << "_decl_tgt_ref_ptr"; 1742 } 1743 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1744 if (!Ptr) { 1745 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1746 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1747 PtrName); 1748 1749 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1750 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1751 1752 if (!CGM.getLangOpts().OpenMPIsDevice) 1753 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1754 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1755 } 1756 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1757 } 1758 return Address::invalid(); 1759 } 1760 1761 llvm::Constant * 1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1763 assert(!CGM.getLangOpts().OpenMPUseTLS || 1764 !CGM.getContext().getTargetInfo().isTLSSupported()); 1765 // Lookup the entry, lazily creating it if necessary. 1766 std::string Suffix = getName({"cache", ""}); 1767 return getOrCreateInternalVariable( 1768 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1769 } 1770 1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1772 const VarDecl *VD, 1773 Address VDAddr, 1774 SourceLocation Loc) { 1775 if (CGM.getLangOpts().OpenMPUseTLS && 1776 CGM.getContext().getTargetInfo().isTLSSupported()) 1777 return VDAddr; 1778 1779 llvm::Type *VarTy = VDAddr.getElementType(); 1780 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1781 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1782 CGM.Int8PtrTy), 1783 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1784 getOrCreateThreadPrivateCache(VD)}; 1785 return Address(CGF.EmitRuntimeCall( 1786 OMPBuilder.getOrCreateRuntimeFunction( 1787 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1788 Args), 1789 VDAddr.getAlignment()); 1790 } 1791 1792 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1793 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1794 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1795 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1796 // library. 1797 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1798 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1799 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1800 OMPLoc); 1801 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1802 // to register constructor/destructor for variable. 1803 llvm::Value *Args[] = { 1804 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1805 Ctor, CopyCtor, Dtor}; 1806 CGF.EmitRuntimeCall( 1807 OMPBuilder.getOrCreateRuntimeFunction( 1808 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1809 Args); 1810 } 1811 1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1813 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1814 bool PerformInit, CodeGenFunction *CGF) { 1815 if (CGM.getLangOpts().OpenMPUseTLS && 1816 CGM.getContext().getTargetInfo().isTLSSupported()) 1817 return nullptr; 1818 1819 VD = VD->getDefinition(CGM.getContext()); 1820 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1821 QualType ASTTy = VD->getType(); 1822 1823 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1824 const Expr *Init = VD->getAnyInitializer(); 1825 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1826 // Generate function that re-emits the declaration's initializer into the 1827 // threadprivate copy of the variable VD 1828 CodeGenFunction CtorCGF(CGM); 1829 FunctionArgList Args; 1830 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1831 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1832 ImplicitParamDecl::Other); 1833 Args.push_back(&Dst); 1834 1835 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1836 CGM.getContext().VoidPtrTy, Args); 1837 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1838 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1839 llvm::Function *Fn = 1840 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1841 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1842 Args, Loc, Loc); 1843 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1844 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1845 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1846 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1847 Arg = CtorCGF.Builder.CreateElementBitCast( 1848 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1849 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1850 /*IsInitializer=*/true); 1851 ArgVal = CtorCGF.EmitLoadOfScalar( 1852 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1853 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1854 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1855 CtorCGF.FinishFunction(); 1856 Ctor = Fn; 1857 } 1858 if (VD->getType().isDestructedType() != QualType::DK_none) { 1859 // Generate function that emits destructor call for the threadprivate copy 1860 // of the variable VD 1861 CodeGenFunction DtorCGF(CGM); 1862 FunctionArgList Args; 1863 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1864 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1865 ImplicitParamDecl::Other); 1866 Args.push_back(&Dst); 1867 1868 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1869 CGM.getContext().VoidTy, Args); 1870 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1871 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1872 llvm::Function *Fn = 1873 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1874 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1875 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1876 Loc, Loc); 1877 // Create a scope with an artificial location for the body of this function. 1878 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1879 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1880 DtorCGF.GetAddrOfLocalVar(&Dst), 1881 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1882 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1883 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1884 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1885 DtorCGF.FinishFunction(); 1886 Dtor = Fn; 1887 } 1888 // Do not emit init function if it is not required. 1889 if (!Ctor && !Dtor) 1890 return nullptr; 1891 1892 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1893 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1894 /*isVarArg=*/false) 1895 ->getPointerTo(); 1896 // Copying constructor for the threadprivate variable. 1897 // Must be NULL - reserved by runtime, but currently it requires that this 1898 // parameter is always NULL. Otherwise it fires assertion. 1899 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1900 if (Ctor == nullptr) { 1901 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1902 /*isVarArg=*/false) 1903 ->getPointerTo(); 1904 Ctor = llvm::Constant::getNullValue(CtorTy); 1905 } 1906 if (Dtor == nullptr) { 1907 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1908 /*isVarArg=*/false) 1909 ->getPointerTo(); 1910 Dtor = llvm::Constant::getNullValue(DtorTy); 1911 } 1912 if (!CGF) { 1913 auto *InitFunctionTy = 1914 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1915 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1916 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1917 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1918 CodeGenFunction InitCGF(CGM); 1919 FunctionArgList ArgList; 1920 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1921 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1922 Loc, Loc); 1923 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1924 InitCGF.FinishFunction(); 1925 return InitFunction; 1926 } 1927 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1928 } 1929 return nullptr; 1930 } 1931 1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1933 llvm::GlobalVariable *Addr, 1934 bool PerformInit) { 1935 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1936 !CGM.getLangOpts().OpenMPIsDevice) 1937 return false; 1938 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1939 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1940 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1941 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1942 HasRequiresUnifiedSharedMemory)) 1943 return CGM.getLangOpts().OpenMPIsDevice; 1944 VD = VD->getDefinition(CGM.getContext()); 1945 assert(VD && "Unknown VarDecl"); 1946 1947 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1948 return CGM.getLangOpts().OpenMPIsDevice; 1949 1950 QualType ASTTy = VD->getType(); 1951 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1952 1953 // Produce the unique prefix to identify the new target regions. We use 1954 // the source location of the variable declaration which we know to not 1955 // conflict with any target region. 1956 unsigned DeviceID; 1957 unsigned FileID; 1958 unsigned Line; 1959 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1960 SmallString<128> Buffer, Out; 1961 { 1962 llvm::raw_svector_ostream OS(Buffer); 1963 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1964 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1965 } 1966 1967 const Expr *Init = VD->getAnyInitializer(); 1968 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1969 llvm::Constant *Ctor; 1970 llvm::Constant *ID; 1971 if (CGM.getLangOpts().OpenMPIsDevice) { 1972 // Generate function that re-emits the declaration's initializer into 1973 // the threadprivate copy of the variable VD 1974 CodeGenFunction CtorCGF(CGM); 1975 1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1979 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1980 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1981 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1982 FunctionArgList(), Loc, Loc); 1983 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1984 CtorCGF.EmitAnyExprToMem(Init, 1985 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1986 Init->getType().getQualifiers(), 1987 /*IsInitializer=*/true); 1988 CtorCGF.FinishFunction(); 1989 Ctor = Fn; 1990 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1991 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1992 } else { 1993 Ctor = new llvm::GlobalVariable( 1994 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1995 llvm::GlobalValue::PrivateLinkage, 1996 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1997 ID = Ctor; 1998 } 1999 2000 // Register the information for the entry associated with the constructor. 2001 Out.clear(); 2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2003 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2005 } 2006 if (VD->getType().isDestructedType() != QualType::DK_none) { 2007 llvm::Constant *Dtor; 2008 llvm::Constant *ID; 2009 if (CGM.getLangOpts().OpenMPIsDevice) { 2010 // Generate function that emits destructor call for the threadprivate 2011 // copy of the variable VD 2012 CodeGenFunction DtorCGF(CGM); 2013 2014 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2015 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2016 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 2017 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2018 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2019 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2020 FunctionArgList(), Loc, Loc); 2021 // Create a scope with an artificial location for the body of this 2022 // function. 2023 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2024 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2025 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2026 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2027 DtorCGF.FinishFunction(); 2028 Dtor = Fn; 2029 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2030 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2031 } else { 2032 Dtor = new llvm::GlobalVariable( 2033 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2034 llvm::GlobalValue::PrivateLinkage, 2035 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2036 ID = Dtor; 2037 } 2038 // Register the information for the entry associated with the destructor. 2039 Out.clear(); 2040 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2041 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2042 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2043 } 2044 return CGM.getLangOpts().OpenMPIsDevice; 2045 } 2046 2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2048 QualType VarType, 2049 StringRef Name) { 2050 std::string Suffix = getName({"artificial", ""}); 2051 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2052 llvm::Value *GAddr = 2053 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2054 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2055 CGM.getTarget().isTLSSupported()) { 2056 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2057 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2058 } 2059 std::string CacheSuffix = getName({"cache", ""}); 2060 llvm::Value *Args[] = { 2061 emitUpdateLocation(CGF, SourceLocation()), 2062 getThreadID(CGF, SourceLocation()), 2063 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2064 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2065 /*isSigned=*/false), 2066 getOrCreateInternalVariable( 2067 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2068 return Address( 2069 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2070 CGF.EmitRuntimeCall( 2071 OMPBuilder.getOrCreateRuntimeFunction( 2072 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2073 Args), 2074 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2075 CGM.getContext().getTypeAlignInChars(VarType)); 2076 } 2077 2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2079 const RegionCodeGenTy &ThenGen, 2080 const RegionCodeGenTy &ElseGen) { 2081 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2082 2083 // If the condition constant folds and can be elided, try to avoid emitting 2084 // the condition and the dead arm of the if/else. 2085 bool CondConstant; 2086 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2087 if (CondConstant) 2088 ThenGen(CGF); 2089 else 2090 ElseGen(CGF); 2091 return; 2092 } 2093 2094 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2095 // emit the conditional branch. 2096 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2097 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2098 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2099 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2100 2101 // Emit the 'then' code. 2102 CGF.EmitBlock(ThenBlock); 2103 ThenGen(CGF); 2104 CGF.EmitBranch(ContBlock); 2105 // Emit the 'else' code if present. 2106 // There is no need to emit line number for unconditional branch. 2107 (void)ApplyDebugLocation::CreateEmpty(CGF); 2108 CGF.EmitBlock(ElseBlock); 2109 ElseGen(CGF); 2110 // There is no need to emit line number for unconditional branch. 2111 (void)ApplyDebugLocation::CreateEmpty(CGF); 2112 CGF.EmitBranch(ContBlock); 2113 // Emit the continuation block for code after the if. 2114 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2115 } 2116 2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2118 llvm::Function *OutlinedFn, 2119 ArrayRef<llvm::Value *> CapturedVars, 2120 const Expr *IfCond) { 2121 if (!CGF.HaveInsertPoint()) 2122 return; 2123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2124 auto &M = CGM.getModule(); 2125 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2126 this](CodeGenFunction &CGF, PrePostActionTy &) { 2127 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2128 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2129 llvm::Value *Args[] = { 2130 RTLoc, 2131 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2132 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2133 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2134 RealArgs.append(std::begin(Args), std::end(Args)); 2135 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2136 2137 llvm::FunctionCallee RTLFn = 2138 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2139 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2140 }; 2141 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2142 this](CodeGenFunction &CGF, PrePostActionTy &) { 2143 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2144 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2145 // Build calls: 2146 // __kmpc_serialized_parallel(&Loc, GTid); 2147 llvm::Value *Args[] = {RTLoc, ThreadID}; 2148 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2149 M, OMPRTL___kmpc_serialized_parallel), 2150 Args); 2151 2152 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2153 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2154 Address ZeroAddrBound = 2155 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2156 /*Name=*/".bound.zero.addr"); 2157 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2158 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2159 // ThreadId for serialized parallels is 0. 2160 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2161 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2162 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2163 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2164 2165 // __kmpc_end_serialized_parallel(&Loc, GTid); 2166 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2167 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2168 M, OMPRTL___kmpc_end_serialized_parallel), 2169 EndArgs); 2170 }; 2171 if (IfCond) { 2172 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2173 } else { 2174 RegionCodeGenTy ThenRCG(ThenGen); 2175 ThenRCG(CGF); 2176 } 2177 } 2178 2179 // If we're inside an (outlined) parallel region, use the region info's 2180 // thread-ID variable (it is passed in a first argument of the outlined function 2181 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2182 // regular serial code region, get thread ID by calling kmp_int32 2183 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2184 // return the address of that temp. 2185 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2186 SourceLocation Loc) { 2187 if (auto *OMPRegionInfo = 2188 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2189 if (OMPRegionInfo->getThreadIDVariable()) 2190 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2191 2192 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2193 QualType Int32Ty = 2194 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2195 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2196 CGF.EmitStoreOfScalar(ThreadID, 2197 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2198 2199 return ThreadIDTemp; 2200 } 2201 2202 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2203 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2204 SmallString<256> Buffer; 2205 llvm::raw_svector_ostream Out(Buffer); 2206 Out << Name; 2207 StringRef RuntimeName = Out.str(); 2208 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2209 if (Elem.second) { 2210 assert(Elem.second->getType()->getPointerElementType() == Ty && 2211 "OMP internal variable has different type than requested"); 2212 return &*Elem.second; 2213 } 2214 2215 return Elem.second = new llvm::GlobalVariable( 2216 CGM.getModule(), Ty, /*IsConstant*/ false, 2217 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2218 Elem.first(), /*InsertBefore=*/nullptr, 2219 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2220 } 2221 2222 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2223 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2224 std::string Name = getName({Prefix, "var"}); 2225 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2226 } 2227 2228 namespace { 2229 /// Common pre(post)-action for different OpenMP constructs. 2230 class CommonActionTy final : public PrePostActionTy { 2231 llvm::FunctionCallee EnterCallee; 2232 ArrayRef<llvm::Value *> EnterArgs; 2233 llvm::FunctionCallee ExitCallee; 2234 ArrayRef<llvm::Value *> ExitArgs; 2235 bool Conditional; 2236 llvm::BasicBlock *ContBlock = nullptr; 2237 2238 public: 2239 CommonActionTy(llvm::FunctionCallee EnterCallee, 2240 ArrayRef<llvm::Value *> EnterArgs, 2241 llvm::FunctionCallee ExitCallee, 2242 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2243 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2244 ExitArgs(ExitArgs), Conditional(Conditional) {} 2245 void Enter(CodeGenFunction &CGF) override { 2246 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2247 if (Conditional) { 2248 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2249 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2250 ContBlock = CGF.createBasicBlock("omp_if.end"); 2251 // Generate the branch (If-stmt) 2252 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2253 CGF.EmitBlock(ThenBlock); 2254 } 2255 } 2256 void Done(CodeGenFunction &CGF) { 2257 // Emit the rest of blocks/branches 2258 CGF.EmitBranch(ContBlock); 2259 CGF.EmitBlock(ContBlock, true); 2260 } 2261 void Exit(CodeGenFunction &CGF) override { 2262 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2263 } 2264 }; 2265 } // anonymous namespace 2266 2267 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2268 StringRef CriticalName, 2269 const RegionCodeGenTy &CriticalOpGen, 2270 SourceLocation Loc, const Expr *Hint) { 2271 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2272 // CriticalOpGen(); 2273 // __kmpc_end_critical(ident_t *, gtid, Lock); 2274 // Prepare arguments and build a call to __kmpc_critical 2275 if (!CGF.HaveInsertPoint()) 2276 return; 2277 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2278 getCriticalRegionLock(CriticalName)}; 2279 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2280 std::end(Args)); 2281 if (Hint) { 2282 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2283 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2284 } 2285 CommonActionTy Action( 2286 OMPBuilder.getOrCreateRuntimeFunction( 2287 CGM.getModule(), 2288 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2289 EnterArgs, 2290 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2291 OMPRTL___kmpc_end_critical), 2292 Args); 2293 CriticalOpGen.setAction(Action); 2294 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2295 } 2296 2297 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2298 const RegionCodeGenTy &MasterOpGen, 2299 SourceLocation Loc) { 2300 if (!CGF.HaveInsertPoint()) 2301 return; 2302 // if(__kmpc_master(ident_t *, gtid)) { 2303 // MasterOpGen(); 2304 // __kmpc_end_master(ident_t *, gtid); 2305 // } 2306 // Prepare arguments and build a call to __kmpc_master 2307 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2308 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2309 CGM.getModule(), OMPRTL___kmpc_master), 2310 Args, 2311 OMPBuilder.getOrCreateRuntimeFunction( 2312 CGM.getModule(), OMPRTL___kmpc_end_master), 2313 Args, 2314 /*Conditional=*/true); 2315 MasterOpGen.setAction(Action); 2316 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2317 Action.Done(CGF); 2318 } 2319 2320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2321 SourceLocation Loc) { 2322 if (!CGF.HaveInsertPoint()) 2323 return; 2324 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2325 OMPBuilder.CreateTaskyield(CGF.Builder); 2326 } else { 2327 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2328 llvm::Value *Args[] = { 2329 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2330 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2331 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2332 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2333 Args); 2334 } 2335 2336 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2337 Region->emitUntiedSwitch(CGF); 2338 } 2339 2340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2341 const RegionCodeGenTy &TaskgroupOpGen, 2342 SourceLocation Loc) { 2343 if (!CGF.HaveInsertPoint()) 2344 return; 2345 // __kmpc_taskgroup(ident_t *, gtid); 2346 // TaskgroupOpGen(); 2347 // __kmpc_end_taskgroup(ident_t *, gtid); 2348 // Prepare arguments and build a call to __kmpc_taskgroup 2349 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2350 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2351 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2352 Args, 2353 OMPBuilder.getOrCreateRuntimeFunction( 2354 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2355 Args); 2356 TaskgroupOpGen.setAction(Action); 2357 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2358 } 2359 2360 /// Given an array of pointers to variables, project the address of a 2361 /// given variable. 2362 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2363 unsigned Index, const VarDecl *Var) { 2364 // Pull out the pointer to the variable. 2365 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2366 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2367 2368 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2369 Addr = CGF.Builder.CreateElementBitCast( 2370 Addr, CGF.ConvertTypeForMem(Var->getType())); 2371 return Addr; 2372 } 2373 2374 static llvm::Value *emitCopyprivateCopyFunction( 2375 CodeGenModule &CGM, llvm::Type *ArgsType, 2376 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2377 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2378 SourceLocation Loc) { 2379 ASTContext &C = CGM.getContext(); 2380 // void copy_func(void *LHSArg, void *RHSArg); 2381 FunctionArgList Args; 2382 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2383 ImplicitParamDecl::Other); 2384 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2385 ImplicitParamDecl::Other); 2386 Args.push_back(&LHSArg); 2387 Args.push_back(&RHSArg); 2388 const auto &CGFI = 2389 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2390 std::string Name = 2391 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2392 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2393 llvm::GlobalValue::InternalLinkage, Name, 2394 &CGM.getModule()); 2395 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2396 Fn->setDoesNotRecurse(); 2397 CodeGenFunction CGF(CGM); 2398 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2399 // Dest = (void*[n])(LHSArg); 2400 // Src = (void*[n])(RHSArg); 2401 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2402 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2403 ArgsType), CGF.getPointerAlign()); 2404 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2406 ArgsType), CGF.getPointerAlign()); 2407 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2408 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2409 // ... 2410 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2411 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2412 const auto *DestVar = 2413 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2414 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2415 2416 const auto *SrcVar = 2417 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2418 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2419 2420 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2421 QualType Type = VD->getType(); 2422 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2423 } 2424 CGF.FinishFunction(); 2425 return Fn; 2426 } 2427 2428 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2429 const RegionCodeGenTy &SingleOpGen, 2430 SourceLocation Loc, 2431 ArrayRef<const Expr *> CopyprivateVars, 2432 ArrayRef<const Expr *> SrcExprs, 2433 ArrayRef<const Expr *> DstExprs, 2434 ArrayRef<const Expr *> AssignmentOps) { 2435 if (!CGF.HaveInsertPoint()) 2436 return; 2437 assert(CopyprivateVars.size() == SrcExprs.size() && 2438 CopyprivateVars.size() == DstExprs.size() && 2439 CopyprivateVars.size() == AssignmentOps.size()); 2440 ASTContext &C = CGM.getContext(); 2441 // int32 did_it = 0; 2442 // if(__kmpc_single(ident_t *, gtid)) { 2443 // SingleOpGen(); 2444 // __kmpc_end_single(ident_t *, gtid); 2445 // did_it = 1; 2446 // } 2447 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2448 // <copy_func>, did_it); 2449 2450 Address DidIt = Address::invalid(); 2451 if (!CopyprivateVars.empty()) { 2452 // int32 did_it = 0; 2453 QualType KmpInt32Ty = 2454 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2455 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2456 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2457 } 2458 // Prepare arguments and build a call to __kmpc_single 2459 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2460 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2461 CGM.getModule(), OMPRTL___kmpc_single), 2462 Args, 2463 OMPBuilder.getOrCreateRuntimeFunction( 2464 CGM.getModule(), OMPRTL___kmpc_end_single), 2465 Args, 2466 /*Conditional=*/true); 2467 SingleOpGen.setAction(Action); 2468 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2469 if (DidIt.isValid()) { 2470 // did_it = 1; 2471 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2472 } 2473 Action.Done(CGF); 2474 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2475 // <copy_func>, did_it); 2476 if (DidIt.isValid()) { 2477 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2478 QualType CopyprivateArrayTy = C.getConstantArrayType( 2479 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2480 /*IndexTypeQuals=*/0); 2481 // Create a list of all private variables for copyprivate. 2482 Address CopyprivateList = 2483 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2484 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2485 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2486 CGF.Builder.CreateStore( 2487 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2488 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2489 CGF.VoidPtrTy), 2490 Elem); 2491 } 2492 // Build function that copies private values from single region to all other 2493 // threads in the corresponding parallel region. 2494 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2495 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2496 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2497 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2498 Address CL = 2499 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2500 CGF.VoidPtrTy); 2501 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2502 llvm::Value *Args[] = { 2503 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2504 getThreadID(CGF, Loc), // i32 <gtid> 2505 BufSize, // size_t <buf_size> 2506 CL.getPointer(), // void *<copyprivate list> 2507 CpyFn, // void (*) (void *, void *) <copy_func> 2508 DidItVal // i32 did_it 2509 }; 2510 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2511 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2512 Args); 2513 } 2514 } 2515 2516 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2517 const RegionCodeGenTy &OrderedOpGen, 2518 SourceLocation Loc, bool IsThreads) { 2519 if (!CGF.HaveInsertPoint()) 2520 return; 2521 // __kmpc_ordered(ident_t *, gtid); 2522 // OrderedOpGen(); 2523 // __kmpc_end_ordered(ident_t *, gtid); 2524 // Prepare arguments and build a call to __kmpc_ordered 2525 if (IsThreads) { 2526 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2527 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2528 CGM.getModule(), OMPRTL___kmpc_ordered), 2529 Args, 2530 OMPBuilder.getOrCreateRuntimeFunction( 2531 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2532 Args); 2533 OrderedOpGen.setAction(Action); 2534 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2535 return; 2536 } 2537 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2538 } 2539 2540 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2541 unsigned Flags; 2542 if (Kind == OMPD_for) 2543 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2544 else if (Kind == OMPD_sections) 2545 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2546 else if (Kind == OMPD_single) 2547 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2548 else if (Kind == OMPD_barrier) 2549 Flags = OMP_IDENT_BARRIER_EXPL; 2550 else 2551 Flags = OMP_IDENT_BARRIER_IMPL; 2552 return Flags; 2553 } 2554 2555 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2556 CodeGenFunction &CGF, const OMPLoopDirective &S, 2557 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2558 // Check if the loop directive is actually a doacross loop directive. In this 2559 // case choose static, 1 schedule. 2560 if (llvm::any_of( 2561 S.getClausesOfKind<OMPOrderedClause>(), 2562 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2563 ScheduleKind = OMPC_SCHEDULE_static; 2564 // Chunk size is 1 in this case. 2565 llvm::APInt ChunkSize(32, 1); 2566 ChunkExpr = IntegerLiteral::Create( 2567 CGF.getContext(), ChunkSize, 2568 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2569 SourceLocation()); 2570 } 2571 } 2572 2573 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2574 OpenMPDirectiveKind Kind, bool EmitChecks, 2575 bool ForceSimpleCall) { 2576 // Check if we should use the OMPBuilder 2577 auto *OMPRegionInfo = 2578 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2579 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2580 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2581 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2582 return; 2583 } 2584 2585 if (!CGF.HaveInsertPoint()) 2586 return; 2587 // Build call __kmpc_cancel_barrier(loc, thread_id); 2588 // Build call __kmpc_barrier(loc, thread_id); 2589 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2590 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2591 // thread_id); 2592 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2593 getThreadID(CGF, Loc)}; 2594 if (OMPRegionInfo) { 2595 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2596 llvm::Value *Result = CGF.EmitRuntimeCall( 2597 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2598 OMPRTL___kmpc_cancel_barrier), 2599 Args); 2600 if (EmitChecks) { 2601 // if (__kmpc_cancel_barrier()) { 2602 // exit from construct; 2603 // } 2604 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2605 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2606 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2607 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2608 CGF.EmitBlock(ExitBB); 2609 // exit from construct; 2610 CodeGenFunction::JumpDest CancelDestination = 2611 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2612 CGF.EmitBranchThroughCleanup(CancelDestination); 2613 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2614 } 2615 return; 2616 } 2617 } 2618 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2619 CGM.getModule(), OMPRTL___kmpc_barrier), 2620 Args); 2621 } 2622 2623 /// Map the OpenMP loop schedule to the runtime enumeration. 2624 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2625 bool Chunked, bool Ordered) { 2626 switch (ScheduleKind) { 2627 case OMPC_SCHEDULE_static: 2628 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2629 : (Ordered ? OMP_ord_static : OMP_sch_static); 2630 case OMPC_SCHEDULE_dynamic: 2631 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2632 case OMPC_SCHEDULE_guided: 2633 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2634 case OMPC_SCHEDULE_runtime: 2635 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2636 case OMPC_SCHEDULE_auto: 2637 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2638 case OMPC_SCHEDULE_unknown: 2639 assert(!Chunked && "chunk was specified but schedule kind not known"); 2640 return Ordered ? OMP_ord_static : OMP_sch_static; 2641 } 2642 llvm_unreachable("Unexpected runtime schedule"); 2643 } 2644 2645 /// Map the OpenMP distribute schedule to the runtime enumeration. 2646 static OpenMPSchedType 2647 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2648 // only static is allowed for dist_schedule 2649 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2650 } 2651 2652 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2653 bool Chunked) const { 2654 OpenMPSchedType Schedule = 2655 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2656 return Schedule == OMP_sch_static; 2657 } 2658 2659 bool CGOpenMPRuntime::isStaticNonchunked( 2660 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2661 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2662 return Schedule == OMP_dist_sch_static; 2663 } 2664 2665 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2666 bool Chunked) const { 2667 OpenMPSchedType Schedule = 2668 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2669 return Schedule == OMP_sch_static_chunked; 2670 } 2671 2672 bool CGOpenMPRuntime::isStaticChunked( 2673 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2674 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2675 return Schedule == OMP_dist_sch_static_chunked; 2676 } 2677 2678 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2679 OpenMPSchedType Schedule = 2680 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2681 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2682 return Schedule != OMP_sch_static; 2683 } 2684 2685 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2686 OpenMPScheduleClauseModifier M1, 2687 OpenMPScheduleClauseModifier M2) { 2688 int Modifier = 0; 2689 switch (M1) { 2690 case OMPC_SCHEDULE_MODIFIER_monotonic: 2691 Modifier = OMP_sch_modifier_monotonic; 2692 break; 2693 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2694 Modifier = OMP_sch_modifier_nonmonotonic; 2695 break; 2696 case OMPC_SCHEDULE_MODIFIER_simd: 2697 if (Schedule == OMP_sch_static_chunked) 2698 Schedule = OMP_sch_static_balanced_chunked; 2699 break; 2700 case OMPC_SCHEDULE_MODIFIER_last: 2701 case OMPC_SCHEDULE_MODIFIER_unknown: 2702 break; 2703 } 2704 switch (M2) { 2705 case OMPC_SCHEDULE_MODIFIER_monotonic: 2706 Modifier = OMP_sch_modifier_monotonic; 2707 break; 2708 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2709 Modifier = OMP_sch_modifier_nonmonotonic; 2710 break; 2711 case OMPC_SCHEDULE_MODIFIER_simd: 2712 if (Schedule == OMP_sch_static_chunked) 2713 Schedule = OMP_sch_static_balanced_chunked; 2714 break; 2715 case OMPC_SCHEDULE_MODIFIER_last: 2716 case OMPC_SCHEDULE_MODIFIER_unknown: 2717 break; 2718 } 2719 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2720 // If the static schedule kind is specified or if the ordered clause is 2721 // specified, and if the nonmonotonic modifier is not specified, the effect is 2722 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2723 // modifier is specified, the effect is as if the nonmonotonic modifier is 2724 // specified. 2725 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2726 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2727 Schedule == OMP_sch_static_balanced_chunked || 2728 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2729 Schedule == OMP_dist_sch_static_chunked || 2730 Schedule == OMP_dist_sch_static)) 2731 Modifier = OMP_sch_modifier_nonmonotonic; 2732 } 2733 return Schedule | Modifier; 2734 } 2735 2736 void CGOpenMPRuntime::emitForDispatchInit( 2737 CodeGenFunction &CGF, SourceLocation Loc, 2738 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2739 bool Ordered, const DispatchRTInput &DispatchValues) { 2740 if (!CGF.HaveInsertPoint()) 2741 return; 2742 OpenMPSchedType Schedule = getRuntimeSchedule( 2743 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2744 assert(Ordered || 2745 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2746 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2747 Schedule != OMP_sch_static_balanced_chunked)); 2748 // Call __kmpc_dispatch_init( 2749 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2750 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2751 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2752 2753 // If the Chunk was not specified in the clause - use default value 1. 2754 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2755 : CGF.Builder.getIntN(IVSize, 1); 2756 llvm::Value *Args[] = { 2757 emitUpdateLocation(CGF, Loc), 2758 getThreadID(CGF, Loc), 2759 CGF.Builder.getInt32(addMonoNonMonoModifier( 2760 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2761 DispatchValues.LB, // Lower 2762 DispatchValues.UB, // Upper 2763 CGF.Builder.getIntN(IVSize, 1), // Stride 2764 Chunk // Chunk 2765 }; 2766 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2767 } 2768 2769 static void emitForStaticInitCall( 2770 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2771 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2772 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2773 const CGOpenMPRuntime::StaticRTInput &Values) { 2774 if (!CGF.HaveInsertPoint()) 2775 return; 2776 2777 assert(!Values.Ordered); 2778 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2779 Schedule == OMP_sch_static_balanced_chunked || 2780 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2781 Schedule == OMP_dist_sch_static || 2782 Schedule == OMP_dist_sch_static_chunked); 2783 2784 // Call __kmpc_for_static_init( 2785 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2786 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2787 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2788 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2789 llvm::Value *Chunk = Values.Chunk; 2790 if (Chunk == nullptr) { 2791 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2792 Schedule == OMP_dist_sch_static) && 2793 "expected static non-chunked schedule"); 2794 // If the Chunk was not specified in the clause - use default value 1. 2795 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2796 } else { 2797 assert((Schedule == OMP_sch_static_chunked || 2798 Schedule == OMP_sch_static_balanced_chunked || 2799 Schedule == OMP_ord_static_chunked || 2800 Schedule == OMP_dist_sch_static_chunked) && 2801 "expected static chunked schedule"); 2802 } 2803 llvm::Value *Args[] = { 2804 UpdateLocation, 2805 ThreadId, 2806 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2807 M2)), // Schedule type 2808 Values.IL.getPointer(), // &isLastIter 2809 Values.LB.getPointer(), // &LB 2810 Values.UB.getPointer(), // &UB 2811 Values.ST.getPointer(), // &Stride 2812 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2813 Chunk // Chunk 2814 }; 2815 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2816 } 2817 2818 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2819 SourceLocation Loc, 2820 OpenMPDirectiveKind DKind, 2821 const OpenMPScheduleTy &ScheduleKind, 2822 const StaticRTInput &Values) { 2823 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2824 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2825 assert(isOpenMPWorksharingDirective(DKind) && 2826 "Expected loop-based or sections-based directive."); 2827 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2828 isOpenMPLoopDirective(DKind) 2829 ? OMP_IDENT_WORK_LOOP 2830 : OMP_IDENT_WORK_SECTIONS); 2831 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2832 llvm::FunctionCallee StaticInitFunction = 2833 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2834 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2835 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2836 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2837 } 2838 2839 void CGOpenMPRuntime::emitDistributeStaticInit( 2840 CodeGenFunction &CGF, SourceLocation Loc, 2841 OpenMPDistScheduleClauseKind SchedKind, 2842 const CGOpenMPRuntime::StaticRTInput &Values) { 2843 OpenMPSchedType ScheduleNum = 2844 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2845 llvm::Value *UpdatedLocation = 2846 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2847 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2848 llvm::FunctionCallee StaticInitFunction = 2849 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2850 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2851 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2852 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2853 } 2854 2855 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2856 SourceLocation Loc, 2857 OpenMPDirectiveKind DKind) { 2858 if (!CGF.HaveInsertPoint()) 2859 return; 2860 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2861 llvm::Value *Args[] = { 2862 emitUpdateLocation(CGF, Loc, 2863 isOpenMPDistributeDirective(DKind) 2864 ? OMP_IDENT_WORK_DISTRIBUTE 2865 : isOpenMPLoopDirective(DKind) 2866 ? OMP_IDENT_WORK_LOOP 2867 : OMP_IDENT_WORK_SECTIONS), 2868 getThreadID(CGF, Loc)}; 2869 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2871 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2872 Args); 2873 } 2874 2875 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2876 SourceLocation Loc, 2877 unsigned IVSize, 2878 bool IVSigned) { 2879 if (!CGF.HaveInsertPoint()) 2880 return; 2881 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2882 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2883 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2884 } 2885 2886 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2887 SourceLocation Loc, unsigned IVSize, 2888 bool IVSigned, Address IL, 2889 Address LB, Address UB, 2890 Address ST) { 2891 // Call __kmpc_dispatch_next( 2892 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2893 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2894 // kmp_int[32|64] *p_stride); 2895 llvm::Value *Args[] = { 2896 emitUpdateLocation(CGF, Loc), 2897 getThreadID(CGF, Loc), 2898 IL.getPointer(), // &isLastIter 2899 LB.getPointer(), // &Lower 2900 UB.getPointer(), // &Upper 2901 ST.getPointer() // &Stride 2902 }; 2903 llvm::Value *Call = 2904 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2905 return CGF.EmitScalarConversion( 2906 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2907 CGF.getContext().BoolTy, Loc); 2908 } 2909 2910 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2911 llvm::Value *NumThreads, 2912 SourceLocation Loc) { 2913 if (!CGF.HaveInsertPoint()) 2914 return; 2915 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2916 llvm::Value *Args[] = { 2917 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2918 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2919 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2920 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2921 Args); 2922 } 2923 2924 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2925 ProcBindKind ProcBind, 2926 SourceLocation Loc) { 2927 if (!CGF.HaveInsertPoint()) 2928 return; 2929 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2930 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2931 llvm::Value *Args[] = { 2932 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2933 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2934 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2935 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2936 Args); 2937 } 2938 2939 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2940 SourceLocation Loc, llvm::AtomicOrdering AO) { 2941 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2942 OMPBuilder.CreateFlush(CGF.Builder); 2943 } else { 2944 if (!CGF.HaveInsertPoint()) 2945 return; 2946 // Build call void __kmpc_flush(ident_t *loc) 2947 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2948 CGM.getModule(), OMPRTL___kmpc_flush), 2949 emitUpdateLocation(CGF, Loc)); 2950 } 2951 } 2952 2953 namespace { 2954 /// Indexes of fields for type kmp_task_t. 2955 enum KmpTaskTFields { 2956 /// List of shared variables. 2957 KmpTaskTShareds, 2958 /// Task routine. 2959 KmpTaskTRoutine, 2960 /// Partition id for the untied tasks. 2961 KmpTaskTPartId, 2962 /// Function with call of destructors for private variables. 2963 Data1, 2964 /// Task priority. 2965 Data2, 2966 /// (Taskloops only) Lower bound. 2967 KmpTaskTLowerBound, 2968 /// (Taskloops only) Upper bound. 2969 KmpTaskTUpperBound, 2970 /// (Taskloops only) Stride. 2971 KmpTaskTStride, 2972 /// (Taskloops only) Is last iteration flag. 2973 KmpTaskTLastIter, 2974 /// (Taskloops only) Reduction data. 2975 KmpTaskTReductions, 2976 }; 2977 } // anonymous namespace 2978 2979 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2980 return OffloadEntriesTargetRegion.empty() && 2981 OffloadEntriesDeviceGlobalVar.empty(); 2982 } 2983 2984 /// Initialize target region entry. 2985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2986 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2987 StringRef ParentName, unsigned LineNum, 2988 unsigned Order) { 2989 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2990 "only required for the device " 2991 "code generation."); 2992 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2993 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2994 OMPTargetRegionEntryTargetRegion); 2995 ++OffloadingEntriesNum; 2996 } 2997 2998 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2999 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3000 StringRef ParentName, unsigned LineNum, 3001 llvm::Constant *Addr, llvm::Constant *ID, 3002 OMPTargetRegionEntryKind Flags) { 3003 // If we are emitting code for a target, the entry is already initialized, 3004 // only has to be registered. 3005 if (CGM.getLangOpts().OpenMPIsDevice) { 3006 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3007 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3008 DiagnosticsEngine::Error, 3009 "Unable to find target region on line '%0' in the device code."); 3010 CGM.getDiags().Report(DiagID) << LineNum; 3011 return; 3012 } 3013 auto &Entry = 3014 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3015 assert(Entry.isValid() && "Entry not initialized!"); 3016 Entry.setAddress(Addr); 3017 Entry.setID(ID); 3018 Entry.setFlags(Flags); 3019 } else { 3020 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3021 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3022 ++OffloadingEntriesNum; 3023 } 3024 } 3025 3026 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3027 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3028 unsigned LineNum) const { 3029 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3030 if (PerDevice == OffloadEntriesTargetRegion.end()) 3031 return false; 3032 auto PerFile = PerDevice->second.find(FileID); 3033 if (PerFile == PerDevice->second.end()) 3034 return false; 3035 auto PerParentName = PerFile->second.find(ParentName); 3036 if (PerParentName == PerFile->second.end()) 3037 return false; 3038 auto PerLine = PerParentName->second.find(LineNum); 3039 if (PerLine == PerParentName->second.end()) 3040 return false; 3041 // Fail if this entry is already registered. 3042 if (PerLine->second.getAddress() || PerLine->second.getID()) 3043 return false; 3044 return true; 3045 } 3046 3047 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3048 const OffloadTargetRegionEntryInfoActTy &Action) { 3049 // Scan all target region entries and perform the provided action. 3050 for (const auto &D : OffloadEntriesTargetRegion) 3051 for (const auto &F : D.second) 3052 for (const auto &P : F.second) 3053 for (const auto &L : P.second) 3054 Action(D.first, F.first, P.first(), L.first, L.second); 3055 } 3056 3057 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3058 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3059 OMPTargetGlobalVarEntryKind Flags, 3060 unsigned Order) { 3061 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3062 "only required for the device " 3063 "code generation."); 3064 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3065 ++OffloadingEntriesNum; 3066 } 3067 3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3069 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3070 CharUnits VarSize, 3071 OMPTargetGlobalVarEntryKind Flags, 3072 llvm::GlobalValue::LinkageTypes Linkage) { 3073 if (CGM.getLangOpts().OpenMPIsDevice) { 3074 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3075 assert(Entry.isValid() && Entry.getFlags() == Flags && 3076 "Entry not initialized!"); 3077 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3078 "Resetting with the new address."); 3079 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3080 if (Entry.getVarSize().isZero()) { 3081 Entry.setVarSize(VarSize); 3082 Entry.setLinkage(Linkage); 3083 } 3084 return; 3085 } 3086 Entry.setVarSize(VarSize); 3087 Entry.setLinkage(Linkage); 3088 Entry.setAddress(Addr); 3089 } else { 3090 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3091 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3092 assert(Entry.isValid() && Entry.getFlags() == Flags && 3093 "Entry not initialized!"); 3094 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3095 "Resetting with the new address."); 3096 if (Entry.getVarSize().isZero()) { 3097 Entry.setVarSize(VarSize); 3098 Entry.setLinkage(Linkage); 3099 } 3100 return; 3101 } 3102 OffloadEntriesDeviceGlobalVar.try_emplace( 3103 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3104 ++OffloadingEntriesNum; 3105 } 3106 } 3107 3108 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3109 actOnDeviceGlobalVarEntriesInfo( 3110 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3111 // Scan all target region entries and perform the provided action. 3112 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3113 Action(E.getKey(), E.getValue()); 3114 } 3115 3116 void CGOpenMPRuntime::createOffloadEntry( 3117 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3118 llvm::GlobalValue::LinkageTypes Linkage) { 3119 StringRef Name = Addr->getName(); 3120 llvm::Module &M = CGM.getModule(); 3121 llvm::LLVMContext &C = M.getContext(); 3122 3123 // Create constant string with the name. 3124 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3125 3126 std::string StringName = getName({"omp_offloading", "entry_name"}); 3127 auto *Str = new llvm::GlobalVariable( 3128 M, StrPtrInit->getType(), /*isConstant=*/true, 3129 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3130 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3131 3132 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3133 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3134 llvm::ConstantInt::get(CGM.SizeTy, Size), 3135 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3136 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3137 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3138 llvm::GlobalVariable *Entry = createGlobalStruct( 3139 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3140 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3141 3142 // The entry has to be created in the section the linker expects it to be. 3143 Entry->setSection("omp_offloading_entries"); 3144 } 3145 3146 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3147 // Emit the offloading entries and metadata so that the device codegen side 3148 // can easily figure out what to emit. The produced metadata looks like 3149 // this: 3150 // 3151 // !omp_offload.info = !{!1, ...} 3152 // 3153 // Right now we only generate metadata for function that contain target 3154 // regions. 3155 3156 // If we are in simd mode or there are no entries, we don't need to do 3157 // anything. 3158 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3159 return; 3160 3161 llvm::Module &M = CGM.getModule(); 3162 llvm::LLVMContext &C = M.getContext(); 3163 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3164 SourceLocation, StringRef>, 3165 16> 3166 OrderedEntries(OffloadEntriesInfoManager.size()); 3167 llvm::SmallVector<StringRef, 16> ParentFunctions( 3168 OffloadEntriesInfoManager.size()); 3169 3170 // Auxiliary methods to create metadata values and strings. 3171 auto &&GetMDInt = [this](unsigned V) { 3172 return llvm::ConstantAsMetadata::get( 3173 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3174 }; 3175 3176 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3177 3178 // Create the offloading info metadata node. 3179 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3180 3181 // Create function that emits metadata for each target region entry; 3182 auto &&TargetRegionMetadataEmitter = 3183 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3184 &GetMDString]( 3185 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3186 unsigned Line, 3187 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3188 // Generate metadata for target regions. Each entry of this metadata 3189 // contains: 3190 // - Entry 0 -> Kind of this type of metadata (0). 3191 // - Entry 1 -> Device ID of the file where the entry was identified. 3192 // - Entry 2 -> File ID of the file where the entry was identified. 3193 // - Entry 3 -> Mangled name of the function where the entry was 3194 // identified. 3195 // - Entry 4 -> Line in the file where the entry was identified. 3196 // - Entry 5 -> Order the entry was created. 3197 // The first element of the metadata node is the kind. 3198 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3199 GetMDInt(FileID), GetMDString(ParentName), 3200 GetMDInt(Line), GetMDInt(E.getOrder())}; 3201 3202 SourceLocation Loc; 3203 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3204 E = CGM.getContext().getSourceManager().fileinfo_end(); 3205 I != E; ++I) { 3206 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3207 I->getFirst()->getUniqueID().getFile() == FileID) { 3208 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3209 I->getFirst(), Line, 1); 3210 break; 3211 } 3212 } 3213 // Save this entry in the right position of the ordered entries array. 3214 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3215 ParentFunctions[E.getOrder()] = ParentName; 3216 3217 // Add metadata to the named metadata node. 3218 MD->addOperand(llvm::MDNode::get(C, Ops)); 3219 }; 3220 3221 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3222 TargetRegionMetadataEmitter); 3223 3224 // Create function that emits metadata for each device global variable entry; 3225 auto &&DeviceGlobalVarMetadataEmitter = 3226 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3227 MD](StringRef MangledName, 3228 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3229 &E) { 3230 // Generate metadata for global variables. Each entry of this metadata 3231 // contains: 3232 // - Entry 0 -> Kind of this type of metadata (1). 3233 // - Entry 1 -> Mangled name of the variable. 3234 // - Entry 2 -> Declare target kind. 3235 // - Entry 3 -> Order the entry was created. 3236 // The first element of the metadata node is the kind. 3237 llvm::Metadata *Ops[] = { 3238 GetMDInt(E.getKind()), GetMDString(MangledName), 3239 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3240 3241 // Save this entry in the right position of the ordered entries array. 3242 OrderedEntries[E.getOrder()] = 3243 std::make_tuple(&E, SourceLocation(), MangledName); 3244 3245 // Add metadata to the named metadata node. 3246 MD->addOperand(llvm::MDNode::get(C, Ops)); 3247 }; 3248 3249 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3250 DeviceGlobalVarMetadataEmitter); 3251 3252 for (const auto &E : OrderedEntries) { 3253 assert(std::get<0>(E) && "All ordered entries must exist!"); 3254 if (const auto *CE = 3255 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3256 std::get<0>(E))) { 3257 if (!CE->getID() || !CE->getAddress()) { 3258 // Do not blame the entry if the parent funtion is not emitted. 3259 StringRef FnName = ParentFunctions[CE->getOrder()]; 3260 if (!CGM.GetGlobalValue(FnName)) 3261 continue; 3262 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3263 DiagnosticsEngine::Error, 3264 "Offloading entry for target region in %0 is incorrect: either the " 3265 "address or the ID is invalid."); 3266 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3267 continue; 3268 } 3269 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3270 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3271 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3272 OffloadEntryInfoDeviceGlobalVar>( 3273 std::get<0>(E))) { 3274 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3275 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3276 CE->getFlags()); 3277 switch (Flags) { 3278 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3279 if (CGM.getLangOpts().OpenMPIsDevice && 3280 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3281 continue; 3282 if (!CE->getAddress()) { 3283 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3284 DiagnosticsEngine::Error, "Offloading entry for declare target " 3285 "variable %0 is incorrect: the " 3286 "address is invalid."); 3287 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3288 continue; 3289 } 3290 // The vaiable has no definition - no need to add the entry. 3291 if (CE->getVarSize().isZero()) 3292 continue; 3293 break; 3294 } 3295 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3296 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3297 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3298 "Declaret target link address is set."); 3299 if (CGM.getLangOpts().OpenMPIsDevice) 3300 continue; 3301 if (!CE->getAddress()) { 3302 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3303 DiagnosticsEngine::Error, 3304 "Offloading entry for declare target variable is incorrect: the " 3305 "address is invalid."); 3306 CGM.getDiags().Report(DiagID); 3307 continue; 3308 } 3309 break; 3310 } 3311 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3312 CE->getVarSize().getQuantity(), Flags, 3313 CE->getLinkage()); 3314 } else { 3315 llvm_unreachable("Unsupported entry kind."); 3316 } 3317 } 3318 } 3319 3320 /// Loads all the offload entries information from the host IR 3321 /// metadata. 3322 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3323 // If we are in target mode, load the metadata from the host IR. This code has 3324 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3325 3326 if (!CGM.getLangOpts().OpenMPIsDevice) 3327 return; 3328 3329 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3330 return; 3331 3332 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3333 if (auto EC = Buf.getError()) { 3334 CGM.getDiags().Report(diag::err_cannot_open_file) 3335 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3336 return; 3337 } 3338 3339 llvm::LLVMContext C; 3340 auto ME = expectedToErrorOrAndEmitErrors( 3341 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3342 3343 if (auto EC = ME.getError()) { 3344 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3345 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3346 CGM.getDiags().Report(DiagID) 3347 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3348 return; 3349 } 3350 3351 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3352 if (!MD) 3353 return; 3354 3355 for (llvm::MDNode *MN : MD->operands()) { 3356 auto &&GetMDInt = [MN](unsigned Idx) { 3357 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3358 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3359 }; 3360 3361 auto &&GetMDString = [MN](unsigned Idx) { 3362 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3363 return V->getString(); 3364 }; 3365 3366 switch (GetMDInt(0)) { 3367 default: 3368 llvm_unreachable("Unexpected metadata!"); 3369 break; 3370 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3371 OffloadingEntryInfoTargetRegion: 3372 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3373 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3374 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3375 /*Order=*/GetMDInt(5)); 3376 break; 3377 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3378 OffloadingEntryInfoDeviceGlobalVar: 3379 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3380 /*MangledName=*/GetMDString(1), 3381 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3382 /*Flags=*/GetMDInt(2)), 3383 /*Order=*/GetMDInt(3)); 3384 break; 3385 } 3386 } 3387 } 3388 3389 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3390 if (!KmpRoutineEntryPtrTy) { 3391 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3392 ASTContext &C = CGM.getContext(); 3393 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3394 FunctionProtoType::ExtProtoInfo EPI; 3395 KmpRoutineEntryPtrQTy = C.getPointerType( 3396 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3397 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3398 } 3399 } 3400 3401 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3402 // Make sure the type of the entry is already created. This is the type we 3403 // have to create: 3404 // struct __tgt_offload_entry{ 3405 // void *addr; // Pointer to the offload entry info. 3406 // // (function or global) 3407 // char *name; // Name of the function or global. 3408 // size_t size; // Size of the entry info (0 if it a function). 3409 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3410 // int32_t reserved; // Reserved, to use by the runtime library. 3411 // }; 3412 if (TgtOffloadEntryQTy.isNull()) { 3413 ASTContext &C = CGM.getContext(); 3414 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3415 RD->startDefinition(); 3416 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3417 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3418 addFieldToRecordDecl(C, RD, C.getSizeType()); 3419 addFieldToRecordDecl( 3420 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3421 addFieldToRecordDecl( 3422 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3423 RD->completeDefinition(); 3424 RD->addAttr(PackedAttr::CreateImplicit(C)); 3425 TgtOffloadEntryQTy = C.getRecordType(RD); 3426 } 3427 return TgtOffloadEntryQTy; 3428 } 3429 3430 namespace { 3431 struct PrivateHelpersTy { 3432 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3433 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3434 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3435 PrivateElemInit(PrivateElemInit) {} 3436 const Expr *OriginalRef = nullptr; 3437 const VarDecl *Original = nullptr; 3438 const VarDecl *PrivateCopy = nullptr; 3439 const VarDecl *PrivateElemInit = nullptr; 3440 }; 3441 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3442 } // anonymous namespace 3443 3444 static RecordDecl * 3445 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3446 if (!Privates.empty()) { 3447 ASTContext &C = CGM.getContext(); 3448 // Build struct .kmp_privates_t. { 3449 // /* private vars */ 3450 // }; 3451 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3452 RD->startDefinition(); 3453 for (const auto &Pair : Privates) { 3454 const VarDecl *VD = Pair.second.Original; 3455 QualType Type = VD->getType().getNonReferenceType(); 3456 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3457 if (VD->hasAttrs()) { 3458 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3459 E(VD->getAttrs().end()); 3460 I != E; ++I) 3461 FD->addAttr(*I); 3462 } 3463 } 3464 RD->completeDefinition(); 3465 return RD; 3466 } 3467 return nullptr; 3468 } 3469 3470 static RecordDecl * 3471 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3472 QualType KmpInt32Ty, 3473 QualType KmpRoutineEntryPointerQTy) { 3474 ASTContext &C = CGM.getContext(); 3475 // Build struct kmp_task_t { 3476 // void * shareds; 3477 // kmp_routine_entry_t routine; 3478 // kmp_int32 part_id; 3479 // kmp_cmplrdata_t data1; 3480 // kmp_cmplrdata_t data2; 3481 // For taskloops additional fields: 3482 // kmp_uint64 lb; 3483 // kmp_uint64 ub; 3484 // kmp_int64 st; 3485 // kmp_int32 liter; 3486 // void * reductions; 3487 // }; 3488 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3489 UD->startDefinition(); 3490 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3491 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3492 UD->completeDefinition(); 3493 QualType KmpCmplrdataTy = C.getRecordType(UD); 3494 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3495 RD->startDefinition(); 3496 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3497 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3498 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3499 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3500 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3501 if (isOpenMPTaskLoopDirective(Kind)) { 3502 QualType KmpUInt64Ty = 3503 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3504 QualType KmpInt64Ty = 3505 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3506 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3507 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3508 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3509 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3510 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3511 } 3512 RD->completeDefinition(); 3513 return RD; 3514 } 3515 3516 static RecordDecl * 3517 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3518 ArrayRef<PrivateDataTy> Privates) { 3519 ASTContext &C = CGM.getContext(); 3520 // Build struct kmp_task_t_with_privates { 3521 // kmp_task_t task_data; 3522 // .kmp_privates_t. privates; 3523 // }; 3524 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3525 RD->startDefinition(); 3526 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3527 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3528 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3529 RD->completeDefinition(); 3530 return RD; 3531 } 3532 3533 /// Emit a proxy function which accepts kmp_task_t as the second 3534 /// argument. 3535 /// \code 3536 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3537 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3538 /// For taskloops: 3539 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3540 /// tt->reductions, tt->shareds); 3541 /// return 0; 3542 /// } 3543 /// \endcode 3544 static llvm::Function * 3545 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3546 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3547 QualType KmpTaskTWithPrivatesPtrQTy, 3548 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3549 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3550 llvm::Value *TaskPrivatesMap) { 3551 ASTContext &C = CGM.getContext(); 3552 FunctionArgList Args; 3553 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3554 ImplicitParamDecl::Other); 3555 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3556 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3557 ImplicitParamDecl::Other); 3558 Args.push_back(&GtidArg); 3559 Args.push_back(&TaskTypeArg); 3560 const auto &TaskEntryFnInfo = 3561 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3562 llvm::FunctionType *TaskEntryTy = 3563 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3564 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3565 auto *TaskEntry = llvm::Function::Create( 3566 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3567 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3568 TaskEntry->setDoesNotRecurse(); 3569 CodeGenFunction CGF(CGM); 3570 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3571 Loc, Loc); 3572 3573 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3574 // tt, 3575 // For taskloops: 3576 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3577 // tt->task_data.shareds); 3578 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3579 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3580 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3581 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3582 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3583 const auto *KmpTaskTWithPrivatesQTyRD = 3584 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3585 LValue Base = 3586 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3587 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3588 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3589 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3590 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3591 3592 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3593 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3594 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3595 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3596 CGF.ConvertTypeForMem(SharedsPtrTy)); 3597 3598 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3599 llvm::Value *PrivatesParam; 3600 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3601 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3602 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3603 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3604 } else { 3605 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3606 } 3607 3608 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3609 TaskPrivatesMap, 3610 CGF.Builder 3611 .CreatePointerBitCastOrAddrSpaceCast( 3612 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3613 .getPointer()}; 3614 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3615 std::end(CommonArgs)); 3616 if (isOpenMPTaskLoopDirective(Kind)) { 3617 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3618 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3619 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3620 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3621 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3622 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3623 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3624 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3625 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3626 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3627 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3628 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3629 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3630 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3631 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3632 CallArgs.push_back(LBParam); 3633 CallArgs.push_back(UBParam); 3634 CallArgs.push_back(StParam); 3635 CallArgs.push_back(LIParam); 3636 CallArgs.push_back(RParam); 3637 } 3638 CallArgs.push_back(SharedsParam); 3639 3640 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3641 CallArgs); 3642 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3643 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3644 CGF.FinishFunction(); 3645 return TaskEntry; 3646 } 3647 3648 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3649 SourceLocation Loc, 3650 QualType KmpInt32Ty, 3651 QualType KmpTaskTWithPrivatesPtrQTy, 3652 QualType KmpTaskTWithPrivatesQTy) { 3653 ASTContext &C = CGM.getContext(); 3654 FunctionArgList Args; 3655 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3656 ImplicitParamDecl::Other); 3657 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3658 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3659 ImplicitParamDecl::Other); 3660 Args.push_back(&GtidArg); 3661 Args.push_back(&TaskTypeArg); 3662 const auto &DestructorFnInfo = 3663 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3664 llvm::FunctionType *DestructorFnTy = 3665 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3666 std::string Name = 3667 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3668 auto *DestructorFn = 3669 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3670 Name, &CGM.getModule()); 3671 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3672 DestructorFnInfo); 3673 DestructorFn->setDoesNotRecurse(); 3674 CodeGenFunction CGF(CGM); 3675 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3676 Args, Loc, Loc); 3677 3678 LValue Base = CGF.EmitLoadOfPointerLValue( 3679 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3680 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3681 const auto *KmpTaskTWithPrivatesQTyRD = 3682 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3683 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3684 Base = CGF.EmitLValueForField(Base, *FI); 3685 for (const auto *Field : 3686 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3687 if (QualType::DestructionKind DtorKind = 3688 Field->getType().isDestructedType()) { 3689 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3690 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3691 } 3692 } 3693 CGF.FinishFunction(); 3694 return DestructorFn; 3695 } 3696 3697 /// Emit a privates mapping function for correct handling of private and 3698 /// firstprivate variables. 3699 /// \code 3700 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3701 /// **noalias priv1,..., <tyn> **noalias privn) { 3702 /// *priv1 = &.privates.priv1; 3703 /// ...; 3704 /// *privn = &.privates.privn; 3705 /// } 3706 /// \endcode 3707 static llvm::Value * 3708 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3709 ArrayRef<const Expr *> PrivateVars, 3710 ArrayRef<const Expr *> FirstprivateVars, 3711 ArrayRef<const Expr *> LastprivateVars, 3712 QualType PrivatesQTy, 3713 ArrayRef<PrivateDataTy> Privates) { 3714 ASTContext &C = CGM.getContext(); 3715 FunctionArgList Args; 3716 ImplicitParamDecl TaskPrivatesArg( 3717 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3718 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3719 ImplicitParamDecl::Other); 3720 Args.push_back(&TaskPrivatesArg); 3721 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3722 unsigned Counter = 1; 3723 for (const Expr *E : PrivateVars) { 3724 Args.push_back(ImplicitParamDecl::Create( 3725 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3726 C.getPointerType(C.getPointerType(E->getType())) 3727 .withConst() 3728 .withRestrict(), 3729 ImplicitParamDecl::Other)); 3730 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3731 PrivateVarsPos[VD] = Counter; 3732 ++Counter; 3733 } 3734 for (const Expr *E : FirstprivateVars) { 3735 Args.push_back(ImplicitParamDecl::Create( 3736 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3737 C.getPointerType(C.getPointerType(E->getType())) 3738 .withConst() 3739 .withRestrict(), 3740 ImplicitParamDecl::Other)); 3741 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3742 PrivateVarsPos[VD] = Counter; 3743 ++Counter; 3744 } 3745 for (const Expr *E : LastprivateVars) { 3746 Args.push_back(ImplicitParamDecl::Create( 3747 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3748 C.getPointerType(C.getPointerType(E->getType())) 3749 .withConst() 3750 .withRestrict(), 3751 ImplicitParamDecl::Other)); 3752 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3753 PrivateVarsPos[VD] = Counter; 3754 ++Counter; 3755 } 3756 const auto &TaskPrivatesMapFnInfo = 3757 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3758 llvm::FunctionType *TaskPrivatesMapTy = 3759 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3760 std::string Name = 3761 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3762 auto *TaskPrivatesMap = llvm::Function::Create( 3763 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3764 &CGM.getModule()); 3765 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3766 TaskPrivatesMapFnInfo); 3767 if (CGM.getLangOpts().Optimize) { 3768 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3769 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3770 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3771 } 3772 CodeGenFunction CGF(CGM); 3773 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3774 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3775 3776 // *privi = &.privates.privi; 3777 LValue Base = CGF.EmitLoadOfPointerLValue( 3778 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3779 TaskPrivatesArg.getType()->castAs<PointerType>()); 3780 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3781 Counter = 0; 3782 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3783 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3784 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3785 LValue RefLVal = 3786 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3787 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3788 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3789 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3790 ++Counter; 3791 } 3792 CGF.FinishFunction(); 3793 return TaskPrivatesMap; 3794 } 3795 3796 /// Emit initialization for private variables in task-based directives. 3797 static void emitPrivatesInit(CodeGenFunction &CGF, 3798 const OMPExecutableDirective &D, 3799 Address KmpTaskSharedsPtr, LValue TDBase, 3800 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3801 QualType SharedsTy, QualType SharedsPtrTy, 3802 const OMPTaskDataTy &Data, 3803 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3804 ASTContext &C = CGF.getContext(); 3805 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3806 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3807 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3808 ? OMPD_taskloop 3809 : OMPD_task; 3810 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3811 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3812 LValue SrcBase; 3813 bool IsTargetTask = 3814 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3815 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3816 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3817 // PointersArray and SizesArray. The original variables for these arrays are 3818 // not captured and we get their addresses explicitly. 3819 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3820 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3821 SrcBase = CGF.MakeAddrLValue( 3822 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3823 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3824 SharedsTy); 3825 } 3826 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3827 for (const PrivateDataTy &Pair : Privates) { 3828 const VarDecl *VD = Pair.second.PrivateCopy; 3829 const Expr *Init = VD->getAnyInitializer(); 3830 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3831 !CGF.isTrivialInitializer(Init)))) { 3832 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3833 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3834 const VarDecl *OriginalVD = Pair.second.Original; 3835 // Check if the variable is the target-based BasePointersArray, 3836 // PointersArray or SizesArray. 3837 LValue SharedRefLValue; 3838 QualType Type = PrivateLValue.getType(); 3839 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3840 if (IsTargetTask && !SharedField) { 3841 assert(isa<ImplicitParamDecl>(OriginalVD) && 3842 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3843 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3844 ->getNumParams() == 0 && 3845 isa<TranslationUnitDecl>( 3846 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3847 ->getDeclContext()) && 3848 "Expected artificial target data variable."); 3849 SharedRefLValue = 3850 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3851 } else if (ForDup) { 3852 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3853 SharedRefLValue = CGF.MakeAddrLValue( 3854 Address(SharedRefLValue.getPointer(CGF), 3855 C.getDeclAlign(OriginalVD)), 3856 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3857 SharedRefLValue.getTBAAInfo()); 3858 } else if (CGF.LambdaCaptureFields.count( 3859 Pair.second.Original->getCanonicalDecl()) > 0 || 3860 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3861 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3862 } else { 3863 // Processing for implicitly captured variables. 3864 InlinedOpenMPRegionRAII Region( 3865 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3866 /*HasCancel=*/false); 3867 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3868 } 3869 if (Type->isArrayType()) { 3870 // Initialize firstprivate array. 3871 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3872 // Perform simple memcpy. 3873 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3874 } else { 3875 // Initialize firstprivate array using element-by-element 3876 // initialization. 3877 CGF.EmitOMPAggregateAssign( 3878 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3879 Type, 3880 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3881 Address SrcElement) { 3882 // Clean up any temporaries needed by the initialization. 3883 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3884 InitScope.addPrivate( 3885 Elem, [SrcElement]() -> Address { return SrcElement; }); 3886 (void)InitScope.Privatize(); 3887 // Emit initialization for single element. 3888 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3889 CGF, &CapturesInfo); 3890 CGF.EmitAnyExprToMem(Init, DestElement, 3891 Init->getType().getQualifiers(), 3892 /*IsInitializer=*/false); 3893 }); 3894 } 3895 } else { 3896 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3897 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3898 return SharedRefLValue.getAddress(CGF); 3899 }); 3900 (void)InitScope.Privatize(); 3901 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3902 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3903 /*capturedByInit=*/false); 3904 } 3905 } else { 3906 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3907 } 3908 } 3909 ++FI; 3910 } 3911 } 3912 3913 /// Check if duplication function is required for taskloops. 3914 static bool checkInitIsRequired(CodeGenFunction &CGF, 3915 ArrayRef<PrivateDataTy> Privates) { 3916 bool InitRequired = false; 3917 for (const PrivateDataTy &Pair : Privates) { 3918 const VarDecl *VD = Pair.second.PrivateCopy; 3919 const Expr *Init = VD->getAnyInitializer(); 3920 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3921 !CGF.isTrivialInitializer(Init)); 3922 if (InitRequired) 3923 break; 3924 } 3925 return InitRequired; 3926 } 3927 3928 3929 /// Emit task_dup function (for initialization of 3930 /// private/firstprivate/lastprivate vars and last_iter flag) 3931 /// \code 3932 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3933 /// lastpriv) { 3934 /// // setup lastprivate flag 3935 /// task_dst->last = lastpriv; 3936 /// // could be constructor calls here... 3937 /// } 3938 /// \endcode 3939 static llvm::Value * 3940 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3941 const OMPExecutableDirective &D, 3942 QualType KmpTaskTWithPrivatesPtrQTy, 3943 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3944 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3945 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3946 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3947 ASTContext &C = CGM.getContext(); 3948 FunctionArgList Args; 3949 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3950 KmpTaskTWithPrivatesPtrQTy, 3951 ImplicitParamDecl::Other); 3952 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3953 KmpTaskTWithPrivatesPtrQTy, 3954 ImplicitParamDecl::Other); 3955 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3956 ImplicitParamDecl::Other); 3957 Args.push_back(&DstArg); 3958 Args.push_back(&SrcArg); 3959 Args.push_back(&LastprivArg); 3960 const auto &TaskDupFnInfo = 3961 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3962 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3963 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3964 auto *TaskDup = llvm::Function::Create( 3965 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3966 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3967 TaskDup->setDoesNotRecurse(); 3968 CodeGenFunction CGF(CGM); 3969 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3970 Loc); 3971 3972 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3973 CGF.GetAddrOfLocalVar(&DstArg), 3974 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3975 // task_dst->liter = lastpriv; 3976 if (WithLastIter) { 3977 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3978 LValue Base = CGF.EmitLValueForField( 3979 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3980 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3981 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3982 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3983 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3984 } 3985 3986 // Emit initial values for private copies (if any). 3987 assert(!Privates.empty()); 3988 Address KmpTaskSharedsPtr = Address::invalid(); 3989 if (!Data.FirstprivateVars.empty()) { 3990 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3991 CGF.GetAddrOfLocalVar(&SrcArg), 3992 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3993 LValue Base = CGF.EmitLValueForField( 3994 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3995 KmpTaskSharedsPtr = Address( 3996 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3997 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3998 KmpTaskTShareds)), 3999 Loc), 4000 CGM.getNaturalTypeAlignment(SharedsTy)); 4001 } 4002 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4003 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4004 CGF.FinishFunction(); 4005 return TaskDup; 4006 } 4007 4008 /// Checks if destructor function is required to be generated. 4009 /// \return true if cleanups are required, false otherwise. 4010 static bool 4011 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4012 bool NeedsCleanup = false; 4013 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4014 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4015 for (const FieldDecl *FD : PrivateRD->fields()) { 4016 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4017 if (NeedsCleanup) 4018 break; 4019 } 4020 return NeedsCleanup; 4021 } 4022 4023 namespace { 4024 /// Loop generator for OpenMP iterator expression. 4025 class OMPIteratorGeneratorScope final 4026 : public CodeGenFunction::OMPPrivateScope { 4027 CodeGenFunction &CGF; 4028 const OMPIteratorExpr *E = nullptr; 4029 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4030 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4031 OMPIteratorGeneratorScope() = delete; 4032 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4033 4034 public: 4035 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4036 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4037 if (!E) 4038 return; 4039 SmallVector<llvm::Value *, 4> Uppers; 4040 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4041 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4042 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4043 addPrivate(VD, [&CGF, VD]() { 4044 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4045 }); 4046 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4047 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4048 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4049 "counter.addr"); 4050 }); 4051 } 4052 Privatize(); 4053 4054 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4055 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4056 LValue CLVal = 4057 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4058 HelperData.CounterVD->getType()); 4059 // Counter = 0; 4060 CGF.EmitStoreOfScalar( 4061 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4062 CLVal); 4063 CodeGenFunction::JumpDest &ContDest = 4064 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4065 CodeGenFunction::JumpDest &ExitDest = 4066 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4067 // N = <number-of_iterations>; 4068 llvm::Value *N = Uppers[I]; 4069 // cont: 4070 // if (Counter < N) goto body; else goto exit; 4071 CGF.EmitBlock(ContDest.getBlock()); 4072 auto *CVal = 4073 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4074 llvm::Value *Cmp = 4075 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4076 ? CGF.Builder.CreateICmpSLT(CVal, N) 4077 : CGF.Builder.CreateICmpULT(CVal, N); 4078 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4079 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4080 // body: 4081 CGF.EmitBlock(BodyBB); 4082 // Iteri = Begini + Counter * Stepi; 4083 CGF.EmitIgnoredExpr(HelperData.Update); 4084 } 4085 } 4086 ~OMPIteratorGeneratorScope() { 4087 if (!E) 4088 return; 4089 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4090 // Counter = Counter + 1; 4091 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4092 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4093 // goto cont; 4094 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4095 // exit: 4096 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4097 } 4098 } 4099 }; 4100 } // namespace 4101 4102 static std::pair<llvm::Value *, llvm::Value *> 4103 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4104 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4105 llvm::Value *Addr; 4106 if (OASE) { 4107 const Expr *Base = OASE->getBase(); 4108 Addr = CGF.EmitScalarExpr(Base); 4109 } else { 4110 Addr = CGF.EmitLValue(E).getPointer(CGF); 4111 } 4112 llvm::Value *SizeVal; 4113 QualType Ty = E->getType(); 4114 if (OASE) { 4115 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4116 for (const Expr *SE : OASE->getDimensions()) { 4117 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4118 Sz = CGF.EmitScalarConversion( 4119 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4120 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4121 } 4122 } else if (const auto *ASE = 4123 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4124 LValue UpAddrLVal = 4125 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4126 llvm::Value *UpAddr = 4127 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4128 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4129 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4130 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4131 } else { 4132 SizeVal = CGF.getTypeSize(Ty); 4133 } 4134 return std::make_pair(Addr, SizeVal); 4135 } 4136 4137 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4138 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4139 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4140 if (KmpTaskAffinityInfoTy.isNull()) { 4141 RecordDecl *KmpAffinityInfoRD = 4142 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4143 KmpAffinityInfoRD->startDefinition(); 4144 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4145 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4146 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4147 KmpAffinityInfoRD->completeDefinition(); 4148 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4149 } 4150 } 4151 4152 CGOpenMPRuntime::TaskResultTy 4153 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4154 const OMPExecutableDirective &D, 4155 llvm::Function *TaskFunction, QualType SharedsTy, 4156 Address Shareds, const OMPTaskDataTy &Data) { 4157 ASTContext &C = CGM.getContext(); 4158 llvm::SmallVector<PrivateDataTy, 4> Privates; 4159 // Aggregate privates and sort them by the alignment. 4160 const auto *I = Data.PrivateCopies.begin(); 4161 for (const Expr *E : Data.PrivateVars) { 4162 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4163 Privates.emplace_back( 4164 C.getDeclAlign(VD), 4165 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4166 /*PrivateElemInit=*/nullptr)); 4167 ++I; 4168 } 4169 I = Data.FirstprivateCopies.begin(); 4170 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4171 for (const Expr *E : Data.FirstprivateVars) { 4172 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4173 Privates.emplace_back( 4174 C.getDeclAlign(VD), 4175 PrivateHelpersTy( 4176 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4177 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4178 ++I; 4179 ++IElemInitRef; 4180 } 4181 I = Data.LastprivateCopies.begin(); 4182 for (const Expr *E : Data.LastprivateVars) { 4183 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4184 Privates.emplace_back( 4185 C.getDeclAlign(VD), 4186 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4187 /*PrivateElemInit=*/nullptr)); 4188 ++I; 4189 } 4190 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4191 return L.first > R.first; 4192 }); 4193 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4194 // Build type kmp_routine_entry_t (if not built yet). 4195 emitKmpRoutineEntryT(KmpInt32Ty); 4196 // Build type kmp_task_t (if not built yet). 4197 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4198 if (SavedKmpTaskloopTQTy.isNull()) { 4199 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4200 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4201 } 4202 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4203 } else { 4204 assert((D.getDirectiveKind() == OMPD_task || 4205 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4206 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4207 "Expected taskloop, task or target directive"); 4208 if (SavedKmpTaskTQTy.isNull()) { 4209 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4210 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4211 } 4212 KmpTaskTQTy = SavedKmpTaskTQTy; 4213 } 4214 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4215 // Build particular struct kmp_task_t for the given task. 4216 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4217 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4218 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4219 QualType KmpTaskTWithPrivatesPtrQTy = 4220 C.getPointerType(KmpTaskTWithPrivatesQTy); 4221 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4222 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4223 KmpTaskTWithPrivatesTy->getPointerTo(); 4224 llvm::Value *KmpTaskTWithPrivatesTySize = 4225 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4226 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4227 4228 // Emit initial values for private copies (if any). 4229 llvm::Value *TaskPrivatesMap = nullptr; 4230 llvm::Type *TaskPrivatesMapTy = 4231 std::next(TaskFunction->arg_begin(), 3)->getType(); 4232 if (!Privates.empty()) { 4233 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4234 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4235 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4236 FI->getType(), Privates); 4237 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4238 TaskPrivatesMap, TaskPrivatesMapTy); 4239 } else { 4240 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4241 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4242 } 4243 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4244 // kmp_task_t *tt); 4245 llvm::Function *TaskEntry = emitProxyTaskFunction( 4246 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4247 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4248 TaskPrivatesMap); 4249 4250 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4251 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4252 // kmp_routine_entry_t *task_entry); 4253 // Task flags. Format is taken from 4254 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4255 // description of kmp_tasking_flags struct. 4256 enum { 4257 TiedFlag = 0x1, 4258 FinalFlag = 0x2, 4259 DestructorsFlag = 0x8, 4260 PriorityFlag = 0x20, 4261 DetachableFlag = 0x40, 4262 }; 4263 unsigned Flags = Data.Tied ? TiedFlag : 0; 4264 bool NeedsCleanup = false; 4265 if (!Privates.empty()) { 4266 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4267 if (NeedsCleanup) 4268 Flags = Flags | DestructorsFlag; 4269 } 4270 if (Data.Priority.getInt()) 4271 Flags = Flags | PriorityFlag; 4272 if (D.hasClausesOfKind<OMPDetachClause>()) 4273 Flags = Flags | DetachableFlag; 4274 llvm::Value *TaskFlags = 4275 Data.Final.getPointer() 4276 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4277 CGF.Builder.getInt32(FinalFlag), 4278 CGF.Builder.getInt32(/*C=*/0)) 4279 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4280 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4281 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4282 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4283 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4284 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4285 TaskEntry, KmpRoutineEntryPtrTy)}; 4286 llvm::Value *NewTask; 4287 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4288 // Check if we have any device clause associated with the directive. 4289 const Expr *Device = nullptr; 4290 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4291 Device = C->getDevice(); 4292 // Emit device ID if any otherwise use default value. 4293 llvm::Value *DeviceID; 4294 if (Device) 4295 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4296 CGF.Int64Ty, /*isSigned=*/true); 4297 else 4298 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4299 AllocArgs.push_back(DeviceID); 4300 NewTask = CGF.EmitRuntimeCall( 4301 OMPBuilder.getOrCreateRuntimeFunction( 4302 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4303 AllocArgs); 4304 } else { 4305 NewTask = 4306 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4307 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4308 AllocArgs); 4309 } 4310 // Emit detach clause initialization. 4311 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4312 // task_descriptor); 4313 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4314 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4315 LValue EvtLVal = CGF.EmitLValue(Evt); 4316 4317 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4318 // int gtid, kmp_task_t *task); 4319 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4320 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4321 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4322 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4323 OMPBuilder.getOrCreateRuntimeFunction( 4324 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4325 {Loc, Tid, NewTask}); 4326 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4327 Evt->getExprLoc()); 4328 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4329 } 4330 // Process affinity clauses. 4331 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4332 // Process list of affinity data. 4333 ASTContext &C = CGM.getContext(); 4334 Address AffinitiesArray = Address::invalid(); 4335 // Calculate number of elements to form the array of affinity data. 4336 llvm::Value *NumOfElements = nullptr; 4337 unsigned NumAffinities = 0; 4338 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4339 if (const Expr *Modifier = C->getModifier()) { 4340 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4341 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4342 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4343 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4344 NumOfElements = 4345 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4346 } 4347 } else { 4348 NumAffinities += C->varlist_size(); 4349 } 4350 } 4351 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4352 // Fields ids in kmp_task_affinity_info record. 4353 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4354 4355 QualType KmpTaskAffinityInfoArrayTy; 4356 if (NumOfElements) { 4357 NumOfElements = CGF.Builder.CreateNUWAdd( 4358 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4359 OpaqueValueExpr OVE( 4360 Loc, 4361 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4362 VK_RValue); 4363 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4364 RValue::get(NumOfElements)); 4365 KmpTaskAffinityInfoArrayTy = 4366 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4367 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4368 // Properly emit variable-sized array. 4369 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4370 ImplicitParamDecl::Other); 4371 CGF.EmitVarDecl(*PD); 4372 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4373 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4374 /*isSigned=*/false); 4375 } else { 4376 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4377 KmpTaskAffinityInfoTy, 4378 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4379 ArrayType::Normal, /*IndexTypeQuals=*/0); 4380 AffinitiesArray = 4381 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4382 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4383 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4384 /*isSigned=*/false); 4385 } 4386 4387 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4388 // Fill array by elements without iterators. 4389 unsigned Pos = 0; 4390 bool HasIterator = false; 4391 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4392 if (C->getModifier()) { 4393 HasIterator = true; 4394 continue; 4395 } 4396 for (const Expr *E : C->varlists()) { 4397 llvm::Value *Addr; 4398 llvm::Value *Size; 4399 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4400 LValue Base = 4401 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4402 KmpTaskAffinityInfoTy); 4403 // affs[i].base_addr = &<Affinities[i].second>; 4404 LValue BaseAddrLVal = CGF.EmitLValueForField( 4405 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4406 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4407 BaseAddrLVal); 4408 // affs[i].len = sizeof(<Affinities[i].second>); 4409 LValue LenLVal = CGF.EmitLValueForField( 4410 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4411 CGF.EmitStoreOfScalar(Size, LenLVal); 4412 ++Pos; 4413 } 4414 } 4415 LValue PosLVal; 4416 if (HasIterator) { 4417 PosLVal = CGF.MakeAddrLValue( 4418 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4419 C.getSizeType()); 4420 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4421 } 4422 // Process elements with iterators. 4423 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4424 const Expr *Modifier = C->getModifier(); 4425 if (!Modifier) 4426 continue; 4427 OMPIteratorGeneratorScope IteratorScope( 4428 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4429 for (const Expr *E : C->varlists()) { 4430 llvm::Value *Addr; 4431 llvm::Value *Size; 4432 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4433 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4434 LValue Base = CGF.MakeAddrLValue( 4435 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4436 AffinitiesArray.getAlignment()), 4437 KmpTaskAffinityInfoTy); 4438 // affs[i].base_addr = &<Affinities[i].second>; 4439 LValue BaseAddrLVal = CGF.EmitLValueForField( 4440 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4441 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4442 BaseAddrLVal); 4443 // affs[i].len = sizeof(<Affinities[i].second>); 4444 LValue LenLVal = CGF.EmitLValueForField( 4445 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4446 CGF.EmitStoreOfScalar(Size, LenLVal); 4447 Idx = CGF.Builder.CreateNUWAdd( 4448 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4449 CGF.EmitStoreOfScalar(Idx, PosLVal); 4450 } 4451 } 4452 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4453 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4454 // naffins, kmp_task_affinity_info_t *affin_list); 4455 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4456 llvm::Value *GTid = getThreadID(CGF, Loc); 4457 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4458 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4459 // FIXME: Emit the function and ignore its result for now unless the 4460 // runtime function is properly implemented. 4461 (void)CGF.EmitRuntimeCall( 4462 OMPBuilder.getOrCreateRuntimeFunction( 4463 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4464 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4465 } 4466 llvm::Value *NewTaskNewTaskTTy = 4467 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4468 NewTask, KmpTaskTWithPrivatesPtrTy); 4469 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4470 KmpTaskTWithPrivatesQTy); 4471 LValue TDBase = 4472 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4473 // Fill the data in the resulting kmp_task_t record. 4474 // Copy shareds if there are any. 4475 Address KmpTaskSharedsPtr = Address::invalid(); 4476 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4477 KmpTaskSharedsPtr = 4478 Address(CGF.EmitLoadOfScalar( 4479 CGF.EmitLValueForField( 4480 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4481 KmpTaskTShareds)), 4482 Loc), 4483 CGM.getNaturalTypeAlignment(SharedsTy)); 4484 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4485 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4486 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4487 } 4488 // Emit initial values for private copies (if any). 4489 TaskResultTy Result; 4490 if (!Privates.empty()) { 4491 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4492 SharedsTy, SharedsPtrTy, Data, Privates, 4493 /*ForDup=*/false); 4494 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4495 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4496 Result.TaskDupFn = emitTaskDupFunction( 4497 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4498 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4499 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4500 } 4501 } 4502 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4503 enum { Priority = 0, Destructors = 1 }; 4504 // Provide pointer to function with destructors for privates. 4505 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4506 const RecordDecl *KmpCmplrdataUD = 4507 (*FI)->getType()->getAsUnionType()->getDecl(); 4508 if (NeedsCleanup) { 4509 llvm::Value *DestructorFn = emitDestructorsFunction( 4510 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4511 KmpTaskTWithPrivatesQTy); 4512 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4513 LValue DestructorsLV = CGF.EmitLValueForField( 4514 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4515 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4516 DestructorFn, KmpRoutineEntryPtrTy), 4517 DestructorsLV); 4518 } 4519 // Set priority. 4520 if (Data.Priority.getInt()) { 4521 LValue Data2LV = CGF.EmitLValueForField( 4522 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4523 LValue PriorityLV = CGF.EmitLValueForField( 4524 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4525 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4526 } 4527 Result.NewTask = NewTask; 4528 Result.TaskEntry = TaskEntry; 4529 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4530 Result.TDBase = TDBase; 4531 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4532 return Result; 4533 } 4534 4535 namespace { 4536 /// Dependence kind for RTL. 4537 enum RTLDependenceKindTy { 4538 DepIn = 0x01, 4539 DepInOut = 0x3, 4540 DepMutexInOutSet = 0x4 4541 }; 4542 /// Fields ids in kmp_depend_info record. 4543 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4544 } // namespace 4545 4546 /// Translates internal dependency kind into the runtime kind. 4547 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4548 RTLDependenceKindTy DepKind; 4549 switch (K) { 4550 case OMPC_DEPEND_in: 4551 DepKind = DepIn; 4552 break; 4553 // Out and InOut dependencies must use the same code. 4554 case OMPC_DEPEND_out: 4555 case OMPC_DEPEND_inout: 4556 DepKind = DepInOut; 4557 break; 4558 case OMPC_DEPEND_mutexinoutset: 4559 DepKind = DepMutexInOutSet; 4560 break; 4561 case OMPC_DEPEND_source: 4562 case OMPC_DEPEND_sink: 4563 case OMPC_DEPEND_depobj: 4564 case OMPC_DEPEND_unknown: 4565 llvm_unreachable("Unknown task dependence type"); 4566 } 4567 return DepKind; 4568 } 4569 4570 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4571 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4572 QualType &FlagsTy) { 4573 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4574 if (KmpDependInfoTy.isNull()) { 4575 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4576 KmpDependInfoRD->startDefinition(); 4577 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4578 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4579 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4580 KmpDependInfoRD->completeDefinition(); 4581 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4582 } 4583 } 4584 4585 std::pair<llvm::Value *, LValue> 4586 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4587 SourceLocation Loc) { 4588 ASTContext &C = CGM.getContext(); 4589 QualType FlagsTy; 4590 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4591 RecordDecl *KmpDependInfoRD = 4592 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4593 LValue Base = CGF.EmitLoadOfPointerLValue( 4594 DepobjLVal.getAddress(CGF), 4595 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4596 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4597 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4598 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4599 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4600 Base.getTBAAInfo()); 4601 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4602 Addr.getPointer(), 4603 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4604 LValue NumDepsBase = CGF.MakeAddrLValue( 4605 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4606 Base.getBaseInfo(), Base.getTBAAInfo()); 4607 // NumDeps = deps[i].base_addr; 4608 LValue BaseAddrLVal = CGF.EmitLValueForField( 4609 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4610 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4611 return std::make_pair(NumDeps, Base); 4612 } 4613 4614 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4615 llvm::PointerUnion<unsigned *, LValue *> Pos, 4616 const OMPTaskDataTy::DependData &Data, 4617 Address DependenciesArray) { 4618 CodeGenModule &CGM = CGF.CGM; 4619 ASTContext &C = CGM.getContext(); 4620 QualType FlagsTy; 4621 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4622 RecordDecl *KmpDependInfoRD = 4623 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4624 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4625 4626 OMPIteratorGeneratorScope IteratorScope( 4627 CGF, cast_or_null<OMPIteratorExpr>( 4628 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4629 : nullptr)); 4630 for (const Expr *E : Data.DepExprs) { 4631 llvm::Value *Addr; 4632 llvm::Value *Size; 4633 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4634 LValue Base; 4635 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4636 Base = CGF.MakeAddrLValue( 4637 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4638 } else { 4639 LValue &PosLVal = *Pos.get<LValue *>(); 4640 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4641 Base = CGF.MakeAddrLValue( 4642 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4643 DependenciesArray.getAlignment()), 4644 KmpDependInfoTy); 4645 } 4646 // deps[i].base_addr = &<Dependencies[i].second>; 4647 LValue BaseAddrLVal = CGF.EmitLValueForField( 4648 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4649 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4650 BaseAddrLVal); 4651 // deps[i].len = sizeof(<Dependencies[i].second>); 4652 LValue LenLVal = CGF.EmitLValueForField( 4653 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4654 CGF.EmitStoreOfScalar(Size, LenLVal); 4655 // deps[i].flags = <Dependencies[i].first>; 4656 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4657 LValue FlagsLVal = CGF.EmitLValueForField( 4658 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4659 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4660 FlagsLVal); 4661 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4662 ++(*P); 4663 } else { 4664 LValue &PosLVal = *Pos.get<LValue *>(); 4665 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4666 Idx = CGF.Builder.CreateNUWAdd(Idx, 4667 llvm::ConstantInt::get(Idx->getType(), 1)); 4668 CGF.EmitStoreOfScalar(Idx, PosLVal); 4669 } 4670 } 4671 } 4672 4673 static SmallVector<llvm::Value *, 4> 4674 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4675 const OMPTaskDataTy::DependData &Data) { 4676 assert(Data.DepKind == OMPC_DEPEND_depobj && 4677 "Expected depobj dependecy kind."); 4678 SmallVector<llvm::Value *, 4> Sizes; 4679 SmallVector<LValue, 4> SizeLVals; 4680 ASTContext &C = CGF.getContext(); 4681 QualType FlagsTy; 4682 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4683 RecordDecl *KmpDependInfoRD = 4684 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4685 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4686 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4687 { 4688 OMPIteratorGeneratorScope IteratorScope( 4689 CGF, cast_or_null<OMPIteratorExpr>( 4690 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4691 : nullptr)); 4692 for (const Expr *E : Data.DepExprs) { 4693 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4694 LValue Base = CGF.EmitLoadOfPointerLValue( 4695 DepobjLVal.getAddress(CGF), 4696 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4697 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4698 Base.getAddress(CGF), KmpDependInfoPtrT); 4699 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4700 Base.getTBAAInfo()); 4701 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4702 Addr.getPointer(), 4703 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4704 LValue NumDepsBase = CGF.MakeAddrLValue( 4705 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4706 Base.getBaseInfo(), Base.getTBAAInfo()); 4707 // NumDeps = deps[i].base_addr; 4708 LValue BaseAddrLVal = CGF.EmitLValueForField( 4709 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4710 llvm::Value *NumDeps = 4711 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4712 LValue NumLVal = CGF.MakeAddrLValue( 4713 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4714 C.getUIntPtrType()); 4715 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4716 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4717 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4718 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4719 CGF.EmitStoreOfScalar(Add, NumLVal); 4720 SizeLVals.push_back(NumLVal); 4721 } 4722 } 4723 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4724 llvm::Value *Size = 4725 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4726 Sizes.push_back(Size); 4727 } 4728 return Sizes; 4729 } 4730 4731 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4732 LValue PosLVal, 4733 const OMPTaskDataTy::DependData &Data, 4734 Address DependenciesArray) { 4735 assert(Data.DepKind == OMPC_DEPEND_depobj && 4736 "Expected depobj dependecy kind."); 4737 ASTContext &C = CGF.getContext(); 4738 QualType FlagsTy; 4739 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4740 RecordDecl *KmpDependInfoRD = 4741 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4742 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4743 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4744 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4745 { 4746 OMPIteratorGeneratorScope IteratorScope( 4747 CGF, cast_or_null<OMPIteratorExpr>( 4748 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4749 : nullptr)); 4750 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4751 const Expr *E = Data.DepExprs[I]; 4752 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4753 LValue Base = CGF.EmitLoadOfPointerLValue( 4754 DepobjLVal.getAddress(CGF), 4755 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4756 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4757 Base.getAddress(CGF), KmpDependInfoPtrT); 4758 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4759 Base.getTBAAInfo()); 4760 4761 // Get number of elements in a single depobj. 4762 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4763 Addr.getPointer(), 4764 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4765 LValue NumDepsBase = CGF.MakeAddrLValue( 4766 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4767 Base.getBaseInfo(), Base.getTBAAInfo()); 4768 // NumDeps = deps[i].base_addr; 4769 LValue BaseAddrLVal = CGF.EmitLValueForField( 4770 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4771 llvm::Value *NumDeps = 4772 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4773 4774 // memcopy dependency data. 4775 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4776 ElSize, 4777 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4778 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4779 Address DepAddr = 4780 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4781 DependenciesArray.getAlignment()); 4782 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4783 4784 // Increase pos. 4785 // pos += size; 4786 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4787 CGF.EmitStoreOfScalar(Add, PosLVal); 4788 } 4789 } 4790 } 4791 4792 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4793 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4794 SourceLocation Loc) { 4795 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4796 return D.DepExprs.empty(); 4797 })) 4798 return std::make_pair(nullptr, Address::invalid()); 4799 // Process list of dependencies. 4800 ASTContext &C = CGM.getContext(); 4801 Address DependenciesArray = Address::invalid(); 4802 llvm::Value *NumOfElements = nullptr; 4803 unsigned NumDependencies = std::accumulate( 4804 Dependencies.begin(), Dependencies.end(), 0, 4805 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4806 return D.DepKind == OMPC_DEPEND_depobj 4807 ? V 4808 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4809 }); 4810 QualType FlagsTy; 4811 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4812 bool HasDepobjDeps = false; 4813 bool HasRegularWithIterators = false; 4814 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4815 llvm::Value *NumOfRegularWithIterators = 4816 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4817 // Calculate number of depobj dependecies and regular deps with the iterators. 4818 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4819 if (D.DepKind == OMPC_DEPEND_depobj) { 4820 SmallVector<llvm::Value *, 4> Sizes = 4821 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4822 for (llvm::Value *Size : Sizes) { 4823 NumOfDepobjElements = 4824 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4825 } 4826 HasDepobjDeps = true; 4827 continue; 4828 } 4829 // Include number of iterations, if any. 4830 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4831 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4832 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4833 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4834 NumOfRegularWithIterators = 4835 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4836 } 4837 HasRegularWithIterators = true; 4838 continue; 4839 } 4840 } 4841 4842 QualType KmpDependInfoArrayTy; 4843 if (HasDepobjDeps || HasRegularWithIterators) { 4844 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4845 /*isSigned=*/false); 4846 if (HasDepobjDeps) { 4847 NumOfElements = 4848 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4849 } 4850 if (HasRegularWithIterators) { 4851 NumOfElements = 4852 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4853 } 4854 OpaqueValueExpr OVE(Loc, 4855 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4856 VK_RValue); 4857 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4858 RValue::get(NumOfElements)); 4859 KmpDependInfoArrayTy = 4860 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4861 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4862 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4863 // Properly emit variable-sized array. 4864 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4865 ImplicitParamDecl::Other); 4866 CGF.EmitVarDecl(*PD); 4867 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4868 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4869 /*isSigned=*/false); 4870 } else { 4871 KmpDependInfoArrayTy = C.getConstantArrayType( 4872 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4873 ArrayType::Normal, /*IndexTypeQuals=*/0); 4874 DependenciesArray = 4875 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4876 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4877 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4878 /*isSigned=*/false); 4879 } 4880 unsigned Pos = 0; 4881 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4882 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4883 Dependencies[I].IteratorExpr) 4884 continue; 4885 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4886 DependenciesArray); 4887 } 4888 // Copy regular dependecies with iterators. 4889 LValue PosLVal = CGF.MakeAddrLValue( 4890 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4891 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4892 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4893 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4894 !Dependencies[I].IteratorExpr) 4895 continue; 4896 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4897 DependenciesArray); 4898 } 4899 // Copy final depobj arrays without iterators. 4900 if (HasDepobjDeps) { 4901 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4902 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4903 continue; 4904 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4905 DependenciesArray); 4906 } 4907 } 4908 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4909 DependenciesArray, CGF.VoidPtrTy); 4910 return std::make_pair(NumOfElements, DependenciesArray); 4911 } 4912 4913 Address CGOpenMPRuntime::emitDepobjDependClause( 4914 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4915 SourceLocation Loc) { 4916 if (Dependencies.DepExprs.empty()) 4917 return Address::invalid(); 4918 // Process list of dependencies. 4919 ASTContext &C = CGM.getContext(); 4920 Address DependenciesArray = Address::invalid(); 4921 unsigned NumDependencies = Dependencies.DepExprs.size(); 4922 QualType FlagsTy; 4923 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4924 RecordDecl *KmpDependInfoRD = 4925 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4926 4927 llvm::Value *Size; 4928 // Define type kmp_depend_info[<Dependencies.size()>]; 4929 // For depobj reserve one extra element to store the number of elements. 4930 // It is required to handle depobj(x) update(in) construct. 4931 // kmp_depend_info[<Dependencies.size()>] deps; 4932 llvm::Value *NumDepsVal; 4933 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4934 if (const auto *IE = 4935 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4936 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4937 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4938 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4939 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4940 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4941 } 4942 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4943 NumDepsVal); 4944 CharUnits SizeInBytes = 4945 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4946 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4947 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4948 NumDepsVal = 4949 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4950 } else { 4951 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4952 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4953 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4954 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4955 Size = CGM.getSize(Sz.alignTo(Align)); 4956 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4957 } 4958 // Need to allocate on the dynamic memory. 4959 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4960 // Use default allocator. 4961 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4962 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4963 4964 llvm::Value *Addr = 4965 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4966 CGM.getModule(), OMPRTL___kmpc_alloc), 4967 Args, ".dep.arr.addr"); 4968 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4969 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4970 DependenciesArray = Address(Addr, Align); 4971 // Write number of elements in the first element of array for depobj. 4972 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4973 // deps[i].base_addr = NumDependencies; 4974 LValue BaseAddrLVal = CGF.EmitLValueForField( 4975 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4976 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4977 llvm::PointerUnion<unsigned *, LValue *> Pos; 4978 unsigned Idx = 1; 4979 LValue PosLVal; 4980 if (Dependencies.IteratorExpr) { 4981 PosLVal = CGF.MakeAddrLValue( 4982 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4983 C.getSizeType()); 4984 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4985 /*IsInit=*/true); 4986 Pos = &PosLVal; 4987 } else { 4988 Pos = &Idx; 4989 } 4990 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4991 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4992 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4993 return DependenciesArray; 4994 } 4995 4996 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4997 SourceLocation Loc) { 4998 ASTContext &C = CGM.getContext(); 4999 QualType FlagsTy; 5000 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5001 LValue Base = CGF.EmitLoadOfPointerLValue( 5002 DepobjLVal.getAddress(CGF), 5003 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5004 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5005 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5006 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5007 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5008 Addr.getPointer(), 5009 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5010 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5011 CGF.VoidPtrTy); 5012 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5013 // Use default allocator. 5014 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5015 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5016 5017 // _kmpc_free(gtid, addr, nullptr); 5018 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5019 CGM.getModule(), OMPRTL___kmpc_free), 5020 Args); 5021 } 5022 5023 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5024 OpenMPDependClauseKind NewDepKind, 5025 SourceLocation Loc) { 5026 ASTContext &C = CGM.getContext(); 5027 QualType FlagsTy; 5028 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5029 RecordDecl *KmpDependInfoRD = 5030 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5031 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5032 llvm::Value *NumDeps; 5033 LValue Base; 5034 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5035 5036 Address Begin = Base.getAddress(CGF); 5037 // Cast from pointer to array type to pointer to single element. 5038 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5039 // The basic structure here is a while-do loop. 5040 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5041 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5042 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5043 CGF.EmitBlock(BodyBB); 5044 llvm::PHINode *ElementPHI = 5045 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5046 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5047 Begin = Address(ElementPHI, Begin.getAlignment()); 5048 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5049 Base.getTBAAInfo()); 5050 // deps[i].flags = NewDepKind; 5051 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5052 LValue FlagsLVal = CGF.EmitLValueForField( 5053 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5054 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5055 FlagsLVal); 5056 5057 // Shift the address forward by one element. 5058 Address ElementNext = 5059 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5060 ElementPHI->addIncoming(ElementNext.getPointer(), 5061 CGF.Builder.GetInsertBlock()); 5062 llvm::Value *IsEmpty = 5063 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5064 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5065 // Done. 5066 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5067 } 5068 5069 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5070 const OMPExecutableDirective &D, 5071 llvm::Function *TaskFunction, 5072 QualType SharedsTy, Address Shareds, 5073 const Expr *IfCond, 5074 const OMPTaskDataTy &Data) { 5075 if (!CGF.HaveInsertPoint()) 5076 return; 5077 5078 TaskResultTy Result = 5079 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5080 llvm::Value *NewTask = Result.NewTask; 5081 llvm::Function *TaskEntry = Result.TaskEntry; 5082 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5083 LValue TDBase = Result.TDBase; 5084 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5085 // Process list of dependences. 5086 Address DependenciesArray = Address::invalid(); 5087 llvm::Value *NumOfElements; 5088 std::tie(NumOfElements, DependenciesArray) = 5089 emitDependClause(CGF, Data.Dependences, Loc); 5090 5091 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5092 // libcall. 5093 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5094 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5095 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5096 // list is not empty 5097 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5098 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5099 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5100 llvm::Value *DepTaskArgs[7]; 5101 if (!Data.Dependences.empty()) { 5102 DepTaskArgs[0] = UpLoc; 5103 DepTaskArgs[1] = ThreadID; 5104 DepTaskArgs[2] = NewTask; 5105 DepTaskArgs[3] = NumOfElements; 5106 DepTaskArgs[4] = DependenciesArray.getPointer(); 5107 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5108 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5109 } 5110 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5111 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5112 if (!Data.Tied) { 5113 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5114 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5115 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5116 } 5117 if (!Data.Dependences.empty()) { 5118 CGF.EmitRuntimeCall( 5119 OMPBuilder.getOrCreateRuntimeFunction( 5120 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5121 DepTaskArgs); 5122 } else { 5123 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5124 CGM.getModule(), OMPRTL___kmpc_omp_task), 5125 TaskArgs); 5126 } 5127 // Check if parent region is untied and build return for untied task; 5128 if (auto *Region = 5129 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5130 Region->emitUntiedSwitch(CGF); 5131 }; 5132 5133 llvm::Value *DepWaitTaskArgs[6]; 5134 if (!Data.Dependences.empty()) { 5135 DepWaitTaskArgs[0] = UpLoc; 5136 DepWaitTaskArgs[1] = ThreadID; 5137 DepWaitTaskArgs[2] = NumOfElements; 5138 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5139 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5140 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5141 } 5142 auto &M = CGM.getModule(); 5143 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5144 TaskEntry, &Data, &DepWaitTaskArgs, 5145 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5146 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5147 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5148 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5149 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5150 // is specified. 5151 if (!Data.Dependences.empty()) 5152 CGF.EmitRuntimeCall( 5153 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5154 DepWaitTaskArgs); 5155 // Call proxy_task_entry(gtid, new_task); 5156 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5157 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5158 Action.Enter(CGF); 5159 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5160 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5161 OutlinedFnArgs); 5162 }; 5163 5164 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5165 // kmp_task_t *new_task); 5166 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5167 // kmp_task_t *new_task); 5168 RegionCodeGenTy RCG(CodeGen); 5169 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5170 M, OMPRTL___kmpc_omp_task_begin_if0), 5171 TaskArgs, 5172 OMPBuilder.getOrCreateRuntimeFunction( 5173 M, OMPRTL___kmpc_omp_task_complete_if0), 5174 TaskArgs); 5175 RCG.setAction(Action); 5176 RCG(CGF); 5177 }; 5178 5179 if (IfCond) { 5180 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5181 } else { 5182 RegionCodeGenTy ThenRCG(ThenCodeGen); 5183 ThenRCG(CGF); 5184 } 5185 } 5186 5187 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5188 const OMPLoopDirective &D, 5189 llvm::Function *TaskFunction, 5190 QualType SharedsTy, Address Shareds, 5191 const Expr *IfCond, 5192 const OMPTaskDataTy &Data) { 5193 if (!CGF.HaveInsertPoint()) 5194 return; 5195 TaskResultTy Result = 5196 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5197 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5198 // libcall. 5199 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5200 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5201 // sched, kmp_uint64 grainsize, void *task_dup); 5202 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5203 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5204 llvm::Value *IfVal; 5205 if (IfCond) { 5206 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5207 /*isSigned=*/true); 5208 } else { 5209 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5210 } 5211 5212 LValue LBLVal = CGF.EmitLValueForField( 5213 Result.TDBase, 5214 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5215 const auto *LBVar = 5216 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5217 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5218 LBLVal.getQuals(), 5219 /*IsInitializer=*/true); 5220 LValue UBLVal = CGF.EmitLValueForField( 5221 Result.TDBase, 5222 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5223 const auto *UBVar = 5224 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5225 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5226 UBLVal.getQuals(), 5227 /*IsInitializer=*/true); 5228 LValue StLVal = CGF.EmitLValueForField( 5229 Result.TDBase, 5230 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5231 const auto *StVar = 5232 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5233 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5234 StLVal.getQuals(), 5235 /*IsInitializer=*/true); 5236 // Store reductions address. 5237 LValue RedLVal = CGF.EmitLValueForField( 5238 Result.TDBase, 5239 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5240 if (Data.Reductions) { 5241 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5242 } else { 5243 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5244 CGF.getContext().VoidPtrTy); 5245 } 5246 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5247 llvm::Value *TaskArgs[] = { 5248 UpLoc, 5249 ThreadID, 5250 Result.NewTask, 5251 IfVal, 5252 LBLVal.getPointer(CGF), 5253 UBLVal.getPointer(CGF), 5254 CGF.EmitLoadOfScalar(StLVal, Loc), 5255 llvm::ConstantInt::getSigned( 5256 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5257 llvm::ConstantInt::getSigned( 5258 CGF.IntTy, Data.Schedule.getPointer() 5259 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5260 : NoSchedule), 5261 Data.Schedule.getPointer() 5262 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5263 /*isSigned=*/false) 5264 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5265 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5266 Result.TaskDupFn, CGF.VoidPtrTy) 5267 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5268 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5269 CGM.getModule(), OMPRTL___kmpc_taskloop), 5270 TaskArgs); 5271 } 5272 5273 /// Emit reduction operation for each element of array (required for 5274 /// array sections) LHS op = RHS. 5275 /// \param Type Type of array. 5276 /// \param LHSVar Variable on the left side of the reduction operation 5277 /// (references element of array in original variable). 5278 /// \param RHSVar Variable on the right side of the reduction operation 5279 /// (references element of array in original variable). 5280 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5281 /// RHSVar. 5282 static void EmitOMPAggregateReduction( 5283 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5284 const VarDecl *RHSVar, 5285 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5286 const Expr *, const Expr *)> &RedOpGen, 5287 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5288 const Expr *UpExpr = nullptr) { 5289 // Perform element-by-element initialization. 5290 QualType ElementTy; 5291 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5292 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5293 5294 // Drill down to the base element type on both arrays. 5295 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5296 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5297 5298 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5299 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5300 // Cast from pointer to array type to pointer to single element. 5301 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5302 // The basic structure here is a while-do loop. 5303 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5304 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5305 llvm::Value *IsEmpty = 5306 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5307 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5308 5309 // Enter the loop body, making that address the current address. 5310 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5311 CGF.EmitBlock(BodyBB); 5312 5313 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5314 5315 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5316 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5317 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5318 Address RHSElementCurrent = 5319 Address(RHSElementPHI, 5320 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5321 5322 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5323 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5324 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5325 Address LHSElementCurrent = 5326 Address(LHSElementPHI, 5327 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5328 5329 // Emit copy. 5330 CodeGenFunction::OMPPrivateScope Scope(CGF); 5331 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5332 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5333 Scope.Privatize(); 5334 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5335 Scope.ForceCleanup(); 5336 5337 // Shift the address forward by one element. 5338 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5339 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5340 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5341 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5342 // Check whether we've reached the end. 5343 llvm::Value *Done = 5344 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5345 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5346 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5347 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5348 5349 // Done. 5350 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5351 } 5352 5353 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5354 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5355 /// UDR combiner function. 5356 static void emitReductionCombiner(CodeGenFunction &CGF, 5357 const Expr *ReductionOp) { 5358 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5359 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5360 if (const auto *DRE = 5361 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5362 if (const auto *DRD = 5363 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5364 std::pair<llvm::Function *, llvm::Function *> Reduction = 5365 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5366 RValue Func = RValue::get(Reduction.first); 5367 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5368 CGF.EmitIgnoredExpr(ReductionOp); 5369 return; 5370 } 5371 CGF.EmitIgnoredExpr(ReductionOp); 5372 } 5373 5374 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5375 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5376 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5377 ArrayRef<const Expr *> ReductionOps) { 5378 ASTContext &C = CGM.getContext(); 5379 5380 // void reduction_func(void *LHSArg, void *RHSArg); 5381 FunctionArgList Args; 5382 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5383 ImplicitParamDecl::Other); 5384 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5385 ImplicitParamDecl::Other); 5386 Args.push_back(&LHSArg); 5387 Args.push_back(&RHSArg); 5388 const auto &CGFI = 5389 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5390 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5391 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5392 llvm::GlobalValue::InternalLinkage, Name, 5393 &CGM.getModule()); 5394 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5395 Fn->setDoesNotRecurse(); 5396 CodeGenFunction CGF(CGM); 5397 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5398 5399 // Dst = (void*[n])(LHSArg); 5400 // Src = (void*[n])(RHSArg); 5401 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5402 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5403 ArgsType), CGF.getPointerAlign()); 5404 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5405 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5406 ArgsType), CGF.getPointerAlign()); 5407 5408 // ... 5409 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5410 // ... 5411 CodeGenFunction::OMPPrivateScope Scope(CGF); 5412 auto IPriv = Privates.begin(); 5413 unsigned Idx = 0; 5414 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5415 const auto *RHSVar = 5416 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5417 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5418 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5419 }); 5420 const auto *LHSVar = 5421 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5422 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5423 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5424 }); 5425 QualType PrivTy = (*IPriv)->getType(); 5426 if (PrivTy->isVariablyModifiedType()) { 5427 // Get array size and emit VLA type. 5428 ++Idx; 5429 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5430 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5431 const VariableArrayType *VLA = 5432 CGF.getContext().getAsVariableArrayType(PrivTy); 5433 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5434 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5435 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5436 CGF.EmitVariablyModifiedType(PrivTy); 5437 } 5438 } 5439 Scope.Privatize(); 5440 IPriv = Privates.begin(); 5441 auto ILHS = LHSExprs.begin(); 5442 auto IRHS = RHSExprs.begin(); 5443 for (const Expr *E : ReductionOps) { 5444 if ((*IPriv)->getType()->isArrayType()) { 5445 // Emit reduction for array section. 5446 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5447 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5448 EmitOMPAggregateReduction( 5449 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5450 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5451 emitReductionCombiner(CGF, E); 5452 }); 5453 } else { 5454 // Emit reduction for array subscript or single variable. 5455 emitReductionCombiner(CGF, E); 5456 } 5457 ++IPriv; 5458 ++ILHS; 5459 ++IRHS; 5460 } 5461 Scope.ForceCleanup(); 5462 CGF.FinishFunction(); 5463 return Fn; 5464 } 5465 5466 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5467 const Expr *ReductionOp, 5468 const Expr *PrivateRef, 5469 const DeclRefExpr *LHS, 5470 const DeclRefExpr *RHS) { 5471 if (PrivateRef->getType()->isArrayType()) { 5472 // Emit reduction for array section. 5473 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5474 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5475 EmitOMPAggregateReduction( 5476 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5477 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5478 emitReductionCombiner(CGF, ReductionOp); 5479 }); 5480 } else { 5481 // Emit reduction for array subscript or single variable. 5482 emitReductionCombiner(CGF, ReductionOp); 5483 } 5484 } 5485 5486 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5487 ArrayRef<const Expr *> Privates, 5488 ArrayRef<const Expr *> LHSExprs, 5489 ArrayRef<const Expr *> RHSExprs, 5490 ArrayRef<const Expr *> ReductionOps, 5491 ReductionOptionsTy Options) { 5492 if (!CGF.HaveInsertPoint()) 5493 return; 5494 5495 bool WithNowait = Options.WithNowait; 5496 bool SimpleReduction = Options.SimpleReduction; 5497 5498 // Next code should be emitted for reduction: 5499 // 5500 // static kmp_critical_name lock = { 0 }; 5501 // 5502 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5503 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5504 // ... 5505 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5506 // *(Type<n>-1*)rhs[<n>-1]); 5507 // } 5508 // 5509 // ... 5510 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5511 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5512 // RedList, reduce_func, &<lock>)) { 5513 // case 1: 5514 // ... 5515 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5516 // ... 5517 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5518 // break; 5519 // case 2: 5520 // ... 5521 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5522 // ... 5523 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5524 // break; 5525 // default:; 5526 // } 5527 // 5528 // if SimpleReduction is true, only the next code is generated: 5529 // ... 5530 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5531 // ... 5532 5533 ASTContext &C = CGM.getContext(); 5534 5535 if (SimpleReduction) { 5536 CodeGenFunction::RunCleanupsScope Scope(CGF); 5537 auto IPriv = Privates.begin(); 5538 auto ILHS = LHSExprs.begin(); 5539 auto IRHS = RHSExprs.begin(); 5540 for (const Expr *E : ReductionOps) { 5541 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5542 cast<DeclRefExpr>(*IRHS)); 5543 ++IPriv; 5544 ++ILHS; 5545 ++IRHS; 5546 } 5547 return; 5548 } 5549 5550 // 1. Build a list of reduction variables. 5551 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5552 auto Size = RHSExprs.size(); 5553 for (const Expr *E : Privates) { 5554 if (E->getType()->isVariablyModifiedType()) 5555 // Reserve place for array size. 5556 ++Size; 5557 } 5558 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5559 QualType ReductionArrayTy = 5560 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5561 /*IndexTypeQuals=*/0); 5562 Address ReductionList = 5563 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5564 auto IPriv = Privates.begin(); 5565 unsigned Idx = 0; 5566 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5567 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5568 CGF.Builder.CreateStore( 5569 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5570 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5571 Elem); 5572 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5573 // Store array size. 5574 ++Idx; 5575 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5576 llvm::Value *Size = CGF.Builder.CreateIntCast( 5577 CGF.getVLASize( 5578 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5579 .NumElts, 5580 CGF.SizeTy, /*isSigned=*/false); 5581 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5582 Elem); 5583 } 5584 } 5585 5586 // 2. Emit reduce_func(). 5587 llvm::Function *ReductionFn = emitReductionFunction( 5588 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5589 LHSExprs, RHSExprs, ReductionOps); 5590 5591 // 3. Create static kmp_critical_name lock = { 0 }; 5592 std::string Name = getName({"reduction"}); 5593 llvm::Value *Lock = getCriticalRegionLock(Name); 5594 5595 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5596 // RedList, reduce_func, &<lock>); 5597 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5598 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5599 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5600 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5601 ReductionList.getPointer(), CGF.VoidPtrTy); 5602 llvm::Value *Args[] = { 5603 IdentTLoc, // ident_t *<loc> 5604 ThreadId, // i32 <gtid> 5605 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5606 ReductionArrayTySize, // size_type sizeof(RedList) 5607 RL, // void *RedList 5608 ReductionFn, // void (*) (void *, void *) <reduce_func> 5609 Lock // kmp_critical_name *&<lock> 5610 }; 5611 llvm::Value *Res = CGF.EmitRuntimeCall( 5612 OMPBuilder.getOrCreateRuntimeFunction( 5613 CGM.getModule(), 5614 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5615 Args); 5616 5617 // 5. Build switch(res) 5618 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5619 llvm::SwitchInst *SwInst = 5620 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5621 5622 // 6. Build case 1: 5623 // ... 5624 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5625 // ... 5626 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5627 // break; 5628 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5629 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5630 CGF.EmitBlock(Case1BB); 5631 5632 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5633 llvm::Value *EndArgs[] = { 5634 IdentTLoc, // ident_t *<loc> 5635 ThreadId, // i32 <gtid> 5636 Lock // kmp_critical_name *&<lock> 5637 }; 5638 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5639 CodeGenFunction &CGF, PrePostActionTy &Action) { 5640 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5641 auto IPriv = Privates.begin(); 5642 auto ILHS = LHSExprs.begin(); 5643 auto IRHS = RHSExprs.begin(); 5644 for (const Expr *E : ReductionOps) { 5645 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5646 cast<DeclRefExpr>(*IRHS)); 5647 ++IPriv; 5648 ++ILHS; 5649 ++IRHS; 5650 } 5651 }; 5652 RegionCodeGenTy RCG(CodeGen); 5653 CommonActionTy Action( 5654 nullptr, llvm::None, 5655 OMPBuilder.getOrCreateRuntimeFunction( 5656 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5657 : OMPRTL___kmpc_end_reduce), 5658 EndArgs); 5659 RCG.setAction(Action); 5660 RCG(CGF); 5661 5662 CGF.EmitBranch(DefaultBB); 5663 5664 // 7. Build case 2: 5665 // ... 5666 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5667 // ... 5668 // break; 5669 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5670 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5671 CGF.EmitBlock(Case2BB); 5672 5673 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5674 CodeGenFunction &CGF, PrePostActionTy &Action) { 5675 auto ILHS = LHSExprs.begin(); 5676 auto IRHS = RHSExprs.begin(); 5677 auto IPriv = Privates.begin(); 5678 for (const Expr *E : ReductionOps) { 5679 const Expr *XExpr = nullptr; 5680 const Expr *EExpr = nullptr; 5681 const Expr *UpExpr = nullptr; 5682 BinaryOperatorKind BO = BO_Comma; 5683 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5684 if (BO->getOpcode() == BO_Assign) { 5685 XExpr = BO->getLHS(); 5686 UpExpr = BO->getRHS(); 5687 } 5688 } 5689 // Try to emit update expression as a simple atomic. 5690 const Expr *RHSExpr = UpExpr; 5691 if (RHSExpr) { 5692 // Analyze RHS part of the whole expression. 5693 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5694 RHSExpr->IgnoreParenImpCasts())) { 5695 // If this is a conditional operator, analyze its condition for 5696 // min/max reduction operator. 5697 RHSExpr = ACO->getCond(); 5698 } 5699 if (const auto *BORHS = 5700 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5701 EExpr = BORHS->getRHS(); 5702 BO = BORHS->getOpcode(); 5703 } 5704 } 5705 if (XExpr) { 5706 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5707 auto &&AtomicRedGen = [BO, VD, 5708 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5709 const Expr *EExpr, const Expr *UpExpr) { 5710 LValue X = CGF.EmitLValue(XExpr); 5711 RValue E; 5712 if (EExpr) 5713 E = CGF.EmitAnyExpr(EExpr); 5714 CGF.EmitOMPAtomicSimpleUpdateExpr( 5715 X, E, BO, /*IsXLHSInRHSPart=*/true, 5716 llvm::AtomicOrdering::Monotonic, Loc, 5717 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5718 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5719 PrivateScope.addPrivate( 5720 VD, [&CGF, VD, XRValue, Loc]() { 5721 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5722 CGF.emitOMPSimpleStore( 5723 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5724 VD->getType().getNonReferenceType(), Loc); 5725 return LHSTemp; 5726 }); 5727 (void)PrivateScope.Privatize(); 5728 return CGF.EmitAnyExpr(UpExpr); 5729 }); 5730 }; 5731 if ((*IPriv)->getType()->isArrayType()) { 5732 // Emit atomic reduction for array section. 5733 const auto *RHSVar = 5734 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5735 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5736 AtomicRedGen, XExpr, EExpr, UpExpr); 5737 } else { 5738 // Emit atomic reduction for array subscript or single variable. 5739 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5740 } 5741 } else { 5742 // Emit as a critical region. 5743 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5744 const Expr *, const Expr *) { 5745 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5746 std::string Name = RT.getName({"atomic_reduction"}); 5747 RT.emitCriticalRegion( 5748 CGF, Name, 5749 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5750 Action.Enter(CGF); 5751 emitReductionCombiner(CGF, E); 5752 }, 5753 Loc); 5754 }; 5755 if ((*IPriv)->getType()->isArrayType()) { 5756 const auto *LHSVar = 5757 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5758 const auto *RHSVar = 5759 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5760 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5761 CritRedGen); 5762 } else { 5763 CritRedGen(CGF, nullptr, nullptr, nullptr); 5764 } 5765 } 5766 ++ILHS; 5767 ++IRHS; 5768 ++IPriv; 5769 } 5770 }; 5771 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5772 if (!WithNowait) { 5773 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5774 llvm::Value *EndArgs[] = { 5775 IdentTLoc, // ident_t *<loc> 5776 ThreadId, // i32 <gtid> 5777 Lock // kmp_critical_name *&<lock> 5778 }; 5779 CommonActionTy Action(nullptr, llvm::None, 5780 OMPBuilder.getOrCreateRuntimeFunction( 5781 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5782 EndArgs); 5783 AtomicRCG.setAction(Action); 5784 AtomicRCG(CGF); 5785 } else { 5786 AtomicRCG(CGF); 5787 } 5788 5789 CGF.EmitBranch(DefaultBB); 5790 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5791 } 5792 5793 /// Generates unique name for artificial threadprivate variables. 5794 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5795 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5796 const Expr *Ref) { 5797 SmallString<256> Buffer; 5798 llvm::raw_svector_ostream Out(Buffer); 5799 const clang::DeclRefExpr *DE; 5800 const VarDecl *D = ::getBaseDecl(Ref, DE); 5801 if (!D) 5802 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5803 D = D->getCanonicalDecl(); 5804 std::string Name = CGM.getOpenMPRuntime().getName( 5805 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5806 Out << Prefix << Name << "_" 5807 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5808 return std::string(Out.str()); 5809 } 5810 5811 /// Emits reduction initializer function: 5812 /// \code 5813 /// void @.red_init(void* %arg, void* %orig) { 5814 /// %0 = bitcast void* %arg to <type>* 5815 /// store <type> <init>, <type>* %0 5816 /// ret void 5817 /// } 5818 /// \endcode 5819 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5820 SourceLocation Loc, 5821 ReductionCodeGen &RCG, unsigned N) { 5822 ASTContext &C = CGM.getContext(); 5823 QualType VoidPtrTy = C.VoidPtrTy; 5824 VoidPtrTy.addRestrict(); 5825 FunctionArgList Args; 5826 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5827 ImplicitParamDecl::Other); 5828 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5829 ImplicitParamDecl::Other); 5830 Args.emplace_back(&Param); 5831 Args.emplace_back(&ParamOrig); 5832 const auto &FnInfo = 5833 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5834 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5835 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5836 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5837 Name, &CGM.getModule()); 5838 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5839 Fn->setDoesNotRecurse(); 5840 CodeGenFunction CGF(CGM); 5841 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5842 Address PrivateAddr = CGF.EmitLoadOfPointer( 5843 CGF.GetAddrOfLocalVar(&Param), 5844 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5845 llvm::Value *Size = nullptr; 5846 // If the size of the reduction item is non-constant, load it from global 5847 // threadprivate variable. 5848 if (RCG.getSizes(N).second) { 5849 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5850 CGF, CGM.getContext().getSizeType(), 5851 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5852 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5853 CGM.getContext().getSizeType(), Loc); 5854 } 5855 RCG.emitAggregateType(CGF, N, Size); 5856 LValue OrigLVal; 5857 // If initializer uses initializer from declare reduction construct, emit a 5858 // pointer to the address of the original reduction item (reuired by reduction 5859 // initializer) 5860 if (RCG.usesReductionInitializer(N)) { 5861 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5862 SharedAddr = CGF.EmitLoadOfPointer( 5863 SharedAddr, 5864 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5865 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5866 } else { 5867 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5868 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5869 CGM.getContext().VoidPtrTy); 5870 } 5871 // Emit the initializer: 5872 // %0 = bitcast void* %arg to <type>* 5873 // store <type> <init>, <type>* %0 5874 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5875 [](CodeGenFunction &) { return false; }); 5876 CGF.FinishFunction(); 5877 return Fn; 5878 } 5879 5880 /// Emits reduction combiner function: 5881 /// \code 5882 /// void @.red_comb(void* %arg0, void* %arg1) { 5883 /// %lhs = bitcast void* %arg0 to <type>* 5884 /// %rhs = bitcast void* %arg1 to <type>* 5885 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5886 /// store <type> %2, <type>* %lhs 5887 /// ret void 5888 /// } 5889 /// \endcode 5890 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5891 SourceLocation Loc, 5892 ReductionCodeGen &RCG, unsigned N, 5893 const Expr *ReductionOp, 5894 const Expr *LHS, const Expr *RHS, 5895 const Expr *PrivateRef) { 5896 ASTContext &C = CGM.getContext(); 5897 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5898 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5899 FunctionArgList Args; 5900 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5901 C.VoidPtrTy, ImplicitParamDecl::Other); 5902 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5903 ImplicitParamDecl::Other); 5904 Args.emplace_back(&ParamInOut); 5905 Args.emplace_back(&ParamIn); 5906 const auto &FnInfo = 5907 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5908 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5909 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5910 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5911 Name, &CGM.getModule()); 5912 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5913 Fn->setDoesNotRecurse(); 5914 CodeGenFunction CGF(CGM); 5915 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5916 llvm::Value *Size = nullptr; 5917 // If the size of the reduction item is non-constant, load it from global 5918 // threadprivate variable. 5919 if (RCG.getSizes(N).second) { 5920 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5921 CGF, CGM.getContext().getSizeType(), 5922 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5923 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5924 CGM.getContext().getSizeType(), Loc); 5925 } 5926 RCG.emitAggregateType(CGF, N, Size); 5927 // Remap lhs and rhs variables to the addresses of the function arguments. 5928 // %lhs = bitcast void* %arg0 to <type>* 5929 // %rhs = bitcast void* %arg1 to <type>* 5930 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5931 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5932 // Pull out the pointer to the variable. 5933 Address PtrAddr = CGF.EmitLoadOfPointer( 5934 CGF.GetAddrOfLocalVar(&ParamInOut), 5935 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5936 return CGF.Builder.CreateElementBitCast( 5937 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5938 }); 5939 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5940 // Pull out the pointer to the variable. 5941 Address PtrAddr = CGF.EmitLoadOfPointer( 5942 CGF.GetAddrOfLocalVar(&ParamIn), 5943 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5944 return CGF.Builder.CreateElementBitCast( 5945 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5946 }); 5947 PrivateScope.Privatize(); 5948 // Emit the combiner body: 5949 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5950 // store <type> %2, <type>* %lhs 5951 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5952 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5953 cast<DeclRefExpr>(RHS)); 5954 CGF.FinishFunction(); 5955 return Fn; 5956 } 5957 5958 /// Emits reduction finalizer function: 5959 /// \code 5960 /// void @.red_fini(void* %arg) { 5961 /// %0 = bitcast void* %arg to <type>* 5962 /// <destroy>(<type>* %0) 5963 /// ret void 5964 /// } 5965 /// \endcode 5966 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5967 SourceLocation Loc, 5968 ReductionCodeGen &RCG, unsigned N) { 5969 if (!RCG.needCleanups(N)) 5970 return nullptr; 5971 ASTContext &C = CGM.getContext(); 5972 FunctionArgList Args; 5973 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5974 ImplicitParamDecl::Other); 5975 Args.emplace_back(&Param); 5976 const auto &FnInfo = 5977 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5978 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5979 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5980 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5981 Name, &CGM.getModule()); 5982 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5983 Fn->setDoesNotRecurse(); 5984 CodeGenFunction CGF(CGM); 5985 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5986 Address PrivateAddr = CGF.EmitLoadOfPointer( 5987 CGF.GetAddrOfLocalVar(&Param), 5988 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5989 llvm::Value *Size = nullptr; 5990 // If the size of the reduction item is non-constant, load it from global 5991 // threadprivate variable. 5992 if (RCG.getSizes(N).second) { 5993 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5994 CGF, CGM.getContext().getSizeType(), 5995 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5996 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5997 CGM.getContext().getSizeType(), Loc); 5998 } 5999 RCG.emitAggregateType(CGF, N, Size); 6000 // Emit the finalizer body: 6001 // <destroy>(<type>* %0) 6002 RCG.emitCleanups(CGF, N, PrivateAddr); 6003 CGF.FinishFunction(Loc); 6004 return Fn; 6005 } 6006 6007 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6008 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6009 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6010 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6011 return nullptr; 6012 6013 // Build typedef struct: 6014 // kmp_taskred_input { 6015 // void *reduce_shar; // shared reduction item 6016 // void *reduce_orig; // original reduction item used for initialization 6017 // size_t reduce_size; // size of data item 6018 // void *reduce_init; // data initialization routine 6019 // void *reduce_fini; // data finalization routine 6020 // void *reduce_comb; // data combiner routine 6021 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6022 // } kmp_taskred_input_t; 6023 ASTContext &C = CGM.getContext(); 6024 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6025 RD->startDefinition(); 6026 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6027 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6028 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6029 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6030 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6031 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6032 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6033 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6034 RD->completeDefinition(); 6035 QualType RDType = C.getRecordType(RD); 6036 unsigned Size = Data.ReductionVars.size(); 6037 llvm::APInt ArraySize(/*numBits=*/64, Size); 6038 QualType ArrayRDType = C.getConstantArrayType( 6039 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6040 // kmp_task_red_input_t .rd_input.[Size]; 6041 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6042 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6043 Data.ReductionCopies, Data.ReductionOps); 6044 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6045 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6046 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6047 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6048 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6049 TaskRedInput.getPointer(), Idxs, 6050 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6051 ".rd_input.gep."); 6052 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6053 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6054 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6055 RCG.emitSharedOrigLValue(CGF, Cnt); 6056 llvm::Value *CastedShared = 6057 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6058 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6059 // ElemLVal.reduce_orig = &Origs[Cnt]; 6060 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6061 llvm::Value *CastedOrig = 6062 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6063 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6064 RCG.emitAggregateType(CGF, Cnt); 6065 llvm::Value *SizeValInChars; 6066 llvm::Value *SizeVal; 6067 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6068 // We use delayed creation/initialization for VLAs and array sections. It is 6069 // required because runtime does not provide the way to pass the sizes of 6070 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6071 // threadprivate global variables are used to store these values and use 6072 // them in the functions. 6073 bool DelayedCreation = !!SizeVal; 6074 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6075 /*isSigned=*/false); 6076 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6077 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6078 // ElemLVal.reduce_init = init; 6079 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6080 llvm::Value *InitAddr = 6081 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6082 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6083 // ElemLVal.reduce_fini = fini; 6084 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6085 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6086 llvm::Value *FiniAddr = Fini 6087 ? CGF.EmitCastToVoidPtr(Fini) 6088 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6089 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6090 // ElemLVal.reduce_comb = comb; 6091 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6092 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6093 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6094 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6095 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6096 // ElemLVal.flags = 0; 6097 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6098 if (DelayedCreation) { 6099 CGF.EmitStoreOfScalar( 6100 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6101 FlagsLVal); 6102 } else 6103 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6104 FlagsLVal.getType()); 6105 } 6106 if (Data.IsReductionWithTaskMod) { 6107 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6108 // is_ws, int num, void *data); 6109 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6110 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6111 CGM.IntTy, /*isSigned=*/true); 6112 llvm::Value *Args[] = { 6113 IdentTLoc, GTid, 6114 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6115 /*isSigned=*/true), 6116 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6117 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6118 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6119 return CGF.EmitRuntimeCall( 6120 OMPBuilder.getOrCreateRuntimeFunction( 6121 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6122 Args); 6123 } 6124 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6125 llvm::Value *Args[] = { 6126 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6127 /*isSigned=*/true), 6128 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6129 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6130 CGM.VoidPtrTy)}; 6131 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6132 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6133 Args); 6134 } 6135 6136 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6137 SourceLocation Loc, 6138 bool IsWorksharingReduction) { 6139 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6140 // is_ws, int num, void *data); 6141 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6142 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6143 CGM.IntTy, /*isSigned=*/true); 6144 llvm::Value *Args[] = {IdentTLoc, GTid, 6145 llvm::ConstantInt::get(CGM.IntTy, 6146 IsWorksharingReduction ? 1 : 0, 6147 /*isSigned=*/true)}; 6148 (void)CGF.EmitRuntimeCall( 6149 OMPBuilder.getOrCreateRuntimeFunction( 6150 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6151 Args); 6152 } 6153 6154 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6155 SourceLocation Loc, 6156 ReductionCodeGen &RCG, 6157 unsigned N) { 6158 auto Sizes = RCG.getSizes(N); 6159 // Emit threadprivate global variable if the type is non-constant 6160 // (Sizes.second = nullptr). 6161 if (Sizes.second) { 6162 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6163 /*isSigned=*/false); 6164 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6165 CGF, CGM.getContext().getSizeType(), 6166 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6167 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6168 } 6169 } 6170 6171 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6172 SourceLocation Loc, 6173 llvm::Value *ReductionsPtr, 6174 LValue SharedLVal) { 6175 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6176 // *d); 6177 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6178 CGM.IntTy, 6179 /*isSigned=*/true), 6180 ReductionsPtr, 6181 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6182 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6183 return Address( 6184 CGF.EmitRuntimeCall( 6185 OMPBuilder.getOrCreateRuntimeFunction( 6186 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6187 Args), 6188 SharedLVal.getAlignment()); 6189 } 6190 6191 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6192 SourceLocation Loc) { 6193 if (!CGF.HaveInsertPoint()) 6194 return; 6195 6196 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6197 OMPBuilder.CreateTaskwait(CGF.Builder); 6198 } else { 6199 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6200 // global_tid); 6201 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6202 // Ignore return result until untied tasks are supported. 6203 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6204 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6205 Args); 6206 } 6207 6208 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6209 Region->emitUntiedSwitch(CGF); 6210 } 6211 6212 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6213 OpenMPDirectiveKind InnerKind, 6214 const RegionCodeGenTy &CodeGen, 6215 bool HasCancel) { 6216 if (!CGF.HaveInsertPoint()) 6217 return; 6218 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6219 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6220 } 6221 6222 namespace { 6223 enum RTCancelKind { 6224 CancelNoreq = 0, 6225 CancelParallel = 1, 6226 CancelLoop = 2, 6227 CancelSections = 3, 6228 CancelTaskgroup = 4 6229 }; 6230 } // anonymous namespace 6231 6232 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6233 RTCancelKind CancelKind = CancelNoreq; 6234 if (CancelRegion == OMPD_parallel) 6235 CancelKind = CancelParallel; 6236 else if (CancelRegion == OMPD_for) 6237 CancelKind = CancelLoop; 6238 else if (CancelRegion == OMPD_sections) 6239 CancelKind = CancelSections; 6240 else { 6241 assert(CancelRegion == OMPD_taskgroup); 6242 CancelKind = CancelTaskgroup; 6243 } 6244 return CancelKind; 6245 } 6246 6247 void CGOpenMPRuntime::emitCancellationPointCall( 6248 CodeGenFunction &CGF, SourceLocation Loc, 6249 OpenMPDirectiveKind CancelRegion) { 6250 if (!CGF.HaveInsertPoint()) 6251 return; 6252 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6253 // global_tid, kmp_int32 cncl_kind); 6254 if (auto *OMPRegionInfo = 6255 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6256 // For 'cancellation point taskgroup', the task region info may not have a 6257 // cancel. This may instead happen in another adjacent task. 6258 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6259 llvm::Value *Args[] = { 6260 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6261 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6262 // Ignore return result until untied tasks are supported. 6263 llvm::Value *Result = CGF.EmitRuntimeCall( 6264 OMPBuilder.getOrCreateRuntimeFunction( 6265 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6266 Args); 6267 // if (__kmpc_cancellationpoint()) { 6268 // exit from construct; 6269 // } 6270 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6271 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6272 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6273 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6274 CGF.EmitBlock(ExitBB); 6275 // exit from construct; 6276 CodeGenFunction::JumpDest CancelDest = 6277 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6278 CGF.EmitBranchThroughCleanup(CancelDest); 6279 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6280 } 6281 } 6282 } 6283 6284 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6285 const Expr *IfCond, 6286 OpenMPDirectiveKind CancelRegion) { 6287 if (!CGF.HaveInsertPoint()) 6288 return; 6289 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6290 // kmp_int32 cncl_kind); 6291 auto &M = CGM.getModule(); 6292 if (auto *OMPRegionInfo = 6293 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6294 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6295 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6296 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6297 llvm::Value *Args[] = { 6298 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6299 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6300 // Ignore return result until untied tasks are supported. 6301 llvm::Value *Result = CGF.EmitRuntimeCall( 6302 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6303 // if (__kmpc_cancel()) { 6304 // exit from construct; 6305 // } 6306 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6307 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6308 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6309 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6310 CGF.EmitBlock(ExitBB); 6311 // exit from construct; 6312 CodeGenFunction::JumpDest CancelDest = 6313 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6314 CGF.EmitBranchThroughCleanup(CancelDest); 6315 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6316 }; 6317 if (IfCond) { 6318 emitIfClause(CGF, IfCond, ThenGen, 6319 [](CodeGenFunction &, PrePostActionTy &) {}); 6320 } else { 6321 RegionCodeGenTy ThenRCG(ThenGen); 6322 ThenRCG(CGF); 6323 } 6324 } 6325 } 6326 6327 namespace { 6328 /// Cleanup action for uses_allocators support. 6329 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6330 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6331 6332 public: 6333 OMPUsesAllocatorsActionTy( 6334 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6335 : Allocators(Allocators) {} 6336 void Enter(CodeGenFunction &CGF) override { 6337 if (!CGF.HaveInsertPoint()) 6338 return; 6339 for (const auto &AllocatorData : Allocators) { 6340 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6341 CGF, AllocatorData.first, AllocatorData.second); 6342 } 6343 } 6344 void Exit(CodeGenFunction &CGF) override { 6345 if (!CGF.HaveInsertPoint()) 6346 return; 6347 for (const auto &AllocatorData : Allocators) { 6348 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6349 AllocatorData.first); 6350 } 6351 } 6352 }; 6353 } // namespace 6354 6355 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6356 const OMPExecutableDirective &D, StringRef ParentName, 6357 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6358 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6359 assert(!ParentName.empty() && "Invalid target region parent name!"); 6360 HasEmittedTargetRegion = true; 6361 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6362 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6363 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6364 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6365 if (!D.AllocatorTraits) 6366 continue; 6367 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6368 } 6369 } 6370 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6371 CodeGen.setAction(UsesAllocatorAction); 6372 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6373 IsOffloadEntry, CodeGen); 6374 } 6375 6376 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6377 const Expr *Allocator, 6378 const Expr *AllocatorTraits) { 6379 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6380 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6381 // Use default memspace handle. 6382 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6383 llvm::Value *NumTraits = llvm::ConstantInt::get( 6384 CGF.IntTy, cast<ConstantArrayType>( 6385 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6386 ->getSize() 6387 .getLimitedValue()); 6388 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6389 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6390 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6391 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6392 AllocatorTraitsLVal.getBaseInfo(), 6393 AllocatorTraitsLVal.getTBAAInfo()); 6394 llvm::Value *Traits = 6395 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6396 6397 llvm::Value *AllocatorVal = 6398 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6399 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6400 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6401 // Store to allocator. 6402 CGF.EmitVarDecl(*cast<VarDecl>( 6403 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6404 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6405 AllocatorVal = 6406 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6407 Allocator->getType(), Allocator->getExprLoc()); 6408 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6409 } 6410 6411 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6412 const Expr *Allocator) { 6413 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6414 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6415 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6416 llvm::Value *AllocatorVal = 6417 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6418 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6419 CGF.getContext().VoidPtrTy, 6420 Allocator->getExprLoc()); 6421 (void)CGF.EmitRuntimeCall( 6422 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6423 OMPRTL___kmpc_destroy_allocator), 6424 {ThreadId, AllocatorVal}); 6425 } 6426 6427 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6428 const OMPExecutableDirective &D, StringRef ParentName, 6429 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6430 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6431 // Create a unique name for the entry function using the source location 6432 // information of the current target region. The name will be something like: 6433 // 6434 // __omp_offloading_DD_FFFF_PP_lBB 6435 // 6436 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6437 // mangled name of the function that encloses the target region and BB is the 6438 // line number of the target region. 6439 6440 unsigned DeviceID; 6441 unsigned FileID; 6442 unsigned Line; 6443 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6444 Line); 6445 SmallString<64> EntryFnName; 6446 { 6447 llvm::raw_svector_ostream OS(EntryFnName); 6448 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6449 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6450 } 6451 6452 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6453 6454 CodeGenFunction CGF(CGM, true); 6455 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6456 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6457 6458 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6459 6460 // If this target outline function is not an offload entry, we don't need to 6461 // register it. 6462 if (!IsOffloadEntry) 6463 return; 6464 6465 // The target region ID is used by the runtime library to identify the current 6466 // target region, so it only has to be unique and not necessarily point to 6467 // anything. It could be the pointer to the outlined function that implements 6468 // the target region, but we aren't using that so that the compiler doesn't 6469 // need to keep that, and could therefore inline the host function if proven 6470 // worthwhile during optimization. In the other hand, if emitting code for the 6471 // device, the ID has to be the function address so that it can retrieved from 6472 // the offloading entry and launched by the runtime library. We also mark the 6473 // outlined function to have external linkage in case we are emitting code for 6474 // the device, because these functions will be entry points to the device. 6475 6476 if (CGM.getLangOpts().OpenMPIsDevice) { 6477 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6478 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6479 OutlinedFn->setDSOLocal(false); 6480 } else { 6481 std::string Name = getName({EntryFnName, "region_id"}); 6482 OutlinedFnID = new llvm::GlobalVariable( 6483 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6484 llvm::GlobalValue::WeakAnyLinkage, 6485 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6486 } 6487 6488 // Register the information for the entry associated with this target region. 6489 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6490 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6491 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6492 } 6493 6494 /// Checks if the expression is constant or does not have non-trivial function 6495 /// calls. 6496 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6497 // We can skip constant expressions. 6498 // We can skip expressions with trivial calls or simple expressions. 6499 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6500 !E->hasNonTrivialCall(Ctx)) && 6501 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6502 } 6503 6504 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6505 const Stmt *Body) { 6506 const Stmt *Child = Body->IgnoreContainers(); 6507 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6508 Child = nullptr; 6509 for (const Stmt *S : C->body()) { 6510 if (const auto *E = dyn_cast<Expr>(S)) { 6511 if (isTrivial(Ctx, E)) 6512 continue; 6513 } 6514 // Some of the statements can be ignored. 6515 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6516 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6517 continue; 6518 // Analyze declarations. 6519 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6520 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6521 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6522 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6523 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6524 isa<UsingDirectiveDecl>(D) || 6525 isa<OMPDeclareReductionDecl>(D) || 6526 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6527 return true; 6528 const auto *VD = dyn_cast<VarDecl>(D); 6529 if (!VD) 6530 return false; 6531 return VD->isConstexpr() || 6532 ((VD->getType().isTrivialType(Ctx) || 6533 VD->getType()->isReferenceType()) && 6534 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6535 })) 6536 continue; 6537 } 6538 // Found multiple children - cannot get the one child only. 6539 if (Child) 6540 return nullptr; 6541 Child = S; 6542 } 6543 if (Child) 6544 Child = Child->IgnoreContainers(); 6545 } 6546 return Child; 6547 } 6548 6549 /// Emit the number of teams for a target directive. Inspect the num_teams 6550 /// clause associated with a teams construct combined or closely nested 6551 /// with the target directive. 6552 /// 6553 /// Emit a team of size one for directives such as 'target parallel' that 6554 /// have no associated teams construct. 6555 /// 6556 /// Otherwise, return nullptr. 6557 static llvm::Value * 6558 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6559 const OMPExecutableDirective &D) { 6560 assert(!CGF.getLangOpts().OpenMPIsDevice && 6561 "Clauses associated with the teams directive expected to be emitted " 6562 "only for the host!"); 6563 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6564 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6565 "Expected target-based executable directive."); 6566 CGBuilderTy &Bld = CGF.Builder; 6567 switch (DirectiveKind) { 6568 case OMPD_target: { 6569 const auto *CS = D.getInnermostCapturedStmt(); 6570 const auto *Body = 6571 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6572 const Stmt *ChildStmt = 6573 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6574 if (const auto *NestedDir = 6575 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6576 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6577 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6578 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6579 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6580 const Expr *NumTeams = 6581 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6582 llvm::Value *NumTeamsVal = 6583 CGF.EmitScalarExpr(NumTeams, 6584 /*IgnoreResultAssign*/ true); 6585 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6586 /*isSigned=*/true); 6587 } 6588 return Bld.getInt32(0); 6589 } 6590 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6591 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6592 return Bld.getInt32(1); 6593 return Bld.getInt32(0); 6594 } 6595 return nullptr; 6596 } 6597 case OMPD_target_teams: 6598 case OMPD_target_teams_distribute: 6599 case OMPD_target_teams_distribute_simd: 6600 case OMPD_target_teams_distribute_parallel_for: 6601 case OMPD_target_teams_distribute_parallel_for_simd: { 6602 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6603 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6604 const Expr *NumTeams = 6605 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6606 llvm::Value *NumTeamsVal = 6607 CGF.EmitScalarExpr(NumTeams, 6608 /*IgnoreResultAssign*/ true); 6609 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6610 /*isSigned=*/true); 6611 } 6612 return Bld.getInt32(0); 6613 } 6614 case OMPD_target_parallel: 6615 case OMPD_target_parallel_for: 6616 case OMPD_target_parallel_for_simd: 6617 case OMPD_target_simd: 6618 return Bld.getInt32(1); 6619 case OMPD_parallel: 6620 case OMPD_for: 6621 case OMPD_parallel_for: 6622 case OMPD_parallel_master: 6623 case OMPD_parallel_sections: 6624 case OMPD_for_simd: 6625 case OMPD_parallel_for_simd: 6626 case OMPD_cancel: 6627 case OMPD_cancellation_point: 6628 case OMPD_ordered: 6629 case OMPD_threadprivate: 6630 case OMPD_allocate: 6631 case OMPD_task: 6632 case OMPD_simd: 6633 case OMPD_sections: 6634 case OMPD_section: 6635 case OMPD_single: 6636 case OMPD_master: 6637 case OMPD_critical: 6638 case OMPD_taskyield: 6639 case OMPD_barrier: 6640 case OMPD_taskwait: 6641 case OMPD_taskgroup: 6642 case OMPD_atomic: 6643 case OMPD_flush: 6644 case OMPD_depobj: 6645 case OMPD_scan: 6646 case OMPD_teams: 6647 case OMPD_target_data: 6648 case OMPD_target_exit_data: 6649 case OMPD_target_enter_data: 6650 case OMPD_distribute: 6651 case OMPD_distribute_simd: 6652 case OMPD_distribute_parallel_for: 6653 case OMPD_distribute_parallel_for_simd: 6654 case OMPD_teams_distribute: 6655 case OMPD_teams_distribute_simd: 6656 case OMPD_teams_distribute_parallel_for: 6657 case OMPD_teams_distribute_parallel_for_simd: 6658 case OMPD_target_update: 6659 case OMPD_declare_simd: 6660 case OMPD_declare_variant: 6661 case OMPD_begin_declare_variant: 6662 case OMPD_end_declare_variant: 6663 case OMPD_declare_target: 6664 case OMPD_end_declare_target: 6665 case OMPD_declare_reduction: 6666 case OMPD_declare_mapper: 6667 case OMPD_taskloop: 6668 case OMPD_taskloop_simd: 6669 case OMPD_master_taskloop: 6670 case OMPD_master_taskloop_simd: 6671 case OMPD_parallel_master_taskloop: 6672 case OMPD_parallel_master_taskloop_simd: 6673 case OMPD_requires: 6674 case OMPD_unknown: 6675 break; 6676 default: 6677 break; 6678 } 6679 llvm_unreachable("Unexpected directive kind."); 6680 } 6681 6682 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6683 llvm::Value *DefaultThreadLimitVal) { 6684 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6685 CGF.getContext(), CS->getCapturedStmt()); 6686 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6687 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6688 llvm::Value *NumThreads = nullptr; 6689 llvm::Value *CondVal = nullptr; 6690 // Handle if clause. If if clause present, the number of threads is 6691 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6692 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6693 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6694 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6695 const OMPIfClause *IfClause = nullptr; 6696 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6697 if (C->getNameModifier() == OMPD_unknown || 6698 C->getNameModifier() == OMPD_parallel) { 6699 IfClause = C; 6700 break; 6701 } 6702 } 6703 if (IfClause) { 6704 const Expr *Cond = IfClause->getCondition(); 6705 bool Result; 6706 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6707 if (!Result) 6708 return CGF.Builder.getInt32(1); 6709 } else { 6710 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6711 if (const auto *PreInit = 6712 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6713 for (const auto *I : PreInit->decls()) { 6714 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6715 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6716 } else { 6717 CodeGenFunction::AutoVarEmission Emission = 6718 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6719 CGF.EmitAutoVarCleanups(Emission); 6720 } 6721 } 6722 } 6723 CondVal = CGF.EvaluateExprAsBool(Cond); 6724 } 6725 } 6726 } 6727 // Check the value of num_threads clause iff if clause was not specified 6728 // or is not evaluated to false. 6729 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6730 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6731 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6732 const auto *NumThreadsClause = 6733 Dir->getSingleClause<OMPNumThreadsClause>(); 6734 CodeGenFunction::LexicalScope Scope( 6735 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6736 if (const auto *PreInit = 6737 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6738 for (const auto *I : PreInit->decls()) { 6739 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6740 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6741 } else { 6742 CodeGenFunction::AutoVarEmission Emission = 6743 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6744 CGF.EmitAutoVarCleanups(Emission); 6745 } 6746 } 6747 } 6748 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6749 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6750 /*isSigned=*/false); 6751 if (DefaultThreadLimitVal) 6752 NumThreads = CGF.Builder.CreateSelect( 6753 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6754 DefaultThreadLimitVal, NumThreads); 6755 } else { 6756 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6757 : CGF.Builder.getInt32(0); 6758 } 6759 // Process condition of the if clause. 6760 if (CondVal) { 6761 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6762 CGF.Builder.getInt32(1)); 6763 } 6764 return NumThreads; 6765 } 6766 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6767 return CGF.Builder.getInt32(1); 6768 return DefaultThreadLimitVal; 6769 } 6770 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6771 : CGF.Builder.getInt32(0); 6772 } 6773 6774 /// Emit the number of threads for a target directive. Inspect the 6775 /// thread_limit clause associated with a teams construct combined or closely 6776 /// nested with the target directive. 6777 /// 6778 /// Emit the num_threads clause for directives such as 'target parallel' that 6779 /// have no associated teams construct. 6780 /// 6781 /// Otherwise, return nullptr. 6782 static llvm::Value * 6783 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6784 const OMPExecutableDirective &D) { 6785 assert(!CGF.getLangOpts().OpenMPIsDevice && 6786 "Clauses associated with the teams directive expected to be emitted " 6787 "only for the host!"); 6788 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6789 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6790 "Expected target-based executable directive."); 6791 CGBuilderTy &Bld = CGF.Builder; 6792 llvm::Value *ThreadLimitVal = nullptr; 6793 llvm::Value *NumThreadsVal = nullptr; 6794 switch (DirectiveKind) { 6795 case OMPD_target: { 6796 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6797 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6798 return NumThreads; 6799 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6800 CGF.getContext(), CS->getCapturedStmt()); 6801 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6802 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6803 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6804 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6805 const auto *ThreadLimitClause = 6806 Dir->getSingleClause<OMPThreadLimitClause>(); 6807 CodeGenFunction::LexicalScope Scope( 6808 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6809 if (const auto *PreInit = 6810 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6811 for (const auto *I : PreInit->decls()) { 6812 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6813 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6814 } else { 6815 CodeGenFunction::AutoVarEmission Emission = 6816 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6817 CGF.EmitAutoVarCleanups(Emission); 6818 } 6819 } 6820 } 6821 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6822 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6823 ThreadLimitVal = 6824 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6825 } 6826 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6827 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6828 CS = Dir->getInnermostCapturedStmt(); 6829 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6830 CGF.getContext(), CS->getCapturedStmt()); 6831 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6832 } 6833 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6834 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6835 CS = Dir->getInnermostCapturedStmt(); 6836 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6837 return NumThreads; 6838 } 6839 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6840 return Bld.getInt32(1); 6841 } 6842 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6843 } 6844 case OMPD_target_teams: { 6845 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6846 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6847 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6848 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6849 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6850 ThreadLimitVal = 6851 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6852 } 6853 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6854 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6855 return NumThreads; 6856 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6857 CGF.getContext(), CS->getCapturedStmt()); 6858 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6859 if (Dir->getDirectiveKind() == OMPD_distribute) { 6860 CS = Dir->getInnermostCapturedStmt(); 6861 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6862 return NumThreads; 6863 } 6864 } 6865 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6866 } 6867 case OMPD_target_teams_distribute: 6868 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6869 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6870 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6871 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6872 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6873 ThreadLimitVal = 6874 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6875 } 6876 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6877 case OMPD_target_parallel: 6878 case OMPD_target_parallel_for: 6879 case OMPD_target_parallel_for_simd: 6880 case OMPD_target_teams_distribute_parallel_for: 6881 case OMPD_target_teams_distribute_parallel_for_simd: { 6882 llvm::Value *CondVal = nullptr; 6883 // Handle if clause. If if clause present, the number of threads is 6884 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6885 if (D.hasClausesOfKind<OMPIfClause>()) { 6886 const OMPIfClause *IfClause = nullptr; 6887 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6888 if (C->getNameModifier() == OMPD_unknown || 6889 C->getNameModifier() == OMPD_parallel) { 6890 IfClause = C; 6891 break; 6892 } 6893 } 6894 if (IfClause) { 6895 const Expr *Cond = IfClause->getCondition(); 6896 bool Result; 6897 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6898 if (!Result) 6899 return Bld.getInt32(1); 6900 } else { 6901 CodeGenFunction::RunCleanupsScope Scope(CGF); 6902 CondVal = CGF.EvaluateExprAsBool(Cond); 6903 } 6904 } 6905 } 6906 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6907 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6908 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6909 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6910 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6911 ThreadLimitVal = 6912 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6913 } 6914 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6915 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6916 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6917 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6918 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6919 NumThreadsVal = 6920 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6921 ThreadLimitVal = ThreadLimitVal 6922 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6923 ThreadLimitVal), 6924 NumThreadsVal, ThreadLimitVal) 6925 : NumThreadsVal; 6926 } 6927 if (!ThreadLimitVal) 6928 ThreadLimitVal = Bld.getInt32(0); 6929 if (CondVal) 6930 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6931 return ThreadLimitVal; 6932 } 6933 case OMPD_target_teams_distribute_simd: 6934 case OMPD_target_simd: 6935 return Bld.getInt32(1); 6936 case OMPD_parallel: 6937 case OMPD_for: 6938 case OMPD_parallel_for: 6939 case OMPD_parallel_master: 6940 case OMPD_parallel_sections: 6941 case OMPD_for_simd: 6942 case OMPD_parallel_for_simd: 6943 case OMPD_cancel: 6944 case OMPD_cancellation_point: 6945 case OMPD_ordered: 6946 case OMPD_threadprivate: 6947 case OMPD_allocate: 6948 case OMPD_task: 6949 case OMPD_simd: 6950 case OMPD_sections: 6951 case OMPD_section: 6952 case OMPD_single: 6953 case OMPD_master: 6954 case OMPD_critical: 6955 case OMPD_taskyield: 6956 case OMPD_barrier: 6957 case OMPD_taskwait: 6958 case OMPD_taskgroup: 6959 case OMPD_atomic: 6960 case OMPD_flush: 6961 case OMPD_depobj: 6962 case OMPD_scan: 6963 case OMPD_teams: 6964 case OMPD_target_data: 6965 case OMPD_target_exit_data: 6966 case OMPD_target_enter_data: 6967 case OMPD_distribute: 6968 case OMPD_distribute_simd: 6969 case OMPD_distribute_parallel_for: 6970 case OMPD_distribute_parallel_for_simd: 6971 case OMPD_teams_distribute: 6972 case OMPD_teams_distribute_simd: 6973 case OMPD_teams_distribute_parallel_for: 6974 case OMPD_teams_distribute_parallel_for_simd: 6975 case OMPD_target_update: 6976 case OMPD_declare_simd: 6977 case OMPD_declare_variant: 6978 case OMPD_begin_declare_variant: 6979 case OMPD_end_declare_variant: 6980 case OMPD_declare_target: 6981 case OMPD_end_declare_target: 6982 case OMPD_declare_reduction: 6983 case OMPD_declare_mapper: 6984 case OMPD_taskloop: 6985 case OMPD_taskloop_simd: 6986 case OMPD_master_taskloop: 6987 case OMPD_master_taskloop_simd: 6988 case OMPD_parallel_master_taskloop: 6989 case OMPD_parallel_master_taskloop_simd: 6990 case OMPD_requires: 6991 case OMPD_unknown: 6992 break; 6993 default: 6994 break; 6995 } 6996 llvm_unreachable("Unsupported directive kind."); 6997 } 6998 6999 namespace { 7000 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7001 7002 // Utility to handle information from clauses associated with a given 7003 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7004 // It provides a convenient interface to obtain the information and generate 7005 // code for that information. 7006 class MappableExprsHandler { 7007 public: 7008 /// Values for bit flags used to specify the mapping type for 7009 /// offloading. 7010 enum OpenMPOffloadMappingFlags : uint64_t { 7011 /// No flags 7012 OMP_MAP_NONE = 0x0, 7013 /// Allocate memory on the device and move data from host to device. 7014 OMP_MAP_TO = 0x01, 7015 /// Allocate memory on the device and move data from device to host. 7016 OMP_MAP_FROM = 0x02, 7017 /// Always perform the requested mapping action on the element, even 7018 /// if it was already mapped before. 7019 OMP_MAP_ALWAYS = 0x04, 7020 /// Delete the element from the device environment, ignoring the 7021 /// current reference count associated with the element. 7022 OMP_MAP_DELETE = 0x08, 7023 /// The element being mapped is a pointer-pointee pair; both the 7024 /// pointer and the pointee should be mapped. 7025 OMP_MAP_PTR_AND_OBJ = 0x10, 7026 /// This flags signals that the base address of an entry should be 7027 /// passed to the target kernel as an argument. 7028 OMP_MAP_TARGET_PARAM = 0x20, 7029 /// Signal that the runtime library has to return the device pointer 7030 /// in the current position for the data being mapped. Used when we have the 7031 /// use_device_ptr or use_device_addr clause. 7032 OMP_MAP_RETURN_PARAM = 0x40, 7033 /// This flag signals that the reference being passed is a pointer to 7034 /// private data. 7035 OMP_MAP_PRIVATE = 0x80, 7036 /// Pass the element to the device by value. 7037 OMP_MAP_LITERAL = 0x100, 7038 /// Implicit map 7039 OMP_MAP_IMPLICIT = 0x200, 7040 /// Close is a hint to the runtime to allocate memory close to 7041 /// the target device. 7042 OMP_MAP_CLOSE = 0x400, 7043 /// The 16 MSBs of the flags indicate whether the entry is member of some 7044 /// struct/class. 7045 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7046 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7047 }; 7048 7049 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7050 static unsigned getFlagMemberOffset() { 7051 unsigned Offset = 0; 7052 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7053 Remain = Remain >> 1) 7054 Offset++; 7055 return Offset; 7056 } 7057 7058 /// Class that associates information with a base pointer to be passed to the 7059 /// runtime library. 7060 class BasePointerInfo { 7061 /// The base pointer. 7062 llvm::Value *Ptr = nullptr; 7063 /// The base declaration that refers to this device pointer, or null if 7064 /// there is none. 7065 const ValueDecl *DevPtrDecl = nullptr; 7066 7067 public: 7068 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7069 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7070 llvm::Value *operator*() const { return Ptr; } 7071 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7072 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7073 }; 7074 7075 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7076 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7077 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7078 7079 /// Map between a struct and the its lowest & highest elements which have been 7080 /// mapped. 7081 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7082 /// HE(FieldIndex, Pointer)} 7083 struct StructRangeInfoTy { 7084 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7085 0, Address::invalid()}; 7086 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7087 0, Address::invalid()}; 7088 Address Base = Address::invalid(); 7089 }; 7090 7091 private: 7092 /// Kind that defines how a device pointer has to be returned. 7093 struct MapInfo { 7094 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7095 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7096 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7097 bool ReturnDevicePointer = false; 7098 bool IsImplicit = false; 7099 bool ForDeviceAddr = false; 7100 7101 MapInfo() = default; 7102 MapInfo( 7103 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7104 OpenMPMapClauseKind MapType, 7105 ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, 7106 bool IsImplicit, bool ForDeviceAddr = false) 7107 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7108 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7109 ForDeviceAddr(ForDeviceAddr) {} 7110 }; 7111 7112 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7113 /// member and there is no map information about it, then emission of that 7114 /// entry is deferred until the whole struct has been processed. 7115 struct DeferredDevicePtrEntryTy { 7116 const Expr *IE = nullptr; 7117 const ValueDecl *VD = nullptr; 7118 bool ForDeviceAddr = false; 7119 7120 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7121 bool ForDeviceAddr) 7122 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7123 }; 7124 7125 /// The target directive from where the mappable clauses were extracted. It 7126 /// is either a executable directive or a user-defined mapper directive. 7127 llvm::PointerUnion<const OMPExecutableDirective *, 7128 const OMPDeclareMapperDecl *> 7129 CurDir; 7130 7131 /// Function the directive is being generated for. 7132 CodeGenFunction &CGF; 7133 7134 /// Set of all first private variables in the current directive. 7135 /// bool data is set to true if the variable is implicitly marked as 7136 /// firstprivate, false otherwise. 7137 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7138 7139 /// Map between device pointer declarations and their expression components. 7140 /// The key value for declarations in 'this' is null. 7141 llvm::DenseMap< 7142 const ValueDecl *, 7143 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7144 DevPointersMap; 7145 7146 llvm::Value *getExprTypeSize(const Expr *E) const { 7147 QualType ExprTy = E->getType().getCanonicalType(); 7148 7149 // Calculate the size for array shaping expression. 7150 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7151 llvm::Value *Size = 7152 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7153 for (const Expr *SE : OAE->getDimensions()) { 7154 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7155 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7156 CGF.getContext().getSizeType(), 7157 SE->getExprLoc()); 7158 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7159 } 7160 return Size; 7161 } 7162 7163 // Reference types are ignored for mapping purposes. 7164 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7165 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7166 7167 // Given that an array section is considered a built-in type, we need to 7168 // do the calculation based on the length of the section instead of relying 7169 // on CGF.getTypeSize(E->getType()). 7170 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7171 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7172 OAE->getBase()->IgnoreParenImpCasts()) 7173 .getCanonicalType(); 7174 7175 // If there is no length associated with the expression and lower bound is 7176 // not specified too, that means we are using the whole length of the 7177 // base. 7178 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7179 !OAE->getLowerBound()) 7180 return CGF.getTypeSize(BaseTy); 7181 7182 llvm::Value *ElemSize; 7183 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7184 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7185 } else { 7186 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7187 assert(ATy && "Expecting array type if not a pointer type."); 7188 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7189 } 7190 7191 // If we don't have a length at this point, that is because we have an 7192 // array section with a single element. 7193 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7194 return ElemSize; 7195 7196 if (const Expr *LenExpr = OAE->getLength()) { 7197 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7198 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7199 CGF.getContext().getSizeType(), 7200 LenExpr->getExprLoc()); 7201 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7202 } 7203 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7204 OAE->getLowerBound() && "expected array_section[lb:]."); 7205 // Size = sizetype - lb * elemtype; 7206 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7207 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7208 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7209 CGF.getContext().getSizeType(), 7210 OAE->getLowerBound()->getExprLoc()); 7211 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7212 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7213 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7214 LengthVal = CGF.Builder.CreateSelect( 7215 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7216 return LengthVal; 7217 } 7218 return CGF.getTypeSize(ExprTy); 7219 } 7220 7221 /// Return the corresponding bits for a given map clause modifier. Add 7222 /// a flag marking the map as a pointer if requested. Add a flag marking the 7223 /// map as the first one of a series of maps that relate to the same map 7224 /// expression. 7225 OpenMPOffloadMappingFlags getMapTypeBits( 7226 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7227 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7228 OpenMPOffloadMappingFlags Bits = 7229 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7230 switch (MapType) { 7231 case OMPC_MAP_alloc: 7232 case OMPC_MAP_release: 7233 // alloc and release is the default behavior in the runtime library, i.e. 7234 // if we don't pass any bits alloc/release that is what the runtime is 7235 // going to do. Therefore, we don't need to signal anything for these two 7236 // type modifiers. 7237 break; 7238 case OMPC_MAP_to: 7239 Bits |= OMP_MAP_TO; 7240 break; 7241 case OMPC_MAP_from: 7242 Bits |= OMP_MAP_FROM; 7243 break; 7244 case OMPC_MAP_tofrom: 7245 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7246 break; 7247 case OMPC_MAP_delete: 7248 Bits |= OMP_MAP_DELETE; 7249 break; 7250 case OMPC_MAP_unknown: 7251 llvm_unreachable("Unexpected map type!"); 7252 } 7253 if (AddPtrFlag) 7254 Bits |= OMP_MAP_PTR_AND_OBJ; 7255 if (AddIsTargetParamFlag) 7256 Bits |= OMP_MAP_TARGET_PARAM; 7257 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7258 != MapModifiers.end()) 7259 Bits |= OMP_MAP_ALWAYS; 7260 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7261 != MapModifiers.end()) 7262 Bits |= OMP_MAP_CLOSE; 7263 return Bits; 7264 } 7265 7266 /// Return true if the provided expression is a final array section. A 7267 /// final array section, is one whose length can't be proved to be one. 7268 bool isFinalArraySectionExpression(const Expr *E) const { 7269 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7270 7271 // It is not an array section and therefore not a unity-size one. 7272 if (!OASE) 7273 return false; 7274 7275 // An array section with no colon always refer to a single element. 7276 if (OASE->getColonLocFirst().isInvalid()) 7277 return false; 7278 7279 const Expr *Length = OASE->getLength(); 7280 7281 // If we don't have a length we have to check if the array has size 1 7282 // for this dimension. Also, we should always expect a length if the 7283 // base type is pointer. 7284 if (!Length) { 7285 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7286 OASE->getBase()->IgnoreParenImpCasts()) 7287 .getCanonicalType(); 7288 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7289 return ATy->getSize().getSExtValue() != 1; 7290 // If we don't have a constant dimension length, we have to consider 7291 // the current section as having any size, so it is not necessarily 7292 // unitary. If it happen to be unity size, that's user fault. 7293 return true; 7294 } 7295 7296 // Check if the length evaluates to 1. 7297 Expr::EvalResult Result; 7298 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7299 return true; // Can have more that size 1. 7300 7301 llvm::APSInt ConstLength = Result.Val.getInt(); 7302 return ConstLength.getSExtValue() != 1; 7303 } 7304 7305 /// Generate the base pointers, section pointers, sizes and map type 7306 /// bits for the provided map type, map modifier, and expression components. 7307 /// \a IsFirstComponent should be set to true if the provided set of 7308 /// components is the first associated with a capture. 7309 void generateInfoForComponentList( 7310 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7311 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7312 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7313 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7314 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7315 bool IsImplicit, bool ForDeviceAddr = false, 7316 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7317 OverlappedElements = llvm::None) const { 7318 // The following summarizes what has to be generated for each map and the 7319 // types below. The generated information is expressed in this order: 7320 // base pointer, section pointer, size, flags 7321 // (to add to the ones that come from the map type and modifier). 7322 // 7323 // double d; 7324 // int i[100]; 7325 // float *p; 7326 // 7327 // struct S1 { 7328 // int i; 7329 // float f[50]; 7330 // } 7331 // struct S2 { 7332 // int i; 7333 // float f[50]; 7334 // S1 s; 7335 // double *p; 7336 // struct S2 *ps; 7337 // } 7338 // S2 s; 7339 // S2 *ps; 7340 // 7341 // map(d) 7342 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7343 // 7344 // map(i) 7345 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7346 // 7347 // map(i[1:23]) 7348 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7349 // 7350 // map(p) 7351 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7352 // 7353 // map(p[1:24]) 7354 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7355 // 7356 // map(s) 7357 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7358 // 7359 // map(s.i) 7360 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7361 // 7362 // map(s.s.f) 7363 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7364 // 7365 // map(s.p) 7366 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7367 // 7368 // map(to: s.p[:22]) 7369 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7370 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7371 // &(s.p), &(s.p[0]), 22*sizeof(double), 7372 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7373 // (*) alloc space for struct members, only this is a target parameter 7374 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7375 // optimizes this entry out, same in the examples below) 7376 // (***) map the pointee (map: to) 7377 // 7378 // map(s.ps) 7379 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7380 // 7381 // map(from: s.ps->s.i) 7382 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7383 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7384 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7385 // 7386 // map(to: s.ps->ps) 7387 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7388 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7389 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7390 // 7391 // map(s.ps->ps->ps) 7392 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7393 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7394 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7395 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7396 // 7397 // map(to: s.ps->ps->s.f[:22]) 7398 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7399 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7400 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7401 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7402 // 7403 // map(ps) 7404 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7405 // 7406 // map(ps->i) 7407 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7408 // 7409 // map(ps->s.f) 7410 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7411 // 7412 // map(from: ps->p) 7413 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7414 // 7415 // map(to: ps->p[:22]) 7416 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7417 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7418 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7419 // 7420 // map(ps->ps) 7421 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7422 // 7423 // map(from: ps->ps->s.i) 7424 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7425 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7426 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7427 // 7428 // map(from: ps->ps->ps) 7429 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7430 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7431 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7432 // 7433 // map(ps->ps->ps->ps) 7434 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7435 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7436 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7437 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7438 // 7439 // map(to: ps->ps->ps->s.f[:22]) 7440 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7441 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7442 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7443 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7444 // 7445 // map(to: s.f[:22]) map(from: s.p[:33]) 7446 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7447 // sizeof(double*) (**), TARGET_PARAM 7448 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7449 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7450 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7451 // (*) allocate contiguous space needed to fit all mapped members even if 7452 // we allocate space for members not mapped (in this example, 7453 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7454 // them as well because they fall between &s.f[0] and &s.p) 7455 // 7456 // map(from: s.f[:22]) map(to: ps->p[:33]) 7457 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7458 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7459 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7460 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7461 // (*) the struct this entry pertains to is the 2nd element in the list of 7462 // arguments, hence MEMBER_OF(2) 7463 // 7464 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7465 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7466 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7467 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7468 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7469 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7470 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7471 // (*) the struct this entry pertains to is the 4th element in the list 7472 // of arguments, hence MEMBER_OF(4) 7473 7474 // Track if the map information being generated is the first for a capture. 7475 bool IsCaptureFirstInfo = IsFirstComponentList; 7476 // When the variable is on a declare target link or in a to clause with 7477 // unified memory, a reference is needed to hold the host/device address 7478 // of the variable. 7479 bool RequiresReference = false; 7480 7481 // Scan the components from the base to the complete expression. 7482 auto CI = Components.rbegin(); 7483 auto CE = Components.rend(); 7484 auto I = CI; 7485 7486 // Track if the map information being generated is the first for a list of 7487 // components. 7488 bool IsExpressionFirstInfo = true; 7489 Address BP = Address::invalid(); 7490 const Expr *AssocExpr = I->getAssociatedExpression(); 7491 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7492 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7493 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7494 7495 if (isa<MemberExpr>(AssocExpr)) { 7496 // The base is the 'this' pointer. The content of the pointer is going 7497 // to be the base of the field being mapped. 7498 BP = CGF.LoadCXXThisAddress(); 7499 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7500 (OASE && 7501 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7502 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7503 } else if (OAShE && 7504 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7505 BP = Address( 7506 CGF.EmitScalarExpr(OAShE->getBase()), 7507 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7508 } else { 7509 // The base is the reference to the variable. 7510 // BP = &Var. 7511 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7512 if (const auto *VD = 7513 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7514 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7516 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7517 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7518 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7519 RequiresReference = true; 7520 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7521 } 7522 } 7523 } 7524 7525 // If the variable is a pointer and is being dereferenced (i.e. is not 7526 // the last component), the base has to be the pointer itself, not its 7527 // reference. References are ignored for mapping purposes. 7528 QualType Ty = 7529 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7530 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7531 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7532 7533 // We do not need to generate individual map information for the 7534 // pointer, it can be associated with the combined storage. 7535 ++I; 7536 } 7537 } 7538 7539 // Track whether a component of the list should be marked as MEMBER_OF some 7540 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7541 // in a component list should be marked as MEMBER_OF, all subsequent entries 7542 // do not belong to the base struct. E.g. 7543 // struct S2 s; 7544 // s.ps->ps->ps->f[:] 7545 // (1) (2) (3) (4) 7546 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7547 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7548 // is the pointee of ps(2) which is not member of struct s, so it should not 7549 // be marked as such (it is still PTR_AND_OBJ). 7550 // The variable is initialized to false so that PTR_AND_OBJ entries which 7551 // are not struct members are not considered (e.g. array of pointers to 7552 // data). 7553 bool ShouldBeMemberOf = false; 7554 7555 // Variable keeping track of whether or not we have encountered a component 7556 // in the component list which is a member expression. Useful when we have a 7557 // pointer or a final array section, in which case it is the previous 7558 // component in the list which tells us whether we have a member expression. 7559 // E.g. X.f[:] 7560 // While processing the final array section "[:]" it is "f" which tells us 7561 // whether we are dealing with a member of a declared struct. 7562 const MemberExpr *EncounteredME = nullptr; 7563 7564 for (; I != CE; ++I) { 7565 // If the current component is member of a struct (parent struct) mark it. 7566 if (!EncounteredME) { 7567 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7568 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7569 // as MEMBER_OF the parent struct. 7570 if (EncounteredME) 7571 ShouldBeMemberOf = true; 7572 } 7573 7574 auto Next = std::next(I); 7575 7576 // We need to generate the addresses and sizes if this is the last 7577 // component, if the component is a pointer or if it is an array section 7578 // whose length can't be proved to be one. If this is a pointer, it 7579 // becomes the base address for the following components. 7580 7581 // A final array section, is one whose length can't be proved to be one. 7582 bool IsFinalArraySection = 7583 isFinalArraySectionExpression(I->getAssociatedExpression()); 7584 7585 // Get information on whether the element is a pointer. Have to do a 7586 // special treatment for array sections given that they are built-in 7587 // types. 7588 const auto *OASE = 7589 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7590 const auto *OAShE = 7591 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7592 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7593 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7594 bool IsPointer = 7595 OAShE || 7596 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7597 .getCanonicalType() 7598 ->isAnyPointerType()) || 7599 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7600 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7601 7602 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7603 // If this is not the last component, we expect the pointer to be 7604 // associated with an array expression or member expression. 7605 assert((Next == CE || 7606 isa<MemberExpr>(Next->getAssociatedExpression()) || 7607 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7608 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7609 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7610 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7611 "Unexpected expression"); 7612 7613 Address LB = Address::invalid(); 7614 if (OAShE) { 7615 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7616 CGF.getContext().getTypeAlignInChars( 7617 OAShE->getBase()->getType())); 7618 } else { 7619 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7620 .getAddress(CGF); 7621 } 7622 7623 // If this component is a pointer inside the base struct then we don't 7624 // need to create any entry for it - it will be combined with the object 7625 // it is pointing to into a single PTR_AND_OBJ entry. 7626 bool IsMemberPointerOrAddr = 7627 (IsPointer || ForDeviceAddr) && EncounteredME && 7628 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7629 EncounteredME); 7630 if (!OverlappedElements.empty()) { 7631 // Handle base element with the info for overlapped elements. 7632 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7633 assert(Next == CE && 7634 "Expected last element for the overlapped elements."); 7635 assert(!IsPointer && 7636 "Unexpected base element with the pointer type."); 7637 // Mark the whole struct as the struct that requires allocation on the 7638 // device. 7639 PartialStruct.LowestElem = {0, LB}; 7640 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7641 I->getAssociatedExpression()->getType()); 7642 Address HB = CGF.Builder.CreateConstGEP( 7643 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7644 CGF.VoidPtrTy), 7645 TypeSize.getQuantity() - 1); 7646 PartialStruct.HighestElem = { 7647 std::numeric_limits<decltype( 7648 PartialStruct.HighestElem.first)>::max(), 7649 HB}; 7650 PartialStruct.Base = BP; 7651 // Emit data for non-overlapped data. 7652 OpenMPOffloadMappingFlags Flags = 7653 OMP_MAP_MEMBER_OF | 7654 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7655 /*AddPtrFlag=*/false, 7656 /*AddIsTargetParamFlag=*/false); 7657 LB = BP; 7658 llvm::Value *Size = nullptr; 7659 // Do bitcopy of all non-overlapped structure elements. 7660 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7661 Component : OverlappedElements) { 7662 Address ComponentLB = Address::invalid(); 7663 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7664 Component) { 7665 if (MC.getAssociatedDeclaration()) { 7666 ComponentLB = 7667 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7668 .getAddress(CGF); 7669 Size = CGF.Builder.CreatePtrDiff( 7670 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7671 CGF.EmitCastToVoidPtr(LB.getPointer())); 7672 break; 7673 } 7674 } 7675 BasePointers.push_back(BP.getPointer()); 7676 Pointers.push_back(LB.getPointer()); 7677 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7678 /*isSigned=*/true)); 7679 Types.push_back(Flags); 7680 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7681 } 7682 BasePointers.push_back(BP.getPointer()); 7683 Pointers.push_back(LB.getPointer()); 7684 Size = CGF.Builder.CreatePtrDiff( 7685 CGF.EmitCastToVoidPtr( 7686 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7687 CGF.EmitCastToVoidPtr(LB.getPointer())); 7688 Sizes.push_back( 7689 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7690 Types.push_back(Flags); 7691 break; 7692 } 7693 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7694 if (!IsMemberPointerOrAddr) { 7695 BasePointers.push_back(BP.getPointer()); 7696 Pointers.push_back(LB.getPointer()); 7697 Sizes.push_back( 7698 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7699 7700 // We need to add a pointer flag for each map that comes from the 7701 // same expression except for the first one. We also need to signal 7702 // this map is the first one that relates with the current capture 7703 // (there is a set of entries for each capture). 7704 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7705 MapType, MapModifiers, IsImplicit, 7706 !IsExpressionFirstInfo || RequiresReference, 7707 IsCaptureFirstInfo && !RequiresReference); 7708 7709 if (!IsExpressionFirstInfo) { 7710 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7711 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7712 if (IsPointer) 7713 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7714 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7715 7716 if (ShouldBeMemberOf) { 7717 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7718 // should be later updated with the correct value of MEMBER_OF. 7719 Flags |= OMP_MAP_MEMBER_OF; 7720 // From now on, all subsequent PTR_AND_OBJ entries should not be 7721 // marked as MEMBER_OF. 7722 ShouldBeMemberOf = false; 7723 } 7724 } 7725 7726 Types.push_back(Flags); 7727 } 7728 7729 // If we have encountered a member expression so far, keep track of the 7730 // mapped member. If the parent is "*this", then the value declaration 7731 // is nullptr. 7732 if (EncounteredME) { 7733 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7734 unsigned FieldIndex = FD->getFieldIndex(); 7735 7736 // Update info about the lowest and highest elements for this struct 7737 if (!PartialStruct.Base.isValid()) { 7738 PartialStruct.LowestElem = {FieldIndex, LB}; 7739 if (IsFinalArraySection) { 7740 Address HB = 7741 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7742 .getAddress(CGF); 7743 PartialStruct.HighestElem = {FieldIndex, HB}; 7744 } else { 7745 PartialStruct.HighestElem = {FieldIndex, LB}; 7746 } 7747 PartialStruct.Base = BP; 7748 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7749 PartialStruct.LowestElem = {FieldIndex, LB}; 7750 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7751 PartialStruct.HighestElem = {FieldIndex, LB}; 7752 } 7753 } 7754 7755 // If we have a final array section, we are done with this expression. 7756 if (IsFinalArraySection) 7757 break; 7758 7759 // The pointer becomes the base for the next element. 7760 if (Next != CE) 7761 BP = LB; 7762 7763 IsExpressionFirstInfo = false; 7764 IsCaptureFirstInfo = false; 7765 } 7766 } 7767 } 7768 7769 /// Return the adjusted map modifiers if the declaration a capture refers to 7770 /// appears in a first-private clause. This is expected to be used only with 7771 /// directives that start with 'target'. 7772 MappableExprsHandler::OpenMPOffloadMappingFlags 7773 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7774 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7775 7776 // A first private variable captured by reference will use only the 7777 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7778 // declaration is known as first-private in this handler. 7779 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7780 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7781 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7782 return MappableExprsHandler::OMP_MAP_ALWAYS | 7783 MappableExprsHandler::OMP_MAP_TO; 7784 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7785 return MappableExprsHandler::OMP_MAP_TO | 7786 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7787 return MappableExprsHandler::OMP_MAP_PRIVATE | 7788 MappableExprsHandler::OMP_MAP_TO; 7789 } 7790 return MappableExprsHandler::OMP_MAP_TO | 7791 MappableExprsHandler::OMP_MAP_FROM; 7792 } 7793 7794 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7795 // Rotate by getFlagMemberOffset() bits. 7796 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7797 << getFlagMemberOffset()); 7798 } 7799 7800 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7801 OpenMPOffloadMappingFlags MemberOfFlag) { 7802 // If the entry is PTR_AND_OBJ but has not been marked with the special 7803 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7804 // marked as MEMBER_OF. 7805 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7806 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7807 return; 7808 7809 // Reset the placeholder value to prepare the flag for the assignment of the 7810 // proper MEMBER_OF value. 7811 Flags &= ~OMP_MAP_MEMBER_OF; 7812 Flags |= MemberOfFlag; 7813 } 7814 7815 void getPlainLayout(const CXXRecordDecl *RD, 7816 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7817 bool AsBase) const { 7818 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7819 7820 llvm::StructType *St = 7821 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7822 7823 unsigned NumElements = St->getNumElements(); 7824 llvm::SmallVector< 7825 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7826 RecordLayout(NumElements); 7827 7828 // Fill bases. 7829 for (const auto &I : RD->bases()) { 7830 if (I.isVirtual()) 7831 continue; 7832 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7833 // Ignore empty bases. 7834 if (Base->isEmpty() || CGF.getContext() 7835 .getASTRecordLayout(Base) 7836 .getNonVirtualSize() 7837 .isZero()) 7838 continue; 7839 7840 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7841 RecordLayout[FieldIndex] = Base; 7842 } 7843 // Fill in virtual bases. 7844 for (const auto &I : RD->vbases()) { 7845 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7846 // Ignore empty bases. 7847 if (Base->isEmpty()) 7848 continue; 7849 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7850 if (RecordLayout[FieldIndex]) 7851 continue; 7852 RecordLayout[FieldIndex] = Base; 7853 } 7854 // Fill in all the fields. 7855 assert(!RD->isUnion() && "Unexpected union."); 7856 for (const auto *Field : RD->fields()) { 7857 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7858 // will fill in later.) 7859 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7860 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7861 RecordLayout[FieldIndex] = Field; 7862 } 7863 } 7864 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7865 &Data : RecordLayout) { 7866 if (Data.isNull()) 7867 continue; 7868 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7869 getPlainLayout(Base, Layout, /*AsBase=*/true); 7870 else 7871 Layout.push_back(Data.get<const FieldDecl *>()); 7872 } 7873 } 7874 7875 public: 7876 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7877 : CurDir(&Dir), CGF(CGF) { 7878 // Extract firstprivate clause information. 7879 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7880 for (const auto *D : C->varlists()) 7881 FirstPrivateDecls.try_emplace( 7882 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7883 // Extract implicit firstprivates from uses_allocators clauses. 7884 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7885 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7886 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7887 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7888 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7889 /*Implicit=*/true); 7890 else if (const auto *VD = dyn_cast<VarDecl>( 7891 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7892 ->getDecl())) 7893 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7894 } 7895 } 7896 // Extract device pointer clause information. 7897 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7898 for (auto L : C->component_lists()) 7899 DevPointersMap[L.first].push_back(L.second); 7900 } 7901 7902 /// Constructor for the declare mapper directive. 7903 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7904 : CurDir(&Dir), CGF(CGF) {} 7905 7906 /// Generate code for the combined entry if we have a partially mapped struct 7907 /// and take care of the mapping flags of the arguments corresponding to 7908 /// individual struct members. 7909 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7910 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7911 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7912 const StructRangeInfoTy &PartialStruct) const { 7913 // Base is the base of the struct 7914 BasePointers.push_back(PartialStruct.Base.getPointer()); 7915 // Pointer is the address of the lowest element 7916 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7917 Pointers.push_back(LB); 7918 // Size is (addr of {highest+1} element) - (addr of lowest element) 7919 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7920 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7921 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7922 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7923 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7924 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7925 /*isSigned=*/false); 7926 Sizes.push_back(Size); 7927 // Map type is always TARGET_PARAM 7928 Types.push_back(OMP_MAP_TARGET_PARAM); 7929 // Remove TARGET_PARAM flag from the first element 7930 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7931 7932 // All other current entries will be MEMBER_OF the combined entry 7933 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7934 // 0xFFFF in the MEMBER_OF field). 7935 OpenMPOffloadMappingFlags MemberOfFlag = 7936 getMemberOfFlag(BasePointers.size() - 1); 7937 for (auto &M : CurTypes) 7938 setCorrectMemberOfFlag(M, MemberOfFlag); 7939 } 7940 7941 /// Generate all the base pointers, section pointers, sizes and map 7942 /// types for the extracted mappable expressions. Also, for each item that 7943 /// relates with a device pointer, a pair of the relevant declaration and 7944 /// index where it occurs is appended to the device pointers info array. 7945 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7946 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7947 MapFlagsArrayTy &Types) const { 7948 // We have to process the component lists that relate with the same 7949 // declaration in a single chunk so that we can generate the map flags 7950 // correctly. Therefore, we organize all lists in a map. 7951 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7952 7953 // Helper function to fill the information map for the different supported 7954 // clauses. 7955 auto &&InfoGen = 7956 [&Info](const ValueDecl *D, 7957 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7958 OpenMPMapClauseKind MapType, 7959 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7960 bool ReturnDevicePointer, bool IsImplicit, 7961 bool ForDeviceAddr = false) { 7962 const ValueDecl *VD = 7963 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7964 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7965 IsImplicit, ForDeviceAddr); 7966 }; 7967 7968 assert(CurDir.is<const OMPExecutableDirective *>() && 7969 "Expect a executable directive"); 7970 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7971 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7972 for (const auto L : C->component_lists()) { 7973 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7974 /*ReturnDevicePointer=*/false, C->isImplicit()); 7975 } 7976 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7977 for (const auto L : C->component_lists()) { 7978 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7979 /*ReturnDevicePointer=*/false, C->isImplicit()); 7980 } 7981 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7982 for (const auto L : C->component_lists()) { 7983 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7984 /*ReturnDevicePointer=*/false, C->isImplicit()); 7985 } 7986 7987 // Look at the use_device_ptr clause information and mark the existing map 7988 // entries as such. If there is no map information for an entry in the 7989 // use_device_ptr list, we create one with map type 'alloc' and zero size 7990 // section. It is the user fault if that was not mapped before. If there is 7991 // no map information and the pointer is a struct member, then we defer the 7992 // emission of that entry until the whole struct has been processed. 7993 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7994 DeferredInfo; 7995 7996 for (const auto *C : 7997 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7998 for (const auto L : C->component_lists()) { 7999 assert(!L.second.empty() && "Not expecting empty list of components!"); 8000 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8001 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8002 const Expr *IE = L.second.back().getAssociatedExpression(); 8003 // If the first component is a member expression, we have to look into 8004 // 'this', which maps to null in the map of map information. Otherwise 8005 // look directly for the information. 8006 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8007 8008 // We potentially have map information for this declaration already. 8009 // Look for the first set of components that refer to it. 8010 if (It != Info.end()) { 8011 auto CI = std::find_if( 8012 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8013 return MI.Components.back().getAssociatedDeclaration() == VD; 8014 }); 8015 // If we found a map entry, signal that the pointer has to be returned 8016 // and move on to the next declaration. 8017 if (CI != It->second.end()) { 8018 CI->ReturnDevicePointer = true; 8019 continue; 8020 } 8021 } 8022 8023 // We didn't find any match in our map information - generate a zero 8024 // size array section - if the pointer is a struct member we defer this 8025 // action until the whole struct has been processed. 8026 if (isa<MemberExpr>(IE)) { 8027 // Insert the pointer into Info to be processed by 8028 // generateInfoForComponentList. Because it is a member pointer 8029 // without a pointee, no entry will be generated for it, therefore 8030 // we need to generate one after the whole struct has been processed. 8031 // Nonetheless, generateInfoForComponentList must be called to take 8032 // the pointer into account for the calculation of the range of the 8033 // partial struct. 8034 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8035 /*ReturnDevicePointer=*/false, C->isImplicit()); 8036 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8037 } else { 8038 llvm::Value *Ptr = 8039 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8040 BasePointers.emplace_back(Ptr, VD); 8041 Pointers.push_back(Ptr); 8042 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8043 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8044 } 8045 } 8046 } 8047 8048 // Look at the use_device_addr clause information and mark the existing map 8049 // entries as such. If there is no map information for an entry in the 8050 // use_device_addr list, we create one with map type 'alloc' and zero size 8051 // section. It is the user fault if that was not mapped before. If there is 8052 // no map information and the pointer is a struct member, then we defer the 8053 // emission of that entry until the whole struct has been processed. 8054 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8055 for (const auto *C : 8056 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8057 for (const auto L : C->component_lists()) { 8058 assert(!L.second.empty() && "Not expecting empty list of components!"); 8059 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8060 if (!Processed.insert(VD).second) 8061 continue; 8062 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8063 const Expr *IE = L.second.back().getAssociatedExpression(); 8064 // If the first component is a member expression, we have to look into 8065 // 'this', which maps to null in the map of map information. Otherwise 8066 // look directly for the information. 8067 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8068 8069 // We potentially have map information for this declaration already. 8070 // Look for the first set of components that refer to it. 8071 if (It != Info.end()) { 8072 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8073 return MI.Components.back().getAssociatedDeclaration() == VD; 8074 }); 8075 // If we found a map entry, signal that the pointer has to be returned 8076 // and move on to the next declaration. 8077 if (CI != It->second.end()) { 8078 CI->ReturnDevicePointer = true; 8079 continue; 8080 } 8081 } 8082 8083 // We didn't find any match in our map information - generate a zero 8084 // size array section - if the pointer is a struct member we defer this 8085 // action until the whole struct has been processed. 8086 if (isa<MemberExpr>(IE)) { 8087 // Insert the pointer into Info to be processed by 8088 // generateInfoForComponentList. Because it is a member pointer 8089 // without a pointee, no entry will be generated for it, therefore 8090 // we need to generate one after the whole struct has been processed. 8091 // Nonetheless, generateInfoForComponentList must be called to take 8092 // the pointer into account for the calculation of the range of the 8093 // partial struct. 8094 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8095 /*ReturnDevicePointer=*/false, C->isImplicit(), 8096 /*ForDeviceAddr=*/true); 8097 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8098 } else { 8099 llvm::Value *Ptr; 8100 if (IE->isGLValue()) 8101 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8102 else 8103 Ptr = CGF.EmitScalarExpr(IE); 8104 BasePointers.emplace_back(Ptr, VD); 8105 Pointers.push_back(Ptr); 8106 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8107 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8108 } 8109 } 8110 } 8111 8112 for (const auto &M : Info) { 8113 // We need to know when we generate information for the first component 8114 // associated with a capture, because the mapping flags depend on it. 8115 bool IsFirstComponentList = true; 8116 8117 // Temporary versions of arrays 8118 MapBaseValuesArrayTy CurBasePointers; 8119 MapValuesArrayTy CurPointers; 8120 MapValuesArrayTy CurSizes; 8121 MapFlagsArrayTy CurTypes; 8122 StructRangeInfoTy PartialStruct; 8123 8124 for (const MapInfo &L : M.second) { 8125 assert(!L.Components.empty() && 8126 "Not expecting declaration with no component lists."); 8127 8128 // Remember the current base pointer index. 8129 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8130 generateInfoForComponentList( 8131 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8132 CurPointers, CurSizes, CurTypes, PartialStruct, 8133 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8134 8135 // If this entry relates with a device pointer, set the relevant 8136 // declaration and add the 'return pointer' flag. 8137 if (L.ReturnDevicePointer) { 8138 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8139 "Unexpected number of mapped base pointers."); 8140 8141 const ValueDecl *RelevantVD = 8142 L.Components.back().getAssociatedDeclaration(); 8143 assert(RelevantVD && 8144 "No relevant declaration related with device pointer??"); 8145 8146 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8147 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8148 } 8149 IsFirstComponentList = false; 8150 } 8151 8152 // Append any pending zero-length pointers which are struct members and 8153 // used with use_device_ptr or use_device_addr. 8154 auto CI = DeferredInfo.find(M.first); 8155 if (CI != DeferredInfo.end()) { 8156 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8157 llvm::Value *BasePtr; 8158 llvm::Value *Ptr; 8159 if (L.ForDeviceAddr) { 8160 if (L.IE->isGLValue()) 8161 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8162 else 8163 Ptr = this->CGF.EmitScalarExpr(L.IE); 8164 BasePtr = Ptr; 8165 // Entry is RETURN_PARAM. Also, set the placeholder value 8166 // MEMBER_OF=FFFF so that the entry is later updated with the 8167 // correct value of MEMBER_OF. 8168 CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8169 } else { 8170 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8171 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8172 L.IE->getExprLoc()); 8173 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8174 // value MEMBER_OF=FFFF so that the entry is later updated with the 8175 // correct value of MEMBER_OF. 8176 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8177 OMP_MAP_MEMBER_OF); 8178 } 8179 CurBasePointers.emplace_back(BasePtr, L.VD); 8180 CurPointers.push_back(Ptr); 8181 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8182 } 8183 } 8184 8185 // If there is an entry in PartialStruct it means we have a struct with 8186 // individual members mapped. Emit an extra combined entry. 8187 if (PartialStruct.Base.isValid()) 8188 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8189 PartialStruct); 8190 8191 // We need to append the results of this capture to what we already have. 8192 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8193 Pointers.append(CurPointers.begin(), CurPointers.end()); 8194 Sizes.append(CurSizes.begin(), CurSizes.end()); 8195 Types.append(CurTypes.begin(), CurTypes.end()); 8196 } 8197 } 8198 8199 /// Generate all the base pointers, section pointers, sizes and map types for 8200 /// the extracted map clauses of user-defined mapper. 8201 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8202 MapValuesArrayTy &Pointers, 8203 MapValuesArrayTy &Sizes, 8204 MapFlagsArrayTy &Types) const { 8205 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8206 "Expect a declare mapper directive"); 8207 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8208 // We have to process the component lists that relate with the same 8209 // declaration in a single chunk so that we can generate the map flags 8210 // correctly. Therefore, we organize all lists in a map. 8211 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8212 8213 // Helper function to fill the information map for the different supported 8214 // clauses. 8215 auto &&InfoGen = [&Info]( 8216 const ValueDecl *D, 8217 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8218 OpenMPMapClauseKind MapType, 8219 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8220 bool ReturnDevicePointer, bool IsImplicit) { 8221 const ValueDecl *VD = 8222 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8223 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8224 IsImplicit); 8225 }; 8226 8227 for (const auto *C : CurMapperDir->clauselists()) { 8228 const auto *MC = cast<OMPMapClause>(C); 8229 for (const auto L : MC->component_lists()) { 8230 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8231 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8232 } 8233 } 8234 8235 for (const auto &M : Info) { 8236 // We need to know when we generate information for the first component 8237 // associated with a capture, because the mapping flags depend on it. 8238 bool IsFirstComponentList = true; 8239 8240 // Temporary versions of arrays 8241 MapBaseValuesArrayTy CurBasePointers; 8242 MapValuesArrayTy CurPointers; 8243 MapValuesArrayTy CurSizes; 8244 MapFlagsArrayTy CurTypes; 8245 StructRangeInfoTy PartialStruct; 8246 8247 for (const MapInfo &L : M.second) { 8248 assert(!L.Components.empty() && 8249 "Not expecting declaration with no component lists."); 8250 generateInfoForComponentList( 8251 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8252 CurPointers, CurSizes, CurTypes, PartialStruct, 8253 IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); 8254 IsFirstComponentList = false; 8255 } 8256 8257 // If there is an entry in PartialStruct it means we have a struct with 8258 // individual members mapped. Emit an extra combined entry. 8259 if (PartialStruct.Base.isValid()) 8260 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8261 PartialStruct); 8262 8263 // We need to append the results of this capture to what we already have. 8264 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8265 Pointers.append(CurPointers.begin(), CurPointers.end()); 8266 Sizes.append(CurSizes.begin(), CurSizes.end()); 8267 Types.append(CurTypes.begin(), CurTypes.end()); 8268 } 8269 } 8270 8271 /// Emit capture info for lambdas for variables captured by reference. 8272 void generateInfoForLambdaCaptures( 8273 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8274 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8275 MapFlagsArrayTy &Types, 8276 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8277 const auto *RD = VD->getType() 8278 .getCanonicalType() 8279 .getNonReferenceType() 8280 ->getAsCXXRecordDecl(); 8281 if (!RD || !RD->isLambda()) 8282 return; 8283 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8284 LValue VDLVal = CGF.MakeAddrLValue( 8285 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8286 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8287 FieldDecl *ThisCapture = nullptr; 8288 RD->getCaptureFields(Captures, ThisCapture); 8289 if (ThisCapture) { 8290 LValue ThisLVal = 8291 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8292 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8293 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8294 VDLVal.getPointer(CGF)); 8295 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8296 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8297 Sizes.push_back( 8298 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8299 CGF.Int64Ty, /*isSigned=*/true)); 8300 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8301 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8302 } 8303 for (const LambdaCapture &LC : RD->captures()) { 8304 if (!LC.capturesVariable()) 8305 continue; 8306 const VarDecl *VD = LC.getCapturedVar(); 8307 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8308 continue; 8309 auto It = Captures.find(VD); 8310 assert(It != Captures.end() && "Found lambda capture without field."); 8311 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8312 if (LC.getCaptureKind() == LCK_ByRef) { 8313 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8314 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8315 VDLVal.getPointer(CGF)); 8316 BasePointers.push_back(VarLVal.getPointer(CGF)); 8317 Pointers.push_back(VarLValVal.getPointer(CGF)); 8318 Sizes.push_back(CGF.Builder.CreateIntCast( 8319 CGF.getTypeSize( 8320 VD->getType().getCanonicalType().getNonReferenceType()), 8321 CGF.Int64Ty, /*isSigned=*/true)); 8322 } else { 8323 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8324 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8325 VDLVal.getPointer(CGF)); 8326 BasePointers.push_back(VarLVal.getPointer(CGF)); 8327 Pointers.push_back(VarRVal.getScalarVal()); 8328 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8329 } 8330 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8331 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8332 } 8333 } 8334 8335 /// Set correct indices for lambdas captures. 8336 void adjustMemberOfForLambdaCaptures( 8337 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8338 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8339 MapFlagsArrayTy &Types) const { 8340 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8341 // Set correct member_of idx for all implicit lambda captures. 8342 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8343 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8344 continue; 8345 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8346 assert(BasePtr && "Unable to find base lambda address."); 8347 int TgtIdx = -1; 8348 for (unsigned J = I; J > 0; --J) { 8349 unsigned Idx = J - 1; 8350 if (Pointers[Idx] != BasePtr) 8351 continue; 8352 TgtIdx = Idx; 8353 break; 8354 } 8355 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8356 // All other current entries will be MEMBER_OF the combined entry 8357 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8358 // 0xFFFF in the MEMBER_OF field). 8359 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8360 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8361 } 8362 } 8363 8364 /// Generate the base pointers, section pointers, sizes and map types 8365 /// associated to a given capture. 8366 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8367 llvm::Value *Arg, 8368 MapBaseValuesArrayTy &BasePointers, 8369 MapValuesArrayTy &Pointers, 8370 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8371 StructRangeInfoTy &PartialStruct) const { 8372 assert(!Cap->capturesVariableArrayType() && 8373 "Not expecting to generate map info for a variable array type!"); 8374 8375 // We need to know when we generating information for the first component 8376 const ValueDecl *VD = Cap->capturesThis() 8377 ? nullptr 8378 : Cap->getCapturedVar()->getCanonicalDecl(); 8379 8380 // If this declaration appears in a is_device_ptr clause we just have to 8381 // pass the pointer by value. If it is a reference to a declaration, we just 8382 // pass its value. 8383 if (DevPointersMap.count(VD)) { 8384 BasePointers.emplace_back(Arg, VD); 8385 Pointers.push_back(Arg); 8386 Sizes.push_back( 8387 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8388 CGF.Int64Ty, /*isSigned=*/true)); 8389 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8390 return; 8391 } 8392 8393 using MapData = 8394 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8395 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8396 SmallVector<MapData, 4> DeclComponentLists; 8397 assert(CurDir.is<const OMPExecutableDirective *>() && 8398 "Expect a executable directive"); 8399 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8400 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8401 for (const auto L : C->decl_component_lists(VD)) { 8402 assert(L.first == VD && 8403 "We got information for the wrong declaration??"); 8404 assert(!L.second.empty() && 8405 "Not expecting declaration with no component lists."); 8406 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8407 C->getMapTypeModifiers(), 8408 C->isImplicit()); 8409 } 8410 } 8411 8412 // Find overlapping elements (including the offset from the base element). 8413 llvm::SmallDenseMap< 8414 const MapData *, 8415 llvm::SmallVector< 8416 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8417 4> 8418 OverlappedData; 8419 size_t Count = 0; 8420 for (const MapData &L : DeclComponentLists) { 8421 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8422 OpenMPMapClauseKind MapType; 8423 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8424 bool IsImplicit; 8425 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8426 ++Count; 8427 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8428 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8429 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8430 auto CI = Components.rbegin(); 8431 auto CE = Components.rend(); 8432 auto SI = Components1.rbegin(); 8433 auto SE = Components1.rend(); 8434 for (; CI != CE && SI != SE; ++CI, ++SI) { 8435 if (CI->getAssociatedExpression()->getStmtClass() != 8436 SI->getAssociatedExpression()->getStmtClass()) 8437 break; 8438 // Are we dealing with different variables/fields? 8439 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8440 break; 8441 } 8442 // Found overlapping if, at least for one component, reached the head of 8443 // the components list. 8444 if (CI == CE || SI == SE) { 8445 assert((CI != CE || SI != SE) && 8446 "Unexpected full match of the mapping components."); 8447 const MapData &BaseData = CI == CE ? L : L1; 8448 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8449 SI == SE ? Components : Components1; 8450 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8451 OverlappedElements.getSecond().push_back(SubData); 8452 } 8453 } 8454 } 8455 // Sort the overlapped elements for each item. 8456 llvm::SmallVector<const FieldDecl *, 4> Layout; 8457 if (!OverlappedData.empty()) { 8458 if (const auto *CRD = 8459 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8460 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8461 else { 8462 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8463 Layout.append(RD->field_begin(), RD->field_end()); 8464 } 8465 } 8466 for (auto &Pair : OverlappedData) { 8467 llvm::sort( 8468 Pair.getSecond(), 8469 [&Layout]( 8470 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8471 OMPClauseMappableExprCommon::MappableExprComponentListRef 8472 Second) { 8473 auto CI = First.rbegin(); 8474 auto CE = First.rend(); 8475 auto SI = Second.rbegin(); 8476 auto SE = Second.rend(); 8477 for (; CI != CE && SI != SE; ++CI, ++SI) { 8478 if (CI->getAssociatedExpression()->getStmtClass() != 8479 SI->getAssociatedExpression()->getStmtClass()) 8480 break; 8481 // Are we dealing with different variables/fields? 8482 if (CI->getAssociatedDeclaration() != 8483 SI->getAssociatedDeclaration()) 8484 break; 8485 } 8486 8487 // Lists contain the same elements. 8488 if (CI == CE && SI == SE) 8489 return false; 8490 8491 // List with less elements is less than list with more elements. 8492 if (CI == CE || SI == SE) 8493 return CI == CE; 8494 8495 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8496 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8497 if (FD1->getParent() == FD2->getParent()) 8498 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8499 const auto It = 8500 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8501 return FD == FD1 || FD == FD2; 8502 }); 8503 return *It == FD1; 8504 }); 8505 } 8506 8507 // Associated with a capture, because the mapping flags depend on it. 8508 // Go through all of the elements with the overlapped elements. 8509 for (const auto &Pair : OverlappedData) { 8510 const MapData &L = *Pair.getFirst(); 8511 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8512 OpenMPMapClauseKind MapType; 8513 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8514 bool IsImplicit; 8515 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8516 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8517 OverlappedComponents = Pair.getSecond(); 8518 bool IsFirstComponentList = true; 8519 generateInfoForComponentList( 8520 MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, 8521 Types, PartialStruct, IsFirstComponentList, IsImplicit, 8522 /*ForDeviceAddr=*/false, OverlappedComponents); 8523 } 8524 // Go through other elements without overlapped elements. 8525 bool IsFirstComponentList = OverlappedData.empty(); 8526 for (const MapData &L : DeclComponentLists) { 8527 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8528 OpenMPMapClauseKind MapType; 8529 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8530 bool IsImplicit; 8531 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8532 auto It = OverlappedData.find(&L); 8533 if (It == OverlappedData.end()) 8534 generateInfoForComponentList(MapType, MapModifiers, Components, 8535 BasePointers, Pointers, Sizes, Types, 8536 PartialStruct, IsFirstComponentList, 8537 IsImplicit); 8538 IsFirstComponentList = false; 8539 } 8540 } 8541 8542 /// Generate the base pointers, section pointers, sizes and map types 8543 /// associated with the declare target link variables. 8544 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8545 MapValuesArrayTy &Pointers, 8546 MapValuesArrayTy &Sizes, 8547 MapFlagsArrayTy &Types) const { 8548 assert(CurDir.is<const OMPExecutableDirective *>() && 8549 "Expect a executable directive"); 8550 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8551 // Map other list items in the map clause which are not captured variables 8552 // but "declare target link" global variables. 8553 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8554 for (const auto L : C->component_lists()) { 8555 if (!L.first) 8556 continue; 8557 const auto *VD = dyn_cast<VarDecl>(L.first); 8558 if (!VD) 8559 continue; 8560 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8561 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8562 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8563 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8564 continue; 8565 StructRangeInfoTy PartialStruct; 8566 generateInfoForComponentList( 8567 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8568 Pointers, Sizes, Types, PartialStruct, 8569 /*IsFirstComponentList=*/true, C->isImplicit()); 8570 assert(!PartialStruct.Base.isValid() && 8571 "No partial structs for declare target link expected."); 8572 } 8573 } 8574 } 8575 8576 /// Generate the default map information for a given capture \a CI, 8577 /// record field declaration \a RI and captured value \a CV. 8578 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8579 const FieldDecl &RI, llvm::Value *CV, 8580 MapBaseValuesArrayTy &CurBasePointers, 8581 MapValuesArrayTy &CurPointers, 8582 MapValuesArrayTy &CurSizes, 8583 MapFlagsArrayTy &CurMapTypes) const { 8584 bool IsImplicit = true; 8585 // Do the default mapping. 8586 if (CI.capturesThis()) { 8587 CurBasePointers.push_back(CV); 8588 CurPointers.push_back(CV); 8589 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8590 CurSizes.push_back( 8591 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8592 CGF.Int64Ty, /*isSigned=*/true)); 8593 // Default map type. 8594 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8595 } else if (CI.capturesVariableByCopy()) { 8596 CurBasePointers.push_back(CV); 8597 CurPointers.push_back(CV); 8598 if (!RI.getType()->isAnyPointerType()) { 8599 // We have to signal to the runtime captures passed by value that are 8600 // not pointers. 8601 CurMapTypes.push_back(OMP_MAP_LITERAL); 8602 CurSizes.push_back(CGF.Builder.CreateIntCast( 8603 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8604 } else { 8605 // Pointers are implicitly mapped with a zero size and no flags 8606 // (other than first map that is added for all implicit maps). 8607 CurMapTypes.push_back(OMP_MAP_NONE); 8608 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8609 } 8610 const VarDecl *VD = CI.getCapturedVar(); 8611 auto I = FirstPrivateDecls.find(VD); 8612 if (I != FirstPrivateDecls.end()) 8613 IsImplicit = I->getSecond(); 8614 } else { 8615 assert(CI.capturesVariable() && "Expected captured reference."); 8616 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8617 QualType ElementType = PtrTy->getPointeeType(); 8618 CurSizes.push_back(CGF.Builder.CreateIntCast( 8619 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8620 // The default map type for a scalar/complex type is 'to' because by 8621 // default the value doesn't have to be retrieved. For an aggregate 8622 // type, the default is 'tofrom'. 8623 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8624 const VarDecl *VD = CI.getCapturedVar(); 8625 auto I = FirstPrivateDecls.find(VD); 8626 if (I != FirstPrivateDecls.end() && 8627 VD->getType().isConstant(CGF.getContext())) { 8628 llvm::Constant *Addr = 8629 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8630 // Copy the value of the original variable to the new global copy. 8631 CGF.Builder.CreateMemCpy( 8632 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8633 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8634 CurSizes.back(), /*IsVolatile=*/false); 8635 // Use new global variable as the base pointers. 8636 CurBasePointers.push_back(Addr); 8637 CurPointers.push_back(Addr); 8638 } else { 8639 CurBasePointers.push_back(CV); 8640 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8641 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8642 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8643 AlignmentSource::Decl)); 8644 CurPointers.push_back(PtrAddr.getPointer()); 8645 } else { 8646 CurPointers.push_back(CV); 8647 } 8648 } 8649 if (I != FirstPrivateDecls.end()) 8650 IsImplicit = I->getSecond(); 8651 } 8652 // Every default map produces a single argument which is a target parameter. 8653 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8654 8655 // Add flag stating this is an implicit map. 8656 if (IsImplicit) 8657 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8658 } 8659 }; 8660 } // anonymous namespace 8661 8662 /// Emit the arrays used to pass the captures and map information to the 8663 /// offloading runtime library. If there is no map or capture information, 8664 /// return nullptr by reference. 8665 static void 8666 emitOffloadingArrays(CodeGenFunction &CGF, 8667 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8668 MappableExprsHandler::MapValuesArrayTy &Pointers, 8669 MappableExprsHandler::MapValuesArrayTy &Sizes, 8670 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8671 CGOpenMPRuntime::TargetDataInfo &Info) { 8672 CodeGenModule &CGM = CGF.CGM; 8673 ASTContext &Ctx = CGF.getContext(); 8674 8675 // Reset the array information. 8676 Info.clearArrayInfo(); 8677 Info.NumberOfPtrs = BasePointers.size(); 8678 8679 if (Info.NumberOfPtrs) { 8680 // Detect if we have any capture size requiring runtime evaluation of the 8681 // size so that a constant array could be eventually used. 8682 bool hasRuntimeEvaluationCaptureSize = false; 8683 for (llvm::Value *S : Sizes) 8684 if (!isa<llvm::Constant>(S)) { 8685 hasRuntimeEvaluationCaptureSize = true; 8686 break; 8687 } 8688 8689 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8690 QualType PointerArrayType = Ctx.getConstantArrayType( 8691 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8692 /*IndexTypeQuals=*/0); 8693 8694 Info.BasePointersArray = 8695 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8696 Info.PointersArray = 8697 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8698 8699 // If we don't have any VLA types or other types that require runtime 8700 // evaluation, we can use a constant array for the map sizes, otherwise we 8701 // need to fill up the arrays as we do for the pointers. 8702 QualType Int64Ty = 8703 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8704 if (hasRuntimeEvaluationCaptureSize) { 8705 QualType SizeArrayType = Ctx.getConstantArrayType( 8706 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8707 /*IndexTypeQuals=*/0); 8708 Info.SizesArray = 8709 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8710 } else { 8711 // We expect all the sizes to be constant, so we collect them to create 8712 // a constant array. 8713 SmallVector<llvm::Constant *, 16> ConstSizes; 8714 for (llvm::Value *S : Sizes) 8715 ConstSizes.push_back(cast<llvm::Constant>(S)); 8716 8717 auto *SizesArrayInit = llvm::ConstantArray::get( 8718 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8719 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8720 auto *SizesArrayGbl = new llvm::GlobalVariable( 8721 CGM.getModule(), SizesArrayInit->getType(), 8722 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8723 SizesArrayInit, Name); 8724 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8725 Info.SizesArray = SizesArrayGbl; 8726 } 8727 8728 // The map types are always constant so we don't need to generate code to 8729 // fill arrays. Instead, we create an array constant. 8730 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8731 llvm::copy(MapTypes, Mapping.begin()); 8732 llvm::Constant *MapTypesArrayInit = 8733 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8734 std::string MaptypesName = 8735 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8736 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8737 CGM.getModule(), MapTypesArrayInit->getType(), 8738 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8739 MapTypesArrayInit, MaptypesName); 8740 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8741 Info.MapTypesArray = MapTypesArrayGbl; 8742 8743 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8744 llvm::Value *BPVal = *BasePointers[I]; 8745 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8746 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8747 Info.BasePointersArray, 0, I); 8748 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8749 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8750 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8751 CGF.Builder.CreateStore(BPVal, BPAddr); 8752 8753 if (Info.requiresDevicePointerInfo()) 8754 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8755 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8756 8757 llvm::Value *PVal = Pointers[I]; 8758 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8759 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8760 Info.PointersArray, 0, I); 8761 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8762 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8763 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8764 CGF.Builder.CreateStore(PVal, PAddr); 8765 8766 if (hasRuntimeEvaluationCaptureSize) { 8767 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8768 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8769 Info.SizesArray, 8770 /*Idx0=*/0, 8771 /*Idx1=*/I); 8772 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8773 CGF.Builder.CreateStore( 8774 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8775 SAddr); 8776 } 8777 } 8778 } 8779 } 8780 8781 /// Emit the arguments to be passed to the runtime library based on the 8782 /// arrays of pointers, sizes and map types. 8783 static void emitOffloadingArraysArgument( 8784 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8785 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8786 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8787 CodeGenModule &CGM = CGF.CGM; 8788 if (Info.NumberOfPtrs) { 8789 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8790 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8791 Info.BasePointersArray, 8792 /*Idx0=*/0, /*Idx1=*/0); 8793 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8794 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8795 Info.PointersArray, 8796 /*Idx0=*/0, 8797 /*Idx1=*/0); 8798 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8799 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8800 /*Idx0=*/0, /*Idx1=*/0); 8801 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8802 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8803 Info.MapTypesArray, 8804 /*Idx0=*/0, 8805 /*Idx1=*/0); 8806 } else { 8807 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8808 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8809 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8810 MapTypesArrayArg = 8811 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8812 } 8813 } 8814 8815 /// Check for inner distribute directive. 8816 static const OMPExecutableDirective * 8817 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8818 const auto *CS = D.getInnermostCapturedStmt(); 8819 const auto *Body = 8820 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8821 const Stmt *ChildStmt = 8822 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8823 8824 if (const auto *NestedDir = 8825 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8826 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8827 switch (D.getDirectiveKind()) { 8828 case OMPD_target: 8829 if (isOpenMPDistributeDirective(DKind)) 8830 return NestedDir; 8831 if (DKind == OMPD_teams) { 8832 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8833 /*IgnoreCaptured=*/true); 8834 if (!Body) 8835 return nullptr; 8836 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8837 if (const auto *NND = 8838 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8839 DKind = NND->getDirectiveKind(); 8840 if (isOpenMPDistributeDirective(DKind)) 8841 return NND; 8842 } 8843 } 8844 return nullptr; 8845 case OMPD_target_teams: 8846 if (isOpenMPDistributeDirective(DKind)) 8847 return NestedDir; 8848 return nullptr; 8849 case OMPD_target_parallel: 8850 case OMPD_target_simd: 8851 case OMPD_target_parallel_for: 8852 case OMPD_target_parallel_for_simd: 8853 return nullptr; 8854 case OMPD_target_teams_distribute: 8855 case OMPD_target_teams_distribute_simd: 8856 case OMPD_target_teams_distribute_parallel_for: 8857 case OMPD_target_teams_distribute_parallel_for_simd: 8858 case OMPD_parallel: 8859 case OMPD_for: 8860 case OMPD_parallel_for: 8861 case OMPD_parallel_master: 8862 case OMPD_parallel_sections: 8863 case OMPD_for_simd: 8864 case OMPD_parallel_for_simd: 8865 case OMPD_cancel: 8866 case OMPD_cancellation_point: 8867 case OMPD_ordered: 8868 case OMPD_threadprivate: 8869 case OMPD_allocate: 8870 case OMPD_task: 8871 case OMPD_simd: 8872 case OMPD_sections: 8873 case OMPD_section: 8874 case OMPD_single: 8875 case OMPD_master: 8876 case OMPD_critical: 8877 case OMPD_taskyield: 8878 case OMPD_barrier: 8879 case OMPD_taskwait: 8880 case OMPD_taskgroup: 8881 case OMPD_atomic: 8882 case OMPD_flush: 8883 case OMPD_depobj: 8884 case OMPD_scan: 8885 case OMPD_teams: 8886 case OMPD_target_data: 8887 case OMPD_target_exit_data: 8888 case OMPD_target_enter_data: 8889 case OMPD_distribute: 8890 case OMPD_distribute_simd: 8891 case OMPD_distribute_parallel_for: 8892 case OMPD_distribute_parallel_for_simd: 8893 case OMPD_teams_distribute: 8894 case OMPD_teams_distribute_simd: 8895 case OMPD_teams_distribute_parallel_for: 8896 case OMPD_teams_distribute_parallel_for_simd: 8897 case OMPD_target_update: 8898 case OMPD_declare_simd: 8899 case OMPD_declare_variant: 8900 case OMPD_begin_declare_variant: 8901 case OMPD_end_declare_variant: 8902 case OMPD_declare_target: 8903 case OMPD_end_declare_target: 8904 case OMPD_declare_reduction: 8905 case OMPD_declare_mapper: 8906 case OMPD_taskloop: 8907 case OMPD_taskloop_simd: 8908 case OMPD_master_taskloop: 8909 case OMPD_master_taskloop_simd: 8910 case OMPD_parallel_master_taskloop: 8911 case OMPD_parallel_master_taskloop_simd: 8912 case OMPD_requires: 8913 case OMPD_unknown: 8914 default: 8915 llvm_unreachable("Unexpected directive."); 8916 } 8917 } 8918 8919 return nullptr; 8920 } 8921 8922 /// Emit the user-defined mapper function. The code generation follows the 8923 /// pattern in the example below. 8924 /// \code 8925 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8926 /// void *base, void *begin, 8927 /// int64_t size, int64_t type) { 8928 /// // Allocate space for an array section first. 8929 /// if (size > 1 && !maptype.IsDelete) 8930 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8931 /// size*sizeof(Ty), clearToFrom(type)); 8932 /// // Map members. 8933 /// for (unsigned i = 0; i < size; i++) { 8934 /// // For each component specified by this mapper: 8935 /// for (auto c : all_components) { 8936 /// if (c.hasMapper()) 8937 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8938 /// c.arg_type); 8939 /// else 8940 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8941 /// c.arg_begin, c.arg_size, c.arg_type); 8942 /// } 8943 /// } 8944 /// // Delete the array section. 8945 /// if (size > 1 && maptype.IsDelete) 8946 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8947 /// size*sizeof(Ty), clearToFrom(type)); 8948 /// } 8949 /// \endcode 8950 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8951 CodeGenFunction *CGF) { 8952 if (UDMMap.count(D) > 0) 8953 return; 8954 ASTContext &C = CGM.getContext(); 8955 QualType Ty = D->getType(); 8956 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8957 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8958 auto *MapperVarDecl = 8959 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8960 SourceLocation Loc = D->getLocation(); 8961 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8962 8963 // Prepare mapper function arguments and attributes. 8964 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8965 C.VoidPtrTy, ImplicitParamDecl::Other); 8966 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8967 ImplicitParamDecl::Other); 8968 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8969 C.VoidPtrTy, ImplicitParamDecl::Other); 8970 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8971 ImplicitParamDecl::Other); 8972 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8973 ImplicitParamDecl::Other); 8974 FunctionArgList Args; 8975 Args.push_back(&HandleArg); 8976 Args.push_back(&BaseArg); 8977 Args.push_back(&BeginArg); 8978 Args.push_back(&SizeArg); 8979 Args.push_back(&TypeArg); 8980 const CGFunctionInfo &FnInfo = 8981 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8982 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8983 SmallString<64> TyStr; 8984 llvm::raw_svector_ostream Out(TyStr); 8985 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8986 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8987 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8988 Name, &CGM.getModule()); 8989 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8990 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8991 // Start the mapper function code generation. 8992 CodeGenFunction MapperCGF(CGM); 8993 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8994 // Compute the starting and end addreses of array elements. 8995 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8996 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8997 C.getPointerType(Int64Ty), Loc); 8998 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8999 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9000 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9001 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9002 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9003 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9004 C.getPointerType(Int64Ty), Loc); 9005 // Prepare common arguments for array initiation and deletion. 9006 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9007 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9008 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9009 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9010 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9011 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9012 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9013 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9014 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9015 9016 // Emit array initiation if this is an array section and \p MapType indicates 9017 // that memory allocation is required. 9018 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9019 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9020 ElementSize, HeadBB, /*IsInit=*/true); 9021 9022 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9023 9024 // Emit the loop header block. 9025 MapperCGF.EmitBlock(HeadBB); 9026 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9027 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9028 // Evaluate whether the initial condition is satisfied. 9029 llvm::Value *IsEmpty = 9030 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9031 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9032 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9033 9034 // Emit the loop body block. 9035 MapperCGF.EmitBlock(BodyBB); 9036 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9037 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9038 PtrPHI->addIncoming(PtrBegin, EntryBB); 9039 Address PtrCurrent = 9040 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9041 .getAlignment() 9042 .alignmentOfArrayElement(ElementSize)); 9043 // Privatize the declared variable of mapper to be the current array element. 9044 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9045 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9046 return MapperCGF 9047 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9048 .getAddress(MapperCGF); 9049 }); 9050 (void)Scope.Privatize(); 9051 9052 // Get map clause information. Fill up the arrays with all mapped variables. 9053 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9054 MappableExprsHandler::MapValuesArrayTy Pointers; 9055 MappableExprsHandler::MapValuesArrayTy Sizes; 9056 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9057 MappableExprsHandler MEHandler(*D, MapperCGF); 9058 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9059 9060 // Call the runtime API __tgt_mapper_num_components to get the number of 9061 // pre-existing components. 9062 llvm::Value *OffloadingArgs[] = {Handle}; 9063 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9064 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9065 OMPRTL___tgt_mapper_num_components), 9066 OffloadingArgs); 9067 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9068 PreviousSize, 9069 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9070 9071 // Fill up the runtime mapper handle for all components. 9072 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9073 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9074 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9075 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9076 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9077 llvm::Value *CurSizeArg = Sizes[I]; 9078 9079 // Extract the MEMBER_OF field from the map type. 9080 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9081 MapperCGF.EmitBlock(MemberBB); 9082 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9083 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9084 OriMapType, 9085 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9086 llvm::BasicBlock *MemberCombineBB = 9087 MapperCGF.createBasicBlock("omp.member.combine"); 9088 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9089 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9090 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9091 // Add the number of pre-existing components to the MEMBER_OF field if it 9092 // is valid. 9093 MapperCGF.EmitBlock(MemberCombineBB); 9094 llvm::Value *CombinedMember = 9095 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9096 // Do nothing if it is not a member of previous components. 9097 MapperCGF.EmitBlock(TypeBB); 9098 llvm::PHINode *MemberMapType = 9099 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9100 MemberMapType->addIncoming(OriMapType, MemberBB); 9101 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9102 9103 // Combine the map type inherited from user-defined mapper with that 9104 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9105 // bits of the \a MapType, which is the input argument of the mapper 9106 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9107 // bits of MemberMapType. 9108 // [OpenMP 5.0], 1.2.6. map-type decay. 9109 // | alloc | to | from | tofrom | release | delete 9110 // ---------------------------------------------------------- 9111 // alloc | alloc | alloc | alloc | alloc | release | delete 9112 // to | alloc | to | alloc | to | release | delete 9113 // from | alloc | alloc | from | from | release | delete 9114 // tofrom | alloc | to | from | tofrom | release | delete 9115 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9116 MapType, 9117 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9118 MappableExprsHandler::OMP_MAP_FROM)); 9119 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9120 llvm::BasicBlock *AllocElseBB = 9121 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9122 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9123 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9124 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9125 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9126 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9127 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9128 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9129 MapperCGF.EmitBlock(AllocBB); 9130 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9131 MemberMapType, 9132 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9133 MappableExprsHandler::OMP_MAP_FROM))); 9134 MapperCGF.Builder.CreateBr(EndBB); 9135 MapperCGF.EmitBlock(AllocElseBB); 9136 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9137 LeftToFrom, 9138 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9139 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9140 // In case of to, clear OMP_MAP_FROM. 9141 MapperCGF.EmitBlock(ToBB); 9142 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9143 MemberMapType, 9144 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9145 MapperCGF.Builder.CreateBr(EndBB); 9146 MapperCGF.EmitBlock(ToElseBB); 9147 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9148 LeftToFrom, 9149 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9150 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9151 // In case of from, clear OMP_MAP_TO. 9152 MapperCGF.EmitBlock(FromBB); 9153 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9154 MemberMapType, 9155 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9156 // In case of tofrom, do nothing. 9157 MapperCGF.EmitBlock(EndBB); 9158 llvm::PHINode *CurMapType = 9159 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9160 CurMapType->addIncoming(AllocMapType, AllocBB); 9161 CurMapType->addIncoming(ToMapType, ToBB); 9162 CurMapType->addIncoming(FromMapType, FromBB); 9163 CurMapType->addIncoming(MemberMapType, ToElseBB); 9164 9165 // TODO: call the corresponding mapper function if a user-defined mapper is 9166 // associated with this map clause. 9167 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9168 // data structure. 9169 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9170 CurSizeArg, CurMapType}; 9171 MapperCGF.EmitRuntimeCall( 9172 OMPBuilder.getOrCreateRuntimeFunction( 9173 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9174 OffloadingArgs); 9175 } 9176 9177 // Update the pointer to point to the next element that needs to be mapped, 9178 // and check whether we have mapped all elements. 9179 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9180 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9181 PtrPHI->addIncoming(PtrNext, BodyBB); 9182 llvm::Value *IsDone = 9183 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9184 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9185 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9186 9187 MapperCGF.EmitBlock(ExitBB); 9188 // Emit array deletion if this is an array section and \p MapType indicates 9189 // that deletion is required. 9190 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9191 ElementSize, DoneBB, /*IsInit=*/false); 9192 9193 // Emit the function exit block. 9194 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9195 MapperCGF.FinishFunction(); 9196 UDMMap.try_emplace(D, Fn); 9197 if (CGF) { 9198 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9199 Decls.second.push_back(D); 9200 } 9201 } 9202 9203 /// Emit the array initialization or deletion portion for user-defined mapper 9204 /// code generation. First, it evaluates whether an array section is mapped and 9205 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9206 /// true, and \a MapType indicates to not delete this array, array 9207 /// initialization code is generated. If \a IsInit is false, and \a MapType 9208 /// indicates to not this array, array deletion code is generated. 9209 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9210 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9211 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9212 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9213 StringRef Prefix = IsInit ? ".init" : ".del"; 9214 9215 // Evaluate if this is an array section. 9216 llvm::BasicBlock *IsDeleteBB = 9217 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9218 llvm::BasicBlock *BodyBB = 9219 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9220 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9221 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9222 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9223 9224 // Evaluate if we are going to delete this section. 9225 MapperCGF.EmitBlock(IsDeleteBB); 9226 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9227 MapType, 9228 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9229 llvm::Value *DeleteCond; 9230 if (IsInit) { 9231 DeleteCond = MapperCGF.Builder.CreateIsNull( 9232 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9233 } else { 9234 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9235 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9236 } 9237 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9238 9239 MapperCGF.EmitBlock(BodyBB); 9240 // Get the array size by multiplying element size and element number (i.e., \p 9241 // Size). 9242 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9243 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9244 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9245 // memory allocation/deletion purpose only. 9246 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9247 MapType, 9248 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9249 MappableExprsHandler::OMP_MAP_FROM))); 9250 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9251 // data structure. 9252 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9253 MapperCGF.EmitRuntimeCall( 9254 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9255 OMPRTL___tgt_push_mapper_component), 9256 OffloadingArgs); 9257 } 9258 9259 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9260 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9261 llvm::Value *DeviceID, 9262 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9263 const OMPLoopDirective &D)> 9264 SizeEmitter) { 9265 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9266 const OMPExecutableDirective *TD = &D; 9267 // Get nested teams distribute kind directive, if any. 9268 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9269 TD = getNestedDistributeDirective(CGM.getContext(), D); 9270 if (!TD) 9271 return; 9272 const auto *LD = cast<OMPLoopDirective>(TD); 9273 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9274 PrePostActionTy &) { 9275 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9276 llvm::Value *Args[] = {DeviceID, NumIterations}; 9277 CGF.EmitRuntimeCall( 9278 OMPBuilder.getOrCreateRuntimeFunction( 9279 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9280 Args); 9281 } 9282 }; 9283 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9284 } 9285 9286 void CGOpenMPRuntime::emitTargetCall( 9287 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9288 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9289 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9290 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9291 const OMPLoopDirective &D)> 9292 SizeEmitter) { 9293 if (!CGF.HaveInsertPoint()) 9294 return; 9295 9296 assert(OutlinedFn && "Invalid outlined function!"); 9297 9298 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9299 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9300 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9301 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9302 PrePostActionTy &) { 9303 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9304 }; 9305 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9306 9307 CodeGenFunction::OMPTargetDataInfo InputInfo; 9308 llvm::Value *MapTypesArray = nullptr; 9309 // Fill up the pointer arrays and transfer execution to the device. 9310 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9311 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9312 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9313 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9314 // Reverse offloading is not supported, so just execute on the host. 9315 if (RequiresOuterTask) { 9316 CapturedVars.clear(); 9317 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9318 } 9319 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9320 return; 9321 } 9322 9323 // On top of the arrays that were filled up, the target offloading call 9324 // takes as arguments the device id as well as the host pointer. The host 9325 // pointer is used by the runtime library to identify the current target 9326 // region, so it only has to be unique and not necessarily point to 9327 // anything. It could be the pointer to the outlined function that 9328 // implements the target region, but we aren't using that so that the 9329 // compiler doesn't need to keep that, and could therefore inline the host 9330 // function if proven worthwhile during optimization. 9331 9332 // From this point on, we need to have an ID of the target region defined. 9333 assert(OutlinedFnID && "Invalid outlined function ID!"); 9334 9335 // Emit device ID if any. 9336 llvm::Value *DeviceID; 9337 if (Device.getPointer()) { 9338 assert((Device.getInt() == OMPC_DEVICE_unknown || 9339 Device.getInt() == OMPC_DEVICE_device_num) && 9340 "Expected device_num modifier."); 9341 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9342 DeviceID = 9343 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9344 } else { 9345 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9346 } 9347 9348 // Emit the number of elements in the offloading arrays. 9349 llvm::Value *PointerNum = 9350 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9351 9352 // Return value of the runtime offloading call. 9353 llvm::Value *Return; 9354 9355 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9356 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9357 9358 // Emit tripcount for the target loop-based directive. 9359 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9360 9361 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9362 // The target region is an outlined function launched by the runtime 9363 // via calls __tgt_target() or __tgt_target_teams(). 9364 // 9365 // __tgt_target() launches a target region with one team and one thread, 9366 // executing a serial region. This master thread may in turn launch 9367 // more threads within its team upon encountering a parallel region, 9368 // however, no additional teams can be launched on the device. 9369 // 9370 // __tgt_target_teams() launches a target region with one or more teams, 9371 // each with one or more threads. This call is required for target 9372 // constructs such as: 9373 // 'target teams' 9374 // 'target' / 'teams' 9375 // 'target teams distribute parallel for' 9376 // 'target parallel' 9377 // and so on. 9378 // 9379 // Note that on the host and CPU targets, the runtime implementation of 9380 // these calls simply call the outlined function without forking threads. 9381 // The outlined functions themselves have runtime calls to 9382 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9383 // the compiler in emitTeamsCall() and emitParallelCall(). 9384 // 9385 // In contrast, on the NVPTX target, the implementation of 9386 // __tgt_target_teams() launches a GPU kernel with the requested number 9387 // of teams and threads so no additional calls to the runtime are required. 9388 if (NumTeams) { 9389 // If we have NumTeams defined this means that we have an enclosed teams 9390 // region. Therefore we also expect to have NumThreads defined. These two 9391 // values should be defined in the presence of a teams directive, 9392 // regardless of having any clauses associated. If the user is using teams 9393 // but no clauses, these two values will be the default that should be 9394 // passed to the runtime library - a 32-bit integer with the value zero. 9395 assert(NumThreads && "Thread limit expression should be available along " 9396 "with number of teams."); 9397 llvm::Value *OffloadingArgs[] = {DeviceID, 9398 OutlinedFnID, 9399 PointerNum, 9400 InputInfo.BasePointersArray.getPointer(), 9401 InputInfo.PointersArray.getPointer(), 9402 InputInfo.SizesArray.getPointer(), 9403 MapTypesArray, 9404 NumTeams, 9405 NumThreads}; 9406 Return = CGF.EmitRuntimeCall( 9407 OMPBuilder.getOrCreateRuntimeFunction( 9408 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait 9409 : OMPRTL___tgt_target_teams), 9410 OffloadingArgs); 9411 } else { 9412 llvm::Value *OffloadingArgs[] = {DeviceID, 9413 OutlinedFnID, 9414 PointerNum, 9415 InputInfo.BasePointersArray.getPointer(), 9416 InputInfo.PointersArray.getPointer(), 9417 InputInfo.SizesArray.getPointer(), 9418 MapTypesArray}; 9419 Return = CGF.EmitRuntimeCall( 9420 OMPBuilder.getOrCreateRuntimeFunction( 9421 CGM.getModule(), 9422 HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), 9423 OffloadingArgs); 9424 } 9425 9426 // Check the error code and execute the host version if required. 9427 llvm::BasicBlock *OffloadFailedBlock = 9428 CGF.createBasicBlock("omp_offload.failed"); 9429 llvm::BasicBlock *OffloadContBlock = 9430 CGF.createBasicBlock("omp_offload.cont"); 9431 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9432 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9433 9434 CGF.EmitBlock(OffloadFailedBlock); 9435 if (RequiresOuterTask) { 9436 CapturedVars.clear(); 9437 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9438 } 9439 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9440 CGF.EmitBranch(OffloadContBlock); 9441 9442 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9443 }; 9444 9445 // Notify that the host version must be executed. 9446 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9447 RequiresOuterTask](CodeGenFunction &CGF, 9448 PrePostActionTy &) { 9449 if (RequiresOuterTask) { 9450 CapturedVars.clear(); 9451 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9452 } 9453 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9454 }; 9455 9456 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9457 &CapturedVars, RequiresOuterTask, 9458 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9459 // Fill up the arrays with all the captured variables. 9460 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9461 MappableExprsHandler::MapValuesArrayTy Pointers; 9462 MappableExprsHandler::MapValuesArrayTy Sizes; 9463 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9464 9465 // Get mappable expression information. 9466 MappableExprsHandler MEHandler(D, CGF); 9467 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9468 9469 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9470 auto CV = CapturedVars.begin(); 9471 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9472 CE = CS.capture_end(); 9473 CI != CE; ++CI, ++RI, ++CV) { 9474 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9475 MappableExprsHandler::MapValuesArrayTy CurPointers; 9476 MappableExprsHandler::MapValuesArrayTy CurSizes; 9477 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9478 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9479 9480 // VLA sizes are passed to the outlined region by copy and do not have map 9481 // information associated. 9482 if (CI->capturesVariableArrayType()) { 9483 CurBasePointers.push_back(*CV); 9484 CurPointers.push_back(*CV); 9485 CurSizes.push_back(CGF.Builder.CreateIntCast( 9486 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9487 // Copy to the device as an argument. No need to retrieve it. 9488 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9489 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9490 MappableExprsHandler::OMP_MAP_IMPLICIT); 9491 } else { 9492 // If we have any information in the map clause, we use it, otherwise we 9493 // just do a default mapping. 9494 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9495 CurSizes, CurMapTypes, PartialStruct); 9496 if (CurBasePointers.empty()) 9497 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9498 CurPointers, CurSizes, CurMapTypes); 9499 // Generate correct mapping for variables captured by reference in 9500 // lambdas. 9501 if (CI->capturesVariable()) 9502 MEHandler.generateInfoForLambdaCaptures( 9503 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9504 CurMapTypes, LambdaPointers); 9505 } 9506 // We expect to have at least an element of information for this capture. 9507 assert(!CurBasePointers.empty() && 9508 "Non-existing map pointer for capture!"); 9509 assert(CurBasePointers.size() == CurPointers.size() && 9510 CurBasePointers.size() == CurSizes.size() && 9511 CurBasePointers.size() == CurMapTypes.size() && 9512 "Inconsistent map information sizes!"); 9513 9514 // If there is an entry in PartialStruct it means we have a struct with 9515 // individual members mapped. Emit an extra combined entry. 9516 if (PartialStruct.Base.isValid()) 9517 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9518 CurMapTypes, PartialStruct); 9519 9520 // We need to append the results of this capture to what we already have. 9521 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9522 Pointers.append(CurPointers.begin(), CurPointers.end()); 9523 Sizes.append(CurSizes.begin(), CurSizes.end()); 9524 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9525 } 9526 // Adjust MEMBER_OF flags for the lambdas captures. 9527 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9528 Pointers, MapTypes); 9529 // Map other list items in the map clause which are not captured variables 9530 // but "declare target link" global variables. 9531 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9532 MapTypes); 9533 9534 TargetDataInfo Info; 9535 // Fill up the arrays and create the arguments. 9536 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9537 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9538 Info.PointersArray, Info.SizesArray, 9539 Info.MapTypesArray, Info); 9540 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9541 InputInfo.BasePointersArray = 9542 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9543 InputInfo.PointersArray = 9544 Address(Info.PointersArray, CGM.getPointerAlign()); 9545 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9546 MapTypesArray = Info.MapTypesArray; 9547 if (RequiresOuterTask) 9548 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9549 else 9550 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9551 }; 9552 9553 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9554 CodeGenFunction &CGF, PrePostActionTy &) { 9555 if (RequiresOuterTask) { 9556 CodeGenFunction::OMPTargetDataInfo InputInfo; 9557 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9558 } else { 9559 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9560 } 9561 }; 9562 9563 // If we have a target function ID it means that we need to support 9564 // offloading, otherwise, just execute on the host. We need to execute on host 9565 // regardless of the conditional in the if clause if, e.g., the user do not 9566 // specify target triples. 9567 if (OutlinedFnID) { 9568 if (IfCond) { 9569 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9570 } else { 9571 RegionCodeGenTy ThenRCG(TargetThenGen); 9572 ThenRCG(CGF); 9573 } 9574 } else { 9575 RegionCodeGenTy ElseRCG(TargetElseGen); 9576 ElseRCG(CGF); 9577 } 9578 } 9579 9580 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9581 StringRef ParentName) { 9582 if (!S) 9583 return; 9584 9585 // Codegen OMP target directives that offload compute to the device. 9586 bool RequiresDeviceCodegen = 9587 isa<OMPExecutableDirective>(S) && 9588 isOpenMPTargetExecutionDirective( 9589 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9590 9591 if (RequiresDeviceCodegen) { 9592 const auto &E = *cast<OMPExecutableDirective>(S); 9593 unsigned DeviceID; 9594 unsigned FileID; 9595 unsigned Line; 9596 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9597 FileID, Line); 9598 9599 // Is this a target region that should not be emitted as an entry point? If 9600 // so just signal we are done with this target region. 9601 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9602 ParentName, Line)) 9603 return; 9604 9605 switch (E.getDirectiveKind()) { 9606 case OMPD_target: 9607 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9608 cast<OMPTargetDirective>(E)); 9609 break; 9610 case OMPD_target_parallel: 9611 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9612 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9613 break; 9614 case OMPD_target_teams: 9615 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9616 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9617 break; 9618 case OMPD_target_teams_distribute: 9619 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9620 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9621 break; 9622 case OMPD_target_teams_distribute_simd: 9623 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9624 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9625 break; 9626 case OMPD_target_parallel_for: 9627 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9628 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9629 break; 9630 case OMPD_target_parallel_for_simd: 9631 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9632 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9633 break; 9634 case OMPD_target_simd: 9635 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9636 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9637 break; 9638 case OMPD_target_teams_distribute_parallel_for: 9639 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9640 CGM, ParentName, 9641 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9642 break; 9643 case OMPD_target_teams_distribute_parallel_for_simd: 9644 CodeGenFunction:: 9645 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9646 CGM, ParentName, 9647 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9648 break; 9649 case OMPD_parallel: 9650 case OMPD_for: 9651 case OMPD_parallel_for: 9652 case OMPD_parallel_master: 9653 case OMPD_parallel_sections: 9654 case OMPD_for_simd: 9655 case OMPD_parallel_for_simd: 9656 case OMPD_cancel: 9657 case OMPD_cancellation_point: 9658 case OMPD_ordered: 9659 case OMPD_threadprivate: 9660 case OMPD_allocate: 9661 case OMPD_task: 9662 case OMPD_simd: 9663 case OMPD_sections: 9664 case OMPD_section: 9665 case OMPD_single: 9666 case OMPD_master: 9667 case OMPD_critical: 9668 case OMPD_taskyield: 9669 case OMPD_barrier: 9670 case OMPD_taskwait: 9671 case OMPD_taskgroup: 9672 case OMPD_atomic: 9673 case OMPD_flush: 9674 case OMPD_depobj: 9675 case OMPD_scan: 9676 case OMPD_teams: 9677 case OMPD_target_data: 9678 case OMPD_target_exit_data: 9679 case OMPD_target_enter_data: 9680 case OMPD_distribute: 9681 case OMPD_distribute_simd: 9682 case OMPD_distribute_parallel_for: 9683 case OMPD_distribute_parallel_for_simd: 9684 case OMPD_teams_distribute: 9685 case OMPD_teams_distribute_simd: 9686 case OMPD_teams_distribute_parallel_for: 9687 case OMPD_teams_distribute_parallel_for_simd: 9688 case OMPD_target_update: 9689 case OMPD_declare_simd: 9690 case OMPD_declare_variant: 9691 case OMPD_begin_declare_variant: 9692 case OMPD_end_declare_variant: 9693 case OMPD_declare_target: 9694 case OMPD_end_declare_target: 9695 case OMPD_declare_reduction: 9696 case OMPD_declare_mapper: 9697 case OMPD_taskloop: 9698 case OMPD_taskloop_simd: 9699 case OMPD_master_taskloop: 9700 case OMPD_master_taskloop_simd: 9701 case OMPD_parallel_master_taskloop: 9702 case OMPD_parallel_master_taskloop_simd: 9703 case OMPD_requires: 9704 case OMPD_unknown: 9705 default: 9706 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9707 } 9708 return; 9709 } 9710 9711 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9712 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9713 return; 9714 9715 scanForTargetRegionsFunctions( 9716 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9717 return; 9718 } 9719 9720 // If this is a lambda function, look into its body. 9721 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9722 S = L->getBody(); 9723 9724 // Keep looking for target regions recursively. 9725 for (const Stmt *II : S->children()) 9726 scanForTargetRegionsFunctions(II, ParentName); 9727 } 9728 9729 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9730 // If emitting code for the host, we do not process FD here. Instead we do 9731 // the normal code generation. 9732 if (!CGM.getLangOpts().OpenMPIsDevice) { 9733 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9734 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9735 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9736 // Do not emit device_type(nohost) functions for the host. 9737 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9738 return true; 9739 } 9740 return false; 9741 } 9742 9743 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9744 // Try to detect target regions in the function. 9745 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9746 StringRef Name = CGM.getMangledName(GD); 9747 scanForTargetRegionsFunctions(FD->getBody(), Name); 9748 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9749 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9750 // Do not emit device_type(nohost) functions for the host. 9751 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9752 return true; 9753 } 9754 9755 // Do not to emit function if it is not marked as declare target. 9756 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9757 AlreadyEmittedTargetDecls.count(VD) == 0; 9758 } 9759 9760 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9761 if (!CGM.getLangOpts().OpenMPIsDevice) 9762 return false; 9763 9764 // Check if there are Ctors/Dtors in this declaration and look for target 9765 // regions in it. We use the complete variant to produce the kernel name 9766 // mangling. 9767 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9768 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9769 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9770 StringRef ParentName = 9771 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9772 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9773 } 9774 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9775 StringRef ParentName = 9776 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9777 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9778 } 9779 } 9780 9781 // Do not to emit variable if it is not marked as declare target. 9782 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9783 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9784 cast<VarDecl>(GD.getDecl())); 9785 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9786 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9787 HasRequiresUnifiedSharedMemory)) { 9788 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9789 return true; 9790 } 9791 return false; 9792 } 9793 9794 llvm::Constant * 9795 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9796 const VarDecl *VD) { 9797 assert(VD->getType().isConstant(CGM.getContext()) && 9798 "Expected constant variable."); 9799 StringRef VarName; 9800 llvm::Constant *Addr; 9801 llvm::GlobalValue::LinkageTypes Linkage; 9802 QualType Ty = VD->getType(); 9803 SmallString<128> Buffer; 9804 { 9805 unsigned DeviceID; 9806 unsigned FileID; 9807 unsigned Line; 9808 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9809 FileID, Line); 9810 llvm::raw_svector_ostream OS(Buffer); 9811 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9812 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9813 VarName = OS.str(); 9814 } 9815 Linkage = llvm::GlobalValue::InternalLinkage; 9816 Addr = 9817 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9818 getDefaultFirstprivateAddressSpace()); 9819 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9820 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9821 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9822 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9823 VarName, Addr, VarSize, 9824 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9825 return Addr; 9826 } 9827 9828 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9829 llvm::Constant *Addr) { 9830 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9831 !CGM.getLangOpts().OpenMPIsDevice) 9832 return; 9833 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9834 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9835 if (!Res) { 9836 if (CGM.getLangOpts().OpenMPIsDevice) { 9837 // Register non-target variables being emitted in device code (debug info 9838 // may cause this). 9839 StringRef VarName = CGM.getMangledName(VD); 9840 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9841 } 9842 return; 9843 } 9844 // Register declare target variables. 9845 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9846 StringRef VarName; 9847 CharUnits VarSize; 9848 llvm::GlobalValue::LinkageTypes Linkage; 9849 9850 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9851 !HasRequiresUnifiedSharedMemory) { 9852 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9853 VarName = CGM.getMangledName(VD); 9854 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9855 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9856 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9857 } else { 9858 VarSize = CharUnits::Zero(); 9859 } 9860 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9861 // Temp solution to prevent optimizations of the internal variables. 9862 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9863 std::string RefName = getName({VarName, "ref"}); 9864 if (!CGM.GetGlobalValue(RefName)) { 9865 llvm::Constant *AddrRef = 9866 getOrCreateInternalVariable(Addr->getType(), RefName); 9867 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9868 GVAddrRef->setConstant(/*Val=*/true); 9869 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9870 GVAddrRef->setInitializer(Addr); 9871 CGM.addCompilerUsedGlobal(GVAddrRef); 9872 } 9873 } 9874 } else { 9875 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9876 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9877 HasRequiresUnifiedSharedMemory)) && 9878 "Declare target attribute must link or to with unified memory."); 9879 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9880 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9881 else 9882 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9883 9884 if (CGM.getLangOpts().OpenMPIsDevice) { 9885 VarName = Addr->getName(); 9886 Addr = nullptr; 9887 } else { 9888 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9889 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9890 } 9891 VarSize = CGM.getPointerSize(); 9892 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9893 } 9894 9895 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9896 VarName, Addr, VarSize, Flags, Linkage); 9897 } 9898 9899 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9900 if (isa<FunctionDecl>(GD.getDecl()) || 9901 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9902 return emitTargetFunctions(GD); 9903 9904 return emitTargetGlobalVariable(GD); 9905 } 9906 9907 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9908 for (const VarDecl *VD : DeferredGlobalVariables) { 9909 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9910 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9911 if (!Res) 9912 continue; 9913 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9914 !HasRequiresUnifiedSharedMemory) { 9915 CGM.EmitGlobal(VD); 9916 } else { 9917 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9918 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9919 HasRequiresUnifiedSharedMemory)) && 9920 "Expected link clause or to clause with unified memory."); 9921 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9922 } 9923 } 9924 } 9925 9926 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9927 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9928 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9929 " Expected target-based directive."); 9930 } 9931 9932 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9933 for (const OMPClause *Clause : D->clauselists()) { 9934 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9935 HasRequiresUnifiedSharedMemory = true; 9936 } else if (const auto *AC = 9937 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9938 switch (AC->getAtomicDefaultMemOrderKind()) { 9939 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9940 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9941 break; 9942 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9943 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9944 break; 9945 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9946 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9947 break; 9948 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9949 break; 9950 } 9951 } 9952 } 9953 } 9954 9955 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9956 return RequiresAtomicOrdering; 9957 } 9958 9959 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9960 LangAS &AS) { 9961 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9962 return false; 9963 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9964 switch(A->getAllocatorType()) { 9965 case OMPAllocateDeclAttr::OMPNullMemAlloc: 9966 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9967 // Not supported, fallback to the default mem space. 9968 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9969 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9970 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9971 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9972 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9973 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9974 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9975 AS = LangAS::Default; 9976 return true; 9977 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9978 llvm_unreachable("Expected predefined allocator for the variables with the " 9979 "static storage."); 9980 } 9981 return false; 9982 } 9983 9984 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9985 return HasRequiresUnifiedSharedMemory; 9986 } 9987 9988 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9989 CodeGenModule &CGM) 9990 : CGM(CGM) { 9991 if (CGM.getLangOpts().OpenMPIsDevice) { 9992 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9993 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9994 } 9995 } 9996 9997 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9998 if (CGM.getLangOpts().OpenMPIsDevice) 9999 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10000 } 10001 10002 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10003 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10004 return true; 10005 10006 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10007 // Do not to emit function if it is marked as declare target as it was already 10008 // emitted. 10009 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10010 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10011 if (auto *F = dyn_cast_or_null<llvm::Function>( 10012 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10013 return !F->isDeclaration(); 10014 return false; 10015 } 10016 return true; 10017 } 10018 10019 return !AlreadyEmittedTargetDecls.insert(D).second; 10020 } 10021 10022 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10023 // If we don't have entries or if we are emitting code for the device, we 10024 // don't need to do anything. 10025 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10026 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10027 (OffloadEntriesInfoManager.empty() && 10028 !HasEmittedDeclareTargetRegion && 10029 !HasEmittedTargetRegion)) 10030 return nullptr; 10031 10032 // Create and register the function that handles the requires directives. 10033 ASTContext &C = CGM.getContext(); 10034 10035 llvm::Function *RequiresRegFn; 10036 { 10037 CodeGenFunction CGF(CGM); 10038 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10039 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10040 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10041 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10042 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10043 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10044 // TODO: check for other requires clauses. 10045 // The requires directive takes effect only when a target region is 10046 // present in the compilation unit. Otherwise it is ignored and not 10047 // passed to the runtime. This avoids the runtime from throwing an error 10048 // for mismatching requires clauses across compilation units that don't 10049 // contain at least 1 target region. 10050 assert((HasEmittedTargetRegion || 10051 HasEmittedDeclareTargetRegion || 10052 !OffloadEntriesInfoManager.empty()) && 10053 "Target or declare target region expected."); 10054 if (HasRequiresUnifiedSharedMemory) 10055 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10056 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10057 CGM.getModule(), OMPRTL___tgt_register_requires), 10058 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10059 CGF.FinishFunction(); 10060 } 10061 return RequiresRegFn; 10062 } 10063 10064 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10065 const OMPExecutableDirective &D, 10066 SourceLocation Loc, 10067 llvm::Function *OutlinedFn, 10068 ArrayRef<llvm::Value *> CapturedVars) { 10069 if (!CGF.HaveInsertPoint()) 10070 return; 10071 10072 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10073 CodeGenFunction::RunCleanupsScope Scope(CGF); 10074 10075 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10076 llvm::Value *Args[] = { 10077 RTLoc, 10078 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10079 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10080 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10081 RealArgs.append(std::begin(Args), std::end(Args)); 10082 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10083 10084 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10085 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10086 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10087 } 10088 10089 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10090 const Expr *NumTeams, 10091 const Expr *ThreadLimit, 10092 SourceLocation Loc) { 10093 if (!CGF.HaveInsertPoint()) 10094 return; 10095 10096 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10097 10098 llvm::Value *NumTeamsVal = 10099 NumTeams 10100 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10101 CGF.CGM.Int32Ty, /* isSigned = */ true) 10102 : CGF.Builder.getInt32(0); 10103 10104 llvm::Value *ThreadLimitVal = 10105 ThreadLimit 10106 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10107 CGF.CGM.Int32Ty, /* isSigned = */ true) 10108 : CGF.Builder.getInt32(0); 10109 10110 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10111 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10112 ThreadLimitVal}; 10113 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10114 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10115 PushNumTeamsArgs); 10116 } 10117 10118 void CGOpenMPRuntime::emitTargetDataCalls( 10119 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10120 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10121 if (!CGF.HaveInsertPoint()) 10122 return; 10123 10124 // Action used to replace the default codegen action and turn privatization 10125 // off. 10126 PrePostActionTy NoPrivAction; 10127 10128 // Generate the code for the opening of the data environment. Capture all the 10129 // arguments of the runtime call by reference because they are used in the 10130 // closing of the region. 10131 auto &&BeginThenGen = [this, &D, Device, &Info, 10132 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10133 // Fill up the arrays with all the mapped variables. 10134 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10135 MappableExprsHandler::MapValuesArrayTy Pointers; 10136 MappableExprsHandler::MapValuesArrayTy Sizes; 10137 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10138 10139 // Get map clause information. 10140 MappableExprsHandler MCHandler(D, CGF); 10141 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10142 10143 // Fill up the arrays and create the arguments. 10144 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10145 10146 llvm::Value *BasePointersArrayArg = nullptr; 10147 llvm::Value *PointersArrayArg = nullptr; 10148 llvm::Value *SizesArrayArg = nullptr; 10149 llvm::Value *MapTypesArrayArg = nullptr; 10150 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10151 SizesArrayArg, MapTypesArrayArg, Info); 10152 10153 // Emit device ID if any. 10154 llvm::Value *DeviceID = nullptr; 10155 if (Device) { 10156 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10157 CGF.Int64Ty, /*isSigned=*/true); 10158 } else { 10159 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10160 } 10161 10162 // Emit the number of elements in the offloading arrays. 10163 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10164 10165 llvm::Value *OffloadingArgs[] = { 10166 DeviceID, PointerNum, BasePointersArrayArg, 10167 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10168 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10169 CGM.getModule(), OMPRTL___tgt_target_data_begin), 10170 OffloadingArgs); 10171 10172 // If device pointer privatization is required, emit the body of the region 10173 // here. It will have to be duplicated: with and without privatization. 10174 if (!Info.CaptureDeviceAddrMap.empty()) 10175 CodeGen(CGF); 10176 }; 10177 10178 // Generate code for the closing of the data region. 10179 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10180 PrePostActionTy &) { 10181 assert(Info.isValid() && "Invalid data environment closing arguments."); 10182 10183 llvm::Value *BasePointersArrayArg = nullptr; 10184 llvm::Value *PointersArrayArg = nullptr; 10185 llvm::Value *SizesArrayArg = nullptr; 10186 llvm::Value *MapTypesArrayArg = nullptr; 10187 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10188 SizesArrayArg, MapTypesArrayArg, Info); 10189 10190 // Emit device ID if any. 10191 llvm::Value *DeviceID = nullptr; 10192 if (Device) { 10193 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10194 CGF.Int64Ty, /*isSigned=*/true); 10195 } else { 10196 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10197 } 10198 10199 // Emit the number of elements in the offloading arrays. 10200 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10201 10202 llvm::Value *OffloadingArgs[] = { 10203 DeviceID, PointerNum, BasePointersArrayArg, 10204 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10205 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10206 CGM.getModule(), OMPRTL___tgt_target_data_end), 10207 OffloadingArgs); 10208 }; 10209 10210 // If we need device pointer privatization, we need to emit the body of the 10211 // region with no privatization in the 'else' branch of the conditional. 10212 // Otherwise, we don't have to do anything. 10213 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10214 PrePostActionTy &) { 10215 if (!Info.CaptureDeviceAddrMap.empty()) { 10216 CodeGen.setAction(NoPrivAction); 10217 CodeGen(CGF); 10218 } 10219 }; 10220 10221 // We don't have to do anything to close the region if the if clause evaluates 10222 // to false. 10223 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10224 10225 if (IfCond) { 10226 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10227 } else { 10228 RegionCodeGenTy RCG(BeginThenGen); 10229 RCG(CGF); 10230 } 10231 10232 // If we don't require privatization of device pointers, we emit the body in 10233 // between the runtime calls. This avoids duplicating the body code. 10234 if (Info.CaptureDeviceAddrMap.empty()) { 10235 CodeGen.setAction(NoPrivAction); 10236 CodeGen(CGF); 10237 } 10238 10239 if (IfCond) { 10240 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10241 } else { 10242 RegionCodeGenTy RCG(EndThenGen); 10243 RCG(CGF); 10244 } 10245 } 10246 10247 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10248 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10249 const Expr *Device) { 10250 if (!CGF.HaveInsertPoint()) 10251 return; 10252 10253 assert((isa<OMPTargetEnterDataDirective>(D) || 10254 isa<OMPTargetExitDataDirective>(D) || 10255 isa<OMPTargetUpdateDirective>(D)) && 10256 "Expecting either target enter, exit data, or update directives."); 10257 10258 CodeGenFunction::OMPTargetDataInfo InputInfo; 10259 llvm::Value *MapTypesArray = nullptr; 10260 // Generate the code for the opening of the data environment. 10261 auto &&ThenGen = [this, &D, Device, &InputInfo, 10262 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10263 // Emit device ID if any. 10264 llvm::Value *DeviceID = nullptr; 10265 if (Device) { 10266 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10267 CGF.Int64Ty, /*isSigned=*/true); 10268 } else { 10269 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10270 } 10271 10272 // Emit the number of elements in the offloading arrays. 10273 llvm::Constant *PointerNum = 10274 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10275 10276 llvm::Value *OffloadingArgs[] = {DeviceID, 10277 PointerNum, 10278 InputInfo.BasePointersArray.getPointer(), 10279 InputInfo.PointersArray.getPointer(), 10280 InputInfo.SizesArray.getPointer(), 10281 MapTypesArray}; 10282 10283 // Select the right runtime function call for each expected standalone 10284 // directive. 10285 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10286 RuntimeFunction RTLFn; 10287 switch (D.getDirectiveKind()) { 10288 case OMPD_target_enter_data: 10289 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait 10290 : OMPRTL___tgt_target_data_begin; 10291 break; 10292 case OMPD_target_exit_data: 10293 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait 10294 : OMPRTL___tgt_target_data_end; 10295 break; 10296 case OMPD_target_update: 10297 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait 10298 : OMPRTL___tgt_target_data_update; 10299 break; 10300 case OMPD_parallel: 10301 case OMPD_for: 10302 case OMPD_parallel_for: 10303 case OMPD_parallel_master: 10304 case OMPD_parallel_sections: 10305 case OMPD_for_simd: 10306 case OMPD_parallel_for_simd: 10307 case OMPD_cancel: 10308 case OMPD_cancellation_point: 10309 case OMPD_ordered: 10310 case OMPD_threadprivate: 10311 case OMPD_allocate: 10312 case OMPD_task: 10313 case OMPD_simd: 10314 case OMPD_sections: 10315 case OMPD_section: 10316 case OMPD_single: 10317 case OMPD_master: 10318 case OMPD_critical: 10319 case OMPD_taskyield: 10320 case OMPD_barrier: 10321 case OMPD_taskwait: 10322 case OMPD_taskgroup: 10323 case OMPD_atomic: 10324 case OMPD_flush: 10325 case OMPD_depobj: 10326 case OMPD_scan: 10327 case OMPD_teams: 10328 case OMPD_target_data: 10329 case OMPD_distribute: 10330 case OMPD_distribute_simd: 10331 case OMPD_distribute_parallel_for: 10332 case OMPD_distribute_parallel_for_simd: 10333 case OMPD_teams_distribute: 10334 case OMPD_teams_distribute_simd: 10335 case OMPD_teams_distribute_parallel_for: 10336 case OMPD_teams_distribute_parallel_for_simd: 10337 case OMPD_declare_simd: 10338 case OMPD_declare_variant: 10339 case OMPD_begin_declare_variant: 10340 case OMPD_end_declare_variant: 10341 case OMPD_declare_target: 10342 case OMPD_end_declare_target: 10343 case OMPD_declare_reduction: 10344 case OMPD_declare_mapper: 10345 case OMPD_taskloop: 10346 case OMPD_taskloop_simd: 10347 case OMPD_master_taskloop: 10348 case OMPD_master_taskloop_simd: 10349 case OMPD_parallel_master_taskloop: 10350 case OMPD_parallel_master_taskloop_simd: 10351 case OMPD_target: 10352 case OMPD_target_simd: 10353 case OMPD_target_teams_distribute: 10354 case OMPD_target_teams_distribute_simd: 10355 case OMPD_target_teams_distribute_parallel_for: 10356 case OMPD_target_teams_distribute_parallel_for_simd: 10357 case OMPD_target_teams: 10358 case OMPD_target_parallel: 10359 case OMPD_target_parallel_for: 10360 case OMPD_target_parallel_for_simd: 10361 case OMPD_requires: 10362 case OMPD_unknown: 10363 default: 10364 llvm_unreachable("Unexpected standalone target data directive."); 10365 break; 10366 } 10367 CGF.EmitRuntimeCall( 10368 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10369 OffloadingArgs); 10370 }; 10371 10372 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10373 CodeGenFunction &CGF, PrePostActionTy &) { 10374 // Fill up the arrays with all the mapped variables. 10375 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10376 MappableExprsHandler::MapValuesArrayTy Pointers; 10377 MappableExprsHandler::MapValuesArrayTy Sizes; 10378 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10379 10380 // Get map clause information. 10381 MappableExprsHandler MEHandler(D, CGF); 10382 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10383 10384 TargetDataInfo Info; 10385 // Fill up the arrays and create the arguments. 10386 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10387 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10388 Info.PointersArray, Info.SizesArray, 10389 Info.MapTypesArray, Info); 10390 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10391 InputInfo.BasePointersArray = 10392 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10393 InputInfo.PointersArray = 10394 Address(Info.PointersArray, CGM.getPointerAlign()); 10395 InputInfo.SizesArray = 10396 Address(Info.SizesArray, CGM.getPointerAlign()); 10397 MapTypesArray = Info.MapTypesArray; 10398 if (D.hasClausesOfKind<OMPDependClause>()) 10399 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10400 else 10401 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10402 }; 10403 10404 if (IfCond) { 10405 emitIfClause(CGF, IfCond, TargetThenGen, 10406 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10407 } else { 10408 RegionCodeGenTy ThenRCG(TargetThenGen); 10409 ThenRCG(CGF); 10410 } 10411 } 10412 10413 namespace { 10414 /// Kind of parameter in a function with 'declare simd' directive. 10415 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10416 /// Attribute set of the parameter. 10417 struct ParamAttrTy { 10418 ParamKindTy Kind = Vector; 10419 llvm::APSInt StrideOrArg; 10420 llvm::APSInt Alignment; 10421 }; 10422 } // namespace 10423 10424 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10425 ArrayRef<ParamAttrTy> ParamAttrs) { 10426 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10427 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10428 // of that clause. The VLEN value must be power of 2. 10429 // In other case the notion of the function`s "characteristic data type" (CDT) 10430 // is used to compute the vector length. 10431 // CDT is defined in the following order: 10432 // a) For non-void function, the CDT is the return type. 10433 // b) If the function has any non-uniform, non-linear parameters, then the 10434 // CDT is the type of the first such parameter. 10435 // c) If the CDT determined by a) or b) above is struct, union, or class 10436 // type which is pass-by-value (except for the type that maps to the 10437 // built-in complex data type), the characteristic data type is int. 10438 // d) If none of the above three cases is applicable, the CDT is int. 10439 // The VLEN is then determined based on the CDT and the size of vector 10440 // register of that ISA for which current vector version is generated. The 10441 // VLEN is computed using the formula below: 10442 // VLEN = sizeof(vector_register) / sizeof(CDT), 10443 // where vector register size specified in section 3.2.1 Registers and the 10444 // Stack Frame of original AMD64 ABI document. 10445 QualType RetType = FD->getReturnType(); 10446 if (RetType.isNull()) 10447 return 0; 10448 ASTContext &C = FD->getASTContext(); 10449 QualType CDT; 10450 if (!RetType.isNull() && !RetType->isVoidType()) { 10451 CDT = RetType; 10452 } else { 10453 unsigned Offset = 0; 10454 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10455 if (ParamAttrs[Offset].Kind == Vector) 10456 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10457 ++Offset; 10458 } 10459 if (CDT.isNull()) { 10460 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10461 if (ParamAttrs[I + Offset].Kind == Vector) { 10462 CDT = FD->getParamDecl(I)->getType(); 10463 break; 10464 } 10465 } 10466 } 10467 } 10468 if (CDT.isNull()) 10469 CDT = C.IntTy; 10470 CDT = CDT->getCanonicalTypeUnqualified(); 10471 if (CDT->isRecordType() || CDT->isUnionType()) 10472 CDT = C.IntTy; 10473 return C.getTypeSize(CDT); 10474 } 10475 10476 static void 10477 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10478 const llvm::APSInt &VLENVal, 10479 ArrayRef<ParamAttrTy> ParamAttrs, 10480 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10481 struct ISADataTy { 10482 char ISA; 10483 unsigned VecRegSize; 10484 }; 10485 ISADataTy ISAData[] = { 10486 { 10487 'b', 128 10488 }, // SSE 10489 { 10490 'c', 256 10491 }, // AVX 10492 { 10493 'd', 256 10494 }, // AVX2 10495 { 10496 'e', 512 10497 }, // AVX512 10498 }; 10499 llvm::SmallVector<char, 2> Masked; 10500 switch (State) { 10501 case OMPDeclareSimdDeclAttr::BS_Undefined: 10502 Masked.push_back('N'); 10503 Masked.push_back('M'); 10504 break; 10505 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10506 Masked.push_back('N'); 10507 break; 10508 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10509 Masked.push_back('M'); 10510 break; 10511 } 10512 for (char Mask : Masked) { 10513 for (const ISADataTy &Data : ISAData) { 10514 SmallString<256> Buffer; 10515 llvm::raw_svector_ostream Out(Buffer); 10516 Out << "_ZGV" << Data.ISA << Mask; 10517 if (!VLENVal) { 10518 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10519 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10520 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10521 } else { 10522 Out << VLENVal; 10523 } 10524 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10525 switch (ParamAttr.Kind){ 10526 case LinearWithVarStride: 10527 Out << 's' << ParamAttr.StrideOrArg; 10528 break; 10529 case Linear: 10530 Out << 'l'; 10531 if (ParamAttr.StrideOrArg != 1) 10532 Out << ParamAttr.StrideOrArg; 10533 break; 10534 case Uniform: 10535 Out << 'u'; 10536 break; 10537 case Vector: 10538 Out << 'v'; 10539 break; 10540 } 10541 if (!!ParamAttr.Alignment) 10542 Out << 'a' << ParamAttr.Alignment; 10543 } 10544 Out << '_' << Fn->getName(); 10545 Fn->addFnAttr(Out.str()); 10546 } 10547 } 10548 } 10549 10550 // This are the Functions that are needed to mangle the name of the 10551 // vector functions generated by the compiler, according to the rules 10552 // defined in the "Vector Function ABI specifications for AArch64", 10553 // available at 10554 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10555 10556 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10557 /// 10558 /// TODO: Need to implement the behavior for reference marked with a 10559 /// var or no linear modifiers (1.b in the section). For this, we 10560 /// need to extend ParamKindTy to support the linear modifiers. 10561 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10562 QT = QT.getCanonicalType(); 10563 10564 if (QT->isVoidType()) 10565 return false; 10566 10567 if (Kind == ParamKindTy::Uniform) 10568 return false; 10569 10570 if (Kind == ParamKindTy::Linear) 10571 return false; 10572 10573 // TODO: Handle linear references with modifiers 10574 10575 if (Kind == ParamKindTy::LinearWithVarStride) 10576 return false; 10577 10578 return true; 10579 } 10580 10581 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10582 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10583 QT = QT.getCanonicalType(); 10584 unsigned Size = C.getTypeSize(QT); 10585 10586 // Only scalars and complex within 16 bytes wide set PVB to true. 10587 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10588 return false; 10589 10590 if (QT->isFloatingType()) 10591 return true; 10592 10593 if (QT->isIntegerType()) 10594 return true; 10595 10596 if (QT->isPointerType()) 10597 return true; 10598 10599 // TODO: Add support for complex types (section 3.1.2, item 2). 10600 10601 return false; 10602 } 10603 10604 /// Computes the lane size (LS) of a return type or of an input parameter, 10605 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10606 /// TODO: Add support for references, section 3.2.1, item 1. 10607 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10608 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10609 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10610 if (getAArch64PBV(PTy, C)) 10611 return C.getTypeSize(PTy); 10612 } 10613 if (getAArch64PBV(QT, C)) 10614 return C.getTypeSize(QT); 10615 10616 return C.getTypeSize(C.getUIntPtrType()); 10617 } 10618 10619 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10620 // signature of the scalar function, as defined in 3.2.2 of the 10621 // AAVFABI. 10622 static std::tuple<unsigned, unsigned, bool> 10623 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10624 QualType RetType = FD->getReturnType().getCanonicalType(); 10625 10626 ASTContext &C = FD->getASTContext(); 10627 10628 bool OutputBecomesInput = false; 10629 10630 llvm::SmallVector<unsigned, 8> Sizes; 10631 if (!RetType->isVoidType()) { 10632 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10633 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10634 OutputBecomesInput = true; 10635 } 10636 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10637 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10638 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10639 } 10640 10641 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10642 // The LS of a function parameter / return value can only be a power 10643 // of 2, starting from 8 bits, up to 128. 10644 assert(std::all_of(Sizes.begin(), Sizes.end(), 10645 [](unsigned Size) { 10646 return Size == 8 || Size == 16 || Size == 32 || 10647 Size == 64 || Size == 128; 10648 }) && 10649 "Invalid size"); 10650 10651 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10652 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10653 OutputBecomesInput); 10654 } 10655 10656 /// Mangle the parameter part of the vector function name according to 10657 /// their OpenMP classification. The mangling function is defined in 10658 /// section 3.5 of the AAVFABI. 10659 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10660 SmallString<256> Buffer; 10661 llvm::raw_svector_ostream Out(Buffer); 10662 for (const auto &ParamAttr : ParamAttrs) { 10663 switch (ParamAttr.Kind) { 10664 case LinearWithVarStride: 10665 Out << "ls" << ParamAttr.StrideOrArg; 10666 break; 10667 case Linear: 10668 Out << 'l'; 10669 // Don't print the step value if it is not present or if it is 10670 // equal to 1. 10671 if (ParamAttr.StrideOrArg != 1) 10672 Out << ParamAttr.StrideOrArg; 10673 break; 10674 case Uniform: 10675 Out << 'u'; 10676 break; 10677 case Vector: 10678 Out << 'v'; 10679 break; 10680 } 10681 10682 if (!!ParamAttr.Alignment) 10683 Out << 'a' << ParamAttr.Alignment; 10684 } 10685 10686 return std::string(Out.str()); 10687 } 10688 10689 // Function used to add the attribute. The parameter `VLEN` is 10690 // templated to allow the use of "x" when targeting scalable functions 10691 // for SVE. 10692 template <typename T> 10693 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10694 char ISA, StringRef ParSeq, 10695 StringRef MangledName, bool OutputBecomesInput, 10696 llvm::Function *Fn) { 10697 SmallString<256> Buffer; 10698 llvm::raw_svector_ostream Out(Buffer); 10699 Out << Prefix << ISA << LMask << VLEN; 10700 if (OutputBecomesInput) 10701 Out << "v"; 10702 Out << ParSeq << "_" << MangledName; 10703 Fn->addFnAttr(Out.str()); 10704 } 10705 10706 // Helper function to generate the Advanced SIMD names depending on 10707 // the value of the NDS when simdlen is not present. 10708 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10709 StringRef Prefix, char ISA, 10710 StringRef ParSeq, StringRef MangledName, 10711 bool OutputBecomesInput, 10712 llvm::Function *Fn) { 10713 switch (NDS) { 10714 case 8: 10715 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10716 OutputBecomesInput, Fn); 10717 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10718 OutputBecomesInput, Fn); 10719 break; 10720 case 16: 10721 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10722 OutputBecomesInput, Fn); 10723 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10724 OutputBecomesInput, Fn); 10725 break; 10726 case 32: 10727 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10728 OutputBecomesInput, Fn); 10729 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10730 OutputBecomesInput, Fn); 10731 break; 10732 case 64: 10733 case 128: 10734 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10735 OutputBecomesInput, Fn); 10736 break; 10737 default: 10738 llvm_unreachable("Scalar type is too wide."); 10739 } 10740 } 10741 10742 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10743 static void emitAArch64DeclareSimdFunction( 10744 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10745 ArrayRef<ParamAttrTy> ParamAttrs, 10746 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10747 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10748 10749 // Get basic data for building the vector signature. 10750 const auto Data = getNDSWDS(FD, ParamAttrs); 10751 const unsigned NDS = std::get<0>(Data); 10752 const unsigned WDS = std::get<1>(Data); 10753 const bool OutputBecomesInput = std::get<2>(Data); 10754 10755 // Check the values provided via `simdlen` by the user. 10756 // 1. A `simdlen(1)` doesn't produce vector signatures, 10757 if (UserVLEN == 1) { 10758 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10759 DiagnosticsEngine::Warning, 10760 "The clause simdlen(1) has no effect when targeting aarch64."); 10761 CGM.getDiags().Report(SLoc, DiagID); 10762 return; 10763 } 10764 10765 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10766 // Advanced SIMD output. 10767 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10768 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10769 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10770 "power of 2 when targeting Advanced SIMD."); 10771 CGM.getDiags().Report(SLoc, DiagID); 10772 return; 10773 } 10774 10775 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10776 // limits. 10777 if (ISA == 's' && UserVLEN != 0) { 10778 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10779 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10780 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10781 "lanes in the architectural constraints " 10782 "for SVE (min is 128-bit, max is " 10783 "2048-bit, by steps of 128-bit)"); 10784 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10785 return; 10786 } 10787 } 10788 10789 // Sort out parameter sequence. 10790 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10791 StringRef Prefix = "_ZGV"; 10792 // Generate simdlen from user input (if any). 10793 if (UserVLEN) { 10794 if (ISA == 's') { 10795 // SVE generates only a masked function. 10796 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10797 OutputBecomesInput, Fn); 10798 } else { 10799 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10800 // Advanced SIMD generates one or two functions, depending on 10801 // the `[not]inbranch` clause. 10802 switch (State) { 10803 case OMPDeclareSimdDeclAttr::BS_Undefined: 10804 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10805 OutputBecomesInput, Fn); 10806 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10807 OutputBecomesInput, Fn); 10808 break; 10809 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10810 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10811 OutputBecomesInput, Fn); 10812 break; 10813 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10814 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10815 OutputBecomesInput, Fn); 10816 break; 10817 } 10818 } 10819 } else { 10820 // If no user simdlen is provided, follow the AAVFABI rules for 10821 // generating the vector length. 10822 if (ISA == 's') { 10823 // SVE, section 3.4.1, item 1. 10824 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10825 OutputBecomesInput, Fn); 10826 } else { 10827 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10828 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10829 // two vector names depending on the use of the clause 10830 // `[not]inbranch`. 10831 switch (State) { 10832 case OMPDeclareSimdDeclAttr::BS_Undefined: 10833 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10834 OutputBecomesInput, Fn); 10835 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10836 OutputBecomesInput, Fn); 10837 break; 10838 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10839 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10840 OutputBecomesInput, Fn); 10841 break; 10842 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10843 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10844 OutputBecomesInput, Fn); 10845 break; 10846 } 10847 } 10848 } 10849 } 10850 10851 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10852 llvm::Function *Fn) { 10853 ASTContext &C = CGM.getContext(); 10854 FD = FD->getMostRecentDecl(); 10855 // Map params to their positions in function decl. 10856 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10857 if (isa<CXXMethodDecl>(FD)) 10858 ParamPositions.try_emplace(FD, 0); 10859 unsigned ParamPos = ParamPositions.size(); 10860 for (const ParmVarDecl *P : FD->parameters()) { 10861 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10862 ++ParamPos; 10863 } 10864 while (FD) { 10865 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10866 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10867 // Mark uniform parameters. 10868 for (const Expr *E : Attr->uniforms()) { 10869 E = E->IgnoreParenImpCasts(); 10870 unsigned Pos; 10871 if (isa<CXXThisExpr>(E)) { 10872 Pos = ParamPositions[FD]; 10873 } else { 10874 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10875 ->getCanonicalDecl(); 10876 Pos = ParamPositions[PVD]; 10877 } 10878 ParamAttrs[Pos].Kind = Uniform; 10879 } 10880 // Get alignment info. 10881 auto NI = Attr->alignments_begin(); 10882 for (const Expr *E : Attr->aligneds()) { 10883 E = E->IgnoreParenImpCasts(); 10884 unsigned Pos; 10885 QualType ParmTy; 10886 if (isa<CXXThisExpr>(E)) { 10887 Pos = ParamPositions[FD]; 10888 ParmTy = E->getType(); 10889 } else { 10890 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10891 ->getCanonicalDecl(); 10892 Pos = ParamPositions[PVD]; 10893 ParmTy = PVD->getType(); 10894 } 10895 ParamAttrs[Pos].Alignment = 10896 (*NI) 10897 ? (*NI)->EvaluateKnownConstInt(C) 10898 : llvm::APSInt::getUnsigned( 10899 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10900 .getQuantity()); 10901 ++NI; 10902 } 10903 // Mark linear parameters. 10904 auto SI = Attr->steps_begin(); 10905 auto MI = Attr->modifiers_begin(); 10906 for (const Expr *E : Attr->linears()) { 10907 E = E->IgnoreParenImpCasts(); 10908 unsigned Pos; 10909 // Rescaling factor needed to compute the linear parameter 10910 // value in the mangled name. 10911 unsigned PtrRescalingFactor = 1; 10912 if (isa<CXXThisExpr>(E)) { 10913 Pos = ParamPositions[FD]; 10914 } else { 10915 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10916 ->getCanonicalDecl(); 10917 Pos = ParamPositions[PVD]; 10918 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 10919 PtrRescalingFactor = CGM.getContext() 10920 .getTypeSizeInChars(P->getPointeeType()) 10921 .getQuantity(); 10922 } 10923 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10924 ParamAttr.Kind = Linear; 10925 // Assuming a stride of 1, for `linear` without modifiers. 10926 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 10927 if (*SI) { 10928 Expr::EvalResult Result; 10929 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10930 if (const auto *DRE = 10931 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10932 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10933 ParamAttr.Kind = LinearWithVarStride; 10934 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10935 ParamPositions[StridePVD->getCanonicalDecl()]); 10936 } 10937 } 10938 } else { 10939 ParamAttr.StrideOrArg = Result.Val.getInt(); 10940 } 10941 } 10942 // If we are using a linear clause on a pointer, we need to 10943 // rescale the value of linear_step with the byte size of the 10944 // pointee type. 10945 if (Linear == ParamAttr.Kind) 10946 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 10947 ++SI; 10948 ++MI; 10949 } 10950 llvm::APSInt VLENVal; 10951 SourceLocation ExprLoc; 10952 const Expr *VLENExpr = Attr->getSimdlen(); 10953 if (VLENExpr) { 10954 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10955 ExprLoc = VLENExpr->getExprLoc(); 10956 } 10957 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10958 if (CGM.getTriple().isX86()) { 10959 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10960 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10961 unsigned VLEN = VLENVal.getExtValue(); 10962 StringRef MangledName = Fn->getName(); 10963 if (CGM.getTarget().hasFeature("sve")) 10964 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10965 MangledName, 's', 128, Fn, ExprLoc); 10966 if (CGM.getTarget().hasFeature("neon")) 10967 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10968 MangledName, 'n', 128, Fn, ExprLoc); 10969 } 10970 } 10971 FD = FD->getPreviousDecl(); 10972 } 10973 } 10974 10975 namespace { 10976 /// Cleanup action for doacross support. 10977 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10978 public: 10979 static const int DoacrossFinArgs = 2; 10980 10981 private: 10982 llvm::FunctionCallee RTLFn; 10983 llvm::Value *Args[DoacrossFinArgs]; 10984 10985 public: 10986 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10987 ArrayRef<llvm::Value *> CallArgs) 10988 : RTLFn(RTLFn) { 10989 assert(CallArgs.size() == DoacrossFinArgs); 10990 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10991 } 10992 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10993 if (!CGF.HaveInsertPoint()) 10994 return; 10995 CGF.EmitRuntimeCall(RTLFn, Args); 10996 } 10997 }; 10998 } // namespace 10999 11000 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11001 const OMPLoopDirective &D, 11002 ArrayRef<Expr *> NumIterations) { 11003 if (!CGF.HaveInsertPoint()) 11004 return; 11005 11006 ASTContext &C = CGM.getContext(); 11007 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11008 RecordDecl *RD; 11009 if (KmpDimTy.isNull()) { 11010 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11011 // kmp_int64 lo; // lower 11012 // kmp_int64 up; // upper 11013 // kmp_int64 st; // stride 11014 // }; 11015 RD = C.buildImplicitRecord("kmp_dim"); 11016 RD->startDefinition(); 11017 addFieldToRecordDecl(C, RD, Int64Ty); 11018 addFieldToRecordDecl(C, RD, Int64Ty); 11019 addFieldToRecordDecl(C, RD, Int64Ty); 11020 RD->completeDefinition(); 11021 KmpDimTy = C.getRecordType(RD); 11022 } else { 11023 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11024 } 11025 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11026 QualType ArrayTy = 11027 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11028 11029 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11030 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11031 enum { LowerFD = 0, UpperFD, StrideFD }; 11032 // Fill dims with data. 11033 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11034 LValue DimsLVal = CGF.MakeAddrLValue( 11035 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11036 // dims.upper = num_iterations; 11037 LValue UpperLVal = CGF.EmitLValueForField( 11038 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11039 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11040 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11041 Int64Ty, NumIterations[I]->getExprLoc()); 11042 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11043 // dims.stride = 1; 11044 LValue StrideLVal = CGF.EmitLValueForField( 11045 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11046 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11047 StrideLVal); 11048 } 11049 11050 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11051 // kmp_int32 num_dims, struct kmp_dim * dims); 11052 llvm::Value *Args[] = { 11053 emitUpdateLocation(CGF, D.getBeginLoc()), 11054 getThreadID(CGF, D.getBeginLoc()), 11055 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11056 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11057 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11058 CGM.VoidPtrTy)}; 11059 11060 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11061 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11062 CGF.EmitRuntimeCall(RTLFn, Args); 11063 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11064 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11065 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11066 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11067 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11068 llvm::makeArrayRef(FiniArgs)); 11069 } 11070 11071 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11072 const OMPDependClause *C) { 11073 QualType Int64Ty = 11074 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11075 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11076 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11077 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11078 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11079 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11080 const Expr *CounterVal = C->getLoopData(I); 11081 assert(CounterVal); 11082 llvm::Value *CntVal = CGF.EmitScalarConversion( 11083 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11084 CounterVal->getExprLoc()); 11085 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11086 /*Volatile=*/false, Int64Ty); 11087 } 11088 llvm::Value *Args[] = { 11089 emitUpdateLocation(CGF, C->getBeginLoc()), 11090 getThreadID(CGF, C->getBeginLoc()), 11091 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11092 llvm::FunctionCallee RTLFn; 11093 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11094 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11095 OMPRTL___kmpc_doacross_post); 11096 } else { 11097 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11098 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11099 OMPRTL___kmpc_doacross_wait); 11100 } 11101 CGF.EmitRuntimeCall(RTLFn, Args); 11102 } 11103 11104 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11105 llvm::FunctionCallee Callee, 11106 ArrayRef<llvm::Value *> Args) const { 11107 assert(Loc.isValid() && "Outlined function call location must be valid."); 11108 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11109 11110 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11111 if (Fn->doesNotThrow()) { 11112 CGF.EmitNounwindRuntimeCall(Fn, Args); 11113 return; 11114 } 11115 } 11116 CGF.EmitRuntimeCall(Callee, Args); 11117 } 11118 11119 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11120 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11121 ArrayRef<llvm::Value *> Args) const { 11122 emitCall(CGF, Loc, OutlinedFn, Args); 11123 } 11124 11125 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11126 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11127 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11128 HasEmittedDeclareTargetRegion = true; 11129 } 11130 11131 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11132 const VarDecl *NativeParam, 11133 const VarDecl *TargetParam) const { 11134 return CGF.GetAddrOfLocalVar(NativeParam); 11135 } 11136 11137 namespace { 11138 /// Cleanup action for allocate support. 11139 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11140 public: 11141 static const int CleanupArgs = 3; 11142 11143 private: 11144 llvm::FunctionCallee RTLFn; 11145 llvm::Value *Args[CleanupArgs]; 11146 11147 public: 11148 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11149 ArrayRef<llvm::Value *> CallArgs) 11150 : RTLFn(RTLFn) { 11151 assert(CallArgs.size() == CleanupArgs && 11152 "Size of arguments does not match."); 11153 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11154 } 11155 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11156 if (!CGF.HaveInsertPoint()) 11157 return; 11158 CGF.EmitRuntimeCall(RTLFn, Args); 11159 } 11160 }; 11161 } // namespace 11162 11163 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11164 const VarDecl *VD) { 11165 if (!VD) 11166 return Address::invalid(); 11167 const VarDecl *CVD = VD->getCanonicalDecl(); 11168 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11169 return Address::invalid(); 11170 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11171 // Use the default allocation. 11172 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11173 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11174 !AA->getAllocator()) 11175 return Address::invalid(); 11176 llvm::Value *Size; 11177 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11178 if (CVD->getType()->isVariablyModifiedType()) { 11179 Size = CGF.getTypeSize(CVD->getType()); 11180 // Align the size: ((size + align - 1) / align) * align 11181 Size = CGF.Builder.CreateNUWAdd( 11182 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11183 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11184 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11185 } else { 11186 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11187 Size = CGM.getSize(Sz.alignTo(Align)); 11188 } 11189 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11190 assert(AA->getAllocator() && 11191 "Expected allocator expression for non-default allocator."); 11192 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11193 // According to the standard, the original allocator type is a enum (integer). 11194 // Convert to pointer type, if required. 11195 if (Allocator->getType()->isIntegerTy()) 11196 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11197 else if (Allocator->getType()->isPointerTy()) 11198 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11199 CGM.VoidPtrTy); 11200 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11201 11202 llvm::Value *Addr = 11203 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11204 CGM.getModule(), OMPRTL___kmpc_alloc), 11205 Args, getName({CVD->getName(), ".void.addr"})); 11206 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11207 Allocator}; 11208 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11209 CGM.getModule(), OMPRTL___kmpc_free); 11210 11211 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11212 llvm::makeArrayRef(FiniArgs)); 11213 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11214 Addr, 11215 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11216 getName({CVD->getName(), ".addr"})); 11217 return Address(Addr, Align); 11218 } 11219 11220 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11221 CodeGenModule &CGM, const OMPLoopDirective &S) 11222 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11223 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11224 if (!NeedToPush) 11225 return; 11226 NontemporalDeclsSet &DS = 11227 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11228 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11229 for (const Stmt *Ref : C->private_refs()) { 11230 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11231 const ValueDecl *VD; 11232 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11233 VD = DRE->getDecl(); 11234 } else { 11235 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11236 assert((ME->isImplicitCXXThis() || 11237 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11238 "Expected member of current class."); 11239 VD = ME->getMemberDecl(); 11240 } 11241 DS.insert(VD); 11242 } 11243 } 11244 } 11245 11246 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11247 if (!NeedToPush) 11248 return; 11249 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11250 } 11251 11252 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11253 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11254 11255 return llvm::any_of( 11256 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11257 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11258 } 11259 11260 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11261 const OMPExecutableDirective &S, 11262 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11263 const { 11264 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11265 // Vars in target/task regions must be excluded completely. 11266 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11267 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11268 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11269 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11270 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11271 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11272 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11273 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11274 } 11275 } 11276 // Exclude vars in private clauses. 11277 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11278 for (const Expr *Ref : C->varlists()) { 11279 if (!Ref->getType()->isScalarType()) 11280 continue; 11281 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11282 if (!DRE) 11283 continue; 11284 NeedToCheckForLPCs.insert(DRE->getDecl()); 11285 } 11286 } 11287 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11288 for (const Expr *Ref : C->varlists()) { 11289 if (!Ref->getType()->isScalarType()) 11290 continue; 11291 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11292 if (!DRE) 11293 continue; 11294 NeedToCheckForLPCs.insert(DRE->getDecl()); 11295 } 11296 } 11297 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11298 for (const Expr *Ref : C->varlists()) { 11299 if (!Ref->getType()->isScalarType()) 11300 continue; 11301 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11302 if (!DRE) 11303 continue; 11304 NeedToCheckForLPCs.insert(DRE->getDecl()); 11305 } 11306 } 11307 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11308 for (const Expr *Ref : C->varlists()) { 11309 if (!Ref->getType()->isScalarType()) 11310 continue; 11311 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11312 if (!DRE) 11313 continue; 11314 NeedToCheckForLPCs.insert(DRE->getDecl()); 11315 } 11316 } 11317 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11318 for (const Expr *Ref : C->varlists()) { 11319 if (!Ref->getType()->isScalarType()) 11320 continue; 11321 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11322 if (!DRE) 11323 continue; 11324 NeedToCheckForLPCs.insert(DRE->getDecl()); 11325 } 11326 } 11327 for (const Decl *VD : NeedToCheckForLPCs) { 11328 for (const LastprivateConditionalData &Data : 11329 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11330 if (Data.DeclToUniqueName.count(VD) > 0) { 11331 if (!Data.Disabled) 11332 NeedToAddForLPCsAsDisabled.insert(VD); 11333 break; 11334 } 11335 } 11336 } 11337 } 11338 11339 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11340 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11341 : CGM(CGF.CGM), 11342 Action((CGM.getLangOpts().OpenMP >= 50 && 11343 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11344 [](const OMPLastprivateClause *C) { 11345 return C->getKind() == 11346 OMPC_LASTPRIVATE_conditional; 11347 })) 11348 ? ActionToDo::PushAsLastprivateConditional 11349 : ActionToDo::DoNotPush) { 11350 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11351 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11352 return; 11353 assert(Action == ActionToDo::PushAsLastprivateConditional && 11354 "Expected a push action."); 11355 LastprivateConditionalData &Data = 11356 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11357 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11358 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11359 continue; 11360 11361 for (const Expr *Ref : C->varlists()) { 11362 Data.DeclToUniqueName.insert(std::make_pair( 11363 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11364 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11365 } 11366 } 11367 Data.IVLVal = IVLVal; 11368 Data.Fn = CGF.CurFn; 11369 } 11370 11371 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11372 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11373 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11374 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11375 if (CGM.getLangOpts().OpenMP < 50) 11376 return; 11377 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11378 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11379 if (!NeedToAddForLPCsAsDisabled.empty()) { 11380 Action = ActionToDo::DisableLastprivateConditional; 11381 LastprivateConditionalData &Data = 11382 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11383 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11384 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11385 Data.Fn = CGF.CurFn; 11386 Data.Disabled = true; 11387 } 11388 } 11389 11390 CGOpenMPRuntime::LastprivateConditionalRAII 11391 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11392 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11393 return LastprivateConditionalRAII(CGF, S); 11394 } 11395 11396 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11397 if (CGM.getLangOpts().OpenMP < 50) 11398 return; 11399 if (Action == ActionToDo::DisableLastprivateConditional) { 11400 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11401 "Expected list of disabled private vars."); 11402 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11403 } 11404 if (Action == ActionToDo::PushAsLastprivateConditional) { 11405 assert( 11406 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11407 "Expected list of lastprivate conditional vars."); 11408 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11409 } 11410 } 11411 11412 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11413 const VarDecl *VD) { 11414 ASTContext &C = CGM.getContext(); 11415 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11416 if (I == LastprivateConditionalToTypes.end()) 11417 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11418 QualType NewType; 11419 const FieldDecl *VDField; 11420 const FieldDecl *FiredField; 11421 LValue BaseLVal; 11422 auto VI = I->getSecond().find(VD); 11423 if (VI == I->getSecond().end()) { 11424 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11425 RD->startDefinition(); 11426 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11427 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11428 RD->completeDefinition(); 11429 NewType = C.getRecordType(RD); 11430 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11431 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11432 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11433 } else { 11434 NewType = std::get<0>(VI->getSecond()); 11435 VDField = std::get<1>(VI->getSecond()); 11436 FiredField = std::get<2>(VI->getSecond()); 11437 BaseLVal = std::get<3>(VI->getSecond()); 11438 } 11439 LValue FiredLVal = 11440 CGF.EmitLValueForField(BaseLVal, FiredField); 11441 CGF.EmitStoreOfScalar( 11442 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11443 FiredLVal); 11444 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11445 } 11446 11447 namespace { 11448 /// Checks if the lastprivate conditional variable is referenced in LHS. 11449 class LastprivateConditionalRefChecker final 11450 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11451 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11452 const Expr *FoundE = nullptr; 11453 const Decl *FoundD = nullptr; 11454 StringRef UniqueDeclName; 11455 LValue IVLVal; 11456 llvm::Function *FoundFn = nullptr; 11457 SourceLocation Loc; 11458 11459 public: 11460 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11461 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11462 llvm::reverse(LPM)) { 11463 auto It = D.DeclToUniqueName.find(E->getDecl()); 11464 if (It == D.DeclToUniqueName.end()) 11465 continue; 11466 if (D.Disabled) 11467 return false; 11468 FoundE = E; 11469 FoundD = E->getDecl()->getCanonicalDecl(); 11470 UniqueDeclName = It->second; 11471 IVLVal = D.IVLVal; 11472 FoundFn = D.Fn; 11473 break; 11474 } 11475 return FoundE == E; 11476 } 11477 bool VisitMemberExpr(const MemberExpr *E) { 11478 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11479 return false; 11480 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11481 llvm::reverse(LPM)) { 11482 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11483 if (It == D.DeclToUniqueName.end()) 11484 continue; 11485 if (D.Disabled) 11486 return false; 11487 FoundE = E; 11488 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11489 UniqueDeclName = It->second; 11490 IVLVal = D.IVLVal; 11491 FoundFn = D.Fn; 11492 break; 11493 } 11494 return FoundE == E; 11495 } 11496 bool VisitStmt(const Stmt *S) { 11497 for (const Stmt *Child : S->children()) { 11498 if (!Child) 11499 continue; 11500 if (const auto *E = dyn_cast<Expr>(Child)) 11501 if (!E->isGLValue()) 11502 continue; 11503 if (Visit(Child)) 11504 return true; 11505 } 11506 return false; 11507 } 11508 explicit LastprivateConditionalRefChecker( 11509 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11510 : LPM(LPM) {} 11511 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11512 getFoundData() const { 11513 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11514 } 11515 }; 11516 } // namespace 11517 11518 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11519 LValue IVLVal, 11520 StringRef UniqueDeclName, 11521 LValue LVal, 11522 SourceLocation Loc) { 11523 // Last updated loop counter for the lastprivate conditional var. 11524 // int<xx> last_iv = 0; 11525 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11526 llvm::Constant *LastIV = 11527 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11528 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11529 IVLVal.getAlignment().getAsAlign()); 11530 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11531 11532 // Last value of the lastprivate conditional. 11533 // decltype(priv_a) last_a; 11534 llvm::Constant *Last = getOrCreateInternalVariable( 11535 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11536 cast<llvm::GlobalVariable>(Last)->setAlignment( 11537 LVal.getAlignment().getAsAlign()); 11538 LValue LastLVal = 11539 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11540 11541 // Global loop counter. Required to handle inner parallel-for regions. 11542 // iv 11543 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11544 11545 // #pragma omp critical(a) 11546 // if (last_iv <= iv) { 11547 // last_iv = iv; 11548 // last_a = priv_a; 11549 // } 11550 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11551 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11552 Action.Enter(CGF); 11553 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11554 // (last_iv <= iv) ? Check if the variable is updated and store new 11555 // value in global var. 11556 llvm::Value *CmpRes; 11557 if (IVLVal.getType()->isSignedIntegerType()) { 11558 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11559 } else { 11560 assert(IVLVal.getType()->isUnsignedIntegerType() && 11561 "Loop iteration variable must be integer."); 11562 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11563 } 11564 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11565 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11566 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11567 // { 11568 CGF.EmitBlock(ThenBB); 11569 11570 // last_iv = iv; 11571 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11572 11573 // last_a = priv_a; 11574 switch (CGF.getEvaluationKind(LVal.getType())) { 11575 case TEK_Scalar: { 11576 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11577 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11578 break; 11579 } 11580 case TEK_Complex: { 11581 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11582 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11583 break; 11584 } 11585 case TEK_Aggregate: 11586 llvm_unreachable( 11587 "Aggregates are not supported in lastprivate conditional."); 11588 } 11589 // } 11590 CGF.EmitBranch(ExitBB); 11591 // There is no need to emit line number for unconditional branch. 11592 (void)ApplyDebugLocation::CreateEmpty(CGF); 11593 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11594 }; 11595 11596 if (CGM.getLangOpts().OpenMPSimd) { 11597 // Do not emit as a critical region as no parallel region could be emitted. 11598 RegionCodeGenTy ThenRCG(CodeGen); 11599 ThenRCG(CGF); 11600 } else { 11601 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11602 } 11603 } 11604 11605 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11606 const Expr *LHS) { 11607 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11608 return; 11609 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11610 if (!Checker.Visit(LHS)) 11611 return; 11612 const Expr *FoundE; 11613 const Decl *FoundD; 11614 StringRef UniqueDeclName; 11615 LValue IVLVal; 11616 llvm::Function *FoundFn; 11617 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11618 Checker.getFoundData(); 11619 if (FoundFn != CGF.CurFn) { 11620 // Special codegen for inner parallel regions. 11621 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11622 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11623 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11624 "Lastprivate conditional is not found in outer region."); 11625 QualType StructTy = std::get<0>(It->getSecond()); 11626 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11627 LValue PrivLVal = CGF.EmitLValue(FoundE); 11628 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11629 PrivLVal.getAddress(CGF), 11630 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11631 LValue BaseLVal = 11632 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11633 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11634 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11635 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11636 FiredLVal, llvm::AtomicOrdering::Unordered, 11637 /*IsVolatile=*/true, /*isInit=*/false); 11638 return; 11639 } 11640 11641 // Private address of the lastprivate conditional in the current context. 11642 // priv_a 11643 LValue LVal = CGF.EmitLValue(FoundE); 11644 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11645 FoundE->getExprLoc()); 11646 } 11647 11648 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11649 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11650 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11651 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11652 return; 11653 auto Range = llvm::reverse(LastprivateConditionalStack); 11654 auto It = llvm::find_if( 11655 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11656 if (It == Range.end() || It->Fn != CGF.CurFn) 11657 return; 11658 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11659 assert(LPCI != LastprivateConditionalToTypes.end() && 11660 "Lastprivates must be registered already."); 11661 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11662 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11663 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11664 for (const auto &Pair : It->DeclToUniqueName) { 11665 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11666 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11667 continue; 11668 auto I = LPCI->getSecond().find(Pair.first); 11669 assert(I != LPCI->getSecond().end() && 11670 "Lastprivate must be rehistered already."); 11671 // bool Cmp = priv_a.Fired != 0; 11672 LValue BaseLVal = std::get<3>(I->getSecond()); 11673 LValue FiredLVal = 11674 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11675 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11676 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11677 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11679 // if (Cmp) { 11680 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11681 CGF.EmitBlock(ThenBB); 11682 Address Addr = CGF.GetAddrOfLocalVar(VD); 11683 LValue LVal; 11684 if (VD->getType()->isReferenceType()) 11685 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11686 AlignmentSource::Decl); 11687 else 11688 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11689 AlignmentSource::Decl); 11690 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11691 D.getBeginLoc()); 11692 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11693 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11694 // } 11695 } 11696 } 11697 11698 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11699 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11700 SourceLocation Loc) { 11701 if (CGF.getLangOpts().OpenMP < 50) 11702 return; 11703 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11704 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11705 "Unknown lastprivate conditional variable."); 11706 StringRef UniqueName = It->second; 11707 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11708 // The variable was not updated in the region - exit. 11709 if (!GV) 11710 return; 11711 LValue LPLVal = CGF.MakeAddrLValue( 11712 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11713 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11714 CGF.EmitStoreOfScalar(Res, PrivLVal); 11715 } 11716 11717 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11718 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11719 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11720 llvm_unreachable("Not supported in SIMD-only mode"); 11721 } 11722 11723 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11724 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11725 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11726 llvm_unreachable("Not supported in SIMD-only mode"); 11727 } 11728 11729 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11730 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11731 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11732 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11733 bool Tied, unsigned &NumberOfParts) { 11734 llvm_unreachable("Not supported in SIMD-only mode"); 11735 } 11736 11737 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11738 SourceLocation Loc, 11739 llvm::Function *OutlinedFn, 11740 ArrayRef<llvm::Value *> CapturedVars, 11741 const Expr *IfCond) { 11742 llvm_unreachable("Not supported in SIMD-only mode"); 11743 } 11744 11745 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11746 CodeGenFunction &CGF, StringRef CriticalName, 11747 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11748 const Expr *Hint) { 11749 llvm_unreachable("Not supported in SIMD-only mode"); 11750 } 11751 11752 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11753 const RegionCodeGenTy &MasterOpGen, 11754 SourceLocation Loc) { 11755 llvm_unreachable("Not supported in SIMD-only mode"); 11756 } 11757 11758 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11759 SourceLocation Loc) { 11760 llvm_unreachable("Not supported in SIMD-only mode"); 11761 } 11762 11763 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11764 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11765 SourceLocation Loc) { 11766 llvm_unreachable("Not supported in SIMD-only mode"); 11767 } 11768 11769 void CGOpenMPSIMDRuntime::emitSingleRegion( 11770 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11771 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11772 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11773 ArrayRef<const Expr *> AssignmentOps) { 11774 llvm_unreachable("Not supported in SIMD-only mode"); 11775 } 11776 11777 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11778 const RegionCodeGenTy &OrderedOpGen, 11779 SourceLocation Loc, 11780 bool IsThreads) { 11781 llvm_unreachable("Not supported in SIMD-only mode"); 11782 } 11783 11784 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11785 SourceLocation Loc, 11786 OpenMPDirectiveKind Kind, 11787 bool EmitChecks, 11788 bool ForceSimpleCall) { 11789 llvm_unreachable("Not supported in SIMD-only mode"); 11790 } 11791 11792 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11793 CodeGenFunction &CGF, SourceLocation Loc, 11794 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11795 bool Ordered, const DispatchRTInput &DispatchValues) { 11796 llvm_unreachable("Not supported in SIMD-only mode"); 11797 } 11798 11799 void CGOpenMPSIMDRuntime::emitForStaticInit( 11800 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11801 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11802 llvm_unreachable("Not supported in SIMD-only mode"); 11803 } 11804 11805 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11806 CodeGenFunction &CGF, SourceLocation Loc, 11807 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11808 llvm_unreachable("Not supported in SIMD-only mode"); 11809 } 11810 11811 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11812 SourceLocation Loc, 11813 unsigned IVSize, 11814 bool IVSigned) { 11815 llvm_unreachable("Not supported in SIMD-only mode"); 11816 } 11817 11818 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11819 SourceLocation Loc, 11820 OpenMPDirectiveKind DKind) { 11821 llvm_unreachable("Not supported in SIMD-only mode"); 11822 } 11823 11824 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11825 SourceLocation Loc, 11826 unsigned IVSize, bool IVSigned, 11827 Address IL, Address LB, 11828 Address UB, Address ST) { 11829 llvm_unreachable("Not supported in SIMD-only mode"); 11830 } 11831 11832 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11833 llvm::Value *NumThreads, 11834 SourceLocation Loc) { 11835 llvm_unreachable("Not supported in SIMD-only mode"); 11836 } 11837 11838 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11839 ProcBindKind ProcBind, 11840 SourceLocation Loc) { 11841 llvm_unreachable("Not supported in SIMD-only mode"); 11842 } 11843 11844 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11845 const VarDecl *VD, 11846 Address VDAddr, 11847 SourceLocation Loc) { 11848 llvm_unreachable("Not supported in SIMD-only mode"); 11849 } 11850 11851 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11852 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11853 CodeGenFunction *CGF) { 11854 llvm_unreachable("Not supported in SIMD-only mode"); 11855 } 11856 11857 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11858 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11859 llvm_unreachable("Not supported in SIMD-only mode"); 11860 } 11861 11862 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11863 ArrayRef<const Expr *> Vars, 11864 SourceLocation Loc, 11865 llvm::AtomicOrdering AO) { 11866 llvm_unreachable("Not supported in SIMD-only mode"); 11867 } 11868 11869 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11870 const OMPExecutableDirective &D, 11871 llvm::Function *TaskFunction, 11872 QualType SharedsTy, Address Shareds, 11873 const Expr *IfCond, 11874 const OMPTaskDataTy &Data) { 11875 llvm_unreachable("Not supported in SIMD-only mode"); 11876 } 11877 11878 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11879 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11880 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11881 const Expr *IfCond, const OMPTaskDataTy &Data) { 11882 llvm_unreachable("Not supported in SIMD-only mode"); 11883 } 11884 11885 void CGOpenMPSIMDRuntime::emitReduction( 11886 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11887 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11888 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11889 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11890 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11891 ReductionOps, Options); 11892 } 11893 11894 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11895 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11896 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11897 llvm_unreachable("Not supported in SIMD-only mode"); 11898 } 11899 11900 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 11901 SourceLocation Loc, 11902 bool IsWorksharingReduction) { 11903 llvm_unreachable("Not supported in SIMD-only mode"); 11904 } 11905 11906 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11907 SourceLocation Loc, 11908 ReductionCodeGen &RCG, 11909 unsigned N) { 11910 llvm_unreachable("Not supported in SIMD-only mode"); 11911 } 11912 11913 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11914 SourceLocation Loc, 11915 llvm::Value *ReductionsPtr, 11916 LValue SharedLVal) { 11917 llvm_unreachable("Not supported in SIMD-only mode"); 11918 } 11919 11920 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11921 SourceLocation Loc) { 11922 llvm_unreachable("Not supported in SIMD-only mode"); 11923 } 11924 11925 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11926 CodeGenFunction &CGF, SourceLocation Loc, 11927 OpenMPDirectiveKind CancelRegion) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11932 SourceLocation Loc, const Expr *IfCond, 11933 OpenMPDirectiveKind CancelRegion) { 11934 llvm_unreachable("Not supported in SIMD-only mode"); 11935 } 11936 11937 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11938 const OMPExecutableDirective &D, StringRef ParentName, 11939 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11940 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11941 llvm_unreachable("Not supported in SIMD-only mode"); 11942 } 11943 11944 void CGOpenMPSIMDRuntime::emitTargetCall( 11945 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11946 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11947 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 11948 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11949 const OMPLoopDirective &D)> 11950 SizeEmitter) { 11951 llvm_unreachable("Not supported in SIMD-only mode"); 11952 } 11953 11954 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11955 llvm_unreachable("Not supported in SIMD-only mode"); 11956 } 11957 11958 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11959 llvm_unreachable("Not supported in SIMD-only mode"); 11960 } 11961 11962 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11963 return false; 11964 } 11965 11966 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11967 const OMPExecutableDirective &D, 11968 SourceLocation Loc, 11969 llvm::Function *OutlinedFn, 11970 ArrayRef<llvm::Value *> CapturedVars) { 11971 llvm_unreachable("Not supported in SIMD-only mode"); 11972 } 11973 11974 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11975 const Expr *NumTeams, 11976 const Expr *ThreadLimit, 11977 SourceLocation Loc) { 11978 llvm_unreachable("Not supported in SIMD-only mode"); 11979 } 11980 11981 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11982 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11983 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11984 llvm_unreachable("Not supported in SIMD-only mode"); 11985 } 11986 11987 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11988 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11989 const Expr *Device) { 11990 llvm_unreachable("Not supported in SIMD-only mode"); 11991 } 11992 11993 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11994 const OMPLoopDirective &D, 11995 ArrayRef<Expr *> NumIterations) { 11996 llvm_unreachable("Not supported in SIMD-only mode"); 11997 } 11998 11999 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12000 const OMPDependClause *C) { 12001 llvm_unreachable("Not supported in SIMD-only mode"); 12002 } 12003 12004 const VarDecl * 12005 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12006 const VarDecl *NativeParam) const { 12007 llvm_unreachable("Not supported in SIMD-only mode"); 12008 } 12009 12010 Address 12011 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12012 const VarDecl *NativeParam, 12013 const VarDecl *TargetParam) const { 12014 llvm_unreachable("Not supported in SIMD-only mode"); 12015 } 12016