1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 }; 756 757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 758 /// region. 759 class CleanupTy final : public EHScopeStack::Cleanup { 760 PrePostActionTy *Action; 761 762 public: 763 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 764 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 765 if (!CGF.HaveInsertPoint()) 766 return; 767 Action->Exit(CGF); 768 } 769 }; 770 771 } // anonymous namespace 772 773 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 774 CodeGenFunction::RunCleanupsScope Scope(CGF); 775 if (PrePostAction) { 776 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 777 Callback(CodeGen, CGF, *PrePostAction); 778 } else { 779 PrePostActionTy Action; 780 Callback(CodeGen, CGF, Action); 781 } 782 } 783 784 /// Check if the combiner is a call to UDR combiner and if it is so return the 785 /// UDR decl used for reduction. 786 static const OMPDeclareReductionDecl * 787 getReductionInit(const Expr *ReductionOp) { 788 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 790 if (const auto *DRE = 791 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 792 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 793 return DRD; 794 return nullptr; 795 } 796 797 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 798 const OMPDeclareReductionDecl *DRD, 799 const Expr *InitOp, 800 Address Private, Address Original, 801 QualType Ty) { 802 if (DRD->getInitializer()) { 803 std::pair<llvm::Function *, llvm::Function *> Reduction = 804 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 805 const auto *CE = cast<CallExpr>(InitOp); 806 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 807 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 808 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 809 const auto *LHSDRE = 810 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 811 const auto *RHSDRE = 812 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 813 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 814 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 815 [=]() { return Private; }); 816 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 817 [=]() { return Original; }); 818 (void)PrivateScope.Privatize(); 819 RValue Func = RValue::get(Reduction.second); 820 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 821 CGF.EmitIgnoredExpr(InitOp); 822 } else { 823 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 824 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 825 auto *GV = new llvm::GlobalVariable( 826 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 827 llvm::GlobalValue::PrivateLinkage, Init, Name); 828 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 829 RValue InitRVal; 830 switch (CGF.getEvaluationKind(Ty)) { 831 case TEK_Scalar: 832 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 833 break; 834 case TEK_Complex: 835 InitRVal = 836 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 837 break; 838 case TEK_Aggregate: 839 InitRVal = RValue::getAggregate(LV.getAddress()); 840 break; 841 } 842 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 844 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 845 /*IsInitializer=*/false); 846 } 847 } 848 849 /// Emit initialization of arrays of complex types. 850 /// \param DestAddr Address of the array. 851 /// \param Type Type of array. 852 /// \param Init Initial expression of array. 853 /// \param SrcAddr Address of the original array. 854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 855 QualType Type, bool EmitDeclareReductionInit, 856 const Expr *Init, 857 const OMPDeclareReductionDecl *DRD, 858 Address SrcAddr = Address::invalid()) { 859 // Perform element-by-element initialization. 860 QualType ElementTy; 861 862 // Drill down to the base element type on both arrays. 863 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 864 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 865 DestAddr = 866 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 867 if (DRD) 868 SrcAddr = 869 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 870 871 llvm::Value *SrcBegin = nullptr; 872 if (DRD) 873 SrcBegin = SrcAddr.getPointer(); 874 llvm::Value *DestBegin = DestAddr.getPointer(); 875 // Cast from pointer to array type to pointer to single element. 876 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 877 // The basic structure here is a while-do loop. 878 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 879 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 880 llvm::Value *IsEmpty = 881 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 882 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 883 884 // Enter the loop body, making that address the current address. 885 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 886 CGF.EmitBlock(BodyBB); 887 888 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 889 890 llvm::PHINode *SrcElementPHI = nullptr; 891 Address SrcElementCurrent = Address::invalid(); 892 if (DRD) { 893 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 894 "omp.arraycpy.srcElementPast"); 895 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 896 SrcElementCurrent = 897 Address(SrcElementPHI, 898 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 899 } 900 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 901 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 902 DestElementPHI->addIncoming(DestBegin, EntryBB); 903 Address DestElementCurrent = 904 Address(DestElementPHI, 905 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 906 907 // Emit copy. 908 { 909 CodeGenFunction::RunCleanupsScope InitScope(CGF); 910 if (EmitDeclareReductionInit) { 911 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 912 SrcElementCurrent, ElementTy); 913 } else 914 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 915 /*IsInitializer=*/false); 916 } 917 918 if (DRD) { 919 // Shift the address forward by one element. 920 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 921 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 922 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 923 } 924 925 // Shift the address forward by one element. 926 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 927 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 928 // Check whether we've reached the end. 929 llvm::Value *Done = 930 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 931 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 932 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 933 934 // Done. 935 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 936 } 937 938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 939 return CGF.EmitOMPSharedLValue(E); 940 } 941 942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 943 const Expr *E) { 944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 945 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 946 return LValue(); 947 } 948 949 void ReductionCodeGen::emitAggregateInitialization( 950 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 951 const OMPDeclareReductionDecl *DRD) { 952 // Emit VarDecl with copy init for arrays. 953 // Get the address of the original variable captured in current 954 // captured region. 955 const auto *PrivateVD = 956 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 957 bool EmitDeclareReductionInit = 958 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 959 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 960 EmitDeclareReductionInit, 961 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 962 : PrivateVD->getInit(), 963 DRD, SharedLVal.getAddress()); 964 } 965 966 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 967 ArrayRef<const Expr *> Privates, 968 ArrayRef<const Expr *> ReductionOps) { 969 ClausesData.reserve(Shareds.size()); 970 SharedAddresses.reserve(Shareds.size()); 971 Sizes.reserve(Shareds.size()); 972 BaseDecls.reserve(Shareds.size()); 973 auto IPriv = Privates.begin(); 974 auto IRed = ReductionOps.begin(); 975 for (const Expr *Ref : Shareds) { 976 ClausesData.emplace_back(Ref, *IPriv, *IRed); 977 std::advance(IPriv, 1); 978 std::advance(IRed, 1); 979 } 980 } 981 982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 983 assert(SharedAddresses.size() == N && 984 "Number of generated lvalues must be exactly N."); 985 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 986 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 987 SharedAddresses.emplace_back(First, Second); 988 } 989 990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 991 const auto *PrivateVD = 992 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 993 QualType PrivateType = PrivateVD->getType(); 994 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 995 if (!PrivateType->isVariablyModifiedType()) { 996 Sizes.emplace_back( 997 CGF.getTypeSize( 998 SharedAddresses[N].first.getType().getNonReferenceType()), 999 nullptr); 1000 return; 1001 } 1002 llvm::Value *Size; 1003 llvm::Value *SizeInChars; 1004 auto *ElemType = 1005 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1006 ->getElementType(); 1007 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1008 if (AsArraySection) { 1009 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1010 SharedAddresses[N].first.getPointer()); 1011 Size = CGF.Builder.CreateNUWAdd( 1012 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1013 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1014 } else { 1015 SizeInChars = CGF.getTypeSize( 1016 SharedAddresses[N].first.getType().getNonReferenceType()); 1017 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1018 } 1019 Sizes.emplace_back(SizeInChars, Size); 1020 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1021 CGF, 1022 cast<OpaqueValueExpr>( 1023 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1024 RValue::get(Size)); 1025 CGF.EmitVariablyModifiedType(PrivateType); 1026 } 1027 1028 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1029 llvm::Value *Size) { 1030 const auto *PrivateVD = 1031 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1032 QualType PrivateType = PrivateVD->getType(); 1033 if (!PrivateType->isVariablyModifiedType()) { 1034 assert(!Size && !Sizes[N].second && 1035 "Size should be nullptr for non-variably modified reduction " 1036 "items."); 1037 return; 1038 } 1039 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1040 CGF, 1041 cast<OpaqueValueExpr>( 1042 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1043 RValue::get(Size)); 1044 CGF.EmitVariablyModifiedType(PrivateType); 1045 } 1046 1047 void ReductionCodeGen::emitInitialization( 1048 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1049 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1050 assert(SharedAddresses.size() > N && "No variable was generated"); 1051 const auto *PrivateVD = 1052 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1053 const OMPDeclareReductionDecl *DRD = 1054 getReductionInit(ClausesData[N].ReductionOp); 1055 QualType PrivateType = PrivateVD->getType(); 1056 PrivateAddr = CGF.Builder.CreateElementBitCast( 1057 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1058 QualType SharedType = SharedAddresses[N].first.getType(); 1059 SharedLVal = CGF.MakeAddrLValue( 1060 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1061 CGF.ConvertTypeForMem(SharedType)), 1062 SharedType, SharedAddresses[N].first.getBaseInfo(), 1063 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1064 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1065 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1066 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1067 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1068 PrivateAddr, SharedLVal.getAddress(), 1069 SharedLVal.getType()); 1070 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1071 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1072 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1073 PrivateVD->getType().getQualifiers(), 1074 /*IsInitializer=*/false); 1075 } 1076 } 1077 1078 bool ReductionCodeGen::needCleanups(unsigned N) { 1079 const auto *PrivateVD = 1080 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1081 QualType PrivateType = PrivateVD->getType(); 1082 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1083 return DTorKind != QualType::DK_none; 1084 } 1085 1086 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1087 Address PrivateAddr) { 1088 const auto *PrivateVD = 1089 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1090 QualType PrivateType = PrivateVD->getType(); 1091 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1092 if (needCleanups(N)) { 1093 PrivateAddr = CGF.Builder.CreateElementBitCast( 1094 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1095 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1096 } 1097 } 1098 1099 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1100 LValue BaseLV) { 1101 BaseTy = BaseTy.getNonReferenceType(); 1102 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1103 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1104 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1105 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1106 } else { 1107 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1108 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1109 } 1110 BaseTy = BaseTy->getPointeeType(); 1111 } 1112 return CGF.MakeAddrLValue( 1113 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1114 CGF.ConvertTypeForMem(ElTy)), 1115 BaseLV.getType(), BaseLV.getBaseInfo(), 1116 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1117 } 1118 1119 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1120 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1121 llvm::Value *Addr) { 1122 Address Tmp = Address::invalid(); 1123 Address TopTmp = Address::invalid(); 1124 Address MostTopTmp = Address::invalid(); 1125 BaseTy = BaseTy.getNonReferenceType(); 1126 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1127 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1128 Tmp = CGF.CreateMemTemp(BaseTy); 1129 if (TopTmp.isValid()) 1130 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1131 else 1132 MostTopTmp = Tmp; 1133 TopTmp = Tmp; 1134 BaseTy = BaseTy->getPointeeType(); 1135 } 1136 llvm::Type *Ty = BaseLVType; 1137 if (Tmp.isValid()) 1138 Ty = Tmp.getElementType(); 1139 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1140 if (Tmp.isValid()) { 1141 CGF.Builder.CreateStore(Addr, Tmp); 1142 return MostTopTmp; 1143 } 1144 return Address(Addr, BaseLVAlignment); 1145 } 1146 1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1148 const VarDecl *OrigVD = nullptr; 1149 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1150 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1151 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1152 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1153 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1154 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1155 DE = cast<DeclRefExpr>(Base); 1156 OrigVD = cast<VarDecl>(DE->getDecl()); 1157 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1158 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1159 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1160 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1161 DE = cast<DeclRefExpr>(Base); 1162 OrigVD = cast<VarDecl>(DE->getDecl()); 1163 } 1164 return OrigVD; 1165 } 1166 1167 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1168 Address PrivateAddr) { 1169 const DeclRefExpr *DE; 1170 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1171 BaseDecls.emplace_back(OrigVD); 1172 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1173 LValue BaseLValue = 1174 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1175 OriginalBaseLValue); 1176 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1177 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1178 llvm::Value *PrivatePointer = 1179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1180 PrivateAddr.getPointer(), 1181 SharedAddresses[N].first.getAddress().getType()); 1182 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1183 return castToBase(CGF, OrigVD->getType(), 1184 SharedAddresses[N].first.getType(), 1185 OriginalBaseLValue.getAddress().getType(), 1186 OriginalBaseLValue.getAlignment(), Ptr); 1187 } 1188 BaseDecls.emplace_back( 1189 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1190 return PrivateAddr; 1191 } 1192 1193 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1194 const OMPDeclareReductionDecl *DRD = 1195 getReductionInit(ClausesData[N].ReductionOp); 1196 return DRD && DRD->getInitializer(); 1197 } 1198 1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1200 return CGF.EmitLoadOfPointerLValue( 1201 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1202 getThreadIDVariable()->getType()->castAs<PointerType>()); 1203 } 1204 1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1206 if (!CGF.HaveInsertPoint()) 1207 return; 1208 // 1.2.2 OpenMP Language Terminology 1209 // Structured block - An executable statement with a single entry at the 1210 // top and a single exit at the bottom. 1211 // The point of exit cannot be a branch out of the structured block. 1212 // longjmp() and throw() must not violate the entry/exit criteria. 1213 CGF.EHStack.pushTerminate(); 1214 CodeGen(CGF); 1215 CGF.EHStack.popTerminate(); 1216 } 1217 1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1219 CodeGenFunction &CGF) { 1220 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1221 getThreadIDVariable()->getType(), 1222 AlignmentSource::Decl); 1223 } 1224 1225 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1226 QualType FieldTy) { 1227 auto *Field = FieldDecl::Create( 1228 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1229 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1230 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1231 Field->setAccess(AS_public); 1232 DC->addDecl(Field); 1233 return Field; 1234 } 1235 1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1237 StringRef Separator) 1238 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1239 OffloadEntriesInfoManager(CGM) { 1240 ASTContext &C = CGM.getContext(); 1241 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1242 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1243 RD->startDefinition(); 1244 // reserved_1 1245 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1246 // flags 1247 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1248 // reserved_2 1249 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1250 // reserved_3 1251 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1252 // psource 1253 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1254 RD->completeDefinition(); 1255 IdentQTy = C.getRecordType(RD); 1256 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1257 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1258 1259 loadOffloadInfoMetadata(); 1260 } 1261 1262 void CGOpenMPRuntime::clear() { 1263 InternalVars.clear(); 1264 // Clean non-target variable declarations possibly used only in debug info. 1265 for (const auto &Data : EmittedNonTargetVariables) { 1266 if (!Data.getValue().pointsToAliveValue()) 1267 continue; 1268 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1269 if (!GV) 1270 continue; 1271 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1272 continue; 1273 GV->eraseFromParent(); 1274 } 1275 } 1276 1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1278 SmallString<128> Buffer; 1279 llvm::raw_svector_ostream OS(Buffer); 1280 StringRef Sep = FirstSeparator; 1281 for (StringRef Part : Parts) { 1282 OS << Sep << Part; 1283 Sep = Separator; 1284 } 1285 return OS.str(); 1286 } 1287 1288 static llvm::Function * 1289 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1290 const Expr *CombinerInitializer, const VarDecl *In, 1291 const VarDecl *Out, bool IsCombiner) { 1292 // void .omp_combiner.(Ty *in, Ty *out); 1293 ASTContext &C = CGM.getContext(); 1294 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1295 FunctionArgList Args; 1296 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1297 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1298 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1299 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1300 Args.push_back(&OmpOutParm); 1301 Args.push_back(&OmpInParm); 1302 const CGFunctionInfo &FnInfo = 1303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1304 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1305 std::string Name = CGM.getOpenMPRuntime().getName( 1306 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1307 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1308 Name, &CGM.getModule()); 1309 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1310 if (CGM.getLangOpts().Optimize) { 1311 Fn->removeFnAttr(llvm::Attribute::NoInline); 1312 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1313 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1314 } 1315 CodeGenFunction CGF(CGM); 1316 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1317 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1318 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1319 Out->getLocation()); 1320 CodeGenFunction::OMPPrivateScope Scope(CGF); 1321 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1322 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1323 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1324 .getAddress(); 1325 }); 1326 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1327 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1328 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1329 .getAddress(); 1330 }); 1331 (void)Scope.Privatize(); 1332 if (!IsCombiner && Out->hasInit() && 1333 !CGF.isTrivialInitializer(Out->getInit())) { 1334 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1335 Out->getType().getQualifiers(), 1336 /*IsInitializer=*/true); 1337 } 1338 if (CombinerInitializer) 1339 CGF.EmitIgnoredExpr(CombinerInitializer); 1340 Scope.ForceCleanup(); 1341 CGF.FinishFunction(); 1342 return Fn; 1343 } 1344 1345 void CGOpenMPRuntime::emitUserDefinedReduction( 1346 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1347 if (UDRMap.count(D) > 0) 1348 return; 1349 llvm::Function *Combiner = emitCombinerOrInitializer( 1350 CGM, D->getType(), D->getCombiner(), 1351 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1352 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1353 /*IsCombiner=*/true); 1354 llvm::Function *Initializer = nullptr; 1355 if (const Expr *Init = D->getInitializer()) { 1356 Initializer = emitCombinerOrInitializer( 1357 CGM, D->getType(), 1358 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1359 : nullptr, 1360 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1361 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1362 /*IsCombiner=*/false); 1363 } 1364 UDRMap.try_emplace(D, Combiner, Initializer); 1365 if (CGF) { 1366 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1367 Decls.second.push_back(D); 1368 } 1369 } 1370 1371 std::pair<llvm::Function *, llvm::Function *> 1372 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1373 auto I = UDRMap.find(D); 1374 if (I != UDRMap.end()) 1375 return I->second; 1376 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1377 return UDRMap.lookup(D); 1378 } 1379 1380 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1381 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1382 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1383 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1384 assert(ThreadIDVar->getType()->isPointerType() && 1385 "thread id variable must be of type kmp_int32 *"); 1386 CodeGenFunction CGF(CGM, true); 1387 bool HasCancel = false; 1388 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1389 HasCancel = OPD->hasCancel(); 1390 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1391 HasCancel = OPSD->hasCancel(); 1392 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1393 HasCancel = OPFD->hasCancel(); 1394 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1395 HasCancel = OPFD->hasCancel(); 1396 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1397 HasCancel = OPFD->hasCancel(); 1398 else if (const auto *OPFD = 1399 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1400 HasCancel = OPFD->hasCancel(); 1401 else if (const auto *OPFD = 1402 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1403 HasCancel = OPFD->hasCancel(); 1404 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1405 HasCancel, OutlinedHelperName); 1406 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1407 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1408 } 1409 1410 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1411 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1412 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1413 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1414 return emitParallelOrTeamsOutlinedFunction( 1415 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1416 } 1417 1418 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1419 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1420 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1421 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1422 return emitParallelOrTeamsOutlinedFunction( 1423 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1424 } 1425 1426 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1427 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1428 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1429 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1430 bool Tied, unsigned &NumberOfParts) { 1431 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1432 PrePostActionTy &) { 1433 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1434 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1435 llvm::Value *TaskArgs[] = { 1436 UpLoc, ThreadID, 1437 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1438 TaskTVar->getType()->castAs<PointerType>()) 1439 .getPointer()}; 1440 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1441 }; 1442 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1443 UntiedCodeGen); 1444 CodeGen.setAction(Action); 1445 assert(!ThreadIDVar->getType()->isPointerType() && 1446 "thread id variable must be of type kmp_int32 for tasks"); 1447 const OpenMPDirectiveKind Region = 1448 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1449 : OMPD_task; 1450 const CapturedStmt *CS = D.getCapturedStmt(Region); 1451 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1452 CodeGenFunction CGF(CGM, true); 1453 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1454 InnermostKind, 1455 TD ? TD->hasCancel() : false, Action); 1456 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1457 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1458 if (!Tied) 1459 NumberOfParts = Action.getNumberOfParts(); 1460 return Res; 1461 } 1462 1463 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1464 const RecordDecl *RD, const CGRecordLayout &RL, 1465 ArrayRef<llvm::Constant *> Data) { 1466 llvm::StructType *StructTy = RL.getLLVMType(); 1467 unsigned PrevIdx = 0; 1468 ConstantInitBuilder CIBuilder(CGM); 1469 auto DI = Data.begin(); 1470 for (const FieldDecl *FD : RD->fields()) { 1471 unsigned Idx = RL.getLLVMFieldNo(FD); 1472 // Fill the alignment. 1473 for (unsigned I = PrevIdx; I < Idx; ++I) 1474 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1475 PrevIdx = Idx + 1; 1476 Fields.add(*DI); 1477 ++DI; 1478 } 1479 } 1480 1481 template <class... As> 1482 static llvm::GlobalVariable * 1483 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1484 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1485 As &&... Args) { 1486 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1487 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1488 ConstantInitBuilder CIBuilder(CGM); 1489 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1490 buildStructValue(Fields, CGM, RD, RL, Data); 1491 return Fields.finishAndCreateGlobal( 1492 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1493 std::forward<As>(Args)...); 1494 } 1495 1496 template <typename T> 1497 static void 1498 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1499 ArrayRef<llvm::Constant *> Data, 1500 T &Parent) { 1501 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1502 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1503 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1504 buildStructValue(Fields, CGM, RD, RL, Data); 1505 Fields.finishAndAddTo(Parent); 1506 } 1507 1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1509 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1510 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1511 FlagsTy FlagsKey(Flags, Reserved2Flags); 1512 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1513 if (!Entry) { 1514 if (!DefaultOpenMPPSource) { 1515 // Initialize default location for psource field of ident_t structure of 1516 // all ident_t objects. Format is ";file;function;line;column;;". 1517 // Taken from 1518 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1519 DefaultOpenMPPSource = 1520 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1521 DefaultOpenMPPSource = 1522 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1523 } 1524 1525 llvm::Constant *Data[] = { 1526 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1527 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1528 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1529 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1530 llvm::GlobalValue *DefaultOpenMPLocation = 1531 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1532 llvm::GlobalValue::PrivateLinkage); 1533 DefaultOpenMPLocation->setUnnamedAddr( 1534 llvm::GlobalValue::UnnamedAddr::Global); 1535 1536 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1537 } 1538 return Address(Entry, Align); 1539 } 1540 1541 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1542 bool AtCurrentPoint) { 1543 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1544 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1545 1546 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1547 if (AtCurrentPoint) { 1548 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1549 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1550 } else { 1551 Elem.second.ServiceInsertPt = 1552 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1553 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1554 } 1555 } 1556 1557 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1558 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1559 if (Elem.second.ServiceInsertPt) { 1560 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1561 Elem.second.ServiceInsertPt = nullptr; 1562 Ptr->eraseFromParent(); 1563 } 1564 } 1565 1566 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1567 SourceLocation Loc, 1568 unsigned Flags) { 1569 Flags |= OMP_IDENT_KMPC; 1570 // If no debug info is generated - return global default location. 1571 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1572 Loc.isInvalid()) 1573 return getOrCreateDefaultLocation(Flags).getPointer(); 1574 1575 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1576 1577 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1578 Address LocValue = Address::invalid(); 1579 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1580 if (I != OpenMPLocThreadIDMap.end()) 1581 LocValue = Address(I->second.DebugLoc, Align); 1582 1583 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1584 // GetOpenMPThreadID was called before this routine. 1585 if (!LocValue.isValid()) { 1586 // Generate "ident_t .kmpc_loc.addr;" 1587 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1588 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1589 Elem.second.DebugLoc = AI.getPointer(); 1590 LocValue = AI; 1591 1592 if (!Elem.second.ServiceInsertPt) 1593 setLocThreadIdInsertPt(CGF); 1594 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1595 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1596 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1597 CGF.getTypeSize(IdentQTy)); 1598 } 1599 1600 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1601 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1602 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1603 LValue PSource = 1604 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1605 1606 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1607 if (OMPDebugLoc == nullptr) { 1608 SmallString<128> Buffer2; 1609 llvm::raw_svector_ostream OS2(Buffer2); 1610 // Build debug location 1611 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1612 OS2 << ";" << PLoc.getFilename() << ";"; 1613 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1614 OS2 << FD->getQualifiedNameAsString(); 1615 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1616 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1617 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1618 } 1619 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1620 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1621 1622 // Our callers always pass this to a runtime function, so for 1623 // convenience, go ahead and return a naked pointer. 1624 return LocValue.getPointer(); 1625 } 1626 1627 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1628 SourceLocation Loc) { 1629 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1630 1631 llvm::Value *ThreadID = nullptr; 1632 // Check whether we've already cached a load of the thread id in this 1633 // function. 1634 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1635 if (I != OpenMPLocThreadIDMap.end()) { 1636 ThreadID = I->second.ThreadID; 1637 if (ThreadID != nullptr) 1638 return ThreadID; 1639 } 1640 // If exceptions are enabled, do not use parameter to avoid possible crash. 1641 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1642 !CGF.getLangOpts().CXXExceptions || 1643 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1644 if (auto *OMPRegionInfo = 1645 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1646 if (OMPRegionInfo->getThreadIDVariable()) { 1647 // Check if this an outlined function with thread id passed as argument. 1648 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1649 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1650 // If value loaded in entry block, cache it and use it everywhere in 1651 // function. 1652 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1653 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1654 Elem.second.ThreadID = ThreadID; 1655 } 1656 return ThreadID; 1657 } 1658 } 1659 } 1660 1661 // This is not an outlined function region - need to call __kmpc_int32 1662 // kmpc_global_thread_num(ident_t *loc). 1663 // Generate thread id value and cache this value for use across the 1664 // function. 1665 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1666 if (!Elem.second.ServiceInsertPt) 1667 setLocThreadIdInsertPt(CGF); 1668 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1669 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1670 llvm::CallInst *Call = CGF.Builder.CreateCall( 1671 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1672 emitUpdateLocation(CGF, Loc)); 1673 Call->setCallingConv(CGF.getRuntimeCC()); 1674 Elem.second.ThreadID = Call; 1675 return Call; 1676 } 1677 1678 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1679 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1680 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1681 clearLocThreadIdInsertPt(CGF); 1682 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1683 } 1684 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1685 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1686 UDRMap.erase(D); 1687 FunctionUDRMap.erase(CGF.CurFn); 1688 } 1689 } 1690 1691 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1692 return IdentTy->getPointerTo(); 1693 } 1694 1695 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1696 if (!Kmpc_MicroTy) { 1697 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1698 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1699 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1700 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1701 } 1702 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1703 } 1704 1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1706 llvm::FunctionCallee RTLFn = nullptr; 1707 switch (static_cast<OpenMPRTLFunction>(Function)) { 1708 case OMPRTL__kmpc_fork_call: { 1709 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1710 // microtask, ...); 1711 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1712 getKmpc_MicroPointerTy()}; 1713 auto *FnTy = 1714 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1716 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1717 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1718 llvm::LLVMContext &Ctx = F->getContext(); 1719 llvm::MDBuilder MDB(Ctx); 1720 // Annotate the callback behavior of the __kmpc_fork_call: 1721 // - The callback callee is argument number 2 (microtask). 1722 // - The first two arguments of the callback callee are unknown (-1). 1723 // - All variadic arguments to the __kmpc_fork_call are passed to the 1724 // callback callee. 1725 F->addMetadata( 1726 llvm::LLVMContext::MD_callback, 1727 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1728 2, {-1, -1}, 1729 /* VarArgsArePassed */ true)})); 1730 } 1731 } 1732 break; 1733 } 1734 case OMPRTL__kmpc_global_thread_num: { 1735 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1736 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1737 auto *FnTy = 1738 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1739 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1740 break; 1741 } 1742 case OMPRTL__kmpc_threadprivate_cached: { 1743 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1744 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1745 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1746 CGM.VoidPtrTy, CGM.SizeTy, 1747 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_critical: { 1754 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1755 // kmp_critical_name *crit); 1756 llvm::Type *TypeParams[] = { 1757 getIdentTyPointerTy(), CGM.Int32Ty, 1758 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_critical_with_hint: { 1765 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit, uintptr_t hint); 1767 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1768 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1769 CGM.IntPtrTy}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_threadprivate_register: { 1776 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1777 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1778 // typedef void *(*kmpc_ctor)(void *); 1779 auto *KmpcCtorTy = 1780 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1781 /*isVarArg*/ false)->getPointerTo(); 1782 // typedef void *(*kmpc_cctor)(void *, void *); 1783 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1784 auto *KmpcCopyCtorTy = 1785 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1786 /*isVarArg*/ false) 1787 ->getPointerTo(); 1788 // typedef void (*kmpc_dtor)(void *); 1789 auto *KmpcDtorTy = 1790 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1791 ->getPointerTo(); 1792 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1793 KmpcCopyCtorTy, KmpcDtorTy}; 1794 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1795 /*isVarArg*/ false); 1796 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1797 break; 1798 } 1799 case OMPRTL__kmpc_end_critical: { 1800 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1801 // kmp_critical_name *crit); 1802 llvm::Type *TypeParams[] = { 1803 getIdentTyPointerTy(), CGM.Int32Ty, 1804 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1805 auto *FnTy = 1806 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_cancel_barrier: { 1811 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1812 // global_tid); 1813 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1814 auto *FnTy = 1815 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1816 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1817 break; 1818 } 1819 case OMPRTL__kmpc_barrier: { 1820 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1821 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1822 auto *FnTy = 1823 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1824 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1825 break; 1826 } 1827 case OMPRTL__kmpc_for_static_fini: { 1828 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1829 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1830 auto *FnTy = 1831 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1832 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1833 break; 1834 } 1835 case OMPRTL__kmpc_push_num_threads: { 1836 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1837 // kmp_int32 num_threads) 1838 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1839 CGM.Int32Ty}; 1840 auto *FnTy = 1841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1843 break; 1844 } 1845 case OMPRTL__kmpc_serialized_parallel: { 1846 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1847 // global_tid); 1848 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1849 auto *FnTy = 1850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1851 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1852 break; 1853 } 1854 case OMPRTL__kmpc_end_serialized_parallel: { 1855 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1856 // global_tid); 1857 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1858 auto *FnTy = 1859 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1861 break; 1862 } 1863 case OMPRTL__kmpc_flush: { 1864 // Build void __kmpc_flush(ident_t *loc); 1865 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1866 auto *FnTy = 1867 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1868 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1869 break; 1870 } 1871 case OMPRTL__kmpc_master: { 1872 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1873 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1874 auto *FnTy = 1875 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1877 break; 1878 } 1879 case OMPRTL__kmpc_end_master: { 1880 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1881 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1882 auto *FnTy = 1883 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1884 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1885 break; 1886 } 1887 case OMPRTL__kmpc_omp_taskyield: { 1888 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1889 // int end_part); 1890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1891 auto *FnTy = 1892 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1893 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1894 break; 1895 } 1896 case OMPRTL__kmpc_single: { 1897 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1898 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1899 auto *FnTy = 1900 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1901 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1902 break; 1903 } 1904 case OMPRTL__kmpc_end_single: { 1905 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1906 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1907 auto *FnTy = 1908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1909 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1910 break; 1911 } 1912 case OMPRTL__kmpc_omp_task_alloc: { 1913 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1914 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1915 // kmp_routine_entry_t *task_entry); 1916 assert(KmpRoutineEntryPtrTy != nullptr && 1917 "Type kmp_routine_entry_t must be created."); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1919 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1920 // Return void * and then cast to particular kmp_task_t type. 1921 auto *FnTy = 1922 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1923 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1924 break; 1925 } 1926 case OMPRTL__kmpc_omp_target_task_alloc: { 1927 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1928 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1929 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 1930 assert(KmpRoutineEntryPtrTy != nullptr && 1931 "Type kmp_routine_entry_t must be created."); 1932 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1933 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 1934 CGM.Int64Ty}; 1935 // Return void * and then cast to particular kmp_task_t type. 1936 auto *FnTy = 1937 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1938 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 1939 break; 1940 } 1941 case OMPRTL__kmpc_omp_task: { 1942 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1943 // *new_task); 1944 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1945 CGM.VoidPtrTy}; 1946 auto *FnTy = 1947 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_copyprivate: { 1952 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1953 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1954 // kmp_int32 didit); 1955 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1956 auto *CpyFnTy = 1957 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1958 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1959 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1960 CGM.Int32Ty}; 1961 auto *FnTy = 1962 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1964 break; 1965 } 1966 case OMPRTL__kmpc_reduce: { 1967 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1968 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1969 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1970 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1971 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1972 /*isVarArg=*/false); 1973 llvm::Type *TypeParams[] = { 1974 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1975 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1976 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1980 break; 1981 } 1982 case OMPRTL__kmpc_reduce_nowait: { 1983 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1984 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1985 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1986 // *lck); 1987 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1988 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1989 /*isVarArg=*/false); 1990 llvm::Type *TypeParams[] = { 1991 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1992 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1993 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1994 auto *FnTy = 1995 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1996 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1997 break; 1998 } 1999 case OMPRTL__kmpc_end_reduce: { 2000 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2001 // kmp_critical_name *lck); 2002 llvm::Type *TypeParams[] = { 2003 getIdentTyPointerTy(), CGM.Int32Ty, 2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_end_reduce_nowait: { 2011 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2012 // kmp_critical_name *lck); 2013 llvm::Type *TypeParams[] = { 2014 getIdentTyPointerTy(), CGM.Int32Ty, 2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = 2019 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_omp_task_begin_if0: { 2023 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2024 // *new_task); 2025 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2026 CGM.VoidPtrTy}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = 2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_omp_task_complete_if0: { 2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2035 // *new_task); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2037 CGM.VoidPtrTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2040 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2041 /*Name=*/"__kmpc_omp_task_complete_if0"); 2042 break; 2043 } 2044 case OMPRTL__kmpc_ordered: { 2045 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2046 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_end_ordered: { 2053 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2054 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2055 auto *FnTy = 2056 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2057 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2058 break; 2059 } 2060 case OMPRTL__kmpc_omp_taskwait: { 2061 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2062 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2063 auto *FnTy = 2064 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2065 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2066 break; 2067 } 2068 case OMPRTL__kmpc_taskgroup: { 2069 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2071 auto *FnTy = 2072 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2073 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2074 break; 2075 } 2076 case OMPRTL__kmpc_end_taskgroup: { 2077 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2078 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2079 auto *FnTy = 2080 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2081 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2082 break; 2083 } 2084 case OMPRTL__kmpc_push_proc_bind: { 2085 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2086 // int proc_bind) 2087 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2088 auto *FnTy = 2089 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2090 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2091 break; 2092 } 2093 case OMPRTL__kmpc_omp_task_with_deps: { 2094 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2095 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2096 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2097 llvm::Type *TypeParams[] = { 2098 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2099 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2100 auto *FnTy = 2101 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2102 RTLFn = 2103 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2104 break; 2105 } 2106 case OMPRTL__kmpc_omp_wait_deps: { 2107 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2108 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2109 // kmp_depend_info_t *noalias_dep_list); 2110 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2111 CGM.Int32Ty, CGM.VoidPtrTy, 2112 CGM.Int32Ty, CGM.VoidPtrTy}; 2113 auto *FnTy = 2114 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2115 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2116 break; 2117 } 2118 case OMPRTL__kmpc_cancellationpoint: { 2119 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2120 // global_tid, kmp_int32 cncl_kind) 2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2122 auto *FnTy = 2123 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2124 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2125 break; 2126 } 2127 case OMPRTL__kmpc_cancel: { 2128 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2129 // kmp_int32 cncl_kind) 2130 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2131 auto *FnTy = 2132 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2133 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2134 break; 2135 } 2136 case OMPRTL__kmpc_push_num_teams: { 2137 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2138 // kmp_int32 num_teams, kmp_int32 num_threads) 2139 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2140 CGM.Int32Ty}; 2141 auto *FnTy = 2142 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_fork_teams: { 2147 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2148 // microtask, ...); 2149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2150 getKmpc_MicroPointerTy()}; 2151 auto *FnTy = 2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2153 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2154 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2155 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2156 llvm::LLVMContext &Ctx = F->getContext(); 2157 llvm::MDBuilder MDB(Ctx); 2158 // Annotate the callback behavior of the __kmpc_fork_teams: 2159 // - The callback callee is argument number 2 (microtask). 2160 // - The first two arguments of the callback callee are unknown (-1). 2161 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2162 // callback callee. 2163 F->addMetadata( 2164 llvm::LLVMContext::MD_callback, 2165 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2166 2, {-1, -1}, 2167 /* VarArgsArePassed */ true)})); 2168 } 2169 } 2170 break; 2171 } 2172 case OMPRTL__kmpc_taskloop: { 2173 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2174 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2175 // sched, kmp_uint64 grainsize, void *task_dup); 2176 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2177 CGM.IntTy, 2178 CGM.VoidPtrTy, 2179 CGM.IntTy, 2180 CGM.Int64Ty->getPointerTo(), 2181 CGM.Int64Ty->getPointerTo(), 2182 CGM.Int64Ty, 2183 CGM.IntTy, 2184 CGM.IntTy, 2185 CGM.Int64Ty, 2186 CGM.VoidPtrTy}; 2187 auto *FnTy = 2188 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2190 break; 2191 } 2192 case OMPRTL__kmpc_doacross_init: { 2193 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2194 // num_dims, struct kmp_dim *dims); 2195 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2196 CGM.Int32Ty, 2197 CGM.Int32Ty, 2198 CGM.VoidPtrTy}; 2199 auto *FnTy = 2200 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2201 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2202 break; 2203 } 2204 case OMPRTL__kmpc_doacross_fini: { 2205 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2207 auto *FnTy = 2208 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2209 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2210 break; 2211 } 2212 case OMPRTL__kmpc_doacross_post: { 2213 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2214 // *vec); 2215 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2216 CGM.Int64Ty->getPointerTo()}; 2217 auto *FnTy = 2218 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2219 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2220 break; 2221 } 2222 case OMPRTL__kmpc_doacross_wait: { 2223 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2224 // *vec); 2225 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2226 CGM.Int64Ty->getPointerTo()}; 2227 auto *FnTy = 2228 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2230 break; 2231 } 2232 case OMPRTL__kmpc_task_reduction_init: { 2233 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2234 // *data); 2235 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2236 auto *FnTy = 2237 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2238 RTLFn = 2239 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2240 break; 2241 } 2242 case OMPRTL__kmpc_task_reduction_get_th_data: { 2243 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2244 // *d); 2245 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2246 auto *FnTy = 2247 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2248 RTLFn = CGM.CreateRuntimeFunction( 2249 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2250 break; 2251 } 2252 case OMPRTL__kmpc_alloc: { 2253 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2254 // al); omp_allocator_handle_t type is void *. 2255 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2256 auto *FnTy = 2257 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2258 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2259 break; 2260 } 2261 case OMPRTL__kmpc_free: { 2262 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2263 // al); omp_allocator_handle_t type is void *. 2264 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2265 auto *FnTy = 2266 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2267 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2268 break; 2269 } 2270 case OMPRTL__kmpc_push_target_tripcount: { 2271 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2272 // size); 2273 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2274 llvm::FunctionType *FnTy = 2275 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2276 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2277 break; 2278 } 2279 case OMPRTL__tgt_target: { 2280 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2281 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2282 // *arg_types); 2283 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2284 CGM.VoidPtrTy, 2285 CGM.Int32Ty, 2286 CGM.VoidPtrPtrTy, 2287 CGM.VoidPtrPtrTy, 2288 CGM.Int64Ty->getPointerTo(), 2289 CGM.Int64Ty->getPointerTo()}; 2290 auto *FnTy = 2291 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2293 break; 2294 } 2295 case OMPRTL__tgt_target_nowait: { 2296 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2297 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2298 // int64_t *arg_types); 2299 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2300 CGM.VoidPtrTy, 2301 CGM.Int32Ty, 2302 CGM.VoidPtrPtrTy, 2303 CGM.VoidPtrPtrTy, 2304 CGM.Int64Ty->getPointerTo(), 2305 CGM.Int64Ty->getPointerTo()}; 2306 auto *FnTy = 2307 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2308 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2309 break; 2310 } 2311 case OMPRTL__tgt_target_teams: { 2312 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2313 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2314 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2315 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2316 CGM.VoidPtrTy, 2317 CGM.Int32Ty, 2318 CGM.VoidPtrPtrTy, 2319 CGM.VoidPtrPtrTy, 2320 CGM.Int64Ty->getPointerTo(), 2321 CGM.Int64Ty->getPointerTo(), 2322 CGM.Int32Ty, 2323 CGM.Int32Ty}; 2324 auto *FnTy = 2325 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2326 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2327 break; 2328 } 2329 case OMPRTL__tgt_target_teams_nowait: { 2330 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2331 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2332 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2333 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2334 CGM.VoidPtrTy, 2335 CGM.Int32Ty, 2336 CGM.VoidPtrPtrTy, 2337 CGM.VoidPtrPtrTy, 2338 CGM.Int64Ty->getPointerTo(), 2339 CGM.Int64Ty->getPointerTo(), 2340 CGM.Int32Ty, 2341 CGM.Int32Ty}; 2342 auto *FnTy = 2343 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2344 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2345 break; 2346 } 2347 case OMPRTL__tgt_register_requires: { 2348 // Build void __tgt_register_requires(int64_t flags); 2349 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2350 auto *FnTy = 2351 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2352 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2353 break; 2354 } 2355 case OMPRTL__tgt_register_lib: { 2356 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2357 QualType ParamTy = 2358 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2359 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2363 break; 2364 } 2365 case OMPRTL__tgt_unregister_lib: { 2366 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2367 QualType ParamTy = 2368 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2369 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2370 auto *FnTy = 2371 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2372 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2373 break; 2374 } 2375 case OMPRTL__tgt_target_data_begin: { 2376 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2377 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2378 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2379 CGM.Int32Ty, 2380 CGM.VoidPtrPtrTy, 2381 CGM.VoidPtrPtrTy, 2382 CGM.Int64Ty->getPointerTo(), 2383 CGM.Int64Ty->getPointerTo()}; 2384 auto *FnTy = 2385 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2387 break; 2388 } 2389 case OMPRTL__tgt_target_data_begin_nowait: { 2390 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2391 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2392 // *arg_types); 2393 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2394 CGM.Int32Ty, 2395 CGM.VoidPtrPtrTy, 2396 CGM.VoidPtrPtrTy, 2397 CGM.Int64Ty->getPointerTo(), 2398 CGM.Int64Ty->getPointerTo()}; 2399 auto *FnTy = 2400 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2401 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2402 break; 2403 } 2404 case OMPRTL__tgt_target_data_end: { 2405 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2406 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2407 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2408 CGM.Int32Ty, 2409 CGM.VoidPtrPtrTy, 2410 CGM.VoidPtrPtrTy, 2411 CGM.Int64Ty->getPointerTo(), 2412 CGM.Int64Ty->getPointerTo()}; 2413 auto *FnTy = 2414 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2415 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2416 break; 2417 } 2418 case OMPRTL__tgt_target_data_end_nowait: { 2419 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2420 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2421 // *arg_types); 2422 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2423 CGM.Int32Ty, 2424 CGM.VoidPtrPtrTy, 2425 CGM.VoidPtrPtrTy, 2426 CGM.Int64Ty->getPointerTo(), 2427 CGM.Int64Ty->getPointerTo()}; 2428 auto *FnTy = 2429 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2430 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2431 break; 2432 } 2433 case OMPRTL__tgt_target_data_update: { 2434 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2435 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2436 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2437 CGM.Int32Ty, 2438 CGM.VoidPtrPtrTy, 2439 CGM.VoidPtrPtrTy, 2440 CGM.Int64Ty->getPointerTo(), 2441 CGM.Int64Ty->getPointerTo()}; 2442 auto *FnTy = 2443 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2444 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2445 break; 2446 } 2447 case OMPRTL__tgt_target_data_update_nowait: { 2448 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2449 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2450 // *arg_types); 2451 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2452 CGM.Int32Ty, 2453 CGM.VoidPtrPtrTy, 2454 CGM.VoidPtrPtrTy, 2455 CGM.Int64Ty->getPointerTo(), 2456 CGM.Int64Ty->getPointerTo()}; 2457 auto *FnTy = 2458 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2459 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2460 break; 2461 } 2462 } 2463 assert(RTLFn && "Unable to find OpenMP runtime function"); 2464 return RTLFn; 2465 } 2466 2467 llvm::FunctionCallee 2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2469 assert((IVSize == 32 || IVSize == 64) && 2470 "IV size is not compatible with the omp runtime"); 2471 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2472 : "__kmpc_for_static_init_4u") 2473 : (IVSigned ? "__kmpc_for_static_init_8" 2474 : "__kmpc_for_static_init_8u"); 2475 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2476 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2477 llvm::Type *TypeParams[] = { 2478 getIdentTyPointerTy(), // loc 2479 CGM.Int32Ty, // tid 2480 CGM.Int32Ty, // schedtype 2481 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2482 PtrTy, // p_lower 2483 PtrTy, // p_upper 2484 PtrTy, // p_stride 2485 ITy, // incr 2486 ITy // chunk 2487 }; 2488 auto *FnTy = 2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2490 return CGM.CreateRuntimeFunction(FnTy, Name); 2491 } 2492 2493 llvm::FunctionCallee 2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2495 assert((IVSize == 32 || IVSize == 64) && 2496 "IV size is not compatible with the omp runtime"); 2497 StringRef Name = 2498 IVSize == 32 2499 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2500 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2501 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2502 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2503 CGM.Int32Ty, // tid 2504 CGM.Int32Ty, // schedtype 2505 ITy, // lower 2506 ITy, // upper 2507 ITy, // stride 2508 ITy // chunk 2509 }; 2510 auto *FnTy = 2511 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2512 return CGM.CreateRuntimeFunction(FnTy, Name); 2513 } 2514 2515 llvm::FunctionCallee 2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2517 assert((IVSize == 32 || IVSize == 64) && 2518 "IV size is not compatible with the omp runtime"); 2519 StringRef Name = 2520 IVSize == 32 2521 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2522 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2523 llvm::Type *TypeParams[] = { 2524 getIdentTyPointerTy(), // loc 2525 CGM.Int32Ty, // tid 2526 }; 2527 auto *FnTy = 2528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2529 return CGM.CreateRuntimeFunction(FnTy, Name); 2530 } 2531 2532 llvm::FunctionCallee 2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2534 assert((IVSize == 32 || IVSize == 64) && 2535 "IV size is not compatible with the omp runtime"); 2536 StringRef Name = 2537 IVSize == 32 2538 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2539 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2540 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2541 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2542 llvm::Type *TypeParams[] = { 2543 getIdentTyPointerTy(), // loc 2544 CGM.Int32Ty, // tid 2545 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2546 PtrTy, // p_lower 2547 PtrTy, // p_upper 2548 PtrTy // p_stride 2549 }; 2550 auto *FnTy = 2551 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2552 return CGM.CreateRuntimeFunction(FnTy, Name); 2553 } 2554 2555 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2556 if (CGM.getLangOpts().OpenMPSimd) 2557 return Address::invalid(); 2558 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2559 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2560 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2561 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2562 HasRequiresUnifiedSharedMemory))) { 2563 SmallString<64> PtrName; 2564 { 2565 llvm::raw_svector_ostream OS(PtrName); 2566 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; 2567 } 2568 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2569 if (!Ptr) { 2570 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2571 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2572 PtrName); 2573 if (!CGM.getLangOpts().OpenMPIsDevice) { 2574 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2575 GV->setLinkage(llvm::GlobalValue::ExternalLinkage); 2576 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2577 } 2578 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); 2579 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2580 } 2581 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2582 } 2583 return Address::invalid(); 2584 } 2585 2586 llvm::Constant * 2587 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2588 assert(!CGM.getLangOpts().OpenMPUseTLS || 2589 !CGM.getContext().getTargetInfo().isTLSSupported()); 2590 // Lookup the entry, lazily creating it if necessary. 2591 std::string Suffix = getName({"cache", ""}); 2592 return getOrCreateInternalVariable( 2593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2594 } 2595 2596 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2597 const VarDecl *VD, 2598 Address VDAddr, 2599 SourceLocation Loc) { 2600 if (CGM.getLangOpts().OpenMPUseTLS && 2601 CGM.getContext().getTargetInfo().isTLSSupported()) 2602 return VDAddr; 2603 2604 llvm::Type *VarTy = VDAddr.getElementType(); 2605 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2606 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2607 CGM.Int8PtrTy), 2608 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2609 getOrCreateThreadPrivateCache(VD)}; 2610 return Address(CGF.EmitRuntimeCall( 2611 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2612 VDAddr.getAlignment()); 2613 } 2614 2615 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2616 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2617 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2618 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2619 // library. 2620 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2621 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2622 OMPLoc); 2623 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2624 // to register constructor/destructor for variable. 2625 llvm::Value *Args[] = { 2626 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2627 Ctor, CopyCtor, Dtor}; 2628 CGF.EmitRuntimeCall( 2629 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2630 } 2631 2632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2633 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2634 bool PerformInit, CodeGenFunction *CGF) { 2635 if (CGM.getLangOpts().OpenMPUseTLS && 2636 CGM.getContext().getTargetInfo().isTLSSupported()) 2637 return nullptr; 2638 2639 VD = VD->getDefinition(CGM.getContext()); 2640 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2641 QualType ASTTy = VD->getType(); 2642 2643 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2644 const Expr *Init = VD->getAnyInitializer(); 2645 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2646 // Generate function that re-emits the declaration's initializer into the 2647 // threadprivate copy of the variable VD 2648 CodeGenFunction CtorCGF(CGM); 2649 FunctionArgList Args; 2650 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2651 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2652 ImplicitParamDecl::Other); 2653 Args.push_back(&Dst); 2654 2655 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2656 CGM.getContext().VoidPtrTy, Args); 2657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2658 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2659 llvm::Function *Fn = 2660 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2662 Args, Loc, Loc); 2663 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2664 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2665 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2666 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2667 Arg = CtorCGF.Builder.CreateElementBitCast( 2668 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2669 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2670 /*IsInitializer=*/true); 2671 ArgVal = CtorCGF.EmitLoadOfScalar( 2672 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2673 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2674 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2675 CtorCGF.FinishFunction(); 2676 Ctor = Fn; 2677 } 2678 if (VD->getType().isDestructedType() != QualType::DK_none) { 2679 // Generate function that emits destructor call for the threadprivate copy 2680 // of the variable VD 2681 CodeGenFunction DtorCGF(CGM); 2682 FunctionArgList Args; 2683 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2684 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2685 ImplicitParamDecl::Other); 2686 Args.push_back(&Dst); 2687 2688 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2689 CGM.getContext().VoidTy, Args); 2690 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2691 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2692 llvm::Function *Fn = 2693 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2694 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2695 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2696 Loc, Loc); 2697 // Create a scope with an artificial location for the body of this function. 2698 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2699 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2700 DtorCGF.GetAddrOfLocalVar(&Dst), 2701 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2702 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2703 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2704 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2705 DtorCGF.FinishFunction(); 2706 Dtor = Fn; 2707 } 2708 // Do not emit init function if it is not required. 2709 if (!Ctor && !Dtor) 2710 return nullptr; 2711 2712 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2713 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2714 /*isVarArg=*/false) 2715 ->getPointerTo(); 2716 // Copying constructor for the threadprivate variable. 2717 // Must be NULL - reserved by runtime, but currently it requires that this 2718 // parameter is always NULL. Otherwise it fires assertion. 2719 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2720 if (Ctor == nullptr) { 2721 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2722 /*isVarArg=*/false) 2723 ->getPointerTo(); 2724 Ctor = llvm::Constant::getNullValue(CtorTy); 2725 } 2726 if (Dtor == nullptr) { 2727 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2728 /*isVarArg=*/false) 2729 ->getPointerTo(); 2730 Dtor = llvm::Constant::getNullValue(DtorTy); 2731 } 2732 if (!CGF) { 2733 auto *InitFunctionTy = 2734 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2735 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2736 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2737 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2738 CodeGenFunction InitCGF(CGM); 2739 FunctionArgList ArgList; 2740 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2741 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2742 Loc, Loc); 2743 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2744 InitCGF.FinishFunction(); 2745 return InitFunction; 2746 } 2747 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2748 } 2749 return nullptr; 2750 } 2751 2752 /// Obtain information that uniquely identifies a target entry. This 2753 /// consists of the file and device IDs as well as line number associated with 2754 /// the relevant entry source location. 2755 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2756 unsigned &DeviceID, unsigned &FileID, 2757 unsigned &LineNum) { 2758 SourceManager &SM = C.getSourceManager(); 2759 2760 // The loc should be always valid and have a file ID (the user cannot use 2761 // #pragma directives in macros) 2762 2763 assert(Loc.isValid() && "Source location is expected to be always valid."); 2764 2765 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2766 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2767 2768 llvm::sys::fs::UniqueID ID; 2769 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2770 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2771 << PLoc.getFilename() << EC.message(); 2772 2773 DeviceID = ID.getDevice(); 2774 FileID = ID.getFile(); 2775 LineNum = PLoc.getLine(); 2776 } 2777 2778 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2779 llvm::GlobalVariable *Addr, 2780 bool PerformInit) { 2781 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2782 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2783 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2784 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2785 HasRequiresUnifiedSharedMemory)) 2786 return CGM.getLangOpts().OpenMPIsDevice; 2787 VD = VD->getDefinition(CGM.getContext()); 2788 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2789 return CGM.getLangOpts().OpenMPIsDevice; 2790 2791 QualType ASTTy = VD->getType(); 2792 2793 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2794 // Produce the unique prefix to identify the new target regions. We use 2795 // the source location of the variable declaration which we know to not 2796 // conflict with any target region. 2797 unsigned DeviceID; 2798 unsigned FileID; 2799 unsigned Line; 2800 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2801 SmallString<128> Buffer, Out; 2802 { 2803 llvm::raw_svector_ostream OS(Buffer); 2804 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2805 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2806 } 2807 2808 const Expr *Init = VD->getAnyInitializer(); 2809 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2810 llvm::Constant *Ctor; 2811 llvm::Constant *ID; 2812 if (CGM.getLangOpts().OpenMPIsDevice) { 2813 // Generate function that re-emits the declaration's initializer into 2814 // the threadprivate copy of the variable VD 2815 CodeGenFunction CtorCGF(CGM); 2816 2817 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2818 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2819 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2820 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2821 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2822 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2823 FunctionArgList(), Loc, Loc); 2824 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2825 CtorCGF.EmitAnyExprToMem(Init, 2826 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2827 Init->getType().getQualifiers(), 2828 /*IsInitializer=*/true); 2829 CtorCGF.FinishFunction(); 2830 Ctor = Fn; 2831 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2832 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2833 } else { 2834 Ctor = new llvm::GlobalVariable( 2835 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2836 llvm::GlobalValue::PrivateLinkage, 2837 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2838 ID = Ctor; 2839 } 2840 2841 // Register the information for the entry associated with the constructor. 2842 Out.clear(); 2843 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2844 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2845 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2846 } 2847 if (VD->getType().isDestructedType() != QualType::DK_none) { 2848 llvm::Constant *Dtor; 2849 llvm::Constant *ID; 2850 if (CGM.getLangOpts().OpenMPIsDevice) { 2851 // Generate function that emits destructor call for the threadprivate 2852 // copy of the variable VD 2853 CodeGenFunction DtorCGF(CGM); 2854 2855 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2856 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2857 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2858 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2859 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2860 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2861 FunctionArgList(), Loc, Loc); 2862 // Create a scope with an artificial location for the body of this 2863 // function. 2864 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2865 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2866 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2867 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2868 DtorCGF.FinishFunction(); 2869 Dtor = Fn; 2870 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2871 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2872 } else { 2873 Dtor = new llvm::GlobalVariable( 2874 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2875 llvm::GlobalValue::PrivateLinkage, 2876 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2877 ID = Dtor; 2878 } 2879 // Register the information for the entry associated with the destructor. 2880 Out.clear(); 2881 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2882 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2883 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2884 } 2885 return CGM.getLangOpts().OpenMPIsDevice; 2886 } 2887 2888 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2889 QualType VarType, 2890 StringRef Name) { 2891 std::string Suffix = getName({"artificial", ""}); 2892 std::string CacheSuffix = getName({"cache", ""}); 2893 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2894 llvm::Value *GAddr = 2895 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2896 llvm::Value *Args[] = { 2897 emitUpdateLocation(CGF, SourceLocation()), 2898 getThreadID(CGF, SourceLocation()), 2899 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2900 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2901 /*isSigned=*/false), 2902 getOrCreateInternalVariable( 2903 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2904 return Address( 2905 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2906 CGF.EmitRuntimeCall( 2907 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2908 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2909 CGM.getPointerAlign()); 2910 } 2911 2912 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2913 const RegionCodeGenTy &ThenGen, 2914 const RegionCodeGenTy &ElseGen) { 2915 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2916 2917 // If the condition constant folds and can be elided, try to avoid emitting 2918 // the condition and the dead arm of the if/else. 2919 bool CondConstant; 2920 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2921 if (CondConstant) 2922 ThenGen(CGF); 2923 else 2924 ElseGen(CGF); 2925 return; 2926 } 2927 2928 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2929 // emit the conditional branch. 2930 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2931 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2932 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2933 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2934 2935 // Emit the 'then' code. 2936 CGF.EmitBlock(ThenBlock); 2937 ThenGen(CGF); 2938 CGF.EmitBranch(ContBlock); 2939 // Emit the 'else' code if present. 2940 // There is no need to emit line number for unconditional branch. 2941 (void)ApplyDebugLocation::CreateEmpty(CGF); 2942 CGF.EmitBlock(ElseBlock); 2943 ElseGen(CGF); 2944 // There is no need to emit line number for unconditional branch. 2945 (void)ApplyDebugLocation::CreateEmpty(CGF); 2946 CGF.EmitBranch(ContBlock); 2947 // Emit the continuation block for code after the if. 2948 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2949 } 2950 2951 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2952 llvm::Function *OutlinedFn, 2953 ArrayRef<llvm::Value *> CapturedVars, 2954 const Expr *IfCond) { 2955 if (!CGF.HaveInsertPoint()) 2956 return; 2957 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2958 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2959 PrePostActionTy &) { 2960 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2961 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2962 llvm::Value *Args[] = { 2963 RTLoc, 2964 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2965 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2966 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2967 RealArgs.append(std::begin(Args), std::end(Args)); 2968 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2969 2970 llvm::FunctionCallee RTLFn = 2971 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2972 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2973 }; 2974 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2975 PrePostActionTy &) { 2976 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2977 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2978 // Build calls: 2979 // __kmpc_serialized_parallel(&Loc, GTid); 2980 llvm::Value *Args[] = {RTLoc, ThreadID}; 2981 CGF.EmitRuntimeCall( 2982 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2983 2984 // OutlinedFn(>id, &zero, CapturedStruct); 2985 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2986 /*Name*/ ".zero.addr"); 2987 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2988 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2989 // ThreadId for serialized parallels is 0. 2990 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2991 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2992 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2993 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2994 2995 // __kmpc_end_serialized_parallel(&Loc, GTid); 2996 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2997 CGF.EmitRuntimeCall( 2998 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 2999 EndArgs); 3000 }; 3001 if (IfCond) { 3002 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3003 } else { 3004 RegionCodeGenTy ThenRCG(ThenGen); 3005 ThenRCG(CGF); 3006 } 3007 } 3008 3009 // If we're inside an (outlined) parallel region, use the region info's 3010 // thread-ID variable (it is passed in a first argument of the outlined function 3011 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3012 // regular serial code region, get thread ID by calling kmp_int32 3013 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3014 // return the address of that temp. 3015 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3016 SourceLocation Loc) { 3017 if (auto *OMPRegionInfo = 3018 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3019 if (OMPRegionInfo->getThreadIDVariable()) 3020 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3021 3022 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3023 QualType Int32Ty = 3024 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3025 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3026 CGF.EmitStoreOfScalar(ThreadID, 3027 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3028 3029 return ThreadIDTemp; 3030 } 3031 3032 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3033 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3034 SmallString<256> Buffer; 3035 llvm::raw_svector_ostream Out(Buffer); 3036 Out << Name; 3037 StringRef RuntimeName = Out.str(); 3038 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3039 if (Elem.second) { 3040 assert(Elem.second->getType()->getPointerElementType() == Ty && 3041 "OMP internal variable has different type than requested"); 3042 return &*Elem.second; 3043 } 3044 3045 return Elem.second = new llvm::GlobalVariable( 3046 CGM.getModule(), Ty, /*IsConstant*/ false, 3047 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3048 Elem.first(), /*InsertBefore=*/nullptr, 3049 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3050 } 3051 3052 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3053 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3054 std::string Name = getName({Prefix, "var"}); 3055 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3056 } 3057 3058 namespace { 3059 /// Common pre(post)-action for different OpenMP constructs. 3060 class CommonActionTy final : public PrePostActionTy { 3061 llvm::FunctionCallee EnterCallee; 3062 ArrayRef<llvm::Value *> EnterArgs; 3063 llvm::FunctionCallee ExitCallee; 3064 ArrayRef<llvm::Value *> ExitArgs; 3065 bool Conditional; 3066 llvm::BasicBlock *ContBlock = nullptr; 3067 3068 public: 3069 CommonActionTy(llvm::FunctionCallee EnterCallee, 3070 ArrayRef<llvm::Value *> EnterArgs, 3071 llvm::FunctionCallee ExitCallee, 3072 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3073 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3074 ExitArgs(ExitArgs), Conditional(Conditional) {} 3075 void Enter(CodeGenFunction &CGF) override { 3076 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3077 if (Conditional) { 3078 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3079 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3080 ContBlock = CGF.createBasicBlock("omp_if.end"); 3081 // Generate the branch (If-stmt) 3082 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3083 CGF.EmitBlock(ThenBlock); 3084 } 3085 } 3086 void Done(CodeGenFunction &CGF) { 3087 // Emit the rest of blocks/branches 3088 CGF.EmitBranch(ContBlock); 3089 CGF.EmitBlock(ContBlock, true); 3090 } 3091 void Exit(CodeGenFunction &CGF) override { 3092 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3093 } 3094 }; 3095 } // anonymous namespace 3096 3097 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3098 StringRef CriticalName, 3099 const RegionCodeGenTy &CriticalOpGen, 3100 SourceLocation Loc, const Expr *Hint) { 3101 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3102 // CriticalOpGen(); 3103 // __kmpc_end_critical(ident_t *, gtid, Lock); 3104 // Prepare arguments and build a call to __kmpc_critical 3105 if (!CGF.HaveInsertPoint()) 3106 return; 3107 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3108 getCriticalRegionLock(CriticalName)}; 3109 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3110 std::end(Args)); 3111 if (Hint) { 3112 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3113 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3114 } 3115 CommonActionTy Action( 3116 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3117 : OMPRTL__kmpc_critical), 3118 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3119 CriticalOpGen.setAction(Action); 3120 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3121 } 3122 3123 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3124 const RegionCodeGenTy &MasterOpGen, 3125 SourceLocation Loc) { 3126 if (!CGF.HaveInsertPoint()) 3127 return; 3128 // if(__kmpc_master(ident_t *, gtid)) { 3129 // MasterOpGen(); 3130 // __kmpc_end_master(ident_t *, gtid); 3131 // } 3132 // Prepare arguments and build a call to __kmpc_master 3133 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3134 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3135 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3136 /*Conditional=*/true); 3137 MasterOpGen.setAction(Action); 3138 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3139 Action.Done(CGF); 3140 } 3141 3142 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3143 SourceLocation Loc) { 3144 if (!CGF.HaveInsertPoint()) 3145 return; 3146 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3147 llvm::Value *Args[] = { 3148 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3149 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3150 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3151 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3152 Region->emitUntiedSwitch(CGF); 3153 } 3154 3155 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3156 const RegionCodeGenTy &TaskgroupOpGen, 3157 SourceLocation Loc) { 3158 if (!CGF.HaveInsertPoint()) 3159 return; 3160 // __kmpc_taskgroup(ident_t *, gtid); 3161 // TaskgroupOpGen(); 3162 // __kmpc_end_taskgroup(ident_t *, gtid); 3163 // Prepare arguments and build a call to __kmpc_taskgroup 3164 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3165 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3166 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3167 Args); 3168 TaskgroupOpGen.setAction(Action); 3169 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3170 } 3171 3172 /// Given an array of pointers to variables, project the address of a 3173 /// given variable. 3174 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3175 unsigned Index, const VarDecl *Var) { 3176 // Pull out the pointer to the variable. 3177 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3178 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3179 3180 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3181 Addr = CGF.Builder.CreateElementBitCast( 3182 Addr, CGF.ConvertTypeForMem(Var->getType())); 3183 return Addr; 3184 } 3185 3186 static llvm::Value *emitCopyprivateCopyFunction( 3187 CodeGenModule &CGM, llvm::Type *ArgsType, 3188 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3189 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3190 SourceLocation Loc) { 3191 ASTContext &C = CGM.getContext(); 3192 // void copy_func(void *LHSArg, void *RHSArg); 3193 FunctionArgList Args; 3194 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3195 ImplicitParamDecl::Other); 3196 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3197 ImplicitParamDecl::Other); 3198 Args.push_back(&LHSArg); 3199 Args.push_back(&RHSArg); 3200 const auto &CGFI = 3201 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3202 std::string Name = 3203 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3204 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3205 llvm::GlobalValue::InternalLinkage, Name, 3206 &CGM.getModule()); 3207 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3208 Fn->setDoesNotRecurse(); 3209 CodeGenFunction CGF(CGM); 3210 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3211 // Dest = (void*[n])(LHSArg); 3212 // Src = (void*[n])(RHSArg); 3213 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3214 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3215 ArgsType), CGF.getPointerAlign()); 3216 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3217 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3218 ArgsType), CGF.getPointerAlign()); 3219 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3220 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3221 // ... 3222 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3223 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3224 const auto *DestVar = 3225 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3226 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3227 3228 const auto *SrcVar = 3229 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3230 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3231 3232 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3233 QualType Type = VD->getType(); 3234 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3235 } 3236 CGF.FinishFunction(); 3237 return Fn; 3238 } 3239 3240 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3241 const RegionCodeGenTy &SingleOpGen, 3242 SourceLocation Loc, 3243 ArrayRef<const Expr *> CopyprivateVars, 3244 ArrayRef<const Expr *> SrcExprs, 3245 ArrayRef<const Expr *> DstExprs, 3246 ArrayRef<const Expr *> AssignmentOps) { 3247 if (!CGF.HaveInsertPoint()) 3248 return; 3249 assert(CopyprivateVars.size() == SrcExprs.size() && 3250 CopyprivateVars.size() == DstExprs.size() && 3251 CopyprivateVars.size() == AssignmentOps.size()); 3252 ASTContext &C = CGM.getContext(); 3253 // int32 did_it = 0; 3254 // if(__kmpc_single(ident_t *, gtid)) { 3255 // SingleOpGen(); 3256 // __kmpc_end_single(ident_t *, gtid); 3257 // did_it = 1; 3258 // } 3259 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3260 // <copy_func>, did_it); 3261 3262 Address DidIt = Address::invalid(); 3263 if (!CopyprivateVars.empty()) { 3264 // int32 did_it = 0; 3265 QualType KmpInt32Ty = 3266 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3267 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3268 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3269 } 3270 // Prepare arguments and build a call to __kmpc_single 3271 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3272 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3273 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3274 /*Conditional=*/true); 3275 SingleOpGen.setAction(Action); 3276 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3277 if (DidIt.isValid()) { 3278 // did_it = 1; 3279 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3280 } 3281 Action.Done(CGF); 3282 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3283 // <copy_func>, did_it); 3284 if (DidIt.isValid()) { 3285 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3286 QualType CopyprivateArrayTy = 3287 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3288 /*IndexTypeQuals=*/0); 3289 // Create a list of all private variables for copyprivate. 3290 Address CopyprivateList = 3291 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3292 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3293 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3294 CGF.Builder.CreateStore( 3295 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3296 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3297 Elem); 3298 } 3299 // Build function that copies private values from single region to all other 3300 // threads in the corresponding parallel region. 3301 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3302 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3303 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3304 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3305 Address CL = 3306 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3307 CGF.VoidPtrTy); 3308 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3309 llvm::Value *Args[] = { 3310 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3311 getThreadID(CGF, Loc), // i32 <gtid> 3312 BufSize, // size_t <buf_size> 3313 CL.getPointer(), // void *<copyprivate list> 3314 CpyFn, // void (*) (void *, void *) <copy_func> 3315 DidItVal // i32 did_it 3316 }; 3317 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3318 } 3319 } 3320 3321 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3322 const RegionCodeGenTy &OrderedOpGen, 3323 SourceLocation Loc, bool IsThreads) { 3324 if (!CGF.HaveInsertPoint()) 3325 return; 3326 // __kmpc_ordered(ident_t *, gtid); 3327 // OrderedOpGen(); 3328 // __kmpc_end_ordered(ident_t *, gtid); 3329 // Prepare arguments and build a call to __kmpc_ordered 3330 if (IsThreads) { 3331 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3332 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3333 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3334 Args); 3335 OrderedOpGen.setAction(Action); 3336 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3337 return; 3338 } 3339 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3340 } 3341 3342 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3343 unsigned Flags; 3344 if (Kind == OMPD_for) 3345 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3346 else if (Kind == OMPD_sections) 3347 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3348 else if (Kind == OMPD_single) 3349 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3350 else if (Kind == OMPD_barrier) 3351 Flags = OMP_IDENT_BARRIER_EXPL; 3352 else 3353 Flags = OMP_IDENT_BARRIER_IMPL; 3354 return Flags; 3355 } 3356 3357 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3358 CodeGenFunction &CGF, const OMPLoopDirective &S, 3359 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3360 // Check if the loop directive is actually a doacross loop directive. In this 3361 // case choose static, 1 schedule. 3362 if (llvm::any_of( 3363 S.getClausesOfKind<OMPOrderedClause>(), 3364 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3365 ScheduleKind = OMPC_SCHEDULE_static; 3366 // Chunk size is 1 in this case. 3367 llvm::APInt ChunkSize(32, 1); 3368 ChunkExpr = IntegerLiteral::Create( 3369 CGF.getContext(), ChunkSize, 3370 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3371 SourceLocation()); 3372 } 3373 } 3374 3375 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3376 OpenMPDirectiveKind Kind, bool EmitChecks, 3377 bool ForceSimpleCall) { 3378 if (!CGF.HaveInsertPoint()) 3379 return; 3380 // Build call __kmpc_cancel_barrier(loc, thread_id); 3381 // Build call __kmpc_barrier(loc, thread_id); 3382 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3383 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3384 // thread_id); 3385 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3386 getThreadID(CGF, Loc)}; 3387 if (auto *OMPRegionInfo = 3388 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3389 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3390 llvm::Value *Result = CGF.EmitRuntimeCall( 3391 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3392 if (EmitChecks) { 3393 // if (__kmpc_cancel_barrier()) { 3394 // exit from construct; 3395 // } 3396 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3397 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3398 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3399 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3400 CGF.EmitBlock(ExitBB); 3401 // exit from construct; 3402 CodeGenFunction::JumpDest CancelDestination = 3403 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3404 CGF.EmitBranchThroughCleanup(CancelDestination); 3405 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3406 } 3407 return; 3408 } 3409 } 3410 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3411 } 3412 3413 /// Map the OpenMP loop schedule to the runtime enumeration. 3414 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3415 bool Chunked, bool Ordered) { 3416 switch (ScheduleKind) { 3417 case OMPC_SCHEDULE_static: 3418 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3419 : (Ordered ? OMP_ord_static : OMP_sch_static); 3420 case OMPC_SCHEDULE_dynamic: 3421 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3422 case OMPC_SCHEDULE_guided: 3423 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3424 case OMPC_SCHEDULE_runtime: 3425 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3426 case OMPC_SCHEDULE_auto: 3427 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3428 case OMPC_SCHEDULE_unknown: 3429 assert(!Chunked && "chunk was specified but schedule kind not known"); 3430 return Ordered ? OMP_ord_static : OMP_sch_static; 3431 } 3432 llvm_unreachable("Unexpected runtime schedule"); 3433 } 3434 3435 /// Map the OpenMP distribute schedule to the runtime enumeration. 3436 static OpenMPSchedType 3437 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3438 // only static is allowed for dist_schedule 3439 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3440 } 3441 3442 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3443 bool Chunked) const { 3444 OpenMPSchedType Schedule = 3445 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3446 return Schedule == OMP_sch_static; 3447 } 3448 3449 bool CGOpenMPRuntime::isStaticNonchunked( 3450 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3451 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3452 return Schedule == OMP_dist_sch_static; 3453 } 3454 3455 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3456 bool Chunked) const { 3457 OpenMPSchedType Schedule = 3458 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3459 return Schedule == OMP_sch_static_chunked; 3460 } 3461 3462 bool CGOpenMPRuntime::isStaticChunked( 3463 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3464 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3465 return Schedule == OMP_dist_sch_static_chunked; 3466 } 3467 3468 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3469 OpenMPSchedType Schedule = 3470 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3471 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3472 return Schedule != OMP_sch_static; 3473 } 3474 3475 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3476 OpenMPScheduleClauseModifier M1, 3477 OpenMPScheduleClauseModifier M2) { 3478 int Modifier = 0; 3479 switch (M1) { 3480 case OMPC_SCHEDULE_MODIFIER_monotonic: 3481 Modifier = OMP_sch_modifier_monotonic; 3482 break; 3483 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3484 Modifier = OMP_sch_modifier_nonmonotonic; 3485 break; 3486 case OMPC_SCHEDULE_MODIFIER_simd: 3487 if (Schedule == OMP_sch_static_chunked) 3488 Schedule = OMP_sch_static_balanced_chunked; 3489 break; 3490 case OMPC_SCHEDULE_MODIFIER_last: 3491 case OMPC_SCHEDULE_MODIFIER_unknown: 3492 break; 3493 } 3494 switch (M2) { 3495 case OMPC_SCHEDULE_MODIFIER_monotonic: 3496 Modifier = OMP_sch_modifier_monotonic; 3497 break; 3498 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3499 Modifier = OMP_sch_modifier_nonmonotonic; 3500 break; 3501 case OMPC_SCHEDULE_MODIFIER_simd: 3502 if (Schedule == OMP_sch_static_chunked) 3503 Schedule = OMP_sch_static_balanced_chunked; 3504 break; 3505 case OMPC_SCHEDULE_MODIFIER_last: 3506 case OMPC_SCHEDULE_MODIFIER_unknown: 3507 break; 3508 } 3509 return Schedule | Modifier; 3510 } 3511 3512 void CGOpenMPRuntime::emitForDispatchInit( 3513 CodeGenFunction &CGF, SourceLocation Loc, 3514 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3515 bool Ordered, const DispatchRTInput &DispatchValues) { 3516 if (!CGF.HaveInsertPoint()) 3517 return; 3518 OpenMPSchedType Schedule = getRuntimeSchedule( 3519 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3520 assert(Ordered || 3521 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3522 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3523 Schedule != OMP_sch_static_balanced_chunked)); 3524 // Call __kmpc_dispatch_init( 3525 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3526 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3527 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3528 3529 // If the Chunk was not specified in the clause - use default value 1. 3530 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3531 : CGF.Builder.getIntN(IVSize, 1); 3532 llvm::Value *Args[] = { 3533 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3534 CGF.Builder.getInt32(addMonoNonMonoModifier( 3535 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3536 DispatchValues.LB, // Lower 3537 DispatchValues.UB, // Upper 3538 CGF.Builder.getIntN(IVSize, 1), // Stride 3539 Chunk // Chunk 3540 }; 3541 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3542 } 3543 3544 static void emitForStaticInitCall( 3545 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3546 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3547 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3548 const CGOpenMPRuntime::StaticRTInput &Values) { 3549 if (!CGF.HaveInsertPoint()) 3550 return; 3551 3552 assert(!Values.Ordered); 3553 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3554 Schedule == OMP_sch_static_balanced_chunked || 3555 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3556 Schedule == OMP_dist_sch_static || 3557 Schedule == OMP_dist_sch_static_chunked); 3558 3559 // Call __kmpc_for_static_init( 3560 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3561 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3562 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3563 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3564 llvm::Value *Chunk = Values.Chunk; 3565 if (Chunk == nullptr) { 3566 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3567 Schedule == OMP_dist_sch_static) && 3568 "expected static non-chunked schedule"); 3569 // If the Chunk was not specified in the clause - use default value 1. 3570 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3571 } else { 3572 assert((Schedule == OMP_sch_static_chunked || 3573 Schedule == OMP_sch_static_balanced_chunked || 3574 Schedule == OMP_ord_static_chunked || 3575 Schedule == OMP_dist_sch_static_chunked) && 3576 "expected static chunked schedule"); 3577 } 3578 llvm::Value *Args[] = { 3579 UpdateLocation, 3580 ThreadId, 3581 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3582 M2)), // Schedule type 3583 Values.IL.getPointer(), // &isLastIter 3584 Values.LB.getPointer(), // &LB 3585 Values.UB.getPointer(), // &UB 3586 Values.ST.getPointer(), // &Stride 3587 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3588 Chunk // Chunk 3589 }; 3590 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3591 } 3592 3593 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3594 SourceLocation Loc, 3595 OpenMPDirectiveKind DKind, 3596 const OpenMPScheduleTy &ScheduleKind, 3597 const StaticRTInput &Values) { 3598 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3599 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3600 assert(isOpenMPWorksharingDirective(DKind) && 3601 "Expected loop-based or sections-based directive."); 3602 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3603 isOpenMPLoopDirective(DKind) 3604 ? OMP_IDENT_WORK_LOOP 3605 : OMP_IDENT_WORK_SECTIONS); 3606 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3607 llvm::FunctionCallee StaticInitFunction = 3608 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3609 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3610 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3611 } 3612 3613 void CGOpenMPRuntime::emitDistributeStaticInit( 3614 CodeGenFunction &CGF, SourceLocation Loc, 3615 OpenMPDistScheduleClauseKind SchedKind, 3616 const CGOpenMPRuntime::StaticRTInput &Values) { 3617 OpenMPSchedType ScheduleNum = 3618 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3619 llvm::Value *UpdatedLocation = 3620 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3621 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3622 llvm::FunctionCallee StaticInitFunction = 3623 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3624 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3625 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3626 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3627 } 3628 3629 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3630 SourceLocation Loc, 3631 OpenMPDirectiveKind DKind) { 3632 if (!CGF.HaveInsertPoint()) 3633 return; 3634 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3635 llvm::Value *Args[] = { 3636 emitUpdateLocation(CGF, Loc, 3637 isOpenMPDistributeDirective(DKind) 3638 ? OMP_IDENT_WORK_DISTRIBUTE 3639 : isOpenMPLoopDirective(DKind) 3640 ? OMP_IDENT_WORK_LOOP 3641 : OMP_IDENT_WORK_SECTIONS), 3642 getThreadID(CGF, Loc)}; 3643 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3644 Args); 3645 } 3646 3647 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3648 SourceLocation Loc, 3649 unsigned IVSize, 3650 bool IVSigned) { 3651 if (!CGF.HaveInsertPoint()) 3652 return; 3653 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3654 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3655 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3656 } 3657 3658 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3659 SourceLocation Loc, unsigned IVSize, 3660 bool IVSigned, Address IL, 3661 Address LB, Address UB, 3662 Address ST) { 3663 // Call __kmpc_dispatch_next( 3664 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3665 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3666 // kmp_int[32|64] *p_stride); 3667 llvm::Value *Args[] = { 3668 emitUpdateLocation(CGF, Loc), 3669 getThreadID(CGF, Loc), 3670 IL.getPointer(), // &isLastIter 3671 LB.getPointer(), // &Lower 3672 UB.getPointer(), // &Upper 3673 ST.getPointer() // &Stride 3674 }; 3675 llvm::Value *Call = 3676 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3677 return CGF.EmitScalarConversion( 3678 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3679 CGF.getContext().BoolTy, Loc); 3680 } 3681 3682 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3683 llvm::Value *NumThreads, 3684 SourceLocation Loc) { 3685 if (!CGF.HaveInsertPoint()) 3686 return; 3687 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3688 llvm::Value *Args[] = { 3689 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3690 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3691 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3692 Args); 3693 } 3694 3695 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3696 OpenMPProcBindClauseKind ProcBind, 3697 SourceLocation Loc) { 3698 if (!CGF.HaveInsertPoint()) 3699 return; 3700 // Constants for proc bind value accepted by the runtime. 3701 enum ProcBindTy { 3702 ProcBindFalse = 0, 3703 ProcBindTrue, 3704 ProcBindMaster, 3705 ProcBindClose, 3706 ProcBindSpread, 3707 ProcBindIntel, 3708 ProcBindDefault 3709 } RuntimeProcBind; 3710 switch (ProcBind) { 3711 case OMPC_PROC_BIND_master: 3712 RuntimeProcBind = ProcBindMaster; 3713 break; 3714 case OMPC_PROC_BIND_close: 3715 RuntimeProcBind = ProcBindClose; 3716 break; 3717 case OMPC_PROC_BIND_spread: 3718 RuntimeProcBind = ProcBindSpread; 3719 break; 3720 case OMPC_PROC_BIND_unknown: 3721 llvm_unreachable("Unsupported proc_bind value."); 3722 } 3723 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3724 llvm::Value *Args[] = { 3725 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3726 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3727 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3728 } 3729 3730 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3731 SourceLocation Loc) { 3732 if (!CGF.HaveInsertPoint()) 3733 return; 3734 // Build call void __kmpc_flush(ident_t *loc) 3735 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3736 emitUpdateLocation(CGF, Loc)); 3737 } 3738 3739 namespace { 3740 /// Indexes of fields for type kmp_task_t. 3741 enum KmpTaskTFields { 3742 /// List of shared variables. 3743 KmpTaskTShareds, 3744 /// Task routine. 3745 KmpTaskTRoutine, 3746 /// Partition id for the untied tasks. 3747 KmpTaskTPartId, 3748 /// Function with call of destructors for private variables. 3749 Data1, 3750 /// Task priority. 3751 Data2, 3752 /// (Taskloops only) Lower bound. 3753 KmpTaskTLowerBound, 3754 /// (Taskloops only) Upper bound. 3755 KmpTaskTUpperBound, 3756 /// (Taskloops only) Stride. 3757 KmpTaskTStride, 3758 /// (Taskloops only) Is last iteration flag. 3759 KmpTaskTLastIter, 3760 /// (Taskloops only) Reduction data. 3761 KmpTaskTReductions, 3762 }; 3763 } // anonymous namespace 3764 3765 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3766 return OffloadEntriesTargetRegion.empty() && 3767 OffloadEntriesDeviceGlobalVar.empty(); 3768 } 3769 3770 /// Initialize target region entry. 3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3772 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3773 StringRef ParentName, unsigned LineNum, 3774 unsigned Order) { 3775 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3776 "only required for the device " 3777 "code generation."); 3778 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3779 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3780 OMPTargetRegionEntryTargetRegion); 3781 ++OffloadingEntriesNum; 3782 } 3783 3784 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3785 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3786 StringRef ParentName, unsigned LineNum, 3787 llvm::Constant *Addr, llvm::Constant *ID, 3788 OMPTargetRegionEntryKind Flags) { 3789 // If we are emitting code for a target, the entry is already initialized, 3790 // only has to be registered. 3791 if (CGM.getLangOpts().OpenMPIsDevice) { 3792 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3793 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3794 DiagnosticsEngine::Error, 3795 "Unable to find target region on line '%0' in the device code."); 3796 CGM.getDiags().Report(DiagID) << LineNum; 3797 return; 3798 } 3799 auto &Entry = 3800 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3801 assert(Entry.isValid() && "Entry not initialized!"); 3802 Entry.setAddress(Addr); 3803 Entry.setID(ID); 3804 Entry.setFlags(Flags); 3805 } else { 3806 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3807 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3808 ++OffloadingEntriesNum; 3809 } 3810 } 3811 3812 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3813 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3814 unsigned LineNum) const { 3815 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3816 if (PerDevice == OffloadEntriesTargetRegion.end()) 3817 return false; 3818 auto PerFile = PerDevice->second.find(FileID); 3819 if (PerFile == PerDevice->second.end()) 3820 return false; 3821 auto PerParentName = PerFile->second.find(ParentName); 3822 if (PerParentName == PerFile->second.end()) 3823 return false; 3824 auto PerLine = PerParentName->second.find(LineNum); 3825 if (PerLine == PerParentName->second.end()) 3826 return false; 3827 // Fail if this entry is already registered. 3828 if (PerLine->second.getAddress() || PerLine->second.getID()) 3829 return false; 3830 return true; 3831 } 3832 3833 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3834 const OffloadTargetRegionEntryInfoActTy &Action) { 3835 // Scan all target region entries and perform the provided action. 3836 for (const auto &D : OffloadEntriesTargetRegion) 3837 for (const auto &F : D.second) 3838 for (const auto &P : F.second) 3839 for (const auto &L : P.second) 3840 Action(D.first, F.first, P.first(), L.first, L.second); 3841 } 3842 3843 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3844 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3845 OMPTargetGlobalVarEntryKind Flags, 3846 unsigned Order) { 3847 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3848 "only required for the device " 3849 "code generation."); 3850 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3851 ++OffloadingEntriesNum; 3852 } 3853 3854 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3855 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3856 CharUnits VarSize, 3857 OMPTargetGlobalVarEntryKind Flags, 3858 llvm::GlobalValue::LinkageTypes Linkage) { 3859 if (CGM.getLangOpts().OpenMPIsDevice) { 3860 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3861 assert(Entry.isValid() && Entry.getFlags() == Flags && 3862 "Entry not initialized!"); 3863 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3864 "Resetting with the new address."); 3865 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3866 if (Entry.getVarSize().isZero()) { 3867 Entry.setVarSize(VarSize); 3868 Entry.setLinkage(Linkage); 3869 } 3870 return; 3871 } 3872 Entry.setVarSize(VarSize); 3873 Entry.setLinkage(Linkage); 3874 Entry.setAddress(Addr); 3875 } else { 3876 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3877 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3878 assert(Entry.isValid() && Entry.getFlags() == Flags && 3879 "Entry not initialized!"); 3880 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3881 "Resetting with the new address."); 3882 if (Entry.getVarSize().isZero()) { 3883 Entry.setVarSize(VarSize); 3884 Entry.setLinkage(Linkage); 3885 } 3886 return; 3887 } 3888 OffloadEntriesDeviceGlobalVar.try_emplace( 3889 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3890 ++OffloadingEntriesNum; 3891 } 3892 } 3893 3894 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3895 actOnDeviceGlobalVarEntriesInfo( 3896 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3897 // Scan all target region entries and perform the provided action. 3898 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3899 Action(E.getKey(), E.getValue()); 3900 } 3901 3902 llvm::Function * 3903 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3904 // If we don't have entries or if we are emitting code for the device, we 3905 // don't need to do anything. 3906 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3907 return nullptr; 3908 3909 llvm::Module &M = CGM.getModule(); 3910 ASTContext &C = CGM.getContext(); 3911 3912 // Get list of devices we care about 3913 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3914 3915 // We should be creating an offloading descriptor only if there are devices 3916 // specified. 3917 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3918 3919 // Create the external variables that will point to the begin and end of the 3920 // host entries section. These will be defined by the linker. 3921 llvm::Type *OffloadEntryTy = 3922 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3923 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3924 auto *HostEntriesBegin = new llvm::GlobalVariable( 3925 M, OffloadEntryTy, /*isConstant=*/true, 3926 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3927 EntriesBeginName); 3928 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3929 auto *HostEntriesEnd = 3930 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3931 llvm::GlobalValue::ExternalLinkage, 3932 /*Initializer=*/nullptr, EntriesEndName); 3933 3934 // Create all device images 3935 auto *DeviceImageTy = cast<llvm::StructType>( 3936 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3937 ConstantInitBuilder DeviceImagesBuilder(CGM); 3938 ConstantArrayBuilder DeviceImagesEntries = 3939 DeviceImagesBuilder.beginArray(DeviceImageTy); 3940 3941 for (const llvm::Triple &Device : Devices) { 3942 StringRef T = Device.getTriple(); 3943 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3944 auto *ImgBegin = new llvm::GlobalVariable( 3945 M, CGM.Int8Ty, /*isConstant=*/true, 3946 llvm::GlobalValue::ExternalWeakLinkage, 3947 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3948 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3949 auto *ImgEnd = new llvm::GlobalVariable( 3950 M, CGM.Int8Ty, /*isConstant=*/true, 3951 llvm::GlobalValue::ExternalWeakLinkage, 3952 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3953 3954 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3955 HostEntriesEnd}; 3956 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3957 DeviceImagesEntries); 3958 } 3959 3960 // Create device images global array. 3961 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3962 llvm::GlobalVariable *DeviceImages = 3963 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3964 CGM.getPointerAlign(), 3965 /*isConstant=*/true); 3966 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3967 3968 // This is a Zero array to be used in the creation of the constant expressions 3969 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3970 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3971 3972 // Create the target region descriptor. 3973 llvm::Constant *Data[] = { 3974 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3975 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3976 DeviceImages, Index), 3977 HostEntriesBegin, HostEntriesEnd}; 3978 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3979 llvm::GlobalVariable *Desc = createGlobalStruct( 3980 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3981 3982 // Emit code to register or unregister the descriptor at execution 3983 // startup or closing, respectively. 3984 3985 llvm::Function *UnRegFn; 3986 { 3987 FunctionArgList Args; 3988 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3989 Args.push_back(&DummyPtr); 3990 3991 CodeGenFunction CGF(CGM); 3992 // Disable debug info for global (de-)initializer because they are not part 3993 // of some particular construct. 3994 CGF.disableDebugInfo(); 3995 const auto &FI = 3996 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3997 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3998 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 3999 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 4000 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 4001 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 4002 Desc); 4003 CGF.FinishFunction(); 4004 } 4005 llvm::Function *RegFn; 4006 { 4007 CodeGenFunction CGF(CGM); 4008 // Disable debug info for global (de-)initializer because they are not part 4009 // of some particular construct. 4010 CGF.disableDebugInfo(); 4011 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 4012 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4013 4014 // Encode offload target triples into the registration function name. It 4015 // will serve as a comdat key for the registration/unregistration code for 4016 // this particular combination of offloading targets. 4017 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 4018 RegFnNameParts[0] = "omp_offloading"; 4019 RegFnNameParts[1] = "descriptor_reg"; 4020 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 4021 [](const llvm::Triple &T) -> const std::string& { 4022 return T.getTriple(); 4023 }); 4024 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 4025 std::string Descriptor = getName(RegFnNameParts); 4026 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 4027 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 4028 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 4029 // Create a variable to drive the registration and unregistration of the 4030 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4031 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4032 SourceLocation(), nullptr, C.CharTy, 4033 ImplicitParamDecl::Other); 4034 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4035 CGF.FinishFunction(); 4036 } 4037 if (CGM.supportsCOMDAT()) { 4038 // It is sufficient to call registration function only once, so create a 4039 // COMDAT group for registration/unregistration functions and associated 4040 // data. That would reduce startup time and code size. Registration 4041 // function serves as a COMDAT group key. 4042 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4043 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4044 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4045 RegFn->setComdat(ComdatKey); 4046 UnRegFn->setComdat(ComdatKey); 4047 DeviceImages->setComdat(ComdatKey); 4048 Desc->setComdat(ComdatKey); 4049 } 4050 return RegFn; 4051 } 4052 4053 void CGOpenMPRuntime::createOffloadEntry( 4054 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4055 llvm::GlobalValue::LinkageTypes Linkage) { 4056 StringRef Name = Addr->getName(); 4057 llvm::Module &M = CGM.getModule(); 4058 llvm::LLVMContext &C = M.getContext(); 4059 4060 // Create constant string with the name. 4061 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4062 4063 std::string StringName = getName({"omp_offloading", "entry_name"}); 4064 auto *Str = new llvm::GlobalVariable( 4065 M, StrPtrInit->getType(), /*isConstant=*/true, 4066 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4067 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4068 4069 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4070 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4071 llvm::ConstantInt::get(CGM.SizeTy, Size), 4072 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4073 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4074 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4075 llvm::GlobalVariable *Entry = createGlobalStruct( 4076 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4077 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4078 4079 // The entry has to be created in the section the linker expects it to be. 4080 std::string Section = getName({"omp_offloading", "entries"}); 4081 Entry->setSection(Section); 4082 } 4083 4084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4085 // Emit the offloading entries and metadata so that the device codegen side 4086 // can easily figure out what to emit. The produced metadata looks like 4087 // this: 4088 // 4089 // !omp_offload.info = !{!1, ...} 4090 // 4091 // Right now we only generate metadata for function that contain target 4092 // regions. 4093 4094 // If we do not have entries, we don't need to do anything. 4095 if (OffloadEntriesInfoManager.empty()) 4096 return; 4097 4098 llvm::Module &M = CGM.getModule(); 4099 llvm::LLVMContext &C = M.getContext(); 4100 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4101 OrderedEntries(OffloadEntriesInfoManager.size()); 4102 llvm::SmallVector<StringRef, 16> ParentFunctions( 4103 OffloadEntriesInfoManager.size()); 4104 4105 // Auxiliary methods to create metadata values and strings. 4106 auto &&GetMDInt = [this](unsigned V) { 4107 return llvm::ConstantAsMetadata::get( 4108 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4109 }; 4110 4111 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4112 4113 // Create the offloading info metadata node. 4114 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4115 4116 // Create function that emits metadata for each target region entry; 4117 auto &&TargetRegionMetadataEmitter = 4118 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4119 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4120 unsigned Line, 4121 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4122 // Generate metadata for target regions. Each entry of this metadata 4123 // contains: 4124 // - Entry 0 -> Kind of this type of metadata (0). 4125 // - Entry 1 -> Device ID of the file where the entry was identified. 4126 // - Entry 2 -> File ID of the file where the entry was identified. 4127 // - Entry 3 -> Mangled name of the function where the entry was 4128 // identified. 4129 // - Entry 4 -> Line in the file where the entry was identified. 4130 // - Entry 5 -> Order the entry was created. 4131 // The first element of the metadata node is the kind. 4132 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4133 GetMDInt(FileID), GetMDString(ParentName), 4134 GetMDInt(Line), GetMDInt(E.getOrder())}; 4135 4136 // Save this entry in the right position of the ordered entries array. 4137 OrderedEntries[E.getOrder()] = &E; 4138 ParentFunctions[E.getOrder()] = ParentName; 4139 4140 // Add metadata to the named metadata node. 4141 MD->addOperand(llvm::MDNode::get(C, Ops)); 4142 }; 4143 4144 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4145 TargetRegionMetadataEmitter); 4146 4147 // Create function that emits metadata for each device global variable entry; 4148 auto &&DeviceGlobalVarMetadataEmitter = 4149 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4150 MD](StringRef MangledName, 4151 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4152 &E) { 4153 // Generate metadata for global variables. Each entry of this metadata 4154 // contains: 4155 // - Entry 0 -> Kind of this type of metadata (1). 4156 // - Entry 1 -> Mangled name of the variable. 4157 // - Entry 2 -> Declare target kind. 4158 // - Entry 3 -> Order the entry was created. 4159 // The first element of the metadata node is the kind. 4160 llvm::Metadata *Ops[] = { 4161 GetMDInt(E.getKind()), GetMDString(MangledName), 4162 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4163 4164 // Save this entry in the right position of the ordered entries array. 4165 OrderedEntries[E.getOrder()] = &E; 4166 4167 // Add metadata to the named metadata node. 4168 MD->addOperand(llvm::MDNode::get(C, Ops)); 4169 }; 4170 4171 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4172 DeviceGlobalVarMetadataEmitter); 4173 4174 for (const auto *E : OrderedEntries) { 4175 assert(E && "All ordered entries must exist!"); 4176 if (const auto *CE = 4177 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4178 E)) { 4179 if (!CE->getID() || !CE->getAddress()) { 4180 // Do not blame the entry if the parent funtion is not emitted. 4181 StringRef FnName = ParentFunctions[CE->getOrder()]; 4182 if (!CGM.GetGlobalValue(FnName)) 4183 continue; 4184 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4185 DiagnosticsEngine::Error, 4186 "Offloading entry for target region is incorrect: either the " 4187 "address or the ID is invalid."); 4188 CGM.getDiags().Report(DiagID); 4189 continue; 4190 } 4191 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4192 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4193 } else if (const auto *CE = 4194 dyn_cast<OffloadEntriesInfoManagerTy:: 4195 OffloadEntryInfoDeviceGlobalVar>(E)) { 4196 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4197 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4198 CE->getFlags()); 4199 switch (Flags) { 4200 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4201 if (CGM.getLangOpts().OpenMPIsDevice && 4202 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4203 continue; 4204 if (!CE->getAddress()) { 4205 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4206 DiagnosticsEngine::Error, 4207 "Offloading entry for declare target variable is incorrect: the " 4208 "address is invalid."); 4209 CGM.getDiags().Report(DiagID); 4210 continue; 4211 } 4212 // The vaiable has no definition - no need to add the entry. 4213 if (CE->getVarSize().isZero()) 4214 continue; 4215 break; 4216 } 4217 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4218 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4219 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4220 "Declaret target link address is set."); 4221 if (CGM.getLangOpts().OpenMPIsDevice) 4222 continue; 4223 if (!CE->getAddress()) { 4224 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4225 DiagnosticsEngine::Error, 4226 "Offloading entry for declare target variable is incorrect: the " 4227 "address is invalid."); 4228 CGM.getDiags().Report(DiagID); 4229 continue; 4230 } 4231 break; 4232 } 4233 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4234 CE->getVarSize().getQuantity(), Flags, 4235 CE->getLinkage()); 4236 } else { 4237 llvm_unreachable("Unsupported entry kind."); 4238 } 4239 } 4240 } 4241 4242 /// Loads all the offload entries information from the host IR 4243 /// metadata. 4244 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4245 // If we are in target mode, load the metadata from the host IR. This code has 4246 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4247 4248 if (!CGM.getLangOpts().OpenMPIsDevice) 4249 return; 4250 4251 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4252 return; 4253 4254 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4255 if (auto EC = Buf.getError()) { 4256 CGM.getDiags().Report(diag::err_cannot_open_file) 4257 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4258 return; 4259 } 4260 4261 llvm::LLVMContext C; 4262 auto ME = expectedToErrorOrAndEmitErrors( 4263 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4264 4265 if (auto EC = ME.getError()) { 4266 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4267 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4268 CGM.getDiags().Report(DiagID) 4269 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4270 return; 4271 } 4272 4273 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4274 if (!MD) 4275 return; 4276 4277 for (llvm::MDNode *MN : MD->operands()) { 4278 auto &&GetMDInt = [MN](unsigned Idx) { 4279 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4280 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4281 }; 4282 4283 auto &&GetMDString = [MN](unsigned Idx) { 4284 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4285 return V->getString(); 4286 }; 4287 4288 switch (GetMDInt(0)) { 4289 default: 4290 llvm_unreachable("Unexpected metadata!"); 4291 break; 4292 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4293 OffloadingEntryInfoTargetRegion: 4294 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4295 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4296 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4297 /*Order=*/GetMDInt(5)); 4298 break; 4299 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4300 OffloadingEntryInfoDeviceGlobalVar: 4301 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4302 /*MangledName=*/GetMDString(1), 4303 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4304 /*Flags=*/GetMDInt(2)), 4305 /*Order=*/GetMDInt(3)); 4306 break; 4307 } 4308 } 4309 } 4310 4311 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4312 if (!KmpRoutineEntryPtrTy) { 4313 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4314 ASTContext &C = CGM.getContext(); 4315 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4316 FunctionProtoType::ExtProtoInfo EPI; 4317 KmpRoutineEntryPtrQTy = C.getPointerType( 4318 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4319 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4320 } 4321 } 4322 4323 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4324 // Make sure the type of the entry is already created. This is the type we 4325 // have to create: 4326 // struct __tgt_offload_entry{ 4327 // void *addr; // Pointer to the offload entry info. 4328 // // (function or global) 4329 // char *name; // Name of the function or global. 4330 // size_t size; // Size of the entry info (0 if it a function). 4331 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4332 // int32_t reserved; // Reserved, to use by the runtime library. 4333 // }; 4334 if (TgtOffloadEntryQTy.isNull()) { 4335 ASTContext &C = CGM.getContext(); 4336 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4337 RD->startDefinition(); 4338 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4339 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4340 addFieldToRecordDecl(C, RD, C.getSizeType()); 4341 addFieldToRecordDecl( 4342 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4343 addFieldToRecordDecl( 4344 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4345 RD->completeDefinition(); 4346 RD->addAttr(PackedAttr::CreateImplicit(C)); 4347 TgtOffloadEntryQTy = C.getRecordType(RD); 4348 } 4349 return TgtOffloadEntryQTy; 4350 } 4351 4352 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4353 // These are the types we need to build: 4354 // struct __tgt_device_image{ 4355 // void *ImageStart; // Pointer to the target code start. 4356 // void *ImageEnd; // Pointer to the target code end. 4357 // // We also add the host entries to the device image, as it may be useful 4358 // // for the target runtime to have access to that information. 4359 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4360 // // the entries. 4361 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4362 // // entries (non inclusive). 4363 // }; 4364 if (TgtDeviceImageQTy.isNull()) { 4365 ASTContext &C = CGM.getContext(); 4366 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4367 RD->startDefinition(); 4368 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4369 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4370 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4371 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4372 RD->completeDefinition(); 4373 TgtDeviceImageQTy = C.getRecordType(RD); 4374 } 4375 return TgtDeviceImageQTy; 4376 } 4377 4378 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4379 // struct __tgt_bin_desc{ 4380 // int32_t NumDevices; // Number of devices supported. 4381 // __tgt_device_image *DeviceImages; // Arrays of device images 4382 // // (one per device). 4383 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4384 // // entries. 4385 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4386 // // entries (non inclusive). 4387 // }; 4388 if (TgtBinaryDescriptorQTy.isNull()) { 4389 ASTContext &C = CGM.getContext(); 4390 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4391 RD->startDefinition(); 4392 addFieldToRecordDecl( 4393 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4394 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4395 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4396 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4397 RD->completeDefinition(); 4398 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4399 } 4400 return TgtBinaryDescriptorQTy; 4401 } 4402 4403 namespace { 4404 struct PrivateHelpersTy { 4405 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4406 const VarDecl *PrivateElemInit) 4407 : Original(Original), PrivateCopy(PrivateCopy), 4408 PrivateElemInit(PrivateElemInit) {} 4409 const VarDecl *Original; 4410 const VarDecl *PrivateCopy; 4411 const VarDecl *PrivateElemInit; 4412 }; 4413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4414 } // anonymous namespace 4415 4416 static RecordDecl * 4417 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4418 if (!Privates.empty()) { 4419 ASTContext &C = CGM.getContext(); 4420 // Build struct .kmp_privates_t. { 4421 // /* private vars */ 4422 // }; 4423 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4424 RD->startDefinition(); 4425 for (const auto &Pair : Privates) { 4426 const VarDecl *VD = Pair.second.Original; 4427 QualType Type = VD->getType().getNonReferenceType(); 4428 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4429 if (VD->hasAttrs()) { 4430 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4431 E(VD->getAttrs().end()); 4432 I != E; ++I) 4433 FD->addAttr(*I); 4434 } 4435 } 4436 RD->completeDefinition(); 4437 return RD; 4438 } 4439 return nullptr; 4440 } 4441 4442 static RecordDecl * 4443 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4444 QualType KmpInt32Ty, 4445 QualType KmpRoutineEntryPointerQTy) { 4446 ASTContext &C = CGM.getContext(); 4447 // Build struct kmp_task_t { 4448 // void * shareds; 4449 // kmp_routine_entry_t routine; 4450 // kmp_int32 part_id; 4451 // kmp_cmplrdata_t data1; 4452 // kmp_cmplrdata_t data2; 4453 // For taskloops additional fields: 4454 // kmp_uint64 lb; 4455 // kmp_uint64 ub; 4456 // kmp_int64 st; 4457 // kmp_int32 liter; 4458 // void * reductions; 4459 // }; 4460 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4461 UD->startDefinition(); 4462 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4463 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4464 UD->completeDefinition(); 4465 QualType KmpCmplrdataTy = C.getRecordType(UD); 4466 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4467 RD->startDefinition(); 4468 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4469 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4470 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4471 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4472 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4473 if (isOpenMPTaskLoopDirective(Kind)) { 4474 QualType KmpUInt64Ty = 4475 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4476 QualType KmpInt64Ty = 4477 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4478 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4479 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4480 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4481 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4482 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4483 } 4484 RD->completeDefinition(); 4485 return RD; 4486 } 4487 4488 static RecordDecl * 4489 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4490 ArrayRef<PrivateDataTy> Privates) { 4491 ASTContext &C = CGM.getContext(); 4492 // Build struct kmp_task_t_with_privates { 4493 // kmp_task_t task_data; 4494 // .kmp_privates_t. privates; 4495 // }; 4496 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4497 RD->startDefinition(); 4498 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4499 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4500 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4501 RD->completeDefinition(); 4502 return RD; 4503 } 4504 4505 /// Emit a proxy function which accepts kmp_task_t as the second 4506 /// argument. 4507 /// \code 4508 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4509 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4510 /// For taskloops: 4511 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4512 /// tt->reductions, tt->shareds); 4513 /// return 0; 4514 /// } 4515 /// \endcode 4516 static llvm::Function * 4517 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4518 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4519 QualType KmpTaskTWithPrivatesPtrQTy, 4520 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4521 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4522 llvm::Value *TaskPrivatesMap) { 4523 ASTContext &C = CGM.getContext(); 4524 FunctionArgList Args; 4525 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4526 ImplicitParamDecl::Other); 4527 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4528 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4529 ImplicitParamDecl::Other); 4530 Args.push_back(&GtidArg); 4531 Args.push_back(&TaskTypeArg); 4532 const auto &TaskEntryFnInfo = 4533 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4534 llvm::FunctionType *TaskEntryTy = 4535 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4536 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4537 auto *TaskEntry = llvm::Function::Create( 4538 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4539 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4540 TaskEntry->setDoesNotRecurse(); 4541 CodeGenFunction CGF(CGM); 4542 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4543 Loc, Loc); 4544 4545 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4546 // tt, 4547 // For taskloops: 4548 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4549 // tt->task_data.shareds); 4550 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4551 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4552 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4553 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4554 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4555 const auto *KmpTaskTWithPrivatesQTyRD = 4556 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4557 LValue Base = 4558 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4559 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4560 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4561 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4562 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4563 4564 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4565 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4566 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4567 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4568 CGF.ConvertTypeForMem(SharedsPtrTy)); 4569 4570 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4571 llvm::Value *PrivatesParam; 4572 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4573 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4574 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4575 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4576 } else { 4577 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4578 } 4579 4580 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4581 TaskPrivatesMap, 4582 CGF.Builder 4583 .CreatePointerBitCastOrAddrSpaceCast( 4584 TDBase.getAddress(), CGF.VoidPtrTy) 4585 .getPointer()}; 4586 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4587 std::end(CommonArgs)); 4588 if (isOpenMPTaskLoopDirective(Kind)) { 4589 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4590 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4591 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4592 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4593 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4594 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4595 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4596 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4597 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4598 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4599 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4600 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4601 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4602 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4603 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4604 CallArgs.push_back(LBParam); 4605 CallArgs.push_back(UBParam); 4606 CallArgs.push_back(StParam); 4607 CallArgs.push_back(LIParam); 4608 CallArgs.push_back(RParam); 4609 } 4610 CallArgs.push_back(SharedsParam); 4611 4612 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4613 CallArgs); 4614 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4615 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4616 CGF.FinishFunction(); 4617 return TaskEntry; 4618 } 4619 4620 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4621 SourceLocation Loc, 4622 QualType KmpInt32Ty, 4623 QualType KmpTaskTWithPrivatesPtrQTy, 4624 QualType KmpTaskTWithPrivatesQTy) { 4625 ASTContext &C = CGM.getContext(); 4626 FunctionArgList Args; 4627 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4628 ImplicitParamDecl::Other); 4629 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4630 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4631 ImplicitParamDecl::Other); 4632 Args.push_back(&GtidArg); 4633 Args.push_back(&TaskTypeArg); 4634 const auto &DestructorFnInfo = 4635 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4636 llvm::FunctionType *DestructorFnTy = 4637 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4638 std::string Name = 4639 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4640 auto *DestructorFn = 4641 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4642 Name, &CGM.getModule()); 4643 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4644 DestructorFnInfo); 4645 DestructorFn->setDoesNotRecurse(); 4646 CodeGenFunction CGF(CGM); 4647 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4648 Args, Loc, Loc); 4649 4650 LValue Base = CGF.EmitLoadOfPointerLValue( 4651 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4652 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4653 const auto *KmpTaskTWithPrivatesQTyRD = 4654 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4655 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4656 Base = CGF.EmitLValueForField(Base, *FI); 4657 for (const auto *Field : 4658 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4659 if (QualType::DestructionKind DtorKind = 4660 Field->getType().isDestructedType()) { 4661 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4662 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4663 } 4664 } 4665 CGF.FinishFunction(); 4666 return DestructorFn; 4667 } 4668 4669 /// Emit a privates mapping function for correct handling of private and 4670 /// firstprivate variables. 4671 /// \code 4672 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4673 /// **noalias priv1,..., <tyn> **noalias privn) { 4674 /// *priv1 = &.privates.priv1; 4675 /// ...; 4676 /// *privn = &.privates.privn; 4677 /// } 4678 /// \endcode 4679 static llvm::Value * 4680 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4681 ArrayRef<const Expr *> PrivateVars, 4682 ArrayRef<const Expr *> FirstprivateVars, 4683 ArrayRef<const Expr *> LastprivateVars, 4684 QualType PrivatesQTy, 4685 ArrayRef<PrivateDataTy> Privates) { 4686 ASTContext &C = CGM.getContext(); 4687 FunctionArgList Args; 4688 ImplicitParamDecl TaskPrivatesArg( 4689 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4690 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4691 ImplicitParamDecl::Other); 4692 Args.push_back(&TaskPrivatesArg); 4693 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4694 unsigned Counter = 1; 4695 for (const Expr *E : PrivateVars) { 4696 Args.push_back(ImplicitParamDecl::Create( 4697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4698 C.getPointerType(C.getPointerType(E->getType())) 4699 .withConst() 4700 .withRestrict(), 4701 ImplicitParamDecl::Other)); 4702 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4703 PrivateVarsPos[VD] = Counter; 4704 ++Counter; 4705 } 4706 for (const Expr *E : FirstprivateVars) { 4707 Args.push_back(ImplicitParamDecl::Create( 4708 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4709 C.getPointerType(C.getPointerType(E->getType())) 4710 .withConst() 4711 .withRestrict(), 4712 ImplicitParamDecl::Other)); 4713 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4714 PrivateVarsPos[VD] = Counter; 4715 ++Counter; 4716 } 4717 for (const Expr *E : LastprivateVars) { 4718 Args.push_back(ImplicitParamDecl::Create( 4719 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4720 C.getPointerType(C.getPointerType(E->getType())) 4721 .withConst() 4722 .withRestrict(), 4723 ImplicitParamDecl::Other)); 4724 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4725 PrivateVarsPos[VD] = Counter; 4726 ++Counter; 4727 } 4728 const auto &TaskPrivatesMapFnInfo = 4729 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4730 llvm::FunctionType *TaskPrivatesMapTy = 4731 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4732 std::string Name = 4733 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4734 auto *TaskPrivatesMap = llvm::Function::Create( 4735 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4736 &CGM.getModule()); 4737 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4738 TaskPrivatesMapFnInfo); 4739 if (CGM.getLangOpts().Optimize) { 4740 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4741 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4742 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4743 } 4744 CodeGenFunction CGF(CGM); 4745 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4746 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4747 4748 // *privi = &.privates.privi; 4749 LValue Base = CGF.EmitLoadOfPointerLValue( 4750 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4751 TaskPrivatesArg.getType()->castAs<PointerType>()); 4752 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4753 Counter = 0; 4754 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4755 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4756 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4757 LValue RefLVal = 4758 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4759 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4760 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4761 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4762 ++Counter; 4763 } 4764 CGF.FinishFunction(); 4765 return TaskPrivatesMap; 4766 } 4767 4768 /// Emit initialization for private variables in task-based directives. 4769 static void emitPrivatesInit(CodeGenFunction &CGF, 4770 const OMPExecutableDirective &D, 4771 Address KmpTaskSharedsPtr, LValue TDBase, 4772 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4773 QualType SharedsTy, QualType SharedsPtrTy, 4774 const OMPTaskDataTy &Data, 4775 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4776 ASTContext &C = CGF.getContext(); 4777 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4778 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4779 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4780 ? OMPD_taskloop 4781 : OMPD_task; 4782 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4783 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4784 LValue SrcBase; 4785 bool IsTargetTask = 4786 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4787 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4788 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4789 // PointersArray and SizesArray. The original variables for these arrays are 4790 // not captured and we get their addresses explicitly. 4791 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4792 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4793 SrcBase = CGF.MakeAddrLValue( 4794 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4795 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4796 SharedsTy); 4797 } 4798 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4799 for (const PrivateDataTy &Pair : Privates) { 4800 const VarDecl *VD = Pair.second.PrivateCopy; 4801 const Expr *Init = VD->getAnyInitializer(); 4802 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4803 !CGF.isTrivialInitializer(Init)))) { 4804 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4805 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4806 const VarDecl *OriginalVD = Pair.second.Original; 4807 // Check if the variable is the target-based BasePointersArray, 4808 // PointersArray or SizesArray. 4809 LValue SharedRefLValue; 4810 QualType Type = PrivateLValue.getType(); 4811 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4812 if (IsTargetTask && !SharedField) { 4813 assert(isa<ImplicitParamDecl>(OriginalVD) && 4814 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4815 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4816 ->getNumParams() == 0 && 4817 isa<TranslationUnitDecl>( 4818 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4819 ->getDeclContext()) && 4820 "Expected artificial target data variable."); 4821 SharedRefLValue = 4822 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4823 } else { 4824 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4825 SharedRefLValue = CGF.MakeAddrLValue( 4826 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4827 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4828 SharedRefLValue.getTBAAInfo()); 4829 } 4830 if (Type->isArrayType()) { 4831 // Initialize firstprivate array. 4832 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4833 // Perform simple memcpy. 4834 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4835 } else { 4836 // Initialize firstprivate array using element-by-element 4837 // initialization. 4838 CGF.EmitOMPAggregateAssign( 4839 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4840 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4841 Address SrcElement) { 4842 // Clean up any temporaries needed by the initialization. 4843 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4844 InitScope.addPrivate( 4845 Elem, [SrcElement]() -> Address { return SrcElement; }); 4846 (void)InitScope.Privatize(); 4847 // Emit initialization for single element. 4848 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4849 CGF, &CapturesInfo); 4850 CGF.EmitAnyExprToMem(Init, DestElement, 4851 Init->getType().getQualifiers(), 4852 /*IsInitializer=*/false); 4853 }); 4854 } 4855 } else { 4856 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4857 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4858 return SharedRefLValue.getAddress(); 4859 }); 4860 (void)InitScope.Privatize(); 4861 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4862 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4863 /*capturedByInit=*/false); 4864 } 4865 } else { 4866 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4867 } 4868 } 4869 ++FI; 4870 } 4871 } 4872 4873 /// Check if duplication function is required for taskloops. 4874 static bool checkInitIsRequired(CodeGenFunction &CGF, 4875 ArrayRef<PrivateDataTy> Privates) { 4876 bool InitRequired = false; 4877 for (const PrivateDataTy &Pair : Privates) { 4878 const VarDecl *VD = Pair.second.PrivateCopy; 4879 const Expr *Init = VD->getAnyInitializer(); 4880 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4881 !CGF.isTrivialInitializer(Init)); 4882 if (InitRequired) 4883 break; 4884 } 4885 return InitRequired; 4886 } 4887 4888 4889 /// Emit task_dup function (for initialization of 4890 /// private/firstprivate/lastprivate vars and last_iter flag) 4891 /// \code 4892 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4893 /// lastpriv) { 4894 /// // setup lastprivate flag 4895 /// task_dst->last = lastpriv; 4896 /// // could be constructor calls here... 4897 /// } 4898 /// \endcode 4899 static llvm::Value * 4900 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4901 const OMPExecutableDirective &D, 4902 QualType KmpTaskTWithPrivatesPtrQTy, 4903 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4904 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4905 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4906 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4907 ASTContext &C = CGM.getContext(); 4908 FunctionArgList Args; 4909 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4910 KmpTaskTWithPrivatesPtrQTy, 4911 ImplicitParamDecl::Other); 4912 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4913 KmpTaskTWithPrivatesPtrQTy, 4914 ImplicitParamDecl::Other); 4915 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4916 ImplicitParamDecl::Other); 4917 Args.push_back(&DstArg); 4918 Args.push_back(&SrcArg); 4919 Args.push_back(&LastprivArg); 4920 const auto &TaskDupFnInfo = 4921 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4922 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4923 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4924 auto *TaskDup = llvm::Function::Create( 4925 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4926 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4927 TaskDup->setDoesNotRecurse(); 4928 CodeGenFunction CGF(CGM); 4929 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4930 Loc); 4931 4932 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4933 CGF.GetAddrOfLocalVar(&DstArg), 4934 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4935 // task_dst->liter = lastpriv; 4936 if (WithLastIter) { 4937 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4938 LValue Base = CGF.EmitLValueForField( 4939 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4940 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4941 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4942 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4943 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4944 } 4945 4946 // Emit initial values for private copies (if any). 4947 assert(!Privates.empty()); 4948 Address KmpTaskSharedsPtr = Address::invalid(); 4949 if (!Data.FirstprivateVars.empty()) { 4950 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4951 CGF.GetAddrOfLocalVar(&SrcArg), 4952 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4953 LValue Base = CGF.EmitLValueForField( 4954 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4955 KmpTaskSharedsPtr = Address( 4956 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4957 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4958 KmpTaskTShareds)), 4959 Loc), 4960 CGF.getNaturalTypeAlignment(SharedsTy)); 4961 } 4962 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4963 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4964 CGF.FinishFunction(); 4965 return TaskDup; 4966 } 4967 4968 /// Checks if destructor function is required to be generated. 4969 /// \return true if cleanups are required, false otherwise. 4970 static bool 4971 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4972 bool NeedsCleanup = false; 4973 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4974 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4975 for (const FieldDecl *FD : PrivateRD->fields()) { 4976 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4977 if (NeedsCleanup) 4978 break; 4979 } 4980 return NeedsCleanup; 4981 } 4982 4983 CGOpenMPRuntime::TaskResultTy 4984 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4985 const OMPExecutableDirective &D, 4986 llvm::Function *TaskFunction, QualType SharedsTy, 4987 Address Shareds, const OMPTaskDataTy &Data) { 4988 ASTContext &C = CGM.getContext(); 4989 llvm::SmallVector<PrivateDataTy, 4> Privates; 4990 // Aggregate privates and sort them by the alignment. 4991 auto I = Data.PrivateCopies.begin(); 4992 for (const Expr *E : Data.PrivateVars) { 4993 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4994 Privates.emplace_back( 4995 C.getDeclAlign(VD), 4996 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4997 /*PrivateElemInit=*/nullptr)); 4998 ++I; 4999 } 5000 I = Data.FirstprivateCopies.begin(); 5001 auto IElemInitRef = Data.FirstprivateInits.begin(); 5002 for (const Expr *E : Data.FirstprivateVars) { 5003 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5004 Privates.emplace_back( 5005 C.getDeclAlign(VD), 5006 PrivateHelpersTy( 5007 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5008 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5009 ++I; 5010 ++IElemInitRef; 5011 } 5012 I = Data.LastprivateCopies.begin(); 5013 for (const Expr *E : Data.LastprivateVars) { 5014 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5015 Privates.emplace_back( 5016 C.getDeclAlign(VD), 5017 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5018 /*PrivateElemInit=*/nullptr)); 5019 ++I; 5020 } 5021 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5022 return L.first > R.first; 5023 }); 5024 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5025 // Build type kmp_routine_entry_t (if not built yet). 5026 emitKmpRoutineEntryT(KmpInt32Ty); 5027 // Build type kmp_task_t (if not built yet). 5028 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5029 if (SavedKmpTaskloopTQTy.isNull()) { 5030 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5031 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5032 } 5033 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5034 } else { 5035 assert((D.getDirectiveKind() == OMPD_task || 5036 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5037 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5038 "Expected taskloop, task or target directive"); 5039 if (SavedKmpTaskTQTy.isNull()) { 5040 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5041 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5042 } 5043 KmpTaskTQTy = SavedKmpTaskTQTy; 5044 } 5045 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5046 // Build particular struct kmp_task_t for the given task. 5047 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5048 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5049 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5050 QualType KmpTaskTWithPrivatesPtrQTy = 5051 C.getPointerType(KmpTaskTWithPrivatesQTy); 5052 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5053 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5054 KmpTaskTWithPrivatesTy->getPointerTo(); 5055 llvm::Value *KmpTaskTWithPrivatesTySize = 5056 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5057 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5058 5059 // Emit initial values for private copies (if any). 5060 llvm::Value *TaskPrivatesMap = nullptr; 5061 llvm::Type *TaskPrivatesMapTy = 5062 std::next(TaskFunction->arg_begin(), 3)->getType(); 5063 if (!Privates.empty()) { 5064 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5065 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5066 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5067 FI->getType(), Privates); 5068 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5069 TaskPrivatesMap, TaskPrivatesMapTy); 5070 } else { 5071 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5072 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5073 } 5074 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5075 // kmp_task_t *tt); 5076 llvm::Function *TaskEntry = emitProxyTaskFunction( 5077 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5078 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5079 TaskPrivatesMap); 5080 5081 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5082 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5083 // kmp_routine_entry_t *task_entry); 5084 // Task flags. Format is taken from 5085 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5086 // description of kmp_tasking_flags struct. 5087 enum { 5088 TiedFlag = 0x1, 5089 FinalFlag = 0x2, 5090 DestructorsFlag = 0x8, 5091 PriorityFlag = 0x20 5092 }; 5093 unsigned Flags = Data.Tied ? TiedFlag : 0; 5094 bool NeedsCleanup = false; 5095 if (!Privates.empty()) { 5096 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5097 if (NeedsCleanup) 5098 Flags = Flags | DestructorsFlag; 5099 } 5100 if (Data.Priority.getInt()) 5101 Flags = Flags | PriorityFlag; 5102 llvm::Value *TaskFlags = 5103 Data.Final.getPointer() 5104 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5105 CGF.Builder.getInt32(FinalFlag), 5106 CGF.Builder.getInt32(/*C=*/0)) 5107 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5108 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5109 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5110 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5111 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5112 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5113 TaskEntry, KmpRoutineEntryPtrTy)}; 5114 llvm::Value *NewTask; 5115 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5116 // Check if we have any device clause associated with the directive. 5117 const Expr *Device = nullptr; 5118 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5119 Device = C->getDevice(); 5120 // Emit device ID if any otherwise use default value. 5121 llvm::Value *DeviceID; 5122 if (Device) 5123 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5124 CGF.Int64Ty, /*isSigned=*/true); 5125 else 5126 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5127 AllocArgs.push_back(DeviceID); 5128 NewTask = CGF.EmitRuntimeCall( 5129 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5130 } else { 5131 NewTask = CGF.EmitRuntimeCall( 5132 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5133 } 5134 llvm::Value *NewTaskNewTaskTTy = 5135 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5136 NewTask, KmpTaskTWithPrivatesPtrTy); 5137 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5138 KmpTaskTWithPrivatesQTy); 5139 LValue TDBase = 5140 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5141 // Fill the data in the resulting kmp_task_t record. 5142 // Copy shareds if there are any. 5143 Address KmpTaskSharedsPtr = Address::invalid(); 5144 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5145 KmpTaskSharedsPtr = 5146 Address(CGF.EmitLoadOfScalar( 5147 CGF.EmitLValueForField( 5148 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5149 KmpTaskTShareds)), 5150 Loc), 5151 CGF.getNaturalTypeAlignment(SharedsTy)); 5152 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5153 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5154 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5155 } 5156 // Emit initial values for private copies (if any). 5157 TaskResultTy Result; 5158 if (!Privates.empty()) { 5159 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5160 SharedsTy, SharedsPtrTy, Data, Privates, 5161 /*ForDup=*/false); 5162 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5163 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5164 Result.TaskDupFn = emitTaskDupFunction( 5165 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5166 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5167 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5168 } 5169 } 5170 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5171 enum { Priority = 0, Destructors = 1 }; 5172 // Provide pointer to function with destructors for privates. 5173 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5174 const RecordDecl *KmpCmplrdataUD = 5175 (*FI)->getType()->getAsUnionType()->getDecl(); 5176 if (NeedsCleanup) { 5177 llvm::Value *DestructorFn = emitDestructorsFunction( 5178 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5179 KmpTaskTWithPrivatesQTy); 5180 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5181 LValue DestructorsLV = CGF.EmitLValueForField( 5182 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5183 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5184 DestructorFn, KmpRoutineEntryPtrTy), 5185 DestructorsLV); 5186 } 5187 // Set priority. 5188 if (Data.Priority.getInt()) { 5189 LValue Data2LV = CGF.EmitLValueForField( 5190 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5191 LValue PriorityLV = CGF.EmitLValueForField( 5192 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5193 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5194 } 5195 Result.NewTask = NewTask; 5196 Result.TaskEntry = TaskEntry; 5197 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5198 Result.TDBase = TDBase; 5199 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5200 return Result; 5201 } 5202 5203 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5204 const OMPExecutableDirective &D, 5205 llvm::Function *TaskFunction, 5206 QualType SharedsTy, Address Shareds, 5207 const Expr *IfCond, 5208 const OMPTaskDataTy &Data) { 5209 if (!CGF.HaveInsertPoint()) 5210 return; 5211 5212 TaskResultTy Result = 5213 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5214 llvm::Value *NewTask = Result.NewTask; 5215 llvm::Function *TaskEntry = Result.TaskEntry; 5216 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5217 LValue TDBase = Result.TDBase; 5218 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5219 ASTContext &C = CGM.getContext(); 5220 // Process list of dependences. 5221 Address DependenciesArray = Address::invalid(); 5222 unsigned NumDependencies = Data.Dependences.size(); 5223 if (NumDependencies) { 5224 // Dependence kind for RTL. 5225 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5226 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5227 RecordDecl *KmpDependInfoRD; 5228 QualType FlagsTy = 5229 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5230 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5231 if (KmpDependInfoTy.isNull()) { 5232 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5233 KmpDependInfoRD->startDefinition(); 5234 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5235 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5236 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5237 KmpDependInfoRD->completeDefinition(); 5238 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5239 } else { 5240 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5241 } 5242 // Define type kmp_depend_info[<Dependences.size()>]; 5243 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5244 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5245 ArrayType::Normal, /*IndexTypeQuals=*/0); 5246 // kmp_depend_info[<Dependences.size()>] deps; 5247 DependenciesArray = 5248 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5249 for (unsigned I = 0; I < NumDependencies; ++I) { 5250 const Expr *E = Data.Dependences[I].second; 5251 LValue Addr = CGF.EmitLValue(E); 5252 llvm::Value *Size; 5253 QualType Ty = E->getType(); 5254 if (const auto *ASE = 5255 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5256 LValue UpAddrLVal = 5257 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5258 llvm::Value *UpAddr = 5259 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5260 llvm::Value *LowIntPtr = 5261 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5262 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5263 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5264 } else { 5265 Size = CGF.getTypeSize(Ty); 5266 } 5267 LValue Base = CGF.MakeAddrLValue( 5268 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5269 KmpDependInfoTy); 5270 // deps[i].base_addr = &<Dependences[i].second>; 5271 LValue BaseAddrLVal = CGF.EmitLValueForField( 5272 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5273 CGF.EmitStoreOfScalar( 5274 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5275 BaseAddrLVal); 5276 // deps[i].len = sizeof(<Dependences[i].second>); 5277 LValue LenLVal = CGF.EmitLValueForField( 5278 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5279 CGF.EmitStoreOfScalar(Size, LenLVal); 5280 // deps[i].flags = <Dependences[i].first>; 5281 RTLDependenceKindTy DepKind; 5282 switch (Data.Dependences[I].first) { 5283 case OMPC_DEPEND_in: 5284 DepKind = DepIn; 5285 break; 5286 // Out and InOut dependencies must use the same code. 5287 case OMPC_DEPEND_out: 5288 case OMPC_DEPEND_inout: 5289 DepKind = DepInOut; 5290 break; 5291 case OMPC_DEPEND_mutexinoutset: 5292 DepKind = DepMutexInOutSet; 5293 break; 5294 case OMPC_DEPEND_source: 5295 case OMPC_DEPEND_sink: 5296 case OMPC_DEPEND_unknown: 5297 llvm_unreachable("Unknown task dependence type"); 5298 } 5299 LValue FlagsLVal = CGF.EmitLValueForField( 5300 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5301 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5302 FlagsLVal); 5303 } 5304 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5305 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5306 } 5307 5308 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5309 // libcall. 5310 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5311 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5312 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5313 // list is not empty 5314 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5315 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5316 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5317 llvm::Value *DepTaskArgs[7]; 5318 if (NumDependencies) { 5319 DepTaskArgs[0] = UpLoc; 5320 DepTaskArgs[1] = ThreadID; 5321 DepTaskArgs[2] = NewTask; 5322 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5323 DepTaskArgs[4] = DependenciesArray.getPointer(); 5324 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5325 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5326 } 5327 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5328 &TaskArgs, 5329 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5330 if (!Data.Tied) { 5331 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5332 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5333 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5334 } 5335 if (NumDependencies) { 5336 CGF.EmitRuntimeCall( 5337 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5338 } else { 5339 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5340 TaskArgs); 5341 } 5342 // Check if parent region is untied and build return for untied task; 5343 if (auto *Region = 5344 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5345 Region->emitUntiedSwitch(CGF); 5346 }; 5347 5348 llvm::Value *DepWaitTaskArgs[6]; 5349 if (NumDependencies) { 5350 DepWaitTaskArgs[0] = UpLoc; 5351 DepWaitTaskArgs[1] = ThreadID; 5352 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5353 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5354 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5355 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5356 } 5357 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5358 NumDependencies, &DepWaitTaskArgs, 5359 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5360 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5361 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5362 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5363 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5364 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5365 // is specified. 5366 if (NumDependencies) 5367 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5368 DepWaitTaskArgs); 5369 // Call proxy_task_entry(gtid, new_task); 5370 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5371 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5372 Action.Enter(CGF); 5373 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5374 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5375 OutlinedFnArgs); 5376 }; 5377 5378 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5379 // kmp_task_t *new_task); 5380 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5381 // kmp_task_t *new_task); 5382 RegionCodeGenTy RCG(CodeGen); 5383 CommonActionTy Action( 5384 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5385 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5386 RCG.setAction(Action); 5387 RCG(CGF); 5388 }; 5389 5390 if (IfCond) { 5391 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5392 } else { 5393 RegionCodeGenTy ThenRCG(ThenCodeGen); 5394 ThenRCG(CGF); 5395 } 5396 } 5397 5398 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5399 const OMPLoopDirective &D, 5400 llvm::Function *TaskFunction, 5401 QualType SharedsTy, Address Shareds, 5402 const Expr *IfCond, 5403 const OMPTaskDataTy &Data) { 5404 if (!CGF.HaveInsertPoint()) 5405 return; 5406 TaskResultTy Result = 5407 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5408 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5409 // libcall. 5410 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5411 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5412 // sched, kmp_uint64 grainsize, void *task_dup); 5413 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5414 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5415 llvm::Value *IfVal; 5416 if (IfCond) { 5417 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5418 /*isSigned=*/true); 5419 } else { 5420 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5421 } 5422 5423 LValue LBLVal = CGF.EmitLValueForField( 5424 Result.TDBase, 5425 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5426 const auto *LBVar = 5427 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5428 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5429 /*IsInitializer=*/true); 5430 LValue UBLVal = CGF.EmitLValueForField( 5431 Result.TDBase, 5432 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5433 const auto *UBVar = 5434 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5435 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5436 /*IsInitializer=*/true); 5437 LValue StLVal = CGF.EmitLValueForField( 5438 Result.TDBase, 5439 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5440 const auto *StVar = 5441 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5442 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5443 /*IsInitializer=*/true); 5444 // Store reductions address. 5445 LValue RedLVal = CGF.EmitLValueForField( 5446 Result.TDBase, 5447 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5448 if (Data.Reductions) { 5449 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5450 } else { 5451 CGF.EmitNullInitialization(RedLVal.getAddress(), 5452 CGF.getContext().VoidPtrTy); 5453 } 5454 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5455 llvm::Value *TaskArgs[] = { 5456 UpLoc, 5457 ThreadID, 5458 Result.NewTask, 5459 IfVal, 5460 LBLVal.getPointer(), 5461 UBLVal.getPointer(), 5462 CGF.EmitLoadOfScalar(StLVal, Loc), 5463 llvm::ConstantInt::getSigned( 5464 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5465 llvm::ConstantInt::getSigned( 5466 CGF.IntTy, Data.Schedule.getPointer() 5467 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5468 : NoSchedule), 5469 Data.Schedule.getPointer() 5470 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5471 /*isSigned=*/false) 5472 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5473 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5474 Result.TaskDupFn, CGF.VoidPtrTy) 5475 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5476 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5477 } 5478 5479 /// Emit reduction operation for each element of array (required for 5480 /// array sections) LHS op = RHS. 5481 /// \param Type Type of array. 5482 /// \param LHSVar Variable on the left side of the reduction operation 5483 /// (references element of array in original variable). 5484 /// \param RHSVar Variable on the right side of the reduction operation 5485 /// (references element of array in original variable). 5486 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5487 /// RHSVar. 5488 static void EmitOMPAggregateReduction( 5489 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5490 const VarDecl *RHSVar, 5491 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5492 const Expr *, const Expr *)> &RedOpGen, 5493 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5494 const Expr *UpExpr = nullptr) { 5495 // Perform element-by-element initialization. 5496 QualType ElementTy; 5497 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5498 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5499 5500 // Drill down to the base element type on both arrays. 5501 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5502 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5503 5504 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5505 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5506 // Cast from pointer to array type to pointer to single element. 5507 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5508 // The basic structure here is a while-do loop. 5509 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5510 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5511 llvm::Value *IsEmpty = 5512 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5513 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5514 5515 // Enter the loop body, making that address the current address. 5516 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5517 CGF.EmitBlock(BodyBB); 5518 5519 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5520 5521 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5522 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5523 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5524 Address RHSElementCurrent = 5525 Address(RHSElementPHI, 5526 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5527 5528 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5529 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5530 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5531 Address LHSElementCurrent = 5532 Address(LHSElementPHI, 5533 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5534 5535 // Emit copy. 5536 CodeGenFunction::OMPPrivateScope Scope(CGF); 5537 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5538 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5539 Scope.Privatize(); 5540 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5541 Scope.ForceCleanup(); 5542 5543 // Shift the address forward by one element. 5544 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5545 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5546 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5547 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5548 // Check whether we've reached the end. 5549 llvm::Value *Done = 5550 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5551 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5552 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5553 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5554 5555 // Done. 5556 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5557 } 5558 5559 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5560 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5561 /// UDR combiner function. 5562 static void emitReductionCombiner(CodeGenFunction &CGF, 5563 const Expr *ReductionOp) { 5564 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5565 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5566 if (const auto *DRE = 5567 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5568 if (const auto *DRD = 5569 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5570 std::pair<llvm::Function *, llvm::Function *> Reduction = 5571 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5572 RValue Func = RValue::get(Reduction.first); 5573 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5574 CGF.EmitIgnoredExpr(ReductionOp); 5575 return; 5576 } 5577 CGF.EmitIgnoredExpr(ReductionOp); 5578 } 5579 5580 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5581 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5582 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5583 ArrayRef<const Expr *> ReductionOps) { 5584 ASTContext &C = CGM.getContext(); 5585 5586 // void reduction_func(void *LHSArg, void *RHSArg); 5587 FunctionArgList Args; 5588 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5589 ImplicitParamDecl::Other); 5590 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5591 ImplicitParamDecl::Other); 5592 Args.push_back(&LHSArg); 5593 Args.push_back(&RHSArg); 5594 const auto &CGFI = 5595 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5596 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5597 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5598 llvm::GlobalValue::InternalLinkage, Name, 5599 &CGM.getModule()); 5600 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5601 Fn->setDoesNotRecurse(); 5602 CodeGenFunction CGF(CGM); 5603 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5604 5605 // Dst = (void*[n])(LHSArg); 5606 // Src = (void*[n])(RHSArg); 5607 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5608 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5609 ArgsType), CGF.getPointerAlign()); 5610 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5611 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5612 ArgsType), CGF.getPointerAlign()); 5613 5614 // ... 5615 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5616 // ... 5617 CodeGenFunction::OMPPrivateScope Scope(CGF); 5618 auto IPriv = Privates.begin(); 5619 unsigned Idx = 0; 5620 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5621 const auto *RHSVar = 5622 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5623 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5624 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5625 }); 5626 const auto *LHSVar = 5627 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5628 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5629 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5630 }); 5631 QualType PrivTy = (*IPriv)->getType(); 5632 if (PrivTy->isVariablyModifiedType()) { 5633 // Get array size and emit VLA type. 5634 ++Idx; 5635 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5636 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5637 const VariableArrayType *VLA = 5638 CGF.getContext().getAsVariableArrayType(PrivTy); 5639 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5640 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5641 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5642 CGF.EmitVariablyModifiedType(PrivTy); 5643 } 5644 } 5645 Scope.Privatize(); 5646 IPriv = Privates.begin(); 5647 auto ILHS = LHSExprs.begin(); 5648 auto IRHS = RHSExprs.begin(); 5649 for (const Expr *E : ReductionOps) { 5650 if ((*IPriv)->getType()->isArrayType()) { 5651 // Emit reduction for array section. 5652 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5653 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5654 EmitOMPAggregateReduction( 5655 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5656 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5657 emitReductionCombiner(CGF, E); 5658 }); 5659 } else { 5660 // Emit reduction for array subscript or single variable. 5661 emitReductionCombiner(CGF, E); 5662 } 5663 ++IPriv; 5664 ++ILHS; 5665 ++IRHS; 5666 } 5667 Scope.ForceCleanup(); 5668 CGF.FinishFunction(); 5669 return Fn; 5670 } 5671 5672 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5673 const Expr *ReductionOp, 5674 const Expr *PrivateRef, 5675 const DeclRefExpr *LHS, 5676 const DeclRefExpr *RHS) { 5677 if (PrivateRef->getType()->isArrayType()) { 5678 // Emit reduction for array section. 5679 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5680 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5681 EmitOMPAggregateReduction( 5682 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5683 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5684 emitReductionCombiner(CGF, ReductionOp); 5685 }); 5686 } else { 5687 // Emit reduction for array subscript or single variable. 5688 emitReductionCombiner(CGF, ReductionOp); 5689 } 5690 } 5691 5692 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5693 ArrayRef<const Expr *> Privates, 5694 ArrayRef<const Expr *> LHSExprs, 5695 ArrayRef<const Expr *> RHSExprs, 5696 ArrayRef<const Expr *> ReductionOps, 5697 ReductionOptionsTy Options) { 5698 if (!CGF.HaveInsertPoint()) 5699 return; 5700 5701 bool WithNowait = Options.WithNowait; 5702 bool SimpleReduction = Options.SimpleReduction; 5703 5704 // Next code should be emitted for reduction: 5705 // 5706 // static kmp_critical_name lock = { 0 }; 5707 // 5708 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5709 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5710 // ... 5711 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5712 // *(Type<n>-1*)rhs[<n>-1]); 5713 // } 5714 // 5715 // ... 5716 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5717 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5718 // RedList, reduce_func, &<lock>)) { 5719 // case 1: 5720 // ... 5721 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5722 // ... 5723 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5724 // break; 5725 // case 2: 5726 // ... 5727 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5728 // ... 5729 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5730 // break; 5731 // default:; 5732 // } 5733 // 5734 // if SimpleReduction is true, only the next code is generated: 5735 // ... 5736 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5737 // ... 5738 5739 ASTContext &C = CGM.getContext(); 5740 5741 if (SimpleReduction) { 5742 CodeGenFunction::RunCleanupsScope Scope(CGF); 5743 auto IPriv = Privates.begin(); 5744 auto ILHS = LHSExprs.begin(); 5745 auto IRHS = RHSExprs.begin(); 5746 for (const Expr *E : ReductionOps) { 5747 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5748 cast<DeclRefExpr>(*IRHS)); 5749 ++IPriv; 5750 ++ILHS; 5751 ++IRHS; 5752 } 5753 return; 5754 } 5755 5756 // 1. Build a list of reduction variables. 5757 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5758 auto Size = RHSExprs.size(); 5759 for (const Expr *E : Privates) { 5760 if (E->getType()->isVariablyModifiedType()) 5761 // Reserve place for array size. 5762 ++Size; 5763 } 5764 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5765 QualType ReductionArrayTy = 5766 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5767 /*IndexTypeQuals=*/0); 5768 Address ReductionList = 5769 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5770 auto IPriv = Privates.begin(); 5771 unsigned Idx = 0; 5772 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5773 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5774 CGF.Builder.CreateStore( 5775 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5776 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5777 Elem); 5778 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5779 // Store array size. 5780 ++Idx; 5781 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5782 llvm::Value *Size = CGF.Builder.CreateIntCast( 5783 CGF.getVLASize( 5784 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5785 .NumElts, 5786 CGF.SizeTy, /*isSigned=*/false); 5787 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5788 Elem); 5789 } 5790 } 5791 5792 // 2. Emit reduce_func(). 5793 llvm::Function *ReductionFn = emitReductionFunction( 5794 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5795 LHSExprs, RHSExprs, ReductionOps); 5796 5797 // 3. Create static kmp_critical_name lock = { 0 }; 5798 std::string Name = getName({"reduction"}); 5799 llvm::Value *Lock = getCriticalRegionLock(Name); 5800 5801 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5802 // RedList, reduce_func, &<lock>); 5803 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5804 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5805 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5806 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5807 ReductionList.getPointer(), CGF.VoidPtrTy); 5808 llvm::Value *Args[] = { 5809 IdentTLoc, // ident_t *<loc> 5810 ThreadId, // i32 <gtid> 5811 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5812 ReductionArrayTySize, // size_type sizeof(RedList) 5813 RL, // void *RedList 5814 ReductionFn, // void (*) (void *, void *) <reduce_func> 5815 Lock // kmp_critical_name *&<lock> 5816 }; 5817 llvm::Value *Res = CGF.EmitRuntimeCall( 5818 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5819 : OMPRTL__kmpc_reduce), 5820 Args); 5821 5822 // 5. Build switch(res) 5823 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5824 llvm::SwitchInst *SwInst = 5825 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5826 5827 // 6. Build case 1: 5828 // ... 5829 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5830 // ... 5831 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5832 // break; 5833 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5834 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5835 CGF.EmitBlock(Case1BB); 5836 5837 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5838 llvm::Value *EndArgs[] = { 5839 IdentTLoc, // ident_t *<loc> 5840 ThreadId, // i32 <gtid> 5841 Lock // kmp_critical_name *&<lock> 5842 }; 5843 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5844 CodeGenFunction &CGF, PrePostActionTy &Action) { 5845 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5846 auto IPriv = Privates.begin(); 5847 auto ILHS = LHSExprs.begin(); 5848 auto IRHS = RHSExprs.begin(); 5849 for (const Expr *E : ReductionOps) { 5850 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5851 cast<DeclRefExpr>(*IRHS)); 5852 ++IPriv; 5853 ++ILHS; 5854 ++IRHS; 5855 } 5856 }; 5857 RegionCodeGenTy RCG(CodeGen); 5858 CommonActionTy Action( 5859 nullptr, llvm::None, 5860 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5861 : OMPRTL__kmpc_end_reduce), 5862 EndArgs); 5863 RCG.setAction(Action); 5864 RCG(CGF); 5865 5866 CGF.EmitBranch(DefaultBB); 5867 5868 // 7. Build case 2: 5869 // ... 5870 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5871 // ... 5872 // break; 5873 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5874 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5875 CGF.EmitBlock(Case2BB); 5876 5877 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5878 CodeGenFunction &CGF, PrePostActionTy &Action) { 5879 auto ILHS = LHSExprs.begin(); 5880 auto IRHS = RHSExprs.begin(); 5881 auto IPriv = Privates.begin(); 5882 for (const Expr *E : ReductionOps) { 5883 const Expr *XExpr = nullptr; 5884 const Expr *EExpr = nullptr; 5885 const Expr *UpExpr = nullptr; 5886 BinaryOperatorKind BO = BO_Comma; 5887 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5888 if (BO->getOpcode() == BO_Assign) { 5889 XExpr = BO->getLHS(); 5890 UpExpr = BO->getRHS(); 5891 } 5892 } 5893 // Try to emit update expression as a simple atomic. 5894 const Expr *RHSExpr = UpExpr; 5895 if (RHSExpr) { 5896 // Analyze RHS part of the whole expression. 5897 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5898 RHSExpr->IgnoreParenImpCasts())) { 5899 // If this is a conditional operator, analyze its condition for 5900 // min/max reduction operator. 5901 RHSExpr = ACO->getCond(); 5902 } 5903 if (const auto *BORHS = 5904 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5905 EExpr = BORHS->getRHS(); 5906 BO = BORHS->getOpcode(); 5907 } 5908 } 5909 if (XExpr) { 5910 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5911 auto &&AtomicRedGen = [BO, VD, 5912 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5913 const Expr *EExpr, const Expr *UpExpr) { 5914 LValue X = CGF.EmitLValue(XExpr); 5915 RValue E; 5916 if (EExpr) 5917 E = CGF.EmitAnyExpr(EExpr); 5918 CGF.EmitOMPAtomicSimpleUpdateExpr( 5919 X, E, BO, /*IsXLHSInRHSPart=*/true, 5920 llvm::AtomicOrdering::Monotonic, Loc, 5921 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5922 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5923 PrivateScope.addPrivate( 5924 VD, [&CGF, VD, XRValue, Loc]() { 5925 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5926 CGF.emitOMPSimpleStore( 5927 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5928 VD->getType().getNonReferenceType(), Loc); 5929 return LHSTemp; 5930 }); 5931 (void)PrivateScope.Privatize(); 5932 return CGF.EmitAnyExpr(UpExpr); 5933 }); 5934 }; 5935 if ((*IPriv)->getType()->isArrayType()) { 5936 // Emit atomic reduction for array section. 5937 const auto *RHSVar = 5938 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5939 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5940 AtomicRedGen, XExpr, EExpr, UpExpr); 5941 } else { 5942 // Emit atomic reduction for array subscript or single variable. 5943 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5944 } 5945 } else { 5946 // Emit as a critical region. 5947 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5948 const Expr *, const Expr *) { 5949 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5950 std::string Name = RT.getName({"atomic_reduction"}); 5951 RT.emitCriticalRegion( 5952 CGF, Name, 5953 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5954 Action.Enter(CGF); 5955 emitReductionCombiner(CGF, E); 5956 }, 5957 Loc); 5958 }; 5959 if ((*IPriv)->getType()->isArrayType()) { 5960 const auto *LHSVar = 5961 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5962 const auto *RHSVar = 5963 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5964 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5965 CritRedGen); 5966 } else { 5967 CritRedGen(CGF, nullptr, nullptr, nullptr); 5968 } 5969 } 5970 ++ILHS; 5971 ++IRHS; 5972 ++IPriv; 5973 } 5974 }; 5975 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5976 if (!WithNowait) { 5977 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5978 llvm::Value *EndArgs[] = { 5979 IdentTLoc, // ident_t *<loc> 5980 ThreadId, // i32 <gtid> 5981 Lock // kmp_critical_name *&<lock> 5982 }; 5983 CommonActionTy Action(nullptr, llvm::None, 5984 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5985 EndArgs); 5986 AtomicRCG.setAction(Action); 5987 AtomicRCG(CGF); 5988 } else { 5989 AtomicRCG(CGF); 5990 } 5991 5992 CGF.EmitBranch(DefaultBB); 5993 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5994 } 5995 5996 /// Generates unique name for artificial threadprivate variables. 5997 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5998 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5999 const Expr *Ref) { 6000 SmallString<256> Buffer; 6001 llvm::raw_svector_ostream Out(Buffer); 6002 const clang::DeclRefExpr *DE; 6003 const VarDecl *D = ::getBaseDecl(Ref, DE); 6004 if (!D) 6005 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6006 D = D->getCanonicalDecl(); 6007 std::string Name = CGM.getOpenMPRuntime().getName( 6008 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6009 Out << Prefix << Name << "_" 6010 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6011 return Out.str(); 6012 } 6013 6014 /// Emits reduction initializer function: 6015 /// \code 6016 /// void @.red_init(void* %arg) { 6017 /// %0 = bitcast void* %arg to <type>* 6018 /// store <type> <init>, <type>* %0 6019 /// ret void 6020 /// } 6021 /// \endcode 6022 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6023 SourceLocation Loc, 6024 ReductionCodeGen &RCG, unsigned N) { 6025 ASTContext &C = CGM.getContext(); 6026 FunctionArgList Args; 6027 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6028 ImplicitParamDecl::Other); 6029 Args.emplace_back(&Param); 6030 const auto &FnInfo = 6031 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6032 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6033 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6034 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6035 Name, &CGM.getModule()); 6036 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6037 Fn->setDoesNotRecurse(); 6038 CodeGenFunction CGF(CGM); 6039 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6040 Address PrivateAddr = CGF.EmitLoadOfPointer( 6041 CGF.GetAddrOfLocalVar(&Param), 6042 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6043 llvm::Value *Size = nullptr; 6044 // If the size of the reduction item is non-constant, load it from global 6045 // threadprivate variable. 6046 if (RCG.getSizes(N).second) { 6047 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6048 CGF, CGM.getContext().getSizeType(), 6049 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6050 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6051 CGM.getContext().getSizeType(), Loc); 6052 } 6053 RCG.emitAggregateType(CGF, N, Size); 6054 LValue SharedLVal; 6055 // If initializer uses initializer from declare reduction construct, emit a 6056 // pointer to the address of the original reduction item (reuired by reduction 6057 // initializer) 6058 if (RCG.usesReductionInitializer(N)) { 6059 Address SharedAddr = 6060 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6061 CGF, CGM.getContext().VoidPtrTy, 6062 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6063 SharedAddr = CGF.EmitLoadOfPointer( 6064 SharedAddr, 6065 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6066 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6067 } else { 6068 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6069 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6070 CGM.getContext().VoidPtrTy); 6071 } 6072 // Emit the initializer: 6073 // %0 = bitcast void* %arg to <type>* 6074 // store <type> <init>, <type>* %0 6075 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6076 [](CodeGenFunction &) { return false; }); 6077 CGF.FinishFunction(); 6078 return Fn; 6079 } 6080 6081 /// Emits reduction combiner function: 6082 /// \code 6083 /// void @.red_comb(void* %arg0, void* %arg1) { 6084 /// %lhs = bitcast void* %arg0 to <type>* 6085 /// %rhs = bitcast void* %arg1 to <type>* 6086 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6087 /// store <type> %2, <type>* %lhs 6088 /// ret void 6089 /// } 6090 /// \endcode 6091 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6092 SourceLocation Loc, 6093 ReductionCodeGen &RCG, unsigned N, 6094 const Expr *ReductionOp, 6095 const Expr *LHS, const Expr *RHS, 6096 const Expr *PrivateRef) { 6097 ASTContext &C = CGM.getContext(); 6098 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6099 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6100 FunctionArgList Args; 6101 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6102 C.VoidPtrTy, ImplicitParamDecl::Other); 6103 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6104 ImplicitParamDecl::Other); 6105 Args.emplace_back(&ParamInOut); 6106 Args.emplace_back(&ParamIn); 6107 const auto &FnInfo = 6108 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6109 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6110 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6111 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6112 Name, &CGM.getModule()); 6113 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6114 Fn->setDoesNotRecurse(); 6115 CodeGenFunction CGF(CGM); 6116 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6117 llvm::Value *Size = nullptr; 6118 // If the size of the reduction item is non-constant, load it from global 6119 // threadprivate variable. 6120 if (RCG.getSizes(N).second) { 6121 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6122 CGF, CGM.getContext().getSizeType(), 6123 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6124 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6125 CGM.getContext().getSizeType(), Loc); 6126 } 6127 RCG.emitAggregateType(CGF, N, Size); 6128 // Remap lhs and rhs variables to the addresses of the function arguments. 6129 // %lhs = bitcast void* %arg0 to <type>* 6130 // %rhs = bitcast void* %arg1 to <type>* 6131 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6132 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6133 // Pull out the pointer to the variable. 6134 Address PtrAddr = CGF.EmitLoadOfPointer( 6135 CGF.GetAddrOfLocalVar(&ParamInOut), 6136 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6137 return CGF.Builder.CreateElementBitCast( 6138 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6139 }); 6140 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6141 // Pull out the pointer to the variable. 6142 Address PtrAddr = CGF.EmitLoadOfPointer( 6143 CGF.GetAddrOfLocalVar(&ParamIn), 6144 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6145 return CGF.Builder.CreateElementBitCast( 6146 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6147 }); 6148 PrivateScope.Privatize(); 6149 // Emit the combiner body: 6150 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6151 // store <type> %2, <type>* %lhs 6152 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6153 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6154 cast<DeclRefExpr>(RHS)); 6155 CGF.FinishFunction(); 6156 return Fn; 6157 } 6158 6159 /// Emits reduction finalizer function: 6160 /// \code 6161 /// void @.red_fini(void* %arg) { 6162 /// %0 = bitcast void* %arg to <type>* 6163 /// <destroy>(<type>* %0) 6164 /// ret void 6165 /// } 6166 /// \endcode 6167 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6168 SourceLocation Loc, 6169 ReductionCodeGen &RCG, unsigned N) { 6170 if (!RCG.needCleanups(N)) 6171 return nullptr; 6172 ASTContext &C = CGM.getContext(); 6173 FunctionArgList Args; 6174 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6175 ImplicitParamDecl::Other); 6176 Args.emplace_back(&Param); 6177 const auto &FnInfo = 6178 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6179 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6180 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6181 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6182 Name, &CGM.getModule()); 6183 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6184 Fn->setDoesNotRecurse(); 6185 CodeGenFunction CGF(CGM); 6186 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6187 Address PrivateAddr = CGF.EmitLoadOfPointer( 6188 CGF.GetAddrOfLocalVar(&Param), 6189 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6190 llvm::Value *Size = nullptr; 6191 // If the size of the reduction item is non-constant, load it from global 6192 // threadprivate variable. 6193 if (RCG.getSizes(N).second) { 6194 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6195 CGF, CGM.getContext().getSizeType(), 6196 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6197 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6198 CGM.getContext().getSizeType(), Loc); 6199 } 6200 RCG.emitAggregateType(CGF, N, Size); 6201 // Emit the finalizer body: 6202 // <destroy>(<type>* %0) 6203 RCG.emitCleanups(CGF, N, PrivateAddr); 6204 CGF.FinishFunction(); 6205 return Fn; 6206 } 6207 6208 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6209 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6210 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6211 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6212 return nullptr; 6213 6214 // Build typedef struct: 6215 // kmp_task_red_input { 6216 // void *reduce_shar; // shared reduction item 6217 // size_t reduce_size; // size of data item 6218 // void *reduce_init; // data initialization routine 6219 // void *reduce_fini; // data finalization routine 6220 // void *reduce_comb; // data combiner routine 6221 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6222 // } kmp_task_red_input_t; 6223 ASTContext &C = CGM.getContext(); 6224 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6225 RD->startDefinition(); 6226 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6227 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6228 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6229 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6230 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6231 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6232 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6233 RD->completeDefinition(); 6234 QualType RDType = C.getRecordType(RD); 6235 unsigned Size = Data.ReductionVars.size(); 6236 llvm::APInt ArraySize(/*numBits=*/64, Size); 6237 QualType ArrayRDType = C.getConstantArrayType( 6238 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6239 // kmp_task_red_input_t .rd_input.[Size]; 6240 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6241 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6242 Data.ReductionOps); 6243 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6244 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6245 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6246 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6247 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6248 TaskRedInput.getPointer(), Idxs, 6249 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6250 ".rd_input.gep."); 6251 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6252 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6253 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6254 RCG.emitSharedLValue(CGF, Cnt); 6255 llvm::Value *CastedShared = 6256 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6257 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6258 RCG.emitAggregateType(CGF, Cnt); 6259 llvm::Value *SizeValInChars; 6260 llvm::Value *SizeVal; 6261 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6262 // We use delayed creation/initialization for VLAs, array sections and 6263 // custom reduction initializations. It is required because runtime does not 6264 // provide the way to pass the sizes of VLAs/array sections to 6265 // initializer/combiner/finalizer functions and does not pass the pointer to 6266 // original reduction item to the initializer. Instead threadprivate global 6267 // variables are used to store these values and use them in the functions. 6268 bool DelayedCreation = !!SizeVal; 6269 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6270 /*isSigned=*/false); 6271 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6272 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6273 // ElemLVal.reduce_init = init; 6274 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6275 llvm::Value *InitAddr = 6276 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6277 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6278 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6279 // ElemLVal.reduce_fini = fini; 6280 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6281 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6282 llvm::Value *FiniAddr = Fini 6283 ? CGF.EmitCastToVoidPtr(Fini) 6284 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6285 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6286 // ElemLVal.reduce_comb = comb; 6287 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6288 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6289 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6290 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6291 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6292 // ElemLVal.flags = 0; 6293 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6294 if (DelayedCreation) { 6295 CGF.EmitStoreOfScalar( 6296 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6297 FlagsLVal); 6298 } else 6299 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6300 } 6301 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6302 // *data); 6303 llvm::Value *Args[] = { 6304 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6305 /*isSigned=*/true), 6306 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6307 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6308 CGM.VoidPtrTy)}; 6309 return CGF.EmitRuntimeCall( 6310 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6311 } 6312 6313 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6314 SourceLocation Loc, 6315 ReductionCodeGen &RCG, 6316 unsigned N) { 6317 auto Sizes = RCG.getSizes(N); 6318 // Emit threadprivate global variable if the type is non-constant 6319 // (Sizes.second = nullptr). 6320 if (Sizes.second) { 6321 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6322 /*isSigned=*/false); 6323 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6324 CGF, CGM.getContext().getSizeType(), 6325 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6326 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6327 } 6328 // Store address of the original reduction item if custom initializer is used. 6329 if (RCG.usesReductionInitializer(N)) { 6330 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6331 CGF, CGM.getContext().VoidPtrTy, 6332 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6333 CGF.Builder.CreateStore( 6334 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6335 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6336 SharedAddr, /*IsVolatile=*/false); 6337 } 6338 } 6339 6340 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6341 SourceLocation Loc, 6342 llvm::Value *ReductionsPtr, 6343 LValue SharedLVal) { 6344 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6345 // *d); 6346 llvm::Value *Args[] = { 6347 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6348 /*isSigned=*/true), 6349 ReductionsPtr, 6350 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6351 CGM.VoidPtrTy)}; 6352 return Address( 6353 CGF.EmitRuntimeCall( 6354 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6355 SharedLVal.getAlignment()); 6356 } 6357 6358 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6359 SourceLocation Loc) { 6360 if (!CGF.HaveInsertPoint()) 6361 return; 6362 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6363 // global_tid); 6364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6365 // Ignore return result until untied tasks are supported. 6366 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6367 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6368 Region->emitUntiedSwitch(CGF); 6369 } 6370 6371 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6372 OpenMPDirectiveKind InnerKind, 6373 const RegionCodeGenTy &CodeGen, 6374 bool HasCancel) { 6375 if (!CGF.HaveInsertPoint()) 6376 return; 6377 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6378 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6379 } 6380 6381 namespace { 6382 enum RTCancelKind { 6383 CancelNoreq = 0, 6384 CancelParallel = 1, 6385 CancelLoop = 2, 6386 CancelSections = 3, 6387 CancelTaskgroup = 4 6388 }; 6389 } // anonymous namespace 6390 6391 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6392 RTCancelKind CancelKind = CancelNoreq; 6393 if (CancelRegion == OMPD_parallel) 6394 CancelKind = CancelParallel; 6395 else if (CancelRegion == OMPD_for) 6396 CancelKind = CancelLoop; 6397 else if (CancelRegion == OMPD_sections) 6398 CancelKind = CancelSections; 6399 else { 6400 assert(CancelRegion == OMPD_taskgroup); 6401 CancelKind = CancelTaskgroup; 6402 } 6403 return CancelKind; 6404 } 6405 6406 void CGOpenMPRuntime::emitCancellationPointCall( 6407 CodeGenFunction &CGF, SourceLocation Loc, 6408 OpenMPDirectiveKind CancelRegion) { 6409 if (!CGF.HaveInsertPoint()) 6410 return; 6411 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6412 // global_tid, kmp_int32 cncl_kind); 6413 if (auto *OMPRegionInfo = 6414 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6415 // For 'cancellation point taskgroup', the task region info may not have a 6416 // cancel. This may instead happen in another adjacent task. 6417 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6418 llvm::Value *Args[] = { 6419 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6420 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6421 // Ignore return result until untied tasks are supported. 6422 llvm::Value *Result = CGF.EmitRuntimeCall( 6423 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6424 // if (__kmpc_cancellationpoint()) { 6425 // exit from construct; 6426 // } 6427 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6428 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6429 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6430 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6431 CGF.EmitBlock(ExitBB); 6432 // exit from construct; 6433 CodeGenFunction::JumpDest CancelDest = 6434 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6435 CGF.EmitBranchThroughCleanup(CancelDest); 6436 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6437 } 6438 } 6439 } 6440 6441 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6442 const Expr *IfCond, 6443 OpenMPDirectiveKind CancelRegion) { 6444 if (!CGF.HaveInsertPoint()) 6445 return; 6446 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6447 // kmp_int32 cncl_kind); 6448 if (auto *OMPRegionInfo = 6449 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6450 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6451 PrePostActionTy &) { 6452 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6453 llvm::Value *Args[] = { 6454 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6455 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6456 // Ignore return result until untied tasks are supported. 6457 llvm::Value *Result = CGF.EmitRuntimeCall( 6458 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6459 // if (__kmpc_cancel()) { 6460 // exit from construct; 6461 // } 6462 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6463 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6464 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6465 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6466 CGF.EmitBlock(ExitBB); 6467 // exit from construct; 6468 CodeGenFunction::JumpDest CancelDest = 6469 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6470 CGF.EmitBranchThroughCleanup(CancelDest); 6471 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6472 }; 6473 if (IfCond) { 6474 emitOMPIfClause(CGF, IfCond, ThenGen, 6475 [](CodeGenFunction &, PrePostActionTy &) {}); 6476 } else { 6477 RegionCodeGenTy ThenRCG(ThenGen); 6478 ThenRCG(CGF); 6479 } 6480 } 6481 } 6482 6483 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6484 const OMPExecutableDirective &D, StringRef ParentName, 6485 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6486 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6487 assert(!ParentName.empty() && "Invalid target region parent name!"); 6488 HasEmittedTargetRegion = true; 6489 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6490 IsOffloadEntry, CodeGen); 6491 } 6492 6493 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6494 const OMPExecutableDirective &D, StringRef ParentName, 6495 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6496 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6497 // Create a unique name for the entry function using the source location 6498 // information of the current target region. The name will be something like: 6499 // 6500 // __omp_offloading_DD_FFFF_PP_lBB 6501 // 6502 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6503 // mangled name of the function that encloses the target region and BB is the 6504 // line number of the target region. 6505 6506 unsigned DeviceID; 6507 unsigned FileID; 6508 unsigned Line; 6509 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6510 Line); 6511 SmallString<64> EntryFnName; 6512 { 6513 llvm::raw_svector_ostream OS(EntryFnName); 6514 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6515 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6516 } 6517 6518 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6519 6520 CodeGenFunction CGF(CGM, true); 6521 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6522 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6523 6524 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6525 6526 // If this target outline function is not an offload entry, we don't need to 6527 // register it. 6528 if (!IsOffloadEntry) 6529 return; 6530 6531 // The target region ID is used by the runtime library to identify the current 6532 // target region, so it only has to be unique and not necessarily point to 6533 // anything. It could be the pointer to the outlined function that implements 6534 // the target region, but we aren't using that so that the compiler doesn't 6535 // need to keep that, and could therefore inline the host function if proven 6536 // worthwhile during optimization. In the other hand, if emitting code for the 6537 // device, the ID has to be the function address so that it can retrieved from 6538 // the offloading entry and launched by the runtime library. We also mark the 6539 // outlined function to have external linkage in case we are emitting code for 6540 // the device, because these functions will be entry points to the device. 6541 6542 if (CGM.getLangOpts().OpenMPIsDevice) { 6543 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6544 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6545 OutlinedFn->setDSOLocal(false); 6546 } else { 6547 std::string Name = getName({EntryFnName, "region_id"}); 6548 OutlinedFnID = new llvm::GlobalVariable( 6549 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6550 llvm::GlobalValue::WeakAnyLinkage, 6551 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6552 } 6553 6554 // Register the information for the entry associated with this target region. 6555 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6556 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6557 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6558 } 6559 6560 /// Checks if the expression is constant or does not have non-trivial function 6561 /// calls. 6562 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6563 // We can skip constant expressions. 6564 // We can skip expressions with trivial calls or simple expressions. 6565 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6566 !E->hasNonTrivialCall(Ctx)) && 6567 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6568 } 6569 6570 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6571 const Stmt *Body) { 6572 const Stmt *Child = Body->IgnoreContainers(); 6573 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6574 Child = nullptr; 6575 for (const Stmt *S : C->body()) { 6576 if (const auto *E = dyn_cast<Expr>(S)) { 6577 if (isTrivial(Ctx, E)) 6578 continue; 6579 } 6580 // Some of the statements can be ignored. 6581 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6582 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6583 continue; 6584 // Analyze declarations. 6585 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6586 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6587 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6588 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6589 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6590 isa<UsingDirectiveDecl>(D) || 6591 isa<OMPDeclareReductionDecl>(D) || 6592 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6593 return true; 6594 const auto *VD = dyn_cast<VarDecl>(D); 6595 if (!VD) 6596 return false; 6597 return VD->isConstexpr() || 6598 ((VD->getType().isTrivialType(Ctx) || 6599 VD->getType()->isReferenceType()) && 6600 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6601 })) 6602 continue; 6603 } 6604 // Found multiple children - cannot get the one child only. 6605 if (Child) 6606 return nullptr; 6607 Child = S; 6608 } 6609 if (Child) 6610 Child = Child->IgnoreContainers(); 6611 } 6612 return Child; 6613 } 6614 6615 /// Emit the number of teams for a target directive. Inspect the num_teams 6616 /// clause associated with a teams construct combined or closely nested 6617 /// with the target directive. 6618 /// 6619 /// Emit a team of size one for directives such as 'target parallel' that 6620 /// have no associated teams construct. 6621 /// 6622 /// Otherwise, return nullptr. 6623 static llvm::Value * 6624 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6625 const OMPExecutableDirective &D) { 6626 assert(!CGF.getLangOpts().OpenMPIsDevice && 6627 "Clauses associated with the teams directive expected to be emitted " 6628 "only for the host!"); 6629 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6630 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6631 "Expected target-based executable directive."); 6632 CGBuilderTy &Bld = CGF.Builder; 6633 switch (DirectiveKind) { 6634 case OMPD_target: { 6635 const auto *CS = D.getInnermostCapturedStmt(); 6636 const auto *Body = 6637 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6638 const Stmt *ChildStmt = 6639 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6640 if (const auto *NestedDir = 6641 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6642 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6643 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6644 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6645 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6646 const Expr *NumTeams = 6647 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6648 llvm::Value *NumTeamsVal = 6649 CGF.EmitScalarExpr(NumTeams, 6650 /*IgnoreResultAssign*/ true); 6651 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6652 /*isSigned=*/true); 6653 } 6654 return Bld.getInt32(0); 6655 } 6656 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6657 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6658 return Bld.getInt32(1); 6659 return Bld.getInt32(0); 6660 } 6661 return nullptr; 6662 } 6663 case OMPD_target_teams: 6664 case OMPD_target_teams_distribute: 6665 case OMPD_target_teams_distribute_simd: 6666 case OMPD_target_teams_distribute_parallel_for: 6667 case OMPD_target_teams_distribute_parallel_for_simd: { 6668 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6669 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6670 const Expr *NumTeams = 6671 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6672 llvm::Value *NumTeamsVal = 6673 CGF.EmitScalarExpr(NumTeams, 6674 /*IgnoreResultAssign*/ true); 6675 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6676 /*isSigned=*/true); 6677 } 6678 return Bld.getInt32(0); 6679 } 6680 case OMPD_target_parallel: 6681 case OMPD_target_parallel_for: 6682 case OMPD_target_parallel_for_simd: 6683 case OMPD_target_simd: 6684 return Bld.getInt32(1); 6685 case OMPD_parallel: 6686 case OMPD_for: 6687 case OMPD_parallel_for: 6688 case OMPD_parallel_sections: 6689 case OMPD_for_simd: 6690 case OMPD_parallel_for_simd: 6691 case OMPD_cancel: 6692 case OMPD_cancellation_point: 6693 case OMPD_ordered: 6694 case OMPD_threadprivate: 6695 case OMPD_allocate: 6696 case OMPD_task: 6697 case OMPD_simd: 6698 case OMPD_sections: 6699 case OMPD_section: 6700 case OMPD_single: 6701 case OMPD_master: 6702 case OMPD_critical: 6703 case OMPD_taskyield: 6704 case OMPD_barrier: 6705 case OMPD_taskwait: 6706 case OMPD_taskgroup: 6707 case OMPD_atomic: 6708 case OMPD_flush: 6709 case OMPD_teams: 6710 case OMPD_target_data: 6711 case OMPD_target_exit_data: 6712 case OMPD_target_enter_data: 6713 case OMPD_distribute: 6714 case OMPD_distribute_simd: 6715 case OMPD_distribute_parallel_for: 6716 case OMPD_distribute_parallel_for_simd: 6717 case OMPD_teams_distribute: 6718 case OMPD_teams_distribute_simd: 6719 case OMPD_teams_distribute_parallel_for: 6720 case OMPD_teams_distribute_parallel_for_simd: 6721 case OMPD_target_update: 6722 case OMPD_declare_simd: 6723 case OMPD_declare_target: 6724 case OMPD_end_declare_target: 6725 case OMPD_declare_reduction: 6726 case OMPD_declare_mapper: 6727 case OMPD_taskloop: 6728 case OMPD_taskloop_simd: 6729 case OMPD_requires: 6730 case OMPD_unknown: 6731 break; 6732 } 6733 llvm_unreachable("Unexpected directive kind."); 6734 } 6735 6736 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6737 llvm::Value *DefaultThreadLimitVal) { 6738 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6739 CGF.getContext(), CS->getCapturedStmt()); 6740 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6741 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6742 llvm::Value *NumThreads = nullptr; 6743 llvm::Value *CondVal = nullptr; 6744 // Handle if clause. If if clause present, the number of threads is 6745 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6746 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6747 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6748 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6749 const OMPIfClause *IfClause = nullptr; 6750 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6751 if (C->getNameModifier() == OMPD_unknown || 6752 C->getNameModifier() == OMPD_parallel) { 6753 IfClause = C; 6754 break; 6755 } 6756 } 6757 if (IfClause) { 6758 const Expr *Cond = IfClause->getCondition(); 6759 bool Result; 6760 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6761 if (!Result) 6762 return CGF.Builder.getInt32(1); 6763 } else { 6764 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6765 if (const auto *PreInit = 6766 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6767 for (const auto *I : PreInit->decls()) { 6768 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6769 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6770 } else { 6771 CodeGenFunction::AutoVarEmission Emission = 6772 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6773 CGF.EmitAutoVarCleanups(Emission); 6774 } 6775 } 6776 } 6777 CondVal = CGF.EvaluateExprAsBool(Cond); 6778 } 6779 } 6780 } 6781 // Check the value of num_threads clause iff if clause was not specified 6782 // or is not evaluated to false. 6783 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6784 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6785 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6786 const auto *NumThreadsClause = 6787 Dir->getSingleClause<OMPNumThreadsClause>(); 6788 CodeGenFunction::LexicalScope Scope( 6789 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6790 if (const auto *PreInit = 6791 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6792 for (const auto *I : PreInit->decls()) { 6793 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6794 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6795 } else { 6796 CodeGenFunction::AutoVarEmission Emission = 6797 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6798 CGF.EmitAutoVarCleanups(Emission); 6799 } 6800 } 6801 } 6802 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6803 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6804 /*isSigned=*/false); 6805 if (DefaultThreadLimitVal) 6806 NumThreads = CGF.Builder.CreateSelect( 6807 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6808 DefaultThreadLimitVal, NumThreads); 6809 } else { 6810 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6811 : CGF.Builder.getInt32(0); 6812 } 6813 // Process condition of the if clause. 6814 if (CondVal) { 6815 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6816 CGF.Builder.getInt32(1)); 6817 } 6818 return NumThreads; 6819 } 6820 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6821 return CGF.Builder.getInt32(1); 6822 return DefaultThreadLimitVal; 6823 } 6824 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6825 : CGF.Builder.getInt32(0); 6826 } 6827 6828 /// Emit the number of threads for a target directive. Inspect the 6829 /// thread_limit clause associated with a teams construct combined or closely 6830 /// nested with the target directive. 6831 /// 6832 /// Emit the num_threads clause for directives such as 'target parallel' that 6833 /// have no associated teams construct. 6834 /// 6835 /// Otherwise, return nullptr. 6836 static llvm::Value * 6837 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6838 const OMPExecutableDirective &D) { 6839 assert(!CGF.getLangOpts().OpenMPIsDevice && 6840 "Clauses associated with the teams directive expected to be emitted " 6841 "only for the host!"); 6842 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6843 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6844 "Expected target-based executable directive."); 6845 CGBuilderTy &Bld = CGF.Builder; 6846 llvm::Value *ThreadLimitVal = nullptr; 6847 llvm::Value *NumThreadsVal = nullptr; 6848 switch (DirectiveKind) { 6849 case OMPD_target: { 6850 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6851 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6852 return NumThreads; 6853 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6854 CGF.getContext(), CS->getCapturedStmt()); 6855 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6856 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6857 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6858 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6859 const auto *ThreadLimitClause = 6860 Dir->getSingleClause<OMPThreadLimitClause>(); 6861 CodeGenFunction::LexicalScope Scope( 6862 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6863 if (const auto *PreInit = 6864 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6865 for (const auto *I : PreInit->decls()) { 6866 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6867 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6868 } else { 6869 CodeGenFunction::AutoVarEmission Emission = 6870 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6871 CGF.EmitAutoVarCleanups(Emission); 6872 } 6873 } 6874 } 6875 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6876 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6877 ThreadLimitVal = 6878 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6879 } 6880 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6881 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6882 CS = Dir->getInnermostCapturedStmt(); 6883 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6884 CGF.getContext(), CS->getCapturedStmt()); 6885 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6886 } 6887 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6888 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6889 CS = Dir->getInnermostCapturedStmt(); 6890 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6891 return NumThreads; 6892 } 6893 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6894 return Bld.getInt32(1); 6895 } 6896 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6897 } 6898 case OMPD_target_teams: { 6899 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6900 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6901 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6902 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6903 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6904 ThreadLimitVal = 6905 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6906 } 6907 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6908 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6909 return NumThreads; 6910 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6911 CGF.getContext(), CS->getCapturedStmt()); 6912 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6913 if (Dir->getDirectiveKind() == OMPD_distribute) { 6914 CS = Dir->getInnermostCapturedStmt(); 6915 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6916 return NumThreads; 6917 } 6918 } 6919 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6920 } 6921 case OMPD_target_teams_distribute: 6922 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6923 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6924 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6925 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6926 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6927 ThreadLimitVal = 6928 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6929 } 6930 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6931 case OMPD_target_parallel: 6932 case OMPD_target_parallel_for: 6933 case OMPD_target_parallel_for_simd: 6934 case OMPD_target_teams_distribute_parallel_for: 6935 case OMPD_target_teams_distribute_parallel_for_simd: { 6936 llvm::Value *CondVal = nullptr; 6937 // Handle if clause. If if clause present, the number of threads is 6938 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6939 if (D.hasClausesOfKind<OMPIfClause>()) { 6940 const OMPIfClause *IfClause = nullptr; 6941 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6942 if (C->getNameModifier() == OMPD_unknown || 6943 C->getNameModifier() == OMPD_parallel) { 6944 IfClause = C; 6945 break; 6946 } 6947 } 6948 if (IfClause) { 6949 const Expr *Cond = IfClause->getCondition(); 6950 bool Result; 6951 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6952 if (!Result) 6953 return Bld.getInt32(1); 6954 } else { 6955 CodeGenFunction::RunCleanupsScope Scope(CGF); 6956 CondVal = CGF.EvaluateExprAsBool(Cond); 6957 } 6958 } 6959 } 6960 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6961 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6962 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6963 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6964 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6965 ThreadLimitVal = 6966 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6967 } 6968 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6969 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6970 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6971 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6972 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6973 NumThreadsVal = 6974 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6975 ThreadLimitVal = ThreadLimitVal 6976 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6977 ThreadLimitVal), 6978 NumThreadsVal, ThreadLimitVal) 6979 : NumThreadsVal; 6980 } 6981 if (!ThreadLimitVal) 6982 ThreadLimitVal = Bld.getInt32(0); 6983 if (CondVal) 6984 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6985 return ThreadLimitVal; 6986 } 6987 case OMPD_target_teams_distribute_simd: 6988 case OMPD_target_simd: 6989 return Bld.getInt32(1); 6990 case OMPD_parallel: 6991 case OMPD_for: 6992 case OMPD_parallel_for: 6993 case OMPD_parallel_sections: 6994 case OMPD_for_simd: 6995 case OMPD_parallel_for_simd: 6996 case OMPD_cancel: 6997 case OMPD_cancellation_point: 6998 case OMPD_ordered: 6999 case OMPD_threadprivate: 7000 case OMPD_allocate: 7001 case OMPD_task: 7002 case OMPD_simd: 7003 case OMPD_sections: 7004 case OMPD_section: 7005 case OMPD_single: 7006 case OMPD_master: 7007 case OMPD_critical: 7008 case OMPD_taskyield: 7009 case OMPD_barrier: 7010 case OMPD_taskwait: 7011 case OMPD_taskgroup: 7012 case OMPD_atomic: 7013 case OMPD_flush: 7014 case OMPD_teams: 7015 case OMPD_target_data: 7016 case OMPD_target_exit_data: 7017 case OMPD_target_enter_data: 7018 case OMPD_distribute: 7019 case OMPD_distribute_simd: 7020 case OMPD_distribute_parallel_for: 7021 case OMPD_distribute_parallel_for_simd: 7022 case OMPD_teams_distribute: 7023 case OMPD_teams_distribute_simd: 7024 case OMPD_teams_distribute_parallel_for: 7025 case OMPD_teams_distribute_parallel_for_simd: 7026 case OMPD_target_update: 7027 case OMPD_declare_simd: 7028 case OMPD_declare_target: 7029 case OMPD_end_declare_target: 7030 case OMPD_declare_reduction: 7031 case OMPD_declare_mapper: 7032 case OMPD_taskloop: 7033 case OMPD_taskloop_simd: 7034 case OMPD_requires: 7035 case OMPD_unknown: 7036 break; 7037 } 7038 llvm_unreachable("Unsupported directive kind."); 7039 } 7040 7041 namespace { 7042 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7043 7044 // Utility to handle information from clauses associated with a given 7045 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7046 // It provides a convenient interface to obtain the information and generate 7047 // code for that information. 7048 class MappableExprsHandler { 7049 public: 7050 /// Values for bit flags used to specify the mapping type for 7051 /// offloading. 7052 enum OpenMPOffloadMappingFlags : uint64_t { 7053 /// No flags 7054 OMP_MAP_NONE = 0x0, 7055 /// Allocate memory on the device and move data from host to device. 7056 OMP_MAP_TO = 0x01, 7057 /// Allocate memory on the device and move data from device to host. 7058 OMP_MAP_FROM = 0x02, 7059 /// Always perform the requested mapping action on the element, even 7060 /// if it was already mapped before. 7061 OMP_MAP_ALWAYS = 0x04, 7062 /// Delete the element from the device environment, ignoring the 7063 /// current reference count associated with the element. 7064 OMP_MAP_DELETE = 0x08, 7065 /// The element being mapped is a pointer-pointee pair; both the 7066 /// pointer and the pointee should be mapped. 7067 OMP_MAP_PTR_AND_OBJ = 0x10, 7068 /// This flags signals that the base address of an entry should be 7069 /// passed to the target kernel as an argument. 7070 OMP_MAP_TARGET_PARAM = 0x20, 7071 /// Signal that the runtime library has to return the device pointer 7072 /// in the current position for the data being mapped. Used when we have the 7073 /// use_device_ptr clause. 7074 OMP_MAP_RETURN_PARAM = 0x40, 7075 /// This flag signals that the reference being passed is a pointer to 7076 /// private data. 7077 OMP_MAP_PRIVATE = 0x80, 7078 /// Pass the element to the device by value. 7079 OMP_MAP_LITERAL = 0x100, 7080 /// Implicit map 7081 OMP_MAP_IMPLICIT = 0x200, 7082 /// The 16 MSBs of the flags indicate whether the entry is member of some 7083 /// struct/class. 7084 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7085 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7086 }; 7087 7088 /// Class that associates information with a base pointer to be passed to the 7089 /// runtime library. 7090 class BasePointerInfo { 7091 /// The base pointer. 7092 llvm::Value *Ptr = nullptr; 7093 /// The base declaration that refers to this device pointer, or null if 7094 /// there is none. 7095 const ValueDecl *DevPtrDecl = nullptr; 7096 7097 public: 7098 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7099 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7100 llvm::Value *operator*() const { return Ptr; } 7101 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7102 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7103 }; 7104 7105 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7106 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7107 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7108 7109 /// Map between a struct and the its lowest & highest elements which have been 7110 /// mapped. 7111 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7112 /// HE(FieldIndex, Pointer)} 7113 struct StructRangeInfoTy { 7114 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7115 0, Address::invalid()}; 7116 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7117 0, Address::invalid()}; 7118 Address Base = Address::invalid(); 7119 }; 7120 7121 private: 7122 /// Kind that defines how a device pointer has to be returned. 7123 struct MapInfo { 7124 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7125 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7126 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7127 bool ReturnDevicePointer = false; 7128 bool IsImplicit = false; 7129 7130 MapInfo() = default; 7131 MapInfo( 7132 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7133 OpenMPMapClauseKind MapType, 7134 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7135 bool ReturnDevicePointer, bool IsImplicit) 7136 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7137 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7138 }; 7139 7140 /// If use_device_ptr is used on a pointer which is a struct member and there 7141 /// is no map information about it, then emission of that entry is deferred 7142 /// until the whole struct has been processed. 7143 struct DeferredDevicePtrEntryTy { 7144 const Expr *IE = nullptr; 7145 const ValueDecl *VD = nullptr; 7146 7147 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7148 : IE(IE), VD(VD) {} 7149 }; 7150 7151 /// Directive from where the map clauses were extracted. 7152 const OMPExecutableDirective &CurDir; 7153 7154 /// Function the directive is being generated for. 7155 CodeGenFunction &CGF; 7156 7157 /// Set of all first private variables in the current directive. 7158 /// bool data is set to true if the variable is implicitly marked as 7159 /// firstprivate, false otherwise. 7160 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7161 7162 /// Map between device pointer declarations and their expression components. 7163 /// The key value for declarations in 'this' is null. 7164 llvm::DenseMap< 7165 const ValueDecl *, 7166 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7167 DevPointersMap; 7168 7169 llvm::Value *getExprTypeSize(const Expr *E) const { 7170 QualType ExprTy = E->getType().getCanonicalType(); 7171 7172 // Reference types are ignored for mapping purposes. 7173 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7174 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7175 7176 // Given that an array section is considered a built-in type, we need to 7177 // do the calculation based on the length of the section instead of relying 7178 // on CGF.getTypeSize(E->getType()). 7179 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7180 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7181 OAE->getBase()->IgnoreParenImpCasts()) 7182 .getCanonicalType(); 7183 7184 // If there is no length associated with the expression, that means we 7185 // are using the whole length of the base. 7186 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7187 return CGF.getTypeSize(BaseTy); 7188 7189 llvm::Value *ElemSize; 7190 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7191 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7192 } else { 7193 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7194 assert(ATy && "Expecting array type if not a pointer type."); 7195 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7196 } 7197 7198 // If we don't have a length at this point, that is because we have an 7199 // array section with a single element. 7200 if (!OAE->getLength()) 7201 return ElemSize; 7202 7203 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7204 LengthVal = 7205 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7206 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7207 } 7208 return CGF.getTypeSize(ExprTy); 7209 } 7210 7211 /// Return the corresponding bits for a given map clause modifier. Add 7212 /// a flag marking the map as a pointer if requested. Add a flag marking the 7213 /// map as the first one of a series of maps that relate to the same map 7214 /// expression. 7215 OpenMPOffloadMappingFlags getMapTypeBits( 7216 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7217 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7218 OpenMPOffloadMappingFlags Bits = 7219 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7220 switch (MapType) { 7221 case OMPC_MAP_alloc: 7222 case OMPC_MAP_release: 7223 // alloc and release is the default behavior in the runtime library, i.e. 7224 // if we don't pass any bits alloc/release that is what the runtime is 7225 // going to do. Therefore, we don't need to signal anything for these two 7226 // type modifiers. 7227 break; 7228 case OMPC_MAP_to: 7229 Bits |= OMP_MAP_TO; 7230 break; 7231 case OMPC_MAP_from: 7232 Bits |= OMP_MAP_FROM; 7233 break; 7234 case OMPC_MAP_tofrom: 7235 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7236 break; 7237 case OMPC_MAP_delete: 7238 Bits |= OMP_MAP_DELETE; 7239 break; 7240 case OMPC_MAP_unknown: 7241 llvm_unreachable("Unexpected map type!"); 7242 } 7243 if (AddPtrFlag) 7244 Bits |= OMP_MAP_PTR_AND_OBJ; 7245 if (AddIsTargetParamFlag) 7246 Bits |= OMP_MAP_TARGET_PARAM; 7247 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7248 != MapModifiers.end()) 7249 Bits |= OMP_MAP_ALWAYS; 7250 return Bits; 7251 } 7252 7253 /// Return true if the provided expression is a final array section. A 7254 /// final array section, is one whose length can't be proved to be one. 7255 bool isFinalArraySectionExpression(const Expr *E) const { 7256 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7257 7258 // It is not an array section and therefore not a unity-size one. 7259 if (!OASE) 7260 return false; 7261 7262 // An array section with no colon always refer to a single element. 7263 if (OASE->getColonLoc().isInvalid()) 7264 return false; 7265 7266 const Expr *Length = OASE->getLength(); 7267 7268 // If we don't have a length we have to check if the array has size 1 7269 // for this dimension. Also, we should always expect a length if the 7270 // base type is pointer. 7271 if (!Length) { 7272 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7273 OASE->getBase()->IgnoreParenImpCasts()) 7274 .getCanonicalType(); 7275 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7276 return ATy->getSize().getSExtValue() != 1; 7277 // If we don't have a constant dimension length, we have to consider 7278 // the current section as having any size, so it is not necessarily 7279 // unitary. If it happen to be unity size, that's user fault. 7280 return true; 7281 } 7282 7283 // Check if the length evaluates to 1. 7284 Expr::EvalResult Result; 7285 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7286 return true; // Can have more that size 1. 7287 7288 llvm::APSInt ConstLength = Result.Val.getInt(); 7289 return ConstLength.getSExtValue() != 1; 7290 } 7291 7292 /// Generate the base pointers, section pointers, sizes and map type 7293 /// bits for the provided map type, map modifier, and expression components. 7294 /// \a IsFirstComponent should be set to true if the provided set of 7295 /// components is the first associated with a capture. 7296 void generateInfoForComponentList( 7297 OpenMPMapClauseKind MapType, 7298 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7299 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7300 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7301 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7302 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7303 bool IsImplicit, 7304 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7305 OverlappedElements = llvm::None) const { 7306 // The following summarizes what has to be generated for each map and the 7307 // types below. The generated information is expressed in this order: 7308 // base pointer, section pointer, size, flags 7309 // (to add to the ones that come from the map type and modifier). 7310 // 7311 // double d; 7312 // int i[100]; 7313 // float *p; 7314 // 7315 // struct S1 { 7316 // int i; 7317 // float f[50]; 7318 // } 7319 // struct S2 { 7320 // int i; 7321 // float f[50]; 7322 // S1 s; 7323 // double *p; 7324 // struct S2 *ps; 7325 // } 7326 // S2 s; 7327 // S2 *ps; 7328 // 7329 // map(d) 7330 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7331 // 7332 // map(i) 7333 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7334 // 7335 // map(i[1:23]) 7336 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7337 // 7338 // map(p) 7339 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7340 // 7341 // map(p[1:24]) 7342 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7343 // 7344 // map(s) 7345 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7346 // 7347 // map(s.i) 7348 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7349 // 7350 // map(s.s.f) 7351 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7352 // 7353 // map(s.p) 7354 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7355 // 7356 // map(to: s.p[:22]) 7357 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7358 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7359 // &(s.p), &(s.p[0]), 22*sizeof(double), 7360 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7361 // (*) alloc space for struct members, only this is a target parameter 7362 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7363 // optimizes this entry out, same in the examples below) 7364 // (***) map the pointee (map: to) 7365 // 7366 // map(s.ps) 7367 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7368 // 7369 // map(from: s.ps->s.i) 7370 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7371 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7372 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7373 // 7374 // map(to: s.ps->ps) 7375 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7376 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7377 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7378 // 7379 // map(s.ps->ps->ps) 7380 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7381 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7382 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7383 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7384 // 7385 // map(to: s.ps->ps->s.f[:22]) 7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7387 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7388 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7389 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7390 // 7391 // map(ps) 7392 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7393 // 7394 // map(ps->i) 7395 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7396 // 7397 // map(ps->s.f) 7398 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7399 // 7400 // map(from: ps->p) 7401 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7402 // 7403 // map(to: ps->p[:22]) 7404 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7405 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7406 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7407 // 7408 // map(ps->ps) 7409 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7410 // 7411 // map(from: ps->ps->s.i) 7412 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7413 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7414 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7415 // 7416 // map(from: ps->ps->ps) 7417 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7418 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7419 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7420 // 7421 // map(ps->ps->ps->ps) 7422 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7423 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7424 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7425 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7426 // 7427 // map(to: ps->ps->ps->s.f[:22]) 7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7429 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7430 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7431 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7432 // 7433 // map(to: s.f[:22]) map(from: s.p[:33]) 7434 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7435 // sizeof(double*) (**), TARGET_PARAM 7436 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7437 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7438 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7439 // (*) allocate contiguous space needed to fit all mapped members even if 7440 // we allocate space for members not mapped (in this example, 7441 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7442 // them as well because they fall between &s.f[0] and &s.p) 7443 // 7444 // map(from: s.f[:22]) map(to: ps->p[:33]) 7445 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7446 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7447 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7448 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7449 // (*) the struct this entry pertains to is the 2nd element in the list of 7450 // arguments, hence MEMBER_OF(2) 7451 // 7452 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7453 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7454 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7455 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7456 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7457 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7458 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7459 // (*) the struct this entry pertains to is the 4th element in the list 7460 // of arguments, hence MEMBER_OF(4) 7461 7462 // Track if the map information being generated is the first for a capture. 7463 bool IsCaptureFirstInfo = IsFirstComponentList; 7464 // When the variable is on a declare target link or in a to clause with 7465 // unified memory, a reference is needed to hold the host/device address 7466 // of the variable. 7467 bool RequiresReference = false; 7468 7469 // Scan the components from the base to the complete expression. 7470 auto CI = Components.rbegin(); 7471 auto CE = Components.rend(); 7472 auto I = CI; 7473 7474 // Track if the map information being generated is the first for a list of 7475 // components. 7476 bool IsExpressionFirstInfo = true; 7477 Address BP = Address::invalid(); 7478 const Expr *AssocExpr = I->getAssociatedExpression(); 7479 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7480 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7481 7482 if (isa<MemberExpr>(AssocExpr)) { 7483 // The base is the 'this' pointer. The content of the pointer is going 7484 // to be the base of the field being mapped. 7485 BP = CGF.LoadCXXThisAddress(); 7486 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7487 (OASE && 7488 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7489 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7490 } else { 7491 // The base is the reference to the variable. 7492 // BP = &Var. 7493 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7494 if (const auto *VD = 7495 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7496 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7497 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7498 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7499 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7500 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7501 RequiresReference = true; 7502 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7503 } 7504 } 7505 } 7506 7507 // If the variable is a pointer and is being dereferenced (i.e. is not 7508 // the last component), the base has to be the pointer itself, not its 7509 // reference. References are ignored for mapping purposes. 7510 QualType Ty = 7511 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7512 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7513 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7514 7515 // We do not need to generate individual map information for the 7516 // pointer, it can be associated with the combined storage. 7517 ++I; 7518 } 7519 } 7520 7521 // Track whether a component of the list should be marked as MEMBER_OF some 7522 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7523 // in a component list should be marked as MEMBER_OF, all subsequent entries 7524 // do not belong to the base struct. E.g. 7525 // struct S2 s; 7526 // s.ps->ps->ps->f[:] 7527 // (1) (2) (3) (4) 7528 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7529 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7530 // is the pointee of ps(2) which is not member of struct s, so it should not 7531 // be marked as such (it is still PTR_AND_OBJ). 7532 // The variable is initialized to false so that PTR_AND_OBJ entries which 7533 // are not struct members are not considered (e.g. array of pointers to 7534 // data). 7535 bool ShouldBeMemberOf = false; 7536 7537 // Variable keeping track of whether or not we have encountered a component 7538 // in the component list which is a member expression. Useful when we have a 7539 // pointer or a final array section, in which case it is the previous 7540 // component in the list which tells us whether we have a member expression. 7541 // E.g. X.f[:] 7542 // While processing the final array section "[:]" it is "f" which tells us 7543 // whether we are dealing with a member of a declared struct. 7544 const MemberExpr *EncounteredME = nullptr; 7545 7546 for (; I != CE; ++I) { 7547 // If the current component is member of a struct (parent struct) mark it. 7548 if (!EncounteredME) { 7549 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7550 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7551 // as MEMBER_OF the parent struct. 7552 if (EncounteredME) 7553 ShouldBeMemberOf = true; 7554 } 7555 7556 auto Next = std::next(I); 7557 7558 // We need to generate the addresses and sizes if this is the last 7559 // component, if the component is a pointer or if it is an array section 7560 // whose length can't be proved to be one. If this is a pointer, it 7561 // becomes the base address for the following components. 7562 7563 // A final array section, is one whose length can't be proved to be one. 7564 bool IsFinalArraySection = 7565 isFinalArraySectionExpression(I->getAssociatedExpression()); 7566 7567 // Get information on whether the element is a pointer. Have to do a 7568 // special treatment for array sections given that they are built-in 7569 // types. 7570 const auto *OASE = 7571 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7572 bool IsPointer = 7573 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7574 .getCanonicalType() 7575 ->isAnyPointerType()) || 7576 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7577 7578 if (Next == CE || IsPointer || IsFinalArraySection) { 7579 // If this is not the last component, we expect the pointer to be 7580 // associated with an array expression or member expression. 7581 assert((Next == CE || 7582 isa<MemberExpr>(Next->getAssociatedExpression()) || 7583 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7584 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7585 "Unexpected expression"); 7586 7587 Address LB = 7588 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7589 7590 // If this component is a pointer inside the base struct then we don't 7591 // need to create any entry for it - it will be combined with the object 7592 // it is pointing to into a single PTR_AND_OBJ entry. 7593 bool IsMemberPointer = 7594 IsPointer && EncounteredME && 7595 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7596 EncounteredME); 7597 if (!OverlappedElements.empty()) { 7598 // Handle base element with the info for overlapped elements. 7599 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7600 assert(Next == CE && 7601 "Expected last element for the overlapped elements."); 7602 assert(!IsPointer && 7603 "Unexpected base element with the pointer type."); 7604 // Mark the whole struct as the struct that requires allocation on the 7605 // device. 7606 PartialStruct.LowestElem = {0, LB}; 7607 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7608 I->getAssociatedExpression()->getType()); 7609 Address HB = CGF.Builder.CreateConstGEP( 7610 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7611 CGF.VoidPtrTy), 7612 TypeSize.getQuantity() - 1); 7613 PartialStruct.HighestElem = { 7614 std::numeric_limits<decltype( 7615 PartialStruct.HighestElem.first)>::max(), 7616 HB}; 7617 PartialStruct.Base = BP; 7618 // Emit data for non-overlapped data. 7619 OpenMPOffloadMappingFlags Flags = 7620 OMP_MAP_MEMBER_OF | 7621 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7622 /*AddPtrFlag=*/false, 7623 /*AddIsTargetParamFlag=*/false); 7624 LB = BP; 7625 llvm::Value *Size = nullptr; 7626 // Do bitcopy of all non-overlapped structure elements. 7627 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7628 Component : OverlappedElements) { 7629 Address ComponentLB = Address::invalid(); 7630 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7631 Component) { 7632 if (MC.getAssociatedDeclaration()) { 7633 ComponentLB = 7634 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7635 .getAddress(); 7636 Size = CGF.Builder.CreatePtrDiff( 7637 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7638 CGF.EmitCastToVoidPtr(LB.getPointer())); 7639 break; 7640 } 7641 } 7642 BasePointers.push_back(BP.getPointer()); 7643 Pointers.push_back(LB.getPointer()); 7644 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7645 /*isSigned=*/true)); 7646 Types.push_back(Flags); 7647 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7648 } 7649 BasePointers.push_back(BP.getPointer()); 7650 Pointers.push_back(LB.getPointer()); 7651 Size = CGF.Builder.CreatePtrDiff( 7652 CGF.EmitCastToVoidPtr( 7653 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7654 CGF.EmitCastToVoidPtr(LB.getPointer())); 7655 Sizes.push_back( 7656 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7657 Types.push_back(Flags); 7658 break; 7659 } 7660 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7661 if (!IsMemberPointer) { 7662 BasePointers.push_back(BP.getPointer()); 7663 Pointers.push_back(LB.getPointer()); 7664 Sizes.push_back( 7665 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7666 7667 // We need to add a pointer flag for each map that comes from the 7668 // same expression except for the first one. We also need to signal 7669 // this map is the first one that relates with the current capture 7670 // (there is a set of entries for each capture). 7671 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7672 MapType, MapModifiers, IsImplicit, 7673 !IsExpressionFirstInfo || RequiresReference, 7674 IsCaptureFirstInfo && !RequiresReference); 7675 7676 if (!IsExpressionFirstInfo) { 7677 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7678 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7679 if (IsPointer) 7680 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7681 OMP_MAP_DELETE); 7682 7683 if (ShouldBeMemberOf) { 7684 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7685 // should be later updated with the correct value of MEMBER_OF. 7686 Flags |= OMP_MAP_MEMBER_OF; 7687 // From now on, all subsequent PTR_AND_OBJ entries should not be 7688 // marked as MEMBER_OF. 7689 ShouldBeMemberOf = false; 7690 } 7691 } 7692 7693 Types.push_back(Flags); 7694 } 7695 7696 // If we have encountered a member expression so far, keep track of the 7697 // mapped member. If the parent is "*this", then the value declaration 7698 // is nullptr. 7699 if (EncounteredME) { 7700 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7701 unsigned FieldIndex = FD->getFieldIndex(); 7702 7703 // Update info about the lowest and highest elements for this struct 7704 if (!PartialStruct.Base.isValid()) { 7705 PartialStruct.LowestElem = {FieldIndex, LB}; 7706 PartialStruct.HighestElem = {FieldIndex, LB}; 7707 PartialStruct.Base = BP; 7708 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7709 PartialStruct.LowestElem = {FieldIndex, LB}; 7710 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7711 PartialStruct.HighestElem = {FieldIndex, LB}; 7712 } 7713 } 7714 7715 // If we have a final array section, we are done with this expression. 7716 if (IsFinalArraySection) 7717 break; 7718 7719 // The pointer becomes the base for the next element. 7720 if (Next != CE) 7721 BP = LB; 7722 7723 IsExpressionFirstInfo = false; 7724 IsCaptureFirstInfo = false; 7725 } 7726 } 7727 } 7728 7729 /// Return the adjusted map modifiers if the declaration a capture refers to 7730 /// appears in a first-private clause. This is expected to be used only with 7731 /// directives that start with 'target'. 7732 MappableExprsHandler::OpenMPOffloadMappingFlags 7733 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7734 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7735 7736 // A first private variable captured by reference will use only the 7737 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7738 // declaration is known as first-private in this handler. 7739 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7740 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7741 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7742 return MappableExprsHandler::OMP_MAP_ALWAYS | 7743 MappableExprsHandler::OMP_MAP_TO; 7744 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7745 return MappableExprsHandler::OMP_MAP_TO | 7746 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7747 return MappableExprsHandler::OMP_MAP_PRIVATE | 7748 MappableExprsHandler::OMP_MAP_TO; 7749 } 7750 return MappableExprsHandler::OMP_MAP_TO | 7751 MappableExprsHandler::OMP_MAP_FROM; 7752 } 7753 7754 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7755 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7756 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7757 << 48); 7758 } 7759 7760 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7761 OpenMPOffloadMappingFlags MemberOfFlag) { 7762 // If the entry is PTR_AND_OBJ but has not been marked with the special 7763 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7764 // marked as MEMBER_OF. 7765 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7766 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7767 return; 7768 7769 // Reset the placeholder value to prepare the flag for the assignment of the 7770 // proper MEMBER_OF value. 7771 Flags &= ~OMP_MAP_MEMBER_OF; 7772 Flags |= MemberOfFlag; 7773 } 7774 7775 void getPlainLayout(const CXXRecordDecl *RD, 7776 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7777 bool AsBase) const { 7778 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7779 7780 llvm::StructType *St = 7781 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7782 7783 unsigned NumElements = St->getNumElements(); 7784 llvm::SmallVector< 7785 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7786 RecordLayout(NumElements); 7787 7788 // Fill bases. 7789 for (const auto &I : RD->bases()) { 7790 if (I.isVirtual()) 7791 continue; 7792 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7793 // Ignore empty bases. 7794 if (Base->isEmpty() || CGF.getContext() 7795 .getASTRecordLayout(Base) 7796 .getNonVirtualSize() 7797 .isZero()) 7798 continue; 7799 7800 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7801 RecordLayout[FieldIndex] = Base; 7802 } 7803 // Fill in virtual bases. 7804 for (const auto &I : RD->vbases()) { 7805 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7806 // Ignore empty bases. 7807 if (Base->isEmpty()) 7808 continue; 7809 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7810 if (RecordLayout[FieldIndex]) 7811 continue; 7812 RecordLayout[FieldIndex] = Base; 7813 } 7814 // Fill in all the fields. 7815 assert(!RD->isUnion() && "Unexpected union."); 7816 for (const auto *Field : RD->fields()) { 7817 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7818 // will fill in later.) 7819 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7820 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7821 RecordLayout[FieldIndex] = Field; 7822 } 7823 } 7824 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7825 &Data : RecordLayout) { 7826 if (Data.isNull()) 7827 continue; 7828 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7829 getPlainLayout(Base, Layout, /*AsBase=*/true); 7830 else 7831 Layout.push_back(Data.get<const FieldDecl *>()); 7832 } 7833 } 7834 7835 public: 7836 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7837 : CurDir(Dir), CGF(CGF) { 7838 // Extract firstprivate clause information. 7839 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7840 for (const auto *D : C->varlists()) 7841 FirstPrivateDecls.try_emplace( 7842 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7843 // Extract device pointer clause information. 7844 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7845 for (auto L : C->component_lists()) 7846 DevPointersMap[L.first].push_back(L.second); 7847 } 7848 7849 /// Generate code for the combined entry if we have a partially mapped struct 7850 /// and take care of the mapping flags of the arguments corresponding to 7851 /// individual struct members. 7852 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7853 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7854 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7855 const StructRangeInfoTy &PartialStruct) const { 7856 // Base is the base of the struct 7857 BasePointers.push_back(PartialStruct.Base.getPointer()); 7858 // Pointer is the address of the lowest element 7859 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7860 Pointers.push_back(LB); 7861 // Size is (addr of {highest+1} element) - (addr of lowest element) 7862 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7863 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7864 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7865 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7866 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7867 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7868 /*isSigned=*/false); 7869 Sizes.push_back(Size); 7870 // Map type is always TARGET_PARAM 7871 Types.push_back(OMP_MAP_TARGET_PARAM); 7872 // Remove TARGET_PARAM flag from the first element 7873 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7874 7875 // All other current entries will be MEMBER_OF the combined entry 7876 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7877 // 0xFFFF in the MEMBER_OF field). 7878 OpenMPOffloadMappingFlags MemberOfFlag = 7879 getMemberOfFlag(BasePointers.size() - 1); 7880 for (auto &M : CurTypes) 7881 setCorrectMemberOfFlag(M, MemberOfFlag); 7882 } 7883 7884 /// Generate all the base pointers, section pointers, sizes and map 7885 /// types for the extracted mappable expressions. Also, for each item that 7886 /// relates with a device pointer, a pair of the relevant declaration and 7887 /// index where it occurs is appended to the device pointers info array. 7888 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7889 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7890 MapFlagsArrayTy &Types) const { 7891 // We have to process the component lists that relate with the same 7892 // declaration in a single chunk so that we can generate the map flags 7893 // correctly. Therefore, we organize all lists in a map. 7894 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7895 7896 // Helper function to fill the information map for the different supported 7897 // clauses. 7898 auto &&InfoGen = [&Info]( 7899 const ValueDecl *D, 7900 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7901 OpenMPMapClauseKind MapType, 7902 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7903 bool ReturnDevicePointer, bool IsImplicit) { 7904 const ValueDecl *VD = 7905 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7906 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7907 IsImplicit); 7908 }; 7909 7910 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7911 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7912 for (const auto &L : C->component_lists()) { 7913 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7914 /*ReturnDevicePointer=*/false, C->isImplicit()); 7915 } 7916 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7917 for (const auto &L : C->component_lists()) { 7918 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7919 /*ReturnDevicePointer=*/false, C->isImplicit()); 7920 } 7921 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7922 for (const auto &L : C->component_lists()) { 7923 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7924 /*ReturnDevicePointer=*/false, C->isImplicit()); 7925 } 7926 7927 // Look at the use_device_ptr clause information and mark the existing map 7928 // entries as such. If there is no map information for an entry in the 7929 // use_device_ptr list, we create one with map type 'alloc' and zero size 7930 // section. It is the user fault if that was not mapped before. If there is 7931 // no map information and the pointer is a struct member, then we defer the 7932 // emission of that entry until the whole struct has been processed. 7933 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7934 DeferredInfo; 7935 7936 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7937 for (const auto *C : 7938 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7939 for (const auto &L : C->component_lists()) { 7940 assert(!L.second.empty() && "Not expecting empty list of components!"); 7941 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7942 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7943 const Expr *IE = L.second.back().getAssociatedExpression(); 7944 // If the first component is a member expression, we have to look into 7945 // 'this', which maps to null in the map of map information. Otherwise 7946 // look directly for the information. 7947 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7948 7949 // We potentially have map information for this declaration already. 7950 // Look for the first set of components that refer to it. 7951 if (It != Info.end()) { 7952 auto CI = std::find_if( 7953 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7954 return MI.Components.back().getAssociatedDeclaration() == VD; 7955 }); 7956 // If we found a map entry, signal that the pointer has to be returned 7957 // and move on to the next declaration. 7958 if (CI != It->second.end()) { 7959 CI->ReturnDevicePointer = true; 7960 continue; 7961 } 7962 } 7963 7964 // We didn't find any match in our map information - generate a zero 7965 // size array section - if the pointer is a struct member we defer this 7966 // action until the whole struct has been processed. 7967 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7968 if (isa<MemberExpr>(IE)) { 7969 // Insert the pointer into Info to be processed by 7970 // generateInfoForComponentList. Because it is a member pointer 7971 // without a pointee, no entry will be generated for it, therefore 7972 // we need to generate one after the whole struct has been processed. 7973 // Nonetheless, generateInfoForComponentList must be called to take 7974 // the pointer into account for the calculation of the range of the 7975 // partial struct. 7976 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7977 /*ReturnDevicePointer=*/false, C->isImplicit()); 7978 DeferredInfo[nullptr].emplace_back(IE, VD); 7979 } else { 7980 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7981 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7982 BasePointers.emplace_back(Ptr, VD); 7983 Pointers.push_back(Ptr); 7984 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 7985 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7986 } 7987 } 7988 } 7989 7990 for (const auto &M : Info) { 7991 // We need to know when we generate information for the first component 7992 // associated with a capture, because the mapping flags depend on it. 7993 bool IsFirstComponentList = true; 7994 7995 // Temporary versions of arrays 7996 MapBaseValuesArrayTy CurBasePointers; 7997 MapValuesArrayTy CurPointers; 7998 MapValuesArrayTy CurSizes; 7999 MapFlagsArrayTy CurTypes; 8000 StructRangeInfoTy PartialStruct; 8001 8002 for (const MapInfo &L : M.second) { 8003 assert(!L.Components.empty() && 8004 "Not expecting declaration with no component lists."); 8005 8006 // Remember the current base pointer index. 8007 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8008 // FIXME: MSVC 2013 seems to require this-> to find the member method. 8009 this->generateInfoForComponentList( 8010 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8011 CurPointers, CurSizes, CurTypes, PartialStruct, 8012 IsFirstComponentList, L.IsImplicit); 8013 8014 // If this entry relates with a device pointer, set the relevant 8015 // declaration and add the 'return pointer' flag. 8016 if (L.ReturnDevicePointer) { 8017 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8018 "Unexpected number of mapped base pointers."); 8019 8020 const ValueDecl *RelevantVD = 8021 L.Components.back().getAssociatedDeclaration(); 8022 assert(RelevantVD && 8023 "No relevant declaration related with device pointer??"); 8024 8025 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8026 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8027 } 8028 IsFirstComponentList = false; 8029 } 8030 8031 // Append any pending zero-length pointers which are struct members and 8032 // used with use_device_ptr. 8033 auto CI = DeferredInfo.find(M.first); 8034 if (CI != DeferredInfo.end()) { 8035 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8036 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8037 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8038 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8039 CurBasePointers.emplace_back(BasePtr, L.VD); 8040 CurPointers.push_back(Ptr); 8041 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8042 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8043 // value MEMBER_OF=FFFF so that the entry is later updated with the 8044 // correct value of MEMBER_OF. 8045 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8046 OMP_MAP_MEMBER_OF); 8047 } 8048 } 8049 8050 // If there is an entry in PartialStruct it means we have a struct with 8051 // individual members mapped. Emit an extra combined entry. 8052 if (PartialStruct.Base.isValid()) 8053 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8054 PartialStruct); 8055 8056 // We need to append the results of this capture to what we already have. 8057 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8058 Pointers.append(CurPointers.begin(), CurPointers.end()); 8059 Sizes.append(CurSizes.begin(), CurSizes.end()); 8060 Types.append(CurTypes.begin(), CurTypes.end()); 8061 } 8062 } 8063 8064 /// Emit capture info for lambdas for variables captured by reference. 8065 void generateInfoForLambdaCaptures( 8066 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8067 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8068 MapFlagsArrayTy &Types, 8069 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8070 const auto *RD = VD->getType() 8071 .getCanonicalType() 8072 .getNonReferenceType() 8073 ->getAsCXXRecordDecl(); 8074 if (!RD || !RD->isLambda()) 8075 return; 8076 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8077 LValue VDLVal = CGF.MakeAddrLValue( 8078 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8079 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8080 FieldDecl *ThisCapture = nullptr; 8081 RD->getCaptureFields(Captures, ThisCapture); 8082 if (ThisCapture) { 8083 LValue ThisLVal = 8084 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8085 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8086 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8087 BasePointers.push_back(ThisLVal.getPointer()); 8088 Pointers.push_back(ThisLValVal.getPointer()); 8089 Sizes.push_back( 8090 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8091 CGF.Int64Ty, /*isSigned=*/true)); 8092 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8093 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8094 } 8095 for (const LambdaCapture &LC : RD->captures()) { 8096 if (!LC.capturesVariable()) 8097 continue; 8098 const VarDecl *VD = LC.getCapturedVar(); 8099 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8100 continue; 8101 auto It = Captures.find(VD); 8102 assert(It != Captures.end() && "Found lambda capture without field."); 8103 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8104 if (LC.getCaptureKind() == LCK_ByRef) { 8105 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8106 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8107 BasePointers.push_back(VarLVal.getPointer()); 8108 Pointers.push_back(VarLValVal.getPointer()); 8109 Sizes.push_back(CGF.Builder.CreateIntCast( 8110 CGF.getTypeSize( 8111 VD->getType().getCanonicalType().getNonReferenceType()), 8112 CGF.Int64Ty, /*isSigned=*/true)); 8113 } else { 8114 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8115 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8116 BasePointers.push_back(VarLVal.getPointer()); 8117 Pointers.push_back(VarRVal.getScalarVal()); 8118 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8119 } 8120 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8121 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8122 } 8123 } 8124 8125 /// Set correct indices for lambdas captures. 8126 void adjustMemberOfForLambdaCaptures( 8127 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8128 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8129 MapFlagsArrayTy &Types) const { 8130 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8131 // Set correct member_of idx for all implicit lambda captures. 8132 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8133 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8134 continue; 8135 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8136 assert(BasePtr && "Unable to find base lambda address."); 8137 int TgtIdx = -1; 8138 for (unsigned J = I; J > 0; --J) { 8139 unsigned Idx = J - 1; 8140 if (Pointers[Idx] != BasePtr) 8141 continue; 8142 TgtIdx = Idx; 8143 break; 8144 } 8145 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8146 // All other current entries will be MEMBER_OF the combined entry 8147 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8148 // 0xFFFF in the MEMBER_OF field). 8149 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8150 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8151 } 8152 } 8153 8154 /// Generate the base pointers, section pointers, sizes and map types 8155 /// associated to a given capture. 8156 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8157 llvm::Value *Arg, 8158 MapBaseValuesArrayTy &BasePointers, 8159 MapValuesArrayTy &Pointers, 8160 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8161 StructRangeInfoTy &PartialStruct) const { 8162 assert(!Cap->capturesVariableArrayType() && 8163 "Not expecting to generate map info for a variable array type!"); 8164 8165 // We need to know when we generating information for the first component 8166 const ValueDecl *VD = Cap->capturesThis() 8167 ? nullptr 8168 : Cap->getCapturedVar()->getCanonicalDecl(); 8169 8170 // If this declaration appears in a is_device_ptr clause we just have to 8171 // pass the pointer by value. If it is a reference to a declaration, we just 8172 // pass its value. 8173 if (DevPointersMap.count(VD)) { 8174 BasePointers.emplace_back(Arg, VD); 8175 Pointers.push_back(Arg); 8176 Sizes.push_back( 8177 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8178 CGF.Int64Ty, /*isSigned=*/true)); 8179 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8180 return; 8181 } 8182 8183 using MapData = 8184 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8185 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8186 SmallVector<MapData, 4> DeclComponentLists; 8187 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 8188 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8189 for (const auto &L : C->decl_component_lists(VD)) { 8190 assert(L.first == VD && 8191 "We got information for the wrong declaration??"); 8192 assert(!L.second.empty() && 8193 "Not expecting declaration with no component lists."); 8194 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8195 C->getMapTypeModifiers(), 8196 C->isImplicit()); 8197 } 8198 } 8199 8200 // Find overlapping elements (including the offset from the base element). 8201 llvm::SmallDenseMap< 8202 const MapData *, 8203 llvm::SmallVector< 8204 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8205 4> 8206 OverlappedData; 8207 size_t Count = 0; 8208 for (const MapData &L : DeclComponentLists) { 8209 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8210 OpenMPMapClauseKind MapType; 8211 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8212 bool IsImplicit; 8213 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8214 ++Count; 8215 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8217 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8218 auto CI = Components.rbegin(); 8219 auto CE = Components.rend(); 8220 auto SI = Components1.rbegin(); 8221 auto SE = Components1.rend(); 8222 for (; CI != CE && SI != SE; ++CI, ++SI) { 8223 if (CI->getAssociatedExpression()->getStmtClass() != 8224 SI->getAssociatedExpression()->getStmtClass()) 8225 break; 8226 // Are we dealing with different variables/fields? 8227 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8228 break; 8229 } 8230 // Found overlapping if, at least for one component, reached the head of 8231 // the components list. 8232 if (CI == CE || SI == SE) { 8233 assert((CI != CE || SI != SE) && 8234 "Unexpected full match of the mapping components."); 8235 const MapData &BaseData = CI == CE ? L : L1; 8236 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8237 SI == SE ? Components : Components1; 8238 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8239 OverlappedElements.getSecond().push_back(SubData); 8240 } 8241 } 8242 } 8243 // Sort the overlapped elements for each item. 8244 llvm::SmallVector<const FieldDecl *, 4> Layout; 8245 if (!OverlappedData.empty()) { 8246 if (const auto *CRD = 8247 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8248 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8249 else { 8250 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8251 Layout.append(RD->field_begin(), RD->field_end()); 8252 } 8253 } 8254 for (auto &Pair : OverlappedData) { 8255 llvm::sort( 8256 Pair.getSecond(), 8257 [&Layout]( 8258 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8259 OMPClauseMappableExprCommon::MappableExprComponentListRef 8260 Second) { 8261 auto CI = First.rbegin(); 8262 auto CE = First.rend(); 8263 auto SI = Second.rbegin(); 8264 auto SE = Second.rend(); 8265 for (; CI != CE && SI != SE; ++CI, ++SI) { 8266 if (CI->getAssociatedExpression()->getStmtClass() != 8267 SI->getAssociatedExpression()->getStmtClass()) 8268 break; 8269 // Are we dealing with different variables/fields? 8270 if (CI->getAssociatedDeclaration() != 8271 SI->getAssociatedDeclaration()) 8272 break; 8273 } 8274 8275 // Lists contain the same elements. 8276 if (CI == CE && SI == SE) 8277 return false; 8278 8279 // List with less elements is less than list with more elements. 8280 if (CI == CE || SI == SE) 8281 return CI == CE; 8282 8283 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8284 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8285 if (FD1->getParent() == FD2->getParent()) 8286 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8287 const auto It = 8288 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8289 return FD == FD1 || FD == FD2; 8290 }); 8291 return *It == FD1; 8292 }); 8293 } 8294 8295 // Associated with a capture, because the mapping flags depend on it. 8296 // Go through all of the elements with the overlapped elements. 8297 for (const auto &Pair : OverlappedData) { 8298 const MapData &L = *Pair.getFirst(); 8299 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8300 OpenMPMapClauseKind MapType; 8301 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8302 bool IsImplicit; 8303 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8304 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8305 OverlappedComponents = Pair.getSecond(); 8306 bool IsFirstComponentList = true; 8307 generateInfoForComponentList(MapType, MapModifiers, Components, 8308 BasePointers, Pointers, Sizes, Types, 8309 PartialStruct, IsFirstComponentList, 8310 IsImplicit, OverlappedComponents); 8311 } 8312 // Go through other elements without overlapped elements. 8313 bool IsFirstComponentList = OverlappedData.empty(); 8314 for (const MapData &L : DeclComponentLists) { 8315 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8316 OpenMPMapClauseKind MapType; 8317 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8318 bool IsImplicit; 8319 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8320 auto It = OverlappedData.find(&L); 8321 if (It == OverlappedData.end()) 8322 generateInfoForComponentList(MapType, MapModifiers, Components, 8323 BasePointers, Pointers, Sizes, Types, 8324 PartialStruct, IsFirstComponentList, 8325 IsImplicit); 8326 IsFirstComponentList = false; 8327 } 8328 } 8329 8330 /// Generate the base pointers, section pointers, sizes and map types 8331 /// associated with the declare target link variables. 8332 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8333 MapValuesArrayTy &Pointers, 8334 MapValuesArrayTy &Sizes, 8335 MapFlagsArrayTy &Types) const { 8336 // Map other list items in the map clause which are not captured variables 8337 // but "declare target link" global variables. 8338 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8339 for (const auto &L : C->component_lists()) { 8340 if (!L.first) 8341 continue; 8342 const auto *VD = dyn_cast<VarDecl>(L.first); 8343 if (!VD) 8344 continue; 8345 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8346 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8347 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8348 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8349 continue; 8350 StructRangeInfoTy PartialStruct; 8351 generateInfoForComponentList( 8352 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8353 Pointers, Sizes, Types, PartialStruct, 8354 /*IsFirstComponentList=*/true, C->isImplicit()); 8355 assert(!PartialStruct.Base.isValid() && 8356 "No partial structs for declare target link expected."); 8357 } 8358 } 8359 } 8360 8361 /// Generate the default map information for a given capture \a CI, 8362 /// record field declaration \a RI and captured value \a CV. 8363 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8364 const FieldDecl &RI, llvm::Value *CV, 8365 MapBaseValuesArrayTy &CurBasePointers, 8366 MapValuesArrayTy &CurPointers, 8367 MapValuesArrayTy &CurSizes, 8368 MapFlagsArrayTy &CurMapTypes) const { 8369 bool IsImplicit = true; 8370 // Do the default mapping. 8371 if (CI.capturesThis()) { 8372 CurBasePointers.push_back(CV); 8373 CurPointers.push_back(CV); 8374 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8375 CurSizes.push_back( 8376 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8377 CGF.Int64Ty, /*isSigned=*/true)); 8378 // Default map type. 8379 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8380 } else if (CI.capturesVariableByCopy()) { 8381 CurBasePointers.push_back(CV); 8382 CurPointers.push_back(CV); 8383 if (!RI.getType()->isAnyPointerType()) { 8384 // We have to signal to the runtime captures passed by value that are 8385 // not pointers. 8386 CurMapTypes.push_back(OMP_MAP_LITERAL); 8387 CurSizes.push_back(CGF.Builder.CreateIntCast( 8388 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8389 } else { 8390 // Pointers are implicitly mapped with a zero size and no flags 8391 // (other than first map that is added for all implicit maps). 8392 CurMapTypes.push_back(OMP_MAP_NONE); 8393 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8394 } 8395 const VarDecl *VD = CI.getCapturedVar(); 8396 auto I = FirstPrivateDecls.find(VD); 8397 if (I != FirstPrivateDecls.end()) 8398 IsImplicit = I->getSecond(); 8399 } else { 8400 assert(CI.capturesVariable() && "Expected captured reference."); 8401 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8402 QualType ElementType = PtrTy->getPointeeType(); 8403 CurSizes.push_back(CGF.Builder.CreateIntCast( 8404 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8405 // The default map type for a scalar/complex type is 'to' because by 8406 // default the value doesn't have to be retrieved. For an aggregate 8407 // type, the default is 'tofrom'. 8408 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8409 const VarDecl *VD = CI.getCapturedVar(); 8410 auto I = FirstPrivateDecls.find(VD); 8411 if (I != FirstPrivateDecls.end() && 8412 VD->getType().isConstant(CGF.getContext())) { 8413 llvm::Constant *Addr = 8414 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8415 // Copy the value of the original variable to the new global copy. 8416 CGF.Builder.CreateMemCpy( 8417 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8418 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8419 CurSizes.back(), /*IsVolatile=*/false); 8420 // Use new global variable as the base pointers. 8421 CurBasePointers.push_back(Addr); 8422 CurPointers.push_back(Addr); 8423 } else { 8424 CurBasePointers.push_back(CV); 8425 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8426 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8427 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8428 AlignmentSource::Decl)); 8429 CurPointers.push_back(PtrAddr.getPointer()); 8430 } else { 8431 CurPointers.push_back(CV); 8432 } 8433 } 8434 if (I != FirstPrivateDecls.end()) 8435 IsImplicit = I->getSecond(); 8436 } 8437 // Every default map produces a single argument which is a target parameter. 8438 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8439 8440 // Add flag stating this is an implicit map. 8441 if (IsImplicit) 8442 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8443 } 8444 }; 8445 } // anonymous namespace 8446 8447 /// Emit the arrays used to pass the captures and map information to the 8448 /// offloading runtime library. If there is no map or capture information, 8449 /// return nullptr by reference. 8450 static void 8451 emitOffloadingArrays(CodeGenFunction &CGF, 8452 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8453 MappableExprsHandler::MapValuesArrayTy &Pointers, 8454 MappableExprsHandler::MapValuesArrayTy &Sizes, 8455 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8456 CGOpenMPRuntime::TargetDataInfo &Info) { 8457 CodeGenModule &CGM = CGF.CGM; 8458 ASTContext &Ctx = CGF.getContext(); 8459 8460 // Reset the array information. 8461 Info.clearArrayInfo(); 8462 Info.NumberOfPtrs = BasePointers.size(); 8463 8464 if (Info.NumberOfPtrs) { 8465 // Detect if we have any capture size requiring runtime evaluation of the 8466 // size so that a constant array could be eventually used. 8467 bool hasRuntimeEvaluationCaptureSize = false; 8468 for (llvm::Value *S : Sizes) 8469 if (!isa<llvm::Constant>(S)) { 8470 hasRuntimeEvaluationCaptureSize = true; 8471 break; 8472 } 8473 8474 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8475 QualType PointerArrayType = 8476 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8477 /*IndexTypeQuals=*/0); 8478 8479 Info.BasePointersArray = 8480 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8481 Info.PointersArray = 8482 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8483 8484 // If we don't have any VLA types or other types that require runtime 8485 // evaluation, we can use a constant array for the map sizes, otherwise we 8486 // need to fill up the arrays as we do for the pointers. 8487 QualType Int64Ty = 8488 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8489 if (hasRuntimeEvaluationCaptureSize) { 8490 QualType SizeArrayType = 8491 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, 8492 /*IndexTypeQuals=*/0); 8493 Info.SizesArray = 8494 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8495 } else { 8496 // We expect all the sizes to be constant, so we collect them to create 8497 // a constant array. 8498 SmallVector<llvm::Constant *, 16> ConstSizes; 8499 for (llvm::Value *S : Sizes) 8500 ConstSizes.push_back(cast<llvm::Constant>(S)); 8501 8502 auto *SizesArrayInit = llvm::ConstantArray::get( 8503 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8504 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8505 auto *SizesArrayGbl = new llvm::GlobalVariable( 8506 CGM.getModule(), SizesArrayInit->getType(), 8507 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8508 SizesArrayInit, Name); 8509 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8510 Info.SizesArray = SizesArrayGbl; 8511 } 8512 8513 // The map types are always constant so we don't need to generate code to 8514 // fill arrays. Instead, we create an array constant. 8515 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8516 llvm::copy(MapTypes, Mapping.begin()); 8517 llvm::Constant *MapTypesArrayInit = 8518 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8519 std::string MaptypesName = 8520 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8521 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8522 CGM.getModule(), MapTypesArrayInit->getType(), 8523 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8524 MapTypesArrayInit, MaptypesName); 8525 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8526 Info.MapTypesArray = MapTypesArrayGbl; 8527 8528 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8529 llvm::Value *BPVal = *BasePointers[I]; 8530 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8531 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8532 Info.BasePointersArray, 0, I); 8533 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8534 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8535 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8536 CGF.Builder.CreateStore(BPVal, BPAddr); 8537 8538 if (Info.requiresDevicePointerInfo()) 8539 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8540 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8541 8542 llvm::Value *PVal = Pointers[I]; 8543 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8544 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8545 Info.PointersArray, 0, I); 8546 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8547 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8548 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8549 CGF.Builder.CreateStore(PVal, PAddr); 8550 8551 if (hasRuntimeEvaluationCaptureSize) { 8552 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8553 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8554 Info.SizesArray, 8555 /*Idx0=*/0, 8556 /*Idx1=*/I); 8557 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8558 CGF.Builder.CreateStore( 8559 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8560 SAddr); 8561 } 8562 } 8563 } 8564 } 8565 /// Emit the arguments to be passed to the runtime library based on the 8566 /// arrays of pointers, sizes and map types. 8567 static void emitOffloadingArraysArgument( 8568 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8569 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8570 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8571 CodeGenModule &CGM = CGF.CGM; 8572 if (Info.NumberOfPtrs) { 8573 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8574 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8575 Info.BasePointersArray, 8576 /*Idx0=*/0, /*Idx1=*/0); 8577 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8578 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8579 Info.PointersArray, 8580 /*Idx0=*/0, 8581 /*Idx1=*/0); 8582 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8583 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8584 /*Idx0=*/0, /*Idx1=*/0); 8585 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8586 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8587 Info.MapTypesArray, 8588 /*Idx0=*/0, 8589 /*Idx1=*/0); 8590 } else { 8591 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8592 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8593 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8594 MapTypesArrayArg = 8595 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8596 } 8597 } 8598 8599 /// Check for inner distribute directive. 8600 static const OMPExecutableDirective * 8601 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8602 const auto *CS = D.getInnermostCapturedStmt(); 8603 const auto *Body = 8604 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8605 const Stmt *ChildStmt = 8606 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8607 8608 if (const auto *NestedDir = 8609 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8610 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8611 switch (D.getDirectiveKind()) { 8612 case OMPD_target: 8613 if (isOpenMPDistributeDirective(DKind)) 8614 return NestedDir; 8615 if (DKind == OMPD_teams) { 8616 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8617 /*IgnoreCaptured=*/true); 8618 if (!Body) 8619 return nullptr; 8620 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8621 if (const auto *NND = 8622 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8623 DKind = NND->getDirectiveKind(); 8624 if (isOpenMPDistributeDirective(DKind)) 8625 return NND; 8626 } 8627 } 8628 return nullptr; 8629 case OMPD_target_teams: 8630 if (isOpenMPDistributeDirective(DKind)) 8631 return NestedDir; 8632 return nullptr; 8633 case OMPD_target_parallel: 8634 case OMPD_target_simd: 8635 case OMPD_target_parallel_for: 8636 case OMPD_target_parallel_for_simd: 8637 return nullptr; 8638 case OMPD_target_teams_distribute: 8639 case OMPD_target_teams_distribute_simd: 8640 case OMPD_target_teams_distribute_parallel_for: 8641 case OMPD_target_teams_distribute_parallel_for_simd: 8642 case OMPD_parallel: 8643 case OMPD_for: 8644 case OMPD_parallel_for: 8645 case OMPD_parallel_sections: 8646 case OMPD_for_simd: 8647 case OMPD_parallel_for_simd: 8648 case OMPD_cancel: 8649 case OMPD_cancellation_point: 8650 case OMPD_ordered: 8651 case OMPD_threadprivate: 8652 case OMPD_allocate: 8653 case OMPD_task: 8654 case OMPD_simd: 8655 case OMPD_sections: 8656 case OMPD_section: 8657 case OMPD_single: 8658 case OMPD_master: 8659 case OMPD_critical: 8660 case OMPD_taskyield: 8661 case OMPD_barrier: 8662 case OMPD_taskwait: 8663 case OMPD_taskgroup: 8664 case OMPD_atomic: 8665 case OMPD_flush: 8666 case OMPD_teams: 8667 case OMPD_target_data: 8668 case OMPD_target_exit_data: 8669 case OMPD_target_enter_data: 8670 case OMPD_distribute: 8671 case OMPD_distribute_simd: 8672 case OMPD_distribute_parallel_for: 8673 case OMPD_distribute_parallel_for_simd: 8674 case OMPD_teams_distribute: 8675 case OMPD_teams_distribute_simd: 8676 case OMPD_teams_distribute_parallel_for: 8677 case OMPD_teams_distribute_parallel_for_simd: 8678 case OMPD_target_update: 8679 case OMPD_declare_simd: 8680 case OMPD_declare_target: 8681 case OMPD_end_declare_target: 8682 case OMPD_declare_reduction: 8683 case OMPD_declare_mapper: 8684 case OMPD_taskloop: 8685 case OMPD_taskloop_simd: 8686 case OMPD_requires: 8687 case OMPD_unknown: 8688 llvm_unreachable("Unexpected directive."); 8689 } 8690 } 8691 8692 return nullptr; 8693 } 8694 8695 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8696 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8697 const llvm::function_ref<llvm::Value *( 8698 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8699 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8700 const OMPExecutableDirective *TD = &D; 8701 // Get nested teams distribute kind directive, if any. 8702 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8703 TD = getNestedDistributeDirective(CGM.getContext(), D); 8704 if (!TD) 8705 return; 8706 const auto *LD = cast<OMPLoopDirective>(TD); 8707 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8708 PrePostActionTy &) { 8709 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8710 8711 // Emit device ID if any. 8712 llvm::Value *DeviceID; 8713 if (Device) 8714 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8715 CGF.Int64Ty, /*isSigned=*/true); 8716 else 8717 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8718 8719 llvm::Value *Args[] = {DeviceID, NumIterations}; 8720 CGF.EmitRuntimeCall( 8721 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8722 }; 8723 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8724 } 8725 8726 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8727 const OMPExecutableDirective &D, 8728 llvm::Function *OutlinedFn, 8729 llvm::Value *OutlinedFnID, 8730 const Expr *IfCond, const Expr *Device) { 8731 if (!CGF.HaveInsertPoint()) 8732 return; 8733 8734 assert(OutlinedFn && "Invalid outlined function!"); 8735 8736 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8737 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8738 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8739 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8740 PrePostActionTy &) { 8741 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8742 }; 8743 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8744 8745 CodeGenFunction::OMPTargetDataInfo InputInfo; 8746 llvm::Value *MapTypesArray = nullptr; 8747 // Fill up the pointer arrays and transfer execution to the device. 8748 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8749 &MapTypesArray, &CS, RequiresOuterTask, 8750 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8751 // On top of the arrays that were filled up, the target offloading call 8752 // takes as arguments the device id as well as the host pointer. The host 8753 // pointer is used by the runtime library to identify the current target 8754 // region, so it only has to be unique and not necessarily point to 8755 // anything. It could be the pointer to the outlined function that 8756 // implements the target region, but we aren't using that so that the 8757 // compiler doesn't need to keep that, and could therefore inline the host 8758 // function if proven worthwhile during optimization. 8759 8760 // From this point on, we need to have an ID of the target region defined. 8761 assert(OutlinedFnID && "Invalid outlined function ID!"); 8762 8763 // Emit device ID if any. 8764 llvm::Value *DeviceID; 8765 if (Device) { 8766 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8767 CGF.Int64Ty, /*isSigned=*/true); 8768 } else { 8769 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8770 } 8771 8772 // Emit the number of elements in the offloading arrays. 8773 llvm::Value *PointerNum = 8774 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8775 8776 // Return value of the runtime offloading call. 8777 llvm::Value *Return; 8778 8779 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 8780 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 8781 8782 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8783 // The target region is an outlined function launched by the runtime 8784 // via calls __tgt_target() or __tgt_target_teams(). 8785 // 8786 // __tgt_target() launches a target region with one team and one thread, 8787 // executing a serial region. This master thread may in turn launch 8788 // more threads within its team upon encountering a parallel region, 8789 // however, no additional teams can be launched on the device. 8790 // 8791 // __tgt_target_teams() launches a target region with one or more teams, 8792 // each with one or more threads. This call is required for target 8793 // constructs such as: 8794 // 'target teams' 8795 // 'target' / 'teams' 8796 // 'target teams distribute parallel for' 8797 // 'target parallel' 8798 // and so on. 8799 // 8800 // Note that on the host and CPU targets, the runtime implementation of 8801 // these calls simply call the outlined function without forking threads. 8802 // The outlined functions themselves have runtime calls to 8803 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8804 // the compiler in emitTeamsCall() and emitParallelCall(). 8805 // 8806 // In contrast, on the NVPTX target, the implementation of 8807 // __tgt_target_teams() launches a GPU kernel with the requested number 8808 // of teams and threads so no additional calls to the runtime are required. 8809 if (NumTeams) { 8810 // If we have NumTeams defined this means that we have an enclosed teams 8811 // region. Therefore we also expect to have NumThreads defined. These two 8812 // values should be defined in the presence of a teams directive, 8813 // regardless of having any clauses associated. If the user is using teams 8814 // but no clauses, these two values will be the default that should be 8815 // passed to the runtime library - a 32-bit integer with the value zero. 8816 assert(NumThreads && "Thread limit expression should be available along " 8817 "with number of teams."); 8818 llvm::Value *OffloadingArgs[] = {DeviceID, 8819 OutlinedFnID, 8820 PointerNum, 8821 InputInfo.BasePointersArray.getPointer(), 8822 InputInfo.PointersArray.getPointer(), 8823 InputInfo.SizesArray.getPointer(), 8824 MapTypesArray, 8825 NumTeams, 8826 NumThreads}; 8827 Return = CGF.EmitRuntimeCall( 8828 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8829 : OMPRTL__tgt_target_teams), 8830 OffloadingArgs); 8831 } else { 8832 llvm::Value *OffloadingArgs[] = {DeviceID, 8833 OutlinedFnID, 8834 PointerNum, 8835 InputInfo.BasePointersArray.getPointer(), 8836 InputInfo.PointersArray.getPointer(), 8837 InputInfo.SizesArray.getPointer(), 8838 MapTypesArray}; 8839 Return = CGF.EmitRuntimeCall( 8840 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8841 : OMPRTL__tgt_target), 8842 OffloadingArgs); 8843 } 8844 8845 // Check the error code and execute the host version if required. 8846 llvm::BasicBlock *OffloadFailedBlock = 8847 CGF.createBasicBlock("omp_offload.failed"); 8848 llvm::BasicBlock *OffloadContBlock = 8849 CGF.createBasicBlock("omp_offload.cont"); 8850 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8851 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8852 8853 CGF.EmitBlock(OffloadFailedBlock); 8854 if (RequiresOuterTask) { 8855 CapturedVars.clear(); 8856 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8857 } 8858 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8859 CGF.EmitBranch(OffloadContBlock); 8860 8861 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8862 }; 8863 8864 // Notify that the host version must be executed. 8865 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8866 RequiresOuterTask](CodeGenFunction &CGF, 8867 PrePostActionTy &) { 8868 if (RequiresOuterTask) { 8869 CapturedVars.clear(); 8870 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8871 } 8872 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8873 }; 8874 8875 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8876 &CapturedVars, RequiresOuterTask, 8877 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8878 // Fill up the arrays with all the captured variables. 8879 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8880 MappableExprsHandler::MapValuesArrayTy Pointers; 8881 MappableExprsHandler::MapValuesArrayTy Sizes; 8882 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8883 8884 // Get mappable expression information. 8885 MappableExprsHandler MEHandler(D, CGF); 8886 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8887 8888 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8889 auto CV = CapturedVars.begin(); 8890 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8891 CE = CS.capture_end(); 8892 CI != CE; ++CI, ++RI, ++CV) { 8893 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8894 MappableExprsHandler::MapValuesArrayTy CurPointers; 8895 MappableExprsHandler::MapValuesArrayTy CurSizes; 8896 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8897 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8898 8899 // VLA sizes are passed to the outlined region by copy and do not have map 8900 // information associated. 8901 if (CI->capturesVariableArrayType()) { 8902 CurBasePointers.push_back(*CV); 8903 CurPointers.push_back(*CV); 8904 CurSizes.push_back(CGF.Builder.CreateIntCast( 8905 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 8906 // Copy to the device as an argument. No need to retrieve it. 8907 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8908 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 8909 MappableExprsHandler::OMP_MAP_IMPLICIT); 8910 } else { 8911 // If we have any information in the map clause, we use it, otherwise we 8912 // just do a default mapping. 8913 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8914 CurSizes, CurMapTypes, PartialStruct); 8915 if (CurBasePointers.empty()) 8916 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8917 CurPointers, CurSizes, CurMapTypes); 8918 // Generate correct mapping for variables captured by reference in 8919 // lambdas. 8920 if (CI->capturesVariable()) 8921 MEHandler.generateInfoForLambdaCaptures( 8922 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8923 CurMapTypes, LambdaPointers); 8924 } 8925 // We expect to have at least an element of information for this capture. 8926 assert(!CurBasePointers.empty() && 8927 "Non-existing map pointer for capture!"); 8928 assert(CurBasePointers.size() == CurPointers.size() && 8929 CurBasePointers.size() == CurSizes.size() && 8930 CurBasePointers.size() == CurMapTypes.size() && 8931 "Inconsistent map information sizes!"); 8932 8933 // If there is an entry in PartialStruct it means we have a struct with 8934 // individual members mapped. Emit an extra combined entry. 8935 if (PartialStruct.Base.isValid()) 8936 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8937 CurMapTypes, PartialStruct); 8938 8939 // We need to append the results of this capture to what we already have. 8940 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8941 Pointers.append(CurPointers.begin(), CurPointers.end()); 8942 Sizes.append(CurSizes.begin(), CurSizes.end()); 8943 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8944 } 8945 // Adjust MEMBER_OF flags for the lambdas captures. 8946 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8947 Pointers, MapTypes); 8948 // Map other list items in the map clause which are not captured variables 8949 // but "declare target link" global variables. 8950 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8951 MapTypes); 8952 8953 TargetDataInfo Info; 8954 // Fill up the arrays and create the arguments. 8955 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8956 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8957 Info.PointersArray, Info.SizesArray, 8958 Info.MapTypesArray, Info); 8959 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8960 InputInfo.BasePointersArray = 8961 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8962 InputInfo.PointersArray = 8963 Address(Info.PointersArray, CGM.getPointerAlign()); 8964 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8965 MapTypesArray = Info.MapTypesArray; 8966 if (RequiresOuterTask) 8967 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8968 else 8969 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8970 }; 8971 8972 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8973 CodeGenFunction &CGF, PrePostActionTy &) { 8974 if (RequiresOuterTask) { 8975 CodeGenFunction::OMPTargetDataInfo InputInfo; 8976 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8977 } else { 8978 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8979 } 8980 }; 8981 8982 // If we have a target function ID it means that we need to support 8983 // offloading, otherwise, just execute on the host. We need to execute on host 8984 // regardless of the conditional in the if clause if, e.g., the user do not 8985 // specify target triples. 8986 if (OutlinedFnID) { 8987 if (IfCond) { 8988 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8989 } else { 8990 RegionCodeGenTy ThenRCG(TargetThenGen); 8991 ThenRCG(CGF); 8992 } 8993 } else { 8994 RegionCodeGenTy ElseRCG(TargetElseGen); 8995 ElseRCG(CGF); 8996 } 8997 } 8998 8999 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9000 StringRef ParentName) { 9001 if (!S) 9002 return; 9003 9004 // Codegen OMP target directives that offload compute to the device. 9005 bool RequiresDeviceCodegen = 9006 isa<OMPExecutableDirective>(S) && 9007 isOpenMPTargetExecutionDirective( 9008 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9009 9010 if (RequiresDeviceCodegen) { 9011 const auto &E = *cast<OMPExecutableDirective>(S); 9012 unsigned DeviceID; 9013 unsigned FileID; 9014 unsigned Line; 9015 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9016 FileID, Line); 9017 9018 // Is this a target region that should not be emitted as an entry point? If 9019 // so just signal we are done with this target region. 9020 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9021 ParentName, Line)) 9022 return; 9023 9024 switch (E.getDirectiveKind()) { 9025 case OMPD_target: 9026 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9027 cast<OMPTargetDirective>(E)); 9028 break; 9029 case OMPD_target_parallel: 9030 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9031 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9032 break; 9033 case OMPD_target_teams: 9034 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9035 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9036 break; 9037 case OMPD_target_teams_distribute: 9038 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9039 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9040 break; 9041 case OMPD_target_teams_distribute_simd: 9042 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9043 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9044 break; 9045 case OMPD_target_parallel_for: 9046 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9047 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9048 break; 9049 case OMPD_target_parallel_for_simd: 9050 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9051 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9052 break; 9053 case OMPD_target_simd: 9054 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9055 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9056 break; 9057 case OMPD_target_teams_distribute_parallel_for: 9058 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9059 CGM, ParentName, 9060 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9061 break; 9062 case OMPD_target_teams_distribute_parallel_for_simd: 9063 CodeGenFunction:: 9064 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9065 CGM, ParentName, 9066 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9067 break; 9068 case OMPD_parallel: 9069 case OMPD_for: 9070 case OMPD_parallel_for: 9071 case OMPD_parallel_sections: 9072 case OMPD_for_simd: 9073 case OMPD_parallel_for_simd: 9074 case OMPD_cancel: 9075 case OMPD_cancellation_point: 9076 case OMPD_ordered: 9077 case OMPD_threadprivate: 9078 case OMPD_allocate: 9079 case OMPD_task: 9080 case OMPD_simd: 9081 case OMPD_sections: 9082 case OMPD_section: 9083 case OMPD_single: 9084 case OMPD_master: 9085 case OMPD_critical: 9086 case OMPD_taskyield: 9087 case OMPD_barrier: 9088 case OMPD_taskwait: 9089 case OMPD_taskgroup: 9090 case OMPD_atomic: 9091 case OMPD_flush: 9092 case OMPD_teams: 9093 case OMPD_target_data: 9094 case OMPD_target_exit_data: 9095 case OMPD_target_enter_data: 9096 case OMPD_distribute: 9097 case OMPD_distribute_simd: 9098 case OMPD_distribute_parallel_for: 9099 case OMPD_distribute_parallel_for_simd: 9100 case OMPD_teams_distribute: 9101 case OMPD_teams_distribute_simd: 9102 case OMPD_teams_distribute_parallel_for: 9103 case OMPD_teams_distribute_parallel_for_simd: 9104 case OMPD_target_update: 9105 case OMPD_declare_simd: 9106 case OMPD_declare_target: 9107 case OMPD_end_declare_target: 9108 case OMPD_declare_reduction: 9109 case OMPD_declare_mapper: 9110 case OMPD_taskloop: 9111 case OMPD_taskloop_simd: 9112 case OMPD_requires: 9113 case OMPD_unknown: 9114 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9115 } 9116 return; 9117 } 9118 9119 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9120 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9121 return; 9122 9123 scanForTargetRegionsFunctions( 9124 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9125 return; 9126 } 9127 9128 // If this is a lambda function, look into its body. 9129 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9130 S = L->getBody(); 9131 9132 // Keep looking for target regions recursively. 9133 for (const Stmt *II : S->children()) 9134 scanForTargetRegionsFunctions(II, ParentName); 9135 } 9136 9137 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9138 // If emitting code for the host, we do not process FD here. Instead we do 9139 // the normal code generation. 9140 if (!CGM.getLangOpts().OpenMPIsDevice) 9141 return false; 9142 9143 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9144 StringRef Name = CGM.getMangledName(GD); 9145 // Try to detect target regions in the function. 9146 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9147 scanForTargetRegionsFunctions(FD->getBody(), Name); 9148 9149 // Do not to emit function if it is not marked as declare target. 9150 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9151 AlreadyEmittedTargetFunctions.count(Name) == 0; 9152 } 9153 9154 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9155 if (!CGM.getLangOpts().OpenMPIsDevice) 9156 return false; 9157 9158 // Check if there are Ctors/Dtors in this declaration and look for target 9159 // regions in it. We use the complete variant to produce the kernel name 9160 // mangling. 9161 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9162 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9163 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9164 StringRef ParentName = 9165 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9166 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9167 } 9168 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9169 StringRef ParentName = 9170 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9171 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9172 } 9173 } 9174 9175 // Do not to emit variable if it is not marked as declare target. 9176 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9177 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9178 cast<VarDecl>(GD.getDecl())); 9179 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9180 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9181 HasRequiresUnifiedSharedMemory)) { 9182 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9183 return true; 9184 } 9185 return false; 9186 } 9187 9188 llvm::Constant * 9189 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9190 const VarDecl *VD) { 9191 assert(VD->getType().isConstant(CGM.getContext()) && 9192 "Expected constant variable."); 9193 StringRef VarName; 9194 llvm::Constant *Addr; 9195 llvm::GlobalValue::LinkageTypes Linkage; 9196 QualType Ty = VD->getType(); 9197 SmallString<128> Buffer; 9198 { 9199 unsigned DeviceID; 9200 unsigned FileID; 9201 unsigned Line; 9202 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9203 FileID, Line); 9204 llvm::raw_svector_ostream OS(Buffer); 9205 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9206 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9207 VarName = OS.str(); 9208 } 9209 Linkage = llvm::GlobalValue::InternalLinkage; 9210 Addr = 9211 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9212 getDefaultFirstprivateAddressSpace()); 9213 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9214 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9215 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9216 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9217 VarName, Addr, VarSize, 9218 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9219 return Addr; 9220 } 9221 9222 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9223 llvm::Constant *Addr) { 9224 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9225 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9226 if (!Res) { 9227 if (CGM.getLangOpts().OpenMPIsDevice) { 9228 // Register non-target variables being emitted in device code (debug info 9229 // may cause this). 9230 StringRef VarName = CGM.getMangledName(VD); 9231 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9232 } 9233 return; 9234 } 9235 // Register declare target variables. 9236 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9237 StringRef VarName; 9238 CharUnits VarSize; 9239 llvm::GlobalValue::LinkageTypes Linkage; 9240 9241 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9242 !HasRequiresUnifiedSharedMemory) { 9243 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9244 VarName = CGM.getMangledName(VD); 9245 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9246 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9247 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9248 } else { 9249 VarSize = CharUnits::Zero(); 9250 } 9251 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9252 // Temp solution to prevent optimizations of the internal variables. 9253 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9254 std::string RefName = getName({VarName, "ref"}); 9255 if (!CGM.GetGlobalValue(RefName)) { 9256 llvm::Constant *AddrRef = 9257 getOrCreateInternalVariable(Addr->getType(), RefName); 9258 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9259 GVAddrRef->setConstant(/*Val=*/true); 9260 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9261 GVAddrRef->setInitializer(Addr); 9262 CGM.addCompilerUsedGlobal(GVAddrRef); 9263 } 9264 } 9265 } else { 9266 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9267 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9268 HasRequiresUnifiedSharedMemory)) && 9269 "Declare target attribute must link or to with unified memory."); 9270 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9271 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9272 else 9273 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9274 9275 if (CGM.getLangOpts().OpenMPIsDevice) { 9276 VarName = Addr->getName(); 9277 Addr = nullptr; 9278 } else { 9279 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9280 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9281 } 9282 VarSize = CGM.getPointerSize(); 9283 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9284 } 9285 9286 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9287 VarName, Addr, VarSize, Flags, Linkage); 9288 } 9289 9290 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9291 if (isa<FunctionDecl>(GD.getDecl()) || 9292 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9293 return emitTargetFunctions(GD); 9294 9295 return emitTargetGlobalVariable(GD); 9296 } 9297 9298 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9299 for (const VarDecl *VD : DeferredGlobalVariables) { 9300 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9301 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9302 if (!Res) 9303 continue; 9304 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9305 !HasRequiresUnifiedSharedMemory) { 9306 CGM.EmitGlobal(VD); 9307 } else { 9308 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9309 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9310 HasRequiresUnifiedSharedMemory)) && 9311 "Expected link clause or to clause with unified memory."); 9312 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9313 } 9314 } 9315 } 9316 9317 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9318 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9319 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9320 " Expected target-based directive."); 9321 } 9322 9323 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9324 const OMPRequiresDecl *D) { 9325 for (const OMPClause *Clause : D->clauselists()) { 9326 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9327 HasRequiresUnifiedSharedMemory = true; 9328 break; 9329 } 9330 } 9331 } 9332 9333 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9334 LangAS &AS) { 9335 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9336 return false; 9337 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9338 switch(A->getAllocatorType()) { 9339 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9340 // Not supported, fallback to the default mem space. 9341 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9342 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9343 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9344 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9345 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9346 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9347 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9348 AS = LangAS::Default; 9349 return true; 9350 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9351 llvm_unreachable("Expected predefined allocator for the variables with the " 9352 "static storage."); 9353 } 9354 return false; 9355 } 9356 9357 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9358 return HasRequiresUnifiedSharedMemory; 9359 } 9360 9361 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9362 CodeGenModule &CGM) 9363 : CGM(CGM) { 9364 if (CGM.getLangOpts().OpenMPIsDevice) { 9365 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9366 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9367 } 9368 } 9369 9370 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9371 if (CGM.getLangOpts().OpenMPIsDevice) 9372 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9373 } 9374 9375 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9376 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9377 return true; 9378 9379 StringRef Name = CGM.getMangledName(GD); 9380 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9381 // Do not to emit function if it is marked as declare target as it was already 9382 // emitted. 9383 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9384 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9385 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9386 return !F->isDeclaration(); 9387 return false; 9388 } 9389 return true; 9390 } 9391 9392 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9393 } 9394 9395 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9396 // If we don't have entries or if we are emitting code for the device, we 9397 // don't need to do anything. 9398 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9399 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9400 (OffloadEntriesInfoManager.empty() && 9401 !HasEmittedDeclareTargetRegion && 9402 !HasEmittedTargetRegion)) 9403 return nullptr; 9404 9405 // Create and register the function that handles the requires directives. 9406 ASTContext &C = CGM.getContext(); 9407 9408 llvm::Function *RequiresRegFn; 9409 { 9410 CodeGenFunction CGF(CGM); 9411 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9412 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9413 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9414 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9415 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9416 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9417 // TODO: check for other requires clauses. 9418 // The requires directive takes effect only when a target region is 9419 // present in the compilation unit. Otherwise it is ignored and not 9420 // passed to the runtime. This avoids the runtime from throwing an error 9421 // for mismatching requires clauses across compilation units that don't 9422 // contain at least 1 target region. 9423 assert((HasEmittedTargetRegion || 9424 HasEmittedDeclareTargetRegion || 9425 !OffloadEntriesInfoManager.empty()) && 9426 "Target or declare target region expected."); 9427 if (HasRequiresUnifiedSharedMemory) 9428 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9429 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9430 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9431 CGF.FinishFunction(); 9432 } 9433 return RequiresRegFn; 9434 } 9435 9436 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9437 // If we have offloading in the current module, we need to emit the entries 9438 // now and register the offloading descriptor. 9439 createOffloadEntriesAndInfoMetadata(); 9440 9441 // Create and register the offloading binary descriptors. This is the main 9442 // entity that captures all the information about offloading in the current 9443 // compilation unit. 9444 return createOffloadingBinaryDescriptorRegistration(); 9445 } 9446 9447 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9448 const OMPExecutableDirective &D, 9449 SourceLocation Loc, 9450 llvm::Function *OutlinedFn, 9451 ArrayRef<llvm::Value *> CapturedVars) { 9452 if (!CGF.HaveInsertPoint()) 9453 return; 9454 9455 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9456 CodeGenFunction::RunCleanupsScope Scope(CGF); 9457 9458 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9459 llvm::Value *Args[] = { 9460 RTLoc, 9461 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9462 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9463 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9464 RealArgs.append(std::begin(Args), std::end(Args)); 9465 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9466 9467 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9468 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9469 } 9470 9471 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9472 const Expr *NumTeams, 9473 const Expr *ThreadLimit, 9474 SourceLocation Loc) { 9475 if (!CGF.HaveInsertPoint()) 9476 return; 9477 9478 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9479 9480 llvm::Value *NumTeamsVal = 9481 NumTeams 9482 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9483 CGF.CGM.Int32Ty, /* isSigned = */ true) 9484 : CGF.Builder.getInt32(0); 9485 9486 llvm::Value *ThreadLimitVal = 9487 ThreadLimit 9488 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9489 CGF.CGM.Int32Ty, /* isSigned = */ true) 9490 : CGF.Builder.getInt32(0); 9491 9492 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9493 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9494 ThreadLimitVal}; 9495 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9496 PushNumTeamsArgs); 9497 } 9498 9499 void CGOpenMPRuntime::emitTargetDataCalls( 9500 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9501 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9502 if (!CGF.HaveInsertPoint()) 9503 return; 9504 9505 // Action used to replace the default codegen action and turn privatization 9506 // off. 9507 PrePostActionTy NoPrivAction; 9508 9509 // Generate the code for the opening of the data environment. Capture all the 9510 // arguments of the runtime call by reference because they are used in the 9511 // closing of the region. 9512 auto &&BeginThenGen = [this, &D, Device, &Info, 9513 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9514 // Fill up the arrays with all the mapped variables. 9515 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9516 MappableExprsHandler::MapValuesArrayTy Pointers; 9517 MappableExprsHandler::MapValuesArrayTy Sizes; 9518 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9519 9520 // Get map clause information. 9521 MappableExprsHandler MCHandler(D, CGF); 9522 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9523 9524 // Fill up the arrays and create the arguments. 9525 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9526 9527 llvm::Value *BasePointersArrayArg = nullptr; 9528 llvm::Value *PointersArrayArg = nullptr; 9529 llvm::Value *SizesArrayArg = nullptr; 9530 llvm::Value *MapTypesArrayArg = nullptr; 9531 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9532 SizesArrayArg, MapTypesArrayArg, Info); 9533 9534 // Emit device ID if any. 9535 llvm::Value *DeviceID = nullptr; 9536 if (Device) { 9537 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9538 CGF.Int64Ty, /*isSigned=*/true); 9539 } else { 9540 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9541 } 9542 9543 // Emit the number of elements in the offloading arrays. 9544 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9545 9546 llvm::Value *OffloadingArgs[] = { 9547 DeviceID, PointerNum, BasePointersArrayArg, 9548 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9549 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9550 OffloadingArgs); 9551 9552 // If device pointer privatization is required, emit the body of the region 9553 // here. It will have to be duplicated: with and without privatization. 9554 if (!Info.CaptureDeviceAddrMap.empty()) 9555 CodeGen(CGF); 9556 }; 9557 9558 // Generate code for the closing of the data region. 9559 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9560 PrePostActionTy &) { 9561 assert(Info.isValid() && "Invalid data environment closing arguments."); 9562 9563 llvm::Value *BasePointersArrayArg = nullptr; 9564 llvm::Value *PointersArrayArg = nullptr; 9565 llvm::Value *SizesArrayArg = nullptr; 9566 llvm::Value *MapTypesArrayArg = nullptr; 9567 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9568 SizesArrayArg, MapTypesArrayArg, Info); 9569 9570 // Emit device ID if any. 9571 llvm::Value *DeviceID = nullptr; 9572 if (Device) { 9573 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9574 CGF.Int64Ty, /*isSigned=*/true); 9575 } else { 9576 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9577 } 9578 9579 // Emit the number of elements in the offloading arrays. 9580 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9581 9582 llvm::Value *OffloadingArgs[] = { 9583 DeviceID, PointerNum, BasePointersArrayArg, 9584 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9585 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9586 OffloadingArgs); 9587 }; 9588 9589 // If we need device pointer privatization, we need to emit the body of the 9590 // region with no privatization in the 'else' branch of the conditional. 9591 // Otherwise, we don't have to do anything. 9592 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9593 PrePostActionTy &) { 9594 if (!Info.CaptureDeviceAddrMap.empty()) { 9595 CodeGen.setAction(NoPrivAction); 9596 CodeGen(CGF); 9597 } 9598 }; 9599 9600 // We don't have to do anything to close the region if the if clause evaluates 9601 // to false. 9602 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9603 9604 if (IfCond) { 9605 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9606 } else { 9607 RegionCodeGenTy RCG(BeginThenGen); 9608 RCG(CGF); 9609 } 9610 9611 // If we don't require privatization of device pointers, we emit the body in 9612 // between the runtime calls. This avoids duplicating the body code. 9613 if (Info.CaptureDeviceAddrMap.empty()) { 9614 CodeGen.setAction(NoPrivAction); 9615 CodeGen(CGF); 9616 } 9617 9618 if (IfCond) { 9619 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9620 } else { 9621 RegionCodeGenTy RCG(EndThenGen); 9622 RCG(CGF); 9623 } 9624 } 9625 9626 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9627 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9628 const Expr *Device) { 9629 if (!CGF.HaveInsertPoint()) 9630 return; 9631 9632 assert((isa<OMPTargetEnterDataDirective>(D) || 9633 isa<OMPTargetExitDataDirective>(D) || 9634 isa<OMPTargetUpdateDirective>(D)) && 9635 "Expecting either target enter, exit data, or update directives."); 9636 9637 CodeGenFunction::OMPTargetDataInfo InputInfo; 9638 llvm::Value *MapTypesArray = nullptr; 9639 // Generate the code for the opening of the data environment. 9640 auto &&ThenGen = [this, &D, Device, &InputInfo, 9641 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9642 // Emit device ID if any. 9643 llvm::Value *DeviceID = nullptr; 9644 if (Device) { 9645 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9646 CGF.Int64Ty, /*isSigned=*/true); 9647 } else { 9648 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9649 } 9650 9651 // Emit the number of elements in the offloading arrays. 9652 llvm::Constant *PointerNum = 9653 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9654 9655 llvm::Value *OffloadingArgs[] = {DeviceID, 9656 PointerNum, 9657 InputInfo.BasePointersArray.getPointer(), 9658 InputInfo.PointersArray.getPointer(), 9659 InputInfo.SizesArray.getPointer(), 9660 MapTypesArray}; 9661 9662 // Select the right runtime function call for each expected standalone 9663 // directive. 9664 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9665 OpenMPRTLFunction RTLFn; 9666 switch (D.getDirectiveKind()) { 9667 case OMPD_target_enter_data: 9668 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9669 : OMPRTL__tgt_target_data_begin; 9670 break; 9671 case OMPD_target_exit_data: 9672 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9673 : OMPRTL__tgt_target_data_end; 9674 break; 9675 case OMPD_target_update: 9676 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9677 : OMPRTL__tgt_target_data_update; 9678 break; 9679 case OMPD_parallel: 9680 case OMPD_for: 9681 case OMPD_parallel_for: 9682 case OMPD_parallel_sections: 9683 case OMPD_for_simd: 9684 case OMPD_parallel_for_simd: 9685 case OMPD_cancel: 9686 case OMPD_cancellation_point: 9687 case OMPD_ordered: 9688 case OMPD_threadprivate: 9689 case OMPD_allocate: 9690 case OMPD_task: 9691 case OMPD_simd: 9692 case OMPD_sections: 9693 case OMPD_section: 9694 case OMPD_single: 9695 case OMPD_master: 9696 case OMPD_critical: 9697 case OMPD_taskyield: 9698 case OMPD_barrier: 9699 case OMPD_taskwait: 9700 case OMPD_taskgroup: 9701 case OMPD_atomic: 9702 case OMPD_flush: 9703 case OMPD_teams: 9704 case OMPD_target_data: 9705 case OMPD_distribute: 9706 case OMPD_distribute_simd: 9707 case OMPD_distribute_parallel_for: 9708 case OMPD_distribute_parallel_for_simd: 9709 case OMPD_teams_distribute: 9710 case OMPD_teams_distribute_simd: 9711 case OMPD_teams_distribute_parallel_for: 9712 case OMPD_teams_distribute_parallel_for_simd: 9713 case OMPD_declare_simd: 9714 case OMPD_declare_target: 9715 case OMPD_end_declare_target: 9716 case OMPD_declare_reduction: 9717 case OMPD_declare_mapper: 9718 case OMPD_taskloop: 9719 case OMPD_taskloop_simd: 9720 case OMPD_target: 9721 case OMPD_target_simd: 9722 case OMPD_target_teams_distribute: 9723 case OMPD_target_teams_distribute_simd: 9724 case OMPD_target_teams_distribute_parallel_for: 9725 case OMPD_target_teams_distribute_parallel_for_simd: 9726 case OMPD_target_teams: 9727 case OMPD_target_parallel: 9728 case OMPD_target_parallel_for: 9729 case OMPD_target_parallel_for_simd: 9730 case OMPD_requires: 9731 case OMPD_unknown: 9732 llvm_unreachable("Unexpected standalone target data directive."); 9733 break; 9734 } 9735 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9736 }; 9737 9738 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9739 CodeGenFunction &CGF, PrePostActionTy &) { 9740 // Fill up the arrays with all the mapped variables. 9741 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9742 MappableExprsHandler::MapValuesArrayTy Pointers; 9743 MappableExprsHandler::MapValuesArrayTy Sizes; 9744 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9745 9746 // Get map clause information. 9747 MappableExprsHandler MEHandler(D, CGF); 9748 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9749 9750 TargetDataInfo Info; 9751 // Fill up the arrays and create the arguments. 9752 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9753 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9754 Info.PointersArray, Info.SizesArray, 9755 Info.MapTypesArray, Info); 9756 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9757 InputInfo.BasePointersArray = 9758 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9759 InputInfo.PointersArray = 9760 Address(Info.PointersArray, CGM.getPointerAlign()); 9761 InputInfo.SizesArray = 9762 Address(Info.SizesArray, CGM.getPointerAlign()); 9763 MapTypesArray = Info.MapTypesArray; 9764 if (D.hasClausesOfKind<OMPDependClause>()) 9765 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9766 else 9767 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9768 }; 9769 9770 if (IfCond) { 9771 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9772 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9773 } else { 9774 RegionCodeGenTy ThenRCG(TargetThenGen); 9775 ThenRCG(CGF); 9776 } 9777 } 9778 9779 namespace { 9780 /// Kind of parameter in a function with 'declare simd' directive. 9781 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9782 /// Attribute set of the parameter. 9783 struct ParamAttrTy { 9784 ParamKindTy Kind = Vector; 9785 llvm::APSInt StrideOrArg; 9786 llvm::APSInt Alignment; 9787 }; 9788 } // namespace 9789 9790 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9791 ArrayRef<ParamAttrTy> ParamAttrs) { 9792 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9793 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9794 // of that clause. The VLEN value must be power of 2. 9795 // In other case the notion of the function`s "characteristic data type" (CDT) 9796 // is used to compute the vector length. 9797 // CDT is defined in the following order: 9798 // a) For non-void function, the CDT is the return type. 9799 // b) If the function has any non-uniform, non-linear parameters, then the 9800 // CDT is the type of the first such parameter. 9801 // c) If the CDT determined by a) or b) above is struct, union, or class 9802 // type which is pass-by-value (except for the type that maps to the 9803 // built-in complex data type), the characteristic data type is int. 9804 // d) If none of the above three cases is applicable, the CDT is int. 9805 // The VLEN is then determined based on the CDT and the size of vector 9806 // register of that ISA for which current vector version is generated. The 9807 // VLEN is computed using the formula below: 9808 // VLEN = sizeof(vector_register) / sizeof(CDT), 9809 // where vector register size specified in section 3.2.1 Registers and the 9810 // Stack Frame of original AMD64 ABI document. 9811 QualType RetType = FD->getReturnType(); 9812 if (RetType.isNull()) 9813 return 0; 9814 ASTContext &C = FD->getASTContext(); 9815 QualType CDT; 9816 if (!RetType.isNull() && !RetType->isVoidType()) { 9817 CDT = RetType; 9818 } else { 9819 unsigned Offset = 0; 9820 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9821 if (ParamAttrs[Offset].Kind == Vector) 9822 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9823 ++Offset; 9824 } 9825 if (CDT.isNull()) { 9826 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9827 if (ParamAttrs[I + Offset].Kind == Vector) { 9828 CDT = FD->getParamDecl(I)->getType(); 9829 break; 9830 } 9831 } 9832 } 9833 } 9834 if (CDT.isNull()) 9835 CDT = C.IntTy; 9836 CDT = CDT->getCanonicalTypeUnqualified(); 9837 if (CDT->isRecordType() || CDT->isUnionType()) 9838 CDT = C.IntTy; 9839 return C.getTypeSize(CDT); 9840 } 9841 9842 static void 9843 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9844 const llvm::APSInt &VLENVal, 9845 ArrayRef<ParamAttrTy> ParamAttrs, 9846 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9847 struct ISADataTy { 9848 char ISA; 9849 unsigned VecRegSize; 9850 }; 9851 ISADataTy ISAData[] = { 9852 { 9853 'b', 128 9854 }, // SSE 9855 { 9856 'c', 256 9857 }, // AVX 9858 { 9859 'd', 256 9860 }, // AVX2 9861 { 9862 'e', 512 9863 }, // AVX512 9864 }; 9865 llvm::SmallVector<char, 2> Masked; 9866 switch (State) { 9867 case OMPDeclareSimdDeclAttr::BS_Undefined: 9868 Masked.push_back('N'); 9869 Masked.push_back('M'); 9870 break; 9871 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9872 Masked.push_back('N'); 9873 break; 9874 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9875 Masked.push_back('M'); 9876 break; 9877 } 9878 for (char Mask : Masked) { 9879 for (const ISADataTy &Data : ISAData) { 9880 SmallString<256> Buffer; 9881 llvm::raw_svector_ostream Out(Buffer); 9882 Out << "_ZGV" << Data.ISA << Mask; 9883 if (!VLENVal) { 9884 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 9885 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 9886 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 9887 } else { 9888 Out << VLENVal; 9889 } 9890 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9891 switch (ParamAttr.Kind){ 9892 case LinearWithVarStride: 9893 Out << 's' << ParamAttr.StrideOrArg; 9894 break; 9895 case Linear: 9896 Out << 'l'; 9897 if (!!ParamAttr.StrideOrArg) 9898 Out << ParamAttr.StrideOrArg; 9899 break; 9900 case Uniform: 9901 Out << 'u'; 9902 break; 9903 case Vector: 9904 Out << 'v'; 9905 break; 9906 } 9907 if (!!ParamAttr.Alignment) 9908 Out << 'a' << ParamAttr.Alignment; 9909 } 9910 Out << '_' << Fn->getName(); 9911 Fn->addFnAttr(Out.str()); 9912 } 9913 } 9914 } 9915 9916 // This are the Functions that are needed to mangle the name of the 9917 // vector functions generated by the compiler, according to the rules 9918 // defined in the "Vector Function ABI specifications for AArch64", 9919 // available at 9920 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 9921 9922 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 9923 /// 9924 /// TODO: Need to implement the behavior for reference marked with a 9925 /// var or no linear modifiers (1.b in the section). For this, we 9926 /// need to extend ParamKindTy to support the linear modifiers. 9927 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 9928 QT = QT.getCanonicalType(); 9929 9930 if (QT->isVoidType()) 9931 return false; 9932 9933 if (Kind == ParamKindTy::Uniform) 9934 return false; 9935 9936 if (Kind == ParamKindTy::Linear) 9937 return false; 9938 9939 // TODO: Handle linear references with modifiers 9940 9941 if (Kind == ParamKindTy::LinearWithVarStride) 9942 return false; 9943 9944 return true; 9945 } 9946 9947 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 9948 static bool getAArch64PBV(QualType QT, ASTContext &C) { 9949 QT = QT.getCanonicalType(); 9950 unsigned Size = C.getTypeSize(QT); 9951 9952 // Only scalars and complex within 16 bytes wide set PVB to true. 9953 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 9954 return false; 9955 9956 if (QT->isFloatingType()) 9957 return true; 9958 9959 if (QT->isIntegerType()) 9960 return true; 9961 9962 if (QT->isPointerType()) 9963 return true; 9964 9965 // TODO: Add support for complex types (section 3.1.2, item 2). 9966 9967 return false; 9968 } 9969 9970 /// Computes the lane size (LS) of a return type or of an input parameter, 9971 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 9972 /// TODO: Add support for references, section 3.2.1, item 1. 9973 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 9974 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 9975 QualType PTy = QT.getCanonicalType()->getPointeeType(); 9976 if (getAArch64PBV(PTy, C)) 9977 return C.getTypeSize(PTy); 9978 } 9979 if (getAArch64PBV(QT, C)) 9980 return C.getTypeSize(QT); 9981 9982 return C.getTypeSize(C.getUIntPtrType()); 9983 } 9984 9985 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 9986 // signature of the scalar function, as defined in 3.2.2 of the 9987 // AAVFABI. 9988 static std::tuple<unsigned, unsigned, bool> 9989 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 9990 QualType RetType = FD->getReturnType().getCanonicalType(); 9991 9992 ASTContext &C = FD->getASTContext(); 9993 9994 bool OutputBecomesInput = false; 9995 9996 llvm::SmallVector<unsigned, 8> Sizes; 9997 if (!RetType->isVoidType()) { 9998 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 9999 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10000 OutputBecomesInput = true; 10001 } 10002 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10003 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10004 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10005 } 10006 10007 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10008 // The LS of a function parameter / return value can only be a power 10009 // of 2, starting from 8 bits, up to 128. 10010 assert(std::all_of(Sizes.begin(), Sizes.end(), 10011 [](unsigned Size) { 10012 return Size == 8 || Size == 16 || Size == 32 || 10013 Size == 64 || Size == 128; 10014 }) && 10015 "Invalid size"); 10016 10017 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10018 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10019 OutputBecomesInput); 10020 } 10021 10022 /// Mangle the parameter part of the vector function name according to 10023 /// their OpenMP classification. The mangling function is defined in 10024 /// section 3.5 of the AAVFABI. 10025 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10026 SmallString<256> Buffer; 10027 llvm::raw_svector_ostream Out(Buffer); 10028 for (const auto &ParamAttr : ParamAttrs) { 10029 switch (ParamAttr.Kind) { 10030 case LinearWithVarStride: 10031 Out << "ls" << ParamAttr.StrideOrArg; 10032 break; 10033 case Linear: 10034 Out << 'l'; 10035 // Don't print the step value if it is not present or if it is 10036 // equal to 1. 10037 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10038 Out << ParamAttr.StrideOrArg; 10039 break; 10040 case Uniform: 10041 Out << 'u'; 10042 break; 10043 case Vector: 10044 Out << 'v'; 10045 break; 10046 } 10047 10048 if (!!ParamAttr.Alignment) 10049 Out << 'a' << ParamAttr.Alignment; 10050 } 10051 10052 return Out.str(); 10053 } 10054 10055 // Function used to add the attribute. The parameter `VLEN` is 10056 // templated to allow the use of "x" when targeting scalable functions 10057 // for SVE. 10058 template <typename T> 10059 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10060 char ISA, StringRef ParSeq, 10061 StringRef MangledName, bool OutputBecomesInput, 10062 llvm::Function *Fn) { 10063 SmallString<256> Buffer; 10064 llvm::raw_svector_ostream Out(Buffer); 10065 Out << Prefix << ISA << LMask << VLEN; 10066 if (OutputBecomesInput) 10067 Out << "v"; 10068 Out << ParSeq << "_" << MangledName; 10069 Fn->addFnAttr(Out.str()); 10070 } 10071 10072 // Helper function to generate the Advanced SIMD names depending on 10073 // the value of the NDS when simdlen is not present. 10074 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10075 StringRef Prefix, char ISA, 10076 StringRef ParSeq, StringRef MangledName, 10077 bool OutputBecomesInput, 10078 llvm::Function *Fn) { 10079 switch (NDS) { 10080 case 8: 10081 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10082 OutputBecomesInput, Fn); 10083 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10084 OutputBecomesInput, Fn); 10085 break; 10086 case 16: 10087 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10088 OutputBecomesInput, Fn); 10089 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10090 OutputBecomesInput, Fn); 10091 break; 10092 case 32: 10093 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10094 OutputBecomesInput, Fn); 10095 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10096 OutputBecomesInput, Fn); 10097 break; 10098 case 64: 10099 case 128: 10100 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10101 OutputBecomesInput, Fn); 10102 break; 10103 default: 10104 llvm_unreachable("Scalar type is too wide."); 10105 } 10106 } 10107 10108 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10109 static void emitAArch64DeclareSimdFunction( 10110 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10111 ArrayRef<ParamAttrTy> ParamAttrs, 10112 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10113 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10114 10115 // Get basic data for building the vector signature. 10116 const auto Data = getNDSWDS(FD, ParamAttrs); 10117 const unsigned NDS = std::get<0>(Data); 10118 const unsigned WDS = std::get<1>(Data); 10119 const bool OutputBecomesInput = std::get<2>(Data); 10120 10121 // Check the values provided via `simdlen` by the user. 10122 // 1. A `simdlen(1)` doesn't produce vector signatures, 10123 if (UserVLEN == 1) { 10124 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10125 DiagnosticsEngine::Warning, 10126 "The clause simdlen(1) has no effect when targeting aarch64."); 10127 CGM.getDiags().Report(SLoc, DiagID); 10128 return; 10129 } 10130 10131 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10132 // Advanced SIMD output. 10133 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10134 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10135 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10136 "power of 2 when targeting Advanced SIMD."); 10137 CGM.getDiags().Report(SLoc, DiagID); 10138 return; 10139 } 10140 10141 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10142 // limits. 10143 if (ISA == 's' && UserVLEN != 0) { 10144 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10145 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10146 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10147 "lanes in the architectural constraints " 10148 "for SVE (min is 128-bit, max is " 10149 "2048-bit, by steps of 128-bit)"); 10150 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10151 return; 10152 } 10153 } 10154 10155 // Sort out parameter sequence. 10156 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10157 StringRef Prefix = "_ZGV"; 10158 // Generate simdlen from user input (if any). 10159 if (UserVLEN) { 10160 if (ISA == 's') { 10161 // SVE generates only a masked function. 10162 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10163 OutputBecomesInput, Fn); 10164 } else { 10165 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10166 // Advanced SIMD generates one or two functions, depending on 10167 // the `[not]inbranch` clause. 10168 switch (State) { 10169 case OMPDeclareSimdDeclAttr::BS_Undefined: 10170 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10171 OutputBecomesInput, Fn); 10172 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10173 OutputBecomesInput, Fn); 10174 break; 10175 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10176 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10177 OutputBecomesInput, Fn); 10178 break; 10179 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10180 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10181 OutputBecomesInput, Fn); 10182 break; 10183 } 10184 } 10185 } else { 10186 // If no user simdlen is provided, follow the AAVFABI rules for 10187 // generating the vector length. 10188 if (ISA == 's') { 10189 // SVE, section 3.4.1, item 1. 10190 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10191 OutputBecomesInput, Fn); 10192 } else { 10193 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10194 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10195 // two vector names depending on the use of the clause 10196 // `[not]inbranch`. 10197 switch (State) { 10198 case OMPDeclareSimdDeclAttr::BS_Undefined: 10199 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10200 OutputBecomesInput, Fn); 10201 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10202 OutputBecomesInput, Fn); 10203 break; 10204 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10205 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10206 OutputBecomesInput, Fn); 10207 break; 10208 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10209 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10210 OutputBecomesInput, Fn); 10211 break; 10212 } 10213 } 10214 } 10215 } 10216 10217 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10218 llvm::Function *Fn) { 10219 ASTContext &C = CGM.getContext(); 10220 FD = FD->getMostRecentDecl(); 10221 // Map params to their positions in function decl. 10222 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10223 if (isa<CXXMethodDecl>(FD)) 10224 ParamPositions.try_emplace(FD, 0); 10225 unsigned ParamPos = ParamPositions.size(); 10226 for (const ParmVarDecl *P : FD->parameters()) { 10227 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10228 ++ParamPos; 10229 } 10230 while (FD) { 10231 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10232 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10233 // Mark uniform parameters. 10234 for (const Expr *E : Attr->uniforms()) { 10235 E = E->IgnoreParenImpCasts(); 10236 unsigned Pos; 10237 if (isa<CXXThisExpr>(E)) { 10238 Pos = ParamPositions[FD]; 10239 } else { 10240 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10241 ->getCanonicalDecl(); 10242 Pos = ParamPositions[PVD]; 10243 } 10244 ParamAttrs[Pos].Kind = Uniform; 10245 } 10246 // Get alignment info. 10247 auto NI = Attr->alignments_begin(); 10248 for (const Expr *E : Attr->aligneds()) { 10249 E = E->IgnoreParenImpCasts(); 10250 unsigned Pos; 10251 QualType ParmTy; 10252 if (isa<CXXThisExpr>(E)) { 10253 Pos = ParamPositions[FD]; 10254 ParmTy = E->getType(); 10255 } else { 10256 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10257 ->getCanonicalDecl(); 10258 Pos = ParamPositions[PVD]; 10259 ParmTy = PVD->getType(); 10260 } 10261 ParamAttrs[Pos].Alignment = 10262 (*NI) 10263 ? (*NI)->EvaluateKnownConstInt(C) 10264 : llvm::APSInt::getUnsigned( 10265 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10266 .getQuantity()); 10267 ++NI; 10268 } 10269 // Mark linear parameters. 10270 auto SI = Attr->steps_begin(); 10271 auto MI = Attr->modifiers_begin(); 10272 for (const Expr *E : Attr->linears()) { 10273 E = E->IgnoreParenImpCasts(); 10274 unsigned Pos; 10275 if (isa<CXXThisExpr>(E)) { 10276 Pos = ParamPositions[FD]; 10277 } else { 10278 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10279 ->getCanonicalDecl(); 10280 Pos = ParamPositions[PVD]; 10281 } 10282 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10283 ParamAttr.Kind = Linear; 10284 if (*SI) { 10285 Expr::EvalResult Result; 10286 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10287 if (const auto *DRE = 10288 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10289 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10290 ParamAttr.Kind = LinearWithVarStride; 10291 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10292 ParamPositions[StridePVD->getCanonicalDecl()]); 10293 } 10294 } 10295 } else { 10296 ParamAttr.StrideOrArg = Result.Val.getInt(); 10297 } 10298 } 10299 ++SI; 10300 ++MI; 10301 } 10302 llvm::APSInt VLENVal; 10303 SourceLocation ExprLoc; 10304 const Expr *VLENExpr = Attr->getSimdlen(); 10305 if (VLENExpr) { 10306 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10307 ExprLoc = VLENExpr->getExprLoc(); 10308 } 10309 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10310 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10311 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10312 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10313 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10314 unsigned VLEN = VLENVal.getExtValue(); 10315 StringRef MangledName = Fn->getName(); 10316 if (CGM.getTarget().hasFeature("sve")) 10317 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10318 MangledName, 's', 128, Fn, ExprLoc); 10319 if (CGM.getTarget().hasFeature("neon")) 10320 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10321 MangledName, 'n', 128, Fn, ExprLoc); 10322 } 10323 } 10324 FD = FD->getPreviousDecl(); 10325 } 10326 } 10327 10328 namespace { 10329 /// Cleanup action for doacross support. 10330 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10331 public: 10332 static const int DoacrossFinArgs = 2; 10333 10334 private: 10335 llvm::FunctionCallee RTLFn; 10336 llvm::Value *Args[DoacrossFinArgs]; 10337 10338 public: 10339 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10340 ArrayRef<llvm::Value *> CallArgs) 10341 : RTLFn(RTLFn) { 10342 assert(CallArgs.size() == DoacrossFinArgs); 10343 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10344 } 10345 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10346 if (!CGF.HaveInsertPoint()) 10347 return; 10348 CGF.EmitRuntimeCall(RTLFn, Args); 10349 } 10350 }; 10351 } // namespace 10352 10353 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10354 const OMPLoopDirective &D, 10355 ArrayRef<Expr *> NumIterations) { 10356 if (!CGF.HaveInsertPoint()) 10357 return; 10358 10359 ASTContext &C = CGM.getContext(); 10360 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10361 RecordDecl *RD; 10362 if (KmpDimTy.isNull()) { 10363 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10364 // kmp_int64 lo; // lower 10365 // kmp_int64 up; // upper 10366 // kmp_int64 st; // stride 10367 // }; 10368 RD = C.buildImplicitRecord("kmp_dim"); 10369 RD->startDefinition(); 10370 addFieldToRecordDecl(C, RD, Int64Ty); 10371 addFieldToRecordDecl(C, RD, Int64Ty); 10372 addFieldToRecordDecl(C, RD, Int64Ty); 10373 RD->completeDefinition(); 10374 KmpDimTy = C.getRecordType(RD); 10375 } else { 10376 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10377 } 10378 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10379 QualType ArrayTy = 10380 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10381 10382 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10383 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10384 enum { LowerFD = 0, UpperFD, StrideFD }; 10385 // Fill dims with data. 10386 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10387 LValue DimsLVal = CGF.MakeAddrLValue( 10388 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10389 // dims.upper = num_iterations; 10390 LValue UpperLVal = CGF.EmitLValueForField( 10391 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10392 llvm::Value *NumIterVal = 10393 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10394 D.getNumIterations()->getType(), Int64Ty, 10395 D.getNumIterations()->getExprLoc()); 10396 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10397 // dims.stride = 1; 10398 LValue StrideLVal = CGF.EmitLValueForField( 10399 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10400 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10401 StrideLVal); 10402 } 10403 10404 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10405 // kmp_int32 num_dims, struct kmp_dim * dims); 10406 llvm::Value *Args[] = { 10407 emitUpdateLocation(CGF, D.getBeginLoc()), 10408 getThreadID(CGF, D.getBeginLoc()), 10409 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10410 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10411 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10412 CGM.VoidPtrTy)}; 10413 10414 llvm::FunctionCallee RTLFn = 10415 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10416 CGF.EmitRuntimeCall(RTLFn, Args); 10417 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10418 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10419 llvm::FunctionCallee FiniRTLFn = 10420 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10421 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10422 llvm::makeArrayRef(FiniArgs)); 10423 } 10424 10425 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10426 const OMPDependClause *C) { 10427 QualType Int64Ty = 10428 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10429 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10430 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10431 Int64Ty, Size, ArrayType::Normal, 0); 10432 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10433 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10434 const Expr *CounterVal = C->getLoopData(I); 10435 assert(CounterVal); 10436 llvm::Value *CntVal = CGF.EmitScalarConversion( 10437 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10438 CounterVal->getExprLoc()); 10439 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10440 /*Volatile=*/false, Int64Ty); 10441 } 10442 llvm::Value *Args[] = { 10443 emitUpdateLocation(CGF, C->getBeginLoc()), 10444 getThreadID(CGF, C->getBeginLoc()), 10445 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10446 llvm::FunctionCallee RTLFn; 10447 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10448 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10449 } else { 10450 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10451 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10452 } 10453 CGF.EmitRuntimeCall(RTLFn, Args); 10454 } 10455 10456 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10457 llvm::FunctionCallee Callee, 10458 ArrayRef<llvm::Value *> Args) const { 10459 assert(Loc.isValid() && "Outlined function call location must be valid."); 10460 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10461 10462 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10463 if (Fn->doesNotThrow()) { 10464 CGF.EmitNounwindRuntimeCall(Fn, Args); 10465 return; 10466 } 10467 } 10468 CGF.EmitRuntimeCall(Callee, Args); 10469 } 10470 10471 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10472 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10473 ArrayRef<llvm::Value *> Args) const { 10474 emitCall(CGF, Loc, OutlinedFn, Args); 10475 } 10476 10477 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10478 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10479 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10480 HasEmittedDeclareTargetRegion = true; 10481 } 10482 10483 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10484 const VarDecl *NativeParam, 10485 const VarDecl *TargetParam) const { 10486 return CGF.GetAddrOfLocalVar(NativeParam); 10487 } 10488 10489 namespace { 10490 /// Cleanup action for allocate support. 10491 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10492 public: 10493 static const int CleanupArgs = 3; 10494 10495 private: 10496 llvm::FunctionCallee RTLFn; 10497 llvm::Value *Args[CleanupArgs]; 10498 10499 public: 10500 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10501 ArrayRef<llvm::Value *> CallArgs) 10502 : RTLFn(RTLFn) { 10503 assert(CallArgs.size() == CleanupArgs && 10504 "Size of arguments does not match."); 10505 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10506 } 10507 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10508 if (!CGF.HaveInsertPoint()) 10509 return; 10510 CGF.EmitRuntimeCall(RTLFn, Args); 10511 } 10512 }; 10513 } // namespace 10514 10515 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10516 const VarDecl *VD) { 10517 if (!VD) 10518 return Address::invalid(); 10519 const VarDecl *CVD = VD->getCanonicalDecl(); 10520 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10521 return Address::invalid(); 10522 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10523 // Use the default allocation. 10524 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10525 !AA->getAllocator()) 10526 return Address::invalid(); 10527 llvm::Value *Size; 10528 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10529 if (CVD->getType()->isVariablyModifiedType()) { 10530 Size = CGF.getTypeSize(CVD->getType()); 10531 // Align the size: ((size + align - 1) / align) * align 10532 Size = CGF.Builder.CreateNUWAdd( 10533 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10534 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10535 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10536 } else { 10537 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10538 Size = CGM.getSize(Sz.alignTo(Align)); 10539 } 10540 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10541 assert(AA->getAllocator() && 10542 "Expected allocator expression for non-default allocator."); 10543 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10544 // According to the standard, the original allocator type is a enum (integer). 10545 // Convert to pointer type, if required. 10546 if (Allocator->getType()->isIntegerTy()) 10547 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10548 else if (Allocator->getType()->isPointerTy()) 10549 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10550 CGM.VoidPtrTy); 10551 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10552 10553 llvm::Value *Addr = 10554 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 10555 CVD->getName() + ".void.addr"); 10556 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 10557 Allocator}; 10558 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 10559 10560 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10561 llvm::makeArrayRef(FiniArgs)); 10562 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10563 Addr, 10564 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 10565 CVD->getName() + ".addr"); 10566 return Address(Addr, Align); 10567 } 10568 10569 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 10570 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10571 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10572 llvm_unreachable("Not supported in SIMD-only mode"); 10573 } 10574 10575 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 10576 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10577 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10578 llvm_unreachable("Not supported in SIMD-only mode"); 10579 } 10580 10581 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 10582 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10583 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 10584 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 10585 bool Tied, unsigned &NumberOfParts) { 10586 llvm_unreachable("Not supported in SIMD-only mode"); 10587 } 10588 10589 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 10590 SourceLocation Loc, 10591 llvm::Function *OutlinedFn, 10592 ArrayRef<llvm::Value *> CapturedVars, 10593 const Expr *IfCond) { 10594 llvm_unreachable("Not supported in SIMD-only mode"); 10595 } 10596 10597 void CGOpenMPSIMDRuntime::emitCriticalRegion( 10598 CodeGenFunction &CGF, StringRef CriticalName, 10599 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 10600 const Expr *Hint) { 10601 llvm_unreachable("Not supported in SIMD-only mode"); 10602 } 10603 10604 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 10605 const RegionCodeGenTy &MasterOpGen, 10606 SourceLocation Loc) { 10607 llvm_unreachable("Not supported in SIMD-only mode"); 10608 } 10609 10610 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 10611 SourceLocation Loc) { 10612 llvm_unreachable("Not supported in SIMD-only mode"); 10613 } 10614 10615 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 10616 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 10617 SourceLocation Loc) { 10618 llvm_unreachable("Not supported in SIMD-only mode"); 10619 } 10620 10621 void CGOpenMPSIMDRuntime::emitSingleRegion( 10622 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 10623 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 10624 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 10625 ArrayRef<const Expr *> AssignmentOps) { 10626 llvm_unreachable("Not supported in SIMD-only mode"); 10627 } 10628 10629 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 10630 const RegionCodeGenTy &OrderedOpGen, 10631 SourceLocation Loc, 10632 bool IsThreads) { 10633 llvm_unreachable("Not supported in SIMD-only mode"); 10634 } 10635 10636 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 10637 SourceLocation Loc, 10638 OpenMPDirectiveKind Kind, 10639 bool EmitChecks, 10640 bool ForceSimpleCall) { 10641 llvm_unreachable("Not supported in SIMD-only mode"); 10642 } 10643 10644 void CGOpenMPSIMDRuntime::emitForDispatchInit( 10645 CodeGenFunction &CGF, SourceLocation Loc, 10646 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 10647 bool Ordered, const DispatchRTInput &DispatchValues) { 10648 llvm_unreachable("Not supported in SIMD-only mode"); 10649 } 10650 10651 void CGOpenMPSIMDRuntime::emitForStaticInit( 10652 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 10653 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 10654 llvm_unreachable("Not supported in SIMD-only mode"); 10655 } 10656 10657 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 10658 CodeGenFunction &CGF, SourceLocation Loc, 10659 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 10660 llvm_unreachable("Not supported in SIMD-only mode"); 10661 } 10662 10663 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 10664 SourceLocation Loc, 10665 unsigned IVSize, 10666 bool IVSigned) { 10667 llvm_unreachable("Not supported in SIMD-only mode"); 10668 } 10669 10670 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 10671 SourceLocation Loc, 10672 OpenMPDirectiveKind DKind) { 10673 llvm_unreachable("Not supported in SIMD-only mode"); 10674 } 10675 10676 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 10677 SourceLocation Loc, 10678 unsigned IVSize, bool IVSigned, 10679 Address IL, Address LB, 10680 Address UB, Address ST) { 10681 llvm_unreachable("Not supported in SIMD-only mode"); 10682 } 10683 10684 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 10685 llvm::Value *NumThreads, 10686 SourceLocation Loc) { 10687 llvm_unreachable("Not supported in SIMD-only mode"); 10688 } 10689 10690 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 10691 OpenMPProcBindClauseKind ProcBind, 10692 SourceLocation Loc) { 10693 llvm_unreachable("Not supported in SIMD-only mode"); 10694 } 10695 10696 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 10697 const VarDecl *VD, 10698 Address VDAddr, 10699 SourceLocation Loc) { 10700 llvm_unreachable("Not supported in SIMD-only mode"); 10701 } 10702 10703 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 10704 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 10705 CodeGenFunction *CGF) { 10706 llvm_unreachable("Not supported in SIMD-only mode"); 10707 } 10708 10709 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 10710 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 10711 llvm_unreachable("Not supported in SIMD-only mode"); 10712 } 10713 10714 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 10715 ArrayRef<const Expr *> Vars, 10716 SourceLocation Loc) { 10717 llvm_unreachable("Not supported in SIMD-only mode"); 10718 } 10719 10720 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 10721 const OMPExecutableDirective &D, 10722 llvm::Function *TaskFunction, 10723 QualType SharedsTy, Address Shareds, 10724 const Expr *IfCond, 10725 const OMPTaskDataTy &Data) { 10726 llvm_unreachable("Not supported in SIMD-only mode"); 10727 } 10728 10729 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 10730 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 10731 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 10732 const Expr *IfCond, const OMPTaskDataTy &Data) { 10733 llvm_unreachable("Not supported in SIMD-only mode"); 10734 } 10735 10736 void CGOpenMPSIMDRuntime::emitReduction( 10737 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 10738 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 10739 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 10740 assert(Options.SimpleReduction && "Only simple reduction is expected."); 10741 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 10742 ReductionOps, Options); 10743 } 10744 10745 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 10746 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 10747 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 10748 llvm_unreachable("Not supported in SIMD-only mode"); 10749 } 10750 10751 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 10752 SourceLocation Loc, 10753 ReductionCodeGen &RCG, 10754 unsigned N) { 10755 llvm_unreachable("Not supported in SIMD-only mode"); 10756 } 10757 10758 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 10759 SourceLocation Loc, 10760 llvm::Value *ReductionsPtr, 10761 LValue SharedLVal) { 10762 llvm_unreachable("Not supported in SIMD-only mode"); 10763 } 10764 10765 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 10766 SourceLocation Loc) { 10767 llvm_unreachable("Not supported in SIMD-only mode"); 10768 } 10769 10770 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 10771 CodeGenFunction &CGF, SourceLocation Loc, 10772 OpenMPDirectiveKind CancelRegion) { 10773 llvm_unreachable("Not supported in SIMD-only mode"); 10774 } 10775 10776 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 10777 SourceLocation Loc, const Expr *IfCond, 10778 OpenMPDirectiveKind CancelRegion) { 10779 llvm_unreachable("Not supported in SIMD-only mode"); 10780 } 10781 10782 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 10783 const OMPExecutableDirective &D, StringRef ParentName, 10784 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 10785 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 10786 llvm_unreachable("Not supported in SIMD-only mode"); 10787 } 10788 10789 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 10790 const OMPExecutableDirective &D, 10791 llvm::Function *OutlinedFn, 10792 llvm::Value *OutlinedFnID, 10793 const Expr *IfCond, 10794 const Expr *Device) { 10795 llvm_unreachable("Not supported in SIMD-only mode"); 10796 } 10797 10798 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 10799 llvm_unreachable("Not supported in SIMD-only mode"); 10800 } 10801 10802 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10803 llvm_unreachable("Not supported in SIMD-only mode"); 10804 } 10805 10806 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 10807 return false; 10808 } 10809 10810 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 10811 return nullptr; 10812 } 10813 10814 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 10815 const OMPExecutableDirective &D, 10816 SourceLocation Loc, 10817 llvm::Function *OutlinedFn, 10818 ArrayRef<llvm::Value *> CapturedVars) { 10819 llvm_unreachable("Not supported in SIMD-only mode"); 10820 } 10821 10822 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10823 const Expr *NumTeams, 10824 const Expr *ThreadLimit, 10825 SourceLocation Loc) { 10826 llvm_unreachable("Not supported in SIMD-only mode"); 10827 } 10828 10829 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 10830 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10831 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10832 llvm_unreachable("Not supported in SIMD-only mode"); 10833 } 10834 10835 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 10836 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10837 const Expr *Device) { 10838 llvm_unreachable("Not supported in SIMD-only mode"); 10839 } 10840 10841 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10842 const OMPLoopDirective &D, 10843 ArrayRef<Expr *> NumIterations) { 10844 llvm_unreachable("Not supported in SIMD-only mode"); 10845 } 10846 10847 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10848 const OMPDependClause *C) { 10849 llvm_unreachable("Not supported in SIMD-only mode"); 10850 } 10851 10852 const VarDecl * 10853 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 10854 const VarDecl *NativeParam) const { 10855 llvm_unreachable("Not supported in SIMD-only mode"); 10856 } 10857 10858 Address 10859 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 10860 const VarDecl *NativeParam, 10861 const VarDecl *TargetParam) const { 10862 llvm_unreachable("Not supported in SIMD-only mode"); 10863 } 10864