xref: /freebsd-src/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/StringExtras.h"
33 #include "llvm/Bitcode/BitcodeReader.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/GlobalValue.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/Format.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <cassert>
42 #include <numeric>
43 
44 using namespace clang;
45 using namespace CodeGen;
46 using namespace llvm::omp;
47 
48 namespace {
49 /// Base class for handling code generation inside OpenMP regions.
50 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51 public:
52   /// Kinds of OpenMP regions used in codegen.
53   enum CGOpenMPRegionKind {
54     /// Region with outlined function for standalone 'parallel'
55     /// directive.
56     ParallelOutlinedRegion,
57     /// Region with outlined function for standalone 'task' directive.
58     TaskOutlinedRegion,
59     /// Region for constructs that do not require function outlining,
60     /// like 'for', 'sections', 'atomic' etc. directives.
61     InlinedRegion,
62     /// Region with outlined function for standalone 'target' directive.
63     TargetRegion,
64   };
65 
66   CGOpenMPRegionInfo(const CapturedStmt &CS,
67                      const CGOpenMPRegionKind RegionKind,
68                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69                      bool HasCancel)
70       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72 
73   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75                      bool HasCancel)
76       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77         Kind(Kind), HasCancel(HasCancel) {}
78 
79   /// Get a variable or parameter for storing global thread id
80   /// inside OpenMP construct.
81   virtual const VarDecl *getThreadIDVariable() const = 0;
82 
83   /// Emit the captured statement body.
84   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85 
86   /// Get an LValue for the current ThreadID variable.
87   /// \return LValue for thread id variable. This LValue always has type int32*.
88   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89 
90   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91 
92   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93 
94   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95 
96   bool hasCancel() const { return HasCancel; }
97 
98   static bool classof(const CGCapturedStmtInfo *Info) {
99     return Info->getKind() == CR_OpenMP;
100   }
101 
102   ~CGOpenMPRegionInfo() override = default;
103 
104 protected:
105   CGOpenMPRegionKind RegionKind;
106   RegionCodeGenTy CodeGen;
107   OpenMPDirectiveKind Kind;
108   bool HasCancel;
109 };
110 
111 /// API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115                              const RegionCodeGenTy &CodeGen,
116                              OpenMPDirectiveKind Kind, bool HasCancel,
117                              StringRef HelperName)
118       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119                            HasCancel),
120         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122   }
123 
124   /// Get a variable or parameter for storing global thread id
125   /// inside OpenMP construct.
126   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127 
128   /// Get the name of the capture helper.
129   StringRef getHelperName() const override { return HelperName; }
130 
131   static bool classof(const CGCapturedStmtInfo *Info) {
132     return CGOpenMPRegionInfo::classof(Info) &&
133            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134                ParallelOutlinedRegion;
135   }
136 
137 private:
138   /// A variable or parameter storing global thread id for OpenMP
139   /// constructs.
140   const VarDecl *ThreadIDVar;
141   StringRef HelperName;
142 };
143 
144 /// API for captured statement code generation in OpenMP constructs.
145 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146 public:
147   class UntiedTaskActionTy final : public PrePostActionTy {
148     bool Untied;
149     const VarDecl *PartIDVar;
150     const RegionCodeGenTy UntiedCodeGen;
151     llvm::SwitchInst *UntiedSwitch = nullptr;
152 
153   public:
154     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155                        const RegionCodeGenTy &UntiedCodeGen)
156         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157     void Enter(CodeGenFunction &CGF) override {
158       if (Untied) {
159         // Emit task switching point.
160         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161             CGF.GetAddrOfLocalVar(PartIDVar),
162             PartIDVar->getType()->castAs<PointerType>());
163         llvm::Value *Res =
164             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167         CGF.EmitBlock(DoneBB);
168         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171                               CGF.Builder.GetInsertBlock());
172         emitUntiedSwitch(CGF);
173       }
174     }
175     void emitUntiedSwitch(CodeGenFunction &CGF) const {
176       if (Untied) {
177         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178             CGF.GetAddrOfLocalVar(PartIDVar),
179             PartIDVar->getType()->castAs<PointerType>());
180         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181                               PartIdLVal);
182         UntiedCodeGen(CGF);
183         CodeGenFunction::JumpDest CurPoint =
184             CGF.getJumpDestInCurrentScope(".untied.next.");
185         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188                               CGF.Builder.GetInsertBlock());
189         CGF.EmitBranchThroughCleanup(CurPoint);
190         CGF.EmitBlock(CurPoint.getBlock());
191       }
192     }
193     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194   };
195   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196                                  const VarDecl *ThreadIDVar,
197                                  const RegionCodeGenTy &CodeGen,
198                                  OpenMPDirectiveKind Kind, bool HasCancel,
199                                  const UntiedTaskActionTy &Action)
200       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201         ThreadIDVar(ThreadIDVar), Action(Action) {
202     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
203   }
204 
205   /// Get a variable or parameter for storing global thread id
206   /// inside OpenMP construct.
207   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208 
209   /// Get an LValue for the current ThreadID variable.
210   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211 
212   /// Get the name of the capture helper.
213   StringRef getHelperName() const override { return ".omp_outlined."; }
214 
215   void emitUntiedSwitch(CodeGenFunction &CGF) override {
216     Action.emitUntiedSwitch(CGF);
217   }
218 
219   static bool classof(const CGCapturedStmtInfo *Info) {
220     return CGOpenMPRegionInfo::classof(Info) &&
221            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222                TaskOutlinedRegion;
223   }
224 
225 private:
226   /// A variable or parameter storing global thread id for OpenMP
227   /// constructs.
228   const VarDecl *ThreadIDVar;
229   /// Action for emitting code for untied tasks.
230   const UntiedTaskActionTy &Action;
231 };
232 
233 /// API for inlined captured statement code generation in OpenMP
234 /// constructs.
235 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236 public:
237   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238                             const RegionCodeGenTy &CodeGen,
239                             OpenMPDirectiveKind Kind, bool HasCancel)
240       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241         OldCSI(OldCSI),
242         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243 
244   // Retrieve the value of the context parameter.
245   llvm::Value *getContextValue() const override {
246     if (OuterRegionInfo)
247       return OuterRegionInfo->getContextValue();
248     llvm_unreachable("No context value for inlined OpenMP region");
249   }
250 
251   void setContextValue(llvm::Value *V) override {
252     if (OuterRegionInfo) {
253       OuterRegionInfo->setContextValue(V);
254       return;
255     }
256     llvm_unreachable("No context value for inlined OpenMP region");
257   }
258 
259   /// Lookup the captured field decl for a variable.
260   const FieldDecl *lookup(const VarDecl *VD) const override {
261     if (OuterRegionInfo)
262       return OuterRegionInfo->lookup(VD);
263     // If there is no outer outlined region,no need to lookup in a list of
264     // captured variables, we can use the original one.
265     return nullptr;
266   }
267 
268   FieldDecl *getThisFieldDecl() const override {
269     if (OuterRegionInfo)
270       return OuterRegionInfo->getThisFieldDecl();
271     return nullptr;
272   }
273 
274   /// Get a variable or parameter for storing global thread id
275   /// inside OpenMP construct.
276   const VarDecl *getThreadIDVariable() const override {
277     if (OuterRegionInfo)
278       return OuterRegionInfo->getThreadIDVariable();
279     return nullptr;
280   }
281 
282   /// Get an LValue for the current ThreadID variable.
283   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284     if (OuterRegionInfo)
285       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286     llvm_unreachable("No LValue for inlined OpenMP construct");
287   }
288 
289   /// Get the name of the capture helper.
290   StringRef getHelperName() const override {
291     if (auto *OuterRegionInfo = getOldCSI())
292       return OuterRegionInfo->getHelperName();
293     llvm_unreachable("No helper name for inlined OpenMP construct");
294   }
295 
296   void emitUntiedSwitch(CodeGenFunction &CGF) override {
297     if (OuterRegionInfo)
298       OuterRegionInfo->emitUntiedSwitch(CGF);
299   }
300 
301   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302 
303   static bool classof(const CGCapturedStmtInfo *Info) {
304     return CGOpenMPRegionInfo::classof(Info) &&
305            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306   }
307 
308   ~CGOpenMPInlinedRegionInfo() override = default;
309 
310 private:
311   /// CodeGen info about outer OpenMP region.
312   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313   CGOpenMPRegionInfo *OuterRegionInfo;
314 };
315 
316 /// API for captured statement code generation in OpenMP target
317 /// constructs. For this captures, implicit parameters are used instead of the
318 /// captured fields. The name of the target region has to be unique in a given
319 /// application so it is provided by the client, because only the client has
320 /// the information to generate that.
321 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322 public:
323   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
325       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326                            /*HasCancel=*/false),
327         HelperName(HelperName) {}
328 
329   /// This is unused for target regions because each starts executing
330   /// with a single thread.
331   const VarDecl *getThreadIDVariable() const override { return nullptr; }
332 
333   /// Get the name of the capture helper.
334   StringRef getHelperName() const override { return HelperName; }
335 
336   static bool classof(const CGCapturedStmtInfo *Info) {
337     return CGOpenMPRegionInfo::classof(Info) &&
338            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339   }
340 
341 private:
342   StringRef HelperName;
343 };
344 
345 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346   llvm_unreachable("No codegen for expressions");
347 }
348 /// API for generation of expressions captured in a innermost OpenMP
349 /// region.
350 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351 public:
352   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354                                   OMPD_unknown,
355                                   /*HasCancel=*/false),
356         PrivScope(CGF) {
357     // Make sure the globals captured in the provided statement are local by
358     // using the privatization logic. We assume the same variable is not
359     // captured more than once.
360     for (const auto &C : CS.captures()) {
361       if (!C.capturesVariable() && !C.capturesVariableByCopy())
362         continue;
363 
364       const VarDecl *VD = C.getCapturedVar();
365       if (VD->isLocalVarDeclOrParm())
366         continue;
367 
368       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369                       /*RefersToEnclosingVariableOrCapture=*/false,
370                       VD->getType().getNonReferenceType(), VK_LValue,
371                       C.getLocation());
372       PrivScope.addPrivate(
373           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 namespace {
481 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
482 /// Values for bit flags for marking which requires clauses have been used.
483 enum OpenMPOffloadingRequiresDirFlags : int64_t {
484   /// flag undefined.
485   OMP_REQ_UNDEFINED               = 0x000,
486   /// no requires clause present.
487   OMP_REQ_NONE                    = 0x001,
488   /// reverse_offload clause.
489   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
490   /// unified_address clause.
491   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
492   /// unified_shared_memory clause.
493   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
494   /// dynamic_allocators clause.
495   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
496   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
497 };
498 
499 enum OpenMPOffloadingReservedDeviceIDs {
500   /// Device ID if the device was not defined, runtime should get it
501   /// from environment variables in the spec.
502   OMP_DEVICEID_UNDEF = -1,
503 };
504 } // anonymous namespace
505 
506 /// Describes ident structure that describes a source location.
507 /// All descriptions are taken from
508 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509 /// Original structure:
510 /// typedef struct ident {
511 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
512 ///                                  see above  */
513 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
514 ///                                  KMP_IDENT_KMPC identifies this union
515 ///                                  member  */
516 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
517 ///                                  see above */
518 ///#if USE_ITT_BUILD
519 ///                            /*  but currently used for storing
520 ///                                region-specific ITT */
521 ///                            /*  contextual information. */
522 ///#endif /* USE_ITT_BUILD */
523 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
524 ///                                 C++  */
525 ///    char const *psource;    /**< String describing the source location.
526 ///                            The string is composed of semi-colon separated
527 //                             fields which describe the source file,
528 ///                            the function and a pair of line numbers that
529 ///                            delimit the construct.
530 ///                             */
531 /// } ident_t;
532 enum IdentFieldIndex {
533   /// might be used in Fortran
534   IdentField_Reserved_1,
535   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536   IdentField_Flags,
537   /// Not really used in Fortran any more
538   IdentField_Reserved_2,
539   /// Source[4] in Fortran, do not use for C++
540   IdentField_Reserved_3,
541   /// String describing the source location. The string is composed of
542   /// semi-colon separated fields which describe the source file, the function
543   /// and a pair of line numbers that delimit the construct.
544   IdentField_PSource
545 };
546 
547 /// Schedule types for 'omp for' loops (these enumerators are taken from
548 /// the enum sched_type in kmp.h).
549 enum OpenMPSchedType {
550   /// Lower bound for default (unordered) versions.
551   OMP_sch_lower = 32,
552   OMP_sch_static_chunked = 33,
553   OMP_sch_static = 34,
554   OMP_sch_dynamic_chunked = 35,
555   OMP_sch_guided_chunked = 36,
556   OMP_sch_runtime = 37,
557   OMP_sch_auto = 38,
558   /// static with chunk adjustment (e.g., simd)
559   OMP_sch_static_balanced_chunked = 45,
560   /// Lower bound for 'ordered' versions.
561   OMP_ord_lower = 64,
562   OMP_ord_static_chunked = 65,
563   OMP_ord_static = 66,
564   OMP_ord_dynamic_chunked = 67,
565   OMP_ord_guided_chunked = 68,
566   OMP_ord_runtime = 69,
567   OMP_ord_auto = 70,
568   OMP_sch_default = OMP_sch_static,
569   /// dist_schedule types
570   OMP_dist_sch_static_chunked = 91,
571   OMP_dist_sch_static = 92,
572   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573   /// Set if the monotonic schedule modifier was present.
574   OMP_sch_modifier_monotonic = (1 << 29),
575   /// Set if the nonmonotonic schedule modifier was present.
576   OMP_sch_modifier_nonmonotonic = (1 << 30),
577 };
578 
579 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580 /// region.
581 class CleanupTy final : public EHScopeStack::Cleanup {
582   PrePostActionTy *Action;
583 
584 public:
585   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587     if (!CGF.HaveInsertPoint())
588       return;
589     Action->Exit(CGF);
590   }
591 };
592 
593 } // anonymous namespace
594 
595 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596   CodeGenFunction::RunCleanupsScope Scope(CGF);
597   if (PrePostAction) {
598     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599     Callback(CodeGen, CGF, *PrePostAction);
600   } else {
601     PrePostActionTy Action;
602     Callback(CodeGen, CGF, Action);
603   }
604 }
605 
606 /// Check if the combiner is a call to UDR combiner and if it is so return the
607 /// UDR decl used for reduction.
608 static const OMPDeclareReductionDecl *
609 getReductionInit(const Expr *ReductionOp) {
610   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612       if (const auto *DRE =
613               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615           return DRD;
616   return nullptr;
617 }
618 
619 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620                                              const OMPDeclareReductionDecl *DRD,
621                                              const Expr *InitOp,
622                                              Address Private, Address Original,
623                                              QualType Ty) {
624   if (DRD->getInitializer()) {
625     std::pair<llvm::Function *, llvm::Function *> Reduction =
626         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627     const auto *CE = cast<CallExpr>(InitOp);
628     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631     const auto *LHSDRE =
632         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633     const auto *RHSDRE =
634         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637                             [=]() { return Private; });
638     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639                             [=]() { return Original; });
640     (void)PrivateScope.Privatize();
641     RValue Func = RValue::get(Reduction.second);
642     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643     CGF.EmitIgnoredExpr(InitOp);
644   } else {
645     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647     auto *GV = new llvm::GlobalVariable(
648         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649         llvm::GlobalValue::PrivateLinkage, Init, Name);
650     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651     RValue InitRVal;
652     switch (CGF.getEvaluationKind(Ty)) {
653     case TEK_Scalar:
654       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655       break;
656     case TEK_Complex:
657       InitRVal =
658           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659       break;
660     case TEK_Aggregate: {
661       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664                            /*IsInitializer=*/false);
665       return;
666     }
667     }
668     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671                          /*IsInitializer=*/false);
672   }
673 }
674 
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681                                  QualType Type, bool EmitDeclareReductionInit,
682                                  const Expr *Init,
683                                  const OMPDeclareReductionDecl *DRD,
684                                  Address SrcAddr = Address::invalid()) {
685   // Perform element-by-element initialization.
686   QualType ElementTy;
687 
688   // Drill down to the base element type on both arrays.
689   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691   if (DRD)
692     SrcAddr =
693         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694 
695   llvm::Value *SrcBegin = nullptr;
696   if (DRD)
697     SrcBegin = SrcAddr.getPointer();
698   llvm::Value *DestBegin = DestAddr.getPointer();
699   // Cast from pointer to array type to pointer to single element.
700   llvm::Value *DestEnd =
701       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747         "omp.arraycpy.dest.element");
748     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749   }
750 
751   // Shift the address forward by one element.
752   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754       "omp.arraycpy.dest.element");
755   // Check whether we've reached the end.
756   llvm::Value *Done =
757       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760 
761   // Done.
762   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763 }
764 
765 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766   return CGF.EmitOMPSharedLValue(E);
767 }
768 
769 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770                                             const Expr *E) {
771   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773   return LValue();
774 }
775 
776 void ReductionCodeGen::emitAggregateInitialization(
777     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778     const OMPDeclareReductionDecl *DRD) {
779   // Emit VarDecl with copy init for arrays.
780   // Get the address of the original variable captured in current
781   // captured region.
782   const auto *PrivateVD =
783       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784   bool EmitDeclareReductionInit =
785       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787                        EmitDeclareReductionInit,
788                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789                                                 : PrivateVD->getInit(),
790                        DRD, SharedAddr);
791 }
792 
793 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794                                    ArrayRef<const Expr *> Origs,
795                                    ArrayRef<const Expr *> Privates,
796                                    ArrayRef<const Expr *> ReductionOps) {
797   ClausesData.reserve(Shareds.size());
798   SharedAddresses.reserve(Shareds.size());
799   Sizes.reserve(Shareds.size());
800   BaseDecls.reserve(Shareds.size());
801   const auto *IOrig = Origs.begin();
802   const auto *IPriv = Privates.begin();
803   const auto *IRed = ReductionOps.begin();
804   for (const Expr *Ref : Shareds) {
805     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806     std::advance(IOrig, 1);
807     std::advance(IPriv, 1);
808     std::advance(IRed, 1);
809   }
810 }
811 
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
814          "Number of generated lvalues must be exactly N.");
815   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817   SharedAddresses.emplace_back(First, Second);
818   if (ClausesData[N].Shared == ClausesData[N].Ref) {
819     OrigAddresses.emplace_back(First, Second);
820   } else {
821     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823     OrigAddresses.emplace_back(First, Second);
824   }
825 }
826 
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828   const auto *PrivateVD =
829       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830   QualType PrivateType = PrivateVD->getType();
831   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832   if (!PrivateType->isVariablyModifiedType()) {
833     Sizes.emplace_back(
834         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835         nullptr);
836     return;
837   }
838   llvm::Value *Size;
839   llvm::Value *SizeInChars;
840   auto *ElemType =
841       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
842           ->getElementType();
843   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
844   if (AsArraySection) {
845     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
846                                      OrigAddresses[N].first.getPointer(CGF));
847     Size = CGF.Builder.CreateNUWAdd(
848         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
849     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
850   } else {
851     SizeInChars =
852         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
853     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
854   }
855   Sizes.emplace_back(SizeInChars, Size);
856   CodeGenFunction::OpaqueValueMapping OpaqueMap(
857       CGF,
858       cast<OpaqueValueExpr>(
859           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
860       RValue::get(Size));
861   CGF.EmitVariablyModifiedType(PrivateType);
862 }
863 
864 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
865                                          llvm::Value *Size) {
866   const auto *PrivateVD =
867       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
868   QualType PrivateType = PrivateVD->getType();
869   if (!PrivateType->isVariablyModifiedType()) {
870     assert(!Size && !Sizes[N].second &&
871            "Size should be nullptr for non-variably modified reduction "
872            "items.");
873     return;
874   }
875   CodeGenFunction::OpaqueValueMapping OpaqueMap(
876       CGF,
877       cast<OpaqueValueExpr>(
878           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
879       RValue::get(Size));
880   CGF.EmitVariablyModifiedType(PrivateType);
881 }
882 
883 void ReductionCodeGen::emitInitialization(
884     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
885     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
886   assert(SharedAddresses.size() > N && "No variable was generated");
887   const auto *PrivateVD =
888       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
889   const OMPDeclareReductionDecl *DRD =
890       getReductionInit(ClausesData[N].ReductionOp);
891   QualType PrivateType = PrivateVD->getType();
892   PrivateAddr = CGF.Builder.CreateElementBitCast(
893       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
894   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
895     if (DRD && DRD->getInitializer())
896       (void)DefaultInit(CGF);
897     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
898   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
899     (void)DefaultInit(CGF);
900     QualType SharedType = SharedAddresses[N].first.getType();
901     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
902                                      PrivateAddr, SharedAddr, SharedType);
903   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
904              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
905     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
906                          PrivateVD->getType().getQualifiers(),
907                          /*IsInitializer=*/false);
908   }
909 }
910 
911 bool ReductionCodeGen::needCleanups(unsigned N) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   return DTorKind != QualType::DK_none;
917 }
918 
919 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
920                                     Address PrivateAddr) {
921   const auto *PrivateVD =
922       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
923   QualType PrivateType = PrivateVD->getType();
924   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
925   if (needCleanups(N)) {
926     PrivateAddr = CGF.Builder.CreateElementBitCast(
927         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
928     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
929   }
930 }
931 
932 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
933                           LValue BaseLV) {
934   BaseTy = BaseTy.getNonReferenceType();
935   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
936          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
937     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
938       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
939     } else {
940       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
941       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
942     }
943     BaseTy = BaseTy->getPointeeType();
944   }
945   return CGF.MakeAddrLValue(
946       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
947                                        CGF.ConvertTypeForMem(ElTy)),
948       BaseLV.getType(), BaseLV.getBaseInfo(),
949       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
950 }
951 
952 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
953                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
954                           llvm::Value *Addr) {
955   Address Tmp = Address::invalid();
956   Address TopTmp = Address::invalid();
957   Address MostTopTmp = Address::invalid();
958   BaseTy = BaseTy.getNonReferenceType();
959   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
960          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
961     Tmp = CGF.CreateMemTemp(BaseTy);
962     if (TopTmp.isValid())
963       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
964     else
965       MostTopTmp = Tmp;
966     TopTmp = Tmp;
967     BaseTy = BaseTy->getPointeeType();
968   }
969   llvm::Type *Ty = BaseLVType;
970   if (Tmp.isValid())
971     Ty = Tmp.getElementType();
972   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
973   if (Tmp.isValid()) {
974     CGF.Builder.CreateStore(Addr, Tmp);
975     return MostTopTmp;
976   }
977   return Address(Addr, BaseLVAlignment);
978 }
979 
980 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
981   const VarDecl *OrigVD = nullptr;
982   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
983     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
984     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
985       Base = TempOASE->getBase()->IgnoreParenImpCasts();
986     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987       Base = TempASE->getBase()->IgnoreParenImpCasts();
988     DE = cast<DeclRefExpr>(Base);
989     OrigVD = cast<VarDecl>(DE->getDecl());
990   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
991     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
992     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
993       Base = TempASE->getBase()->IgnoreParenImpCasts();
994     DE = cast<DeclRefExpr>(Base);
995     OrigVD = cast<VarDecl>(DE->getDecl());
996   }
997   return OrigVD;
998 }
999 
1000 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1001                                                Address PrivateAddr) {
1002   const DeclRefExpr *DE;
1003   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1004     BaseDecls.emplace_back(OrigVD);
1005     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1006     LValue BaseLValue =
1007         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1008                     OriginalBaseLValue);
1009     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1010     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1011         BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1012     llvm::Value *PrivatePointer =
1013         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014             PrivateAddr.getPointer(), SharedAddr.getType());
1015     llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016         SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017     return castToBase(CGF, OrigVD->getType(),
1018                       SharedAddresses[N].first.getType(),
1019                       OriginalBaseLValue.getAddress(CGF).getType(),
1020                       OriginalBaseLValue.getAlignment(), Ptr);
1021   }
1022   BaseDecls.emplace_back(
1023       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024   return PrivateAddr;
1025 }
1026 
1027 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028   const OMPDeclareReductionDecl *DRD =
1029       getReductionInit(ClausesData[N].ReductionOp);
1030   return DRD && DRD->getInitializer();
1031 }
1032 
1033 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034   return CGF.EmitLoadOfPointerLValue(
1035       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036       getThreadIDVariable()->getType()->castAs<PointerType>());
1037 }
1038 
1039 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040   if (!CGF.HaveInsertPoint())
1041     return;
1042   // 1.2.2 OpenMP Language Terminology
1043   // Structured block - An executable statement with a single entry at the
1044   // top and a single exit at the bottom.
1045   // The point of exit cannot be a branch out of the structured block.
1046   // longjmp() and throw() must not violate the entry/exit criteria.
1047   CGF.EHStack.pushTerminate();
1048   if (S)
1049     CGF.incrementProfileCounter(S);
1050   CodeGen(CGF);
1051   CGF.EHStack.popTerminate();
1052 }
1053 
1054 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055     CodeGenFunction &CGF) {
1056   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057                             getThreadIDVariable()->getType(),
1058                             AlignmentSource::Decl);
1059 }
1060 
1061 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062                                        QualType FieldTy) {
1063   auto *Field = FieldDecl::Create(
1064       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067   Field->setAccess(AS_public);
1068   DC->addDecl(Field);
1069   return Field;
1070 }
1071 
1072 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073                                  StringRef Separator)
1074     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077 
1078   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079   OMPBuilder.initialize();
1080   loadOffloadInfoMetadata();
1081 }
1082 
1083 void CGOpenMPRuntime::clear() {
1084   InternalVars.clear();
1085   // Clean non-target variable declarations possibly used only in debug info.
1086   for (const auto &Data : EmittedNonTargetVariables) {
1087     if (!Data.getValue().pointsToAliveValue())
1088       continue;
1089     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090     if (!GV)
1091       continue;
1092     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093       continue;
1094     GV->eraseFromParent();
1095   }
1096 }
1097 
1098 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099   SmallString<128> Buffer;
1100   llvm::raw_svector_ostream OS(Buffer);
1101   StringRef Sep = FirstSeparator;
1102   for (StringRef Part : Parts) {
1103     OS << Sep << Part;
1104     Sep = Separator;
1105   }
1106   return std::string(OS.str());
1107 }
1108 
1109 static llvm::Function *
1110 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111                           const Expr *CombinerInitializer, const VarDecl *In,
1112                           const VarDecl *Out, bool IsCombiner) {
1113   // void .omp_combiner.(Ty *in, Ty *out);
1114   ASTContext &C = CGM.getContext();
1115   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116   FunctionArgList Args;
1117   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121   Args.push_back(&OmpOutParm);
1122   Args.push_back(&OmpInParm);
1123   const CGFunctionInfo &FnInfo =
1124       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126   std::string Name = CGM.getOpenMPRuntime().getName(
1127       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129                                     Name, &CGM.getModule());
1130   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131   if (CGM.getLangOpts().Optimize) {
1132     Fn->removeFnAttr(llvm::Attribute::NoInline);
1133     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135   }
1136   CodeGenFunction CGF(CGM);
1137   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140                     Out->getLocation());
1141   CodeGenFunction::OMPPrivateScope Scope(CGF);
1142   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145         .getAddress(CGF);
1146   });
1147   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   (void)Scope.Privatize();
1153   if (!IsCombiner && Out->hasInit() &&
1154       !CGF.isTrivialInitializer(Out->getInit())) {
1155     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156                          Out->getType().getQualifiers(),
1157                          /*IsInitializer=*/true);
1158   }
1159   if (CombinerInitializer)
1160     CGF.EmitIgnoredExpr(CombinerInitializer);
1161   Scope.ForceCleanup();
1162   CGF.FinishFunction();
1163   return Fn;
1164 }
1165 
1166 void CGOpenMPRuntime::emitUserDefinedReduction(
1167     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168   if (UDRMap.count(D) > 0)
1169     return;
1170   llvm::Function *Combiner = emitCombinerOrInitializer(
1171       CGM, D->getType(), D->getCombiner(),
1172       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174       /*IsCombiner=*/true);
1175   llvm::Function *Initializer = nullptr;
1176   if (const Expr *Init = D->getInitializer()) {
1177     Initializer = emitCombinerOrInitializer(
1178         CGM, D->getType(),
1179         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180                                                                      : nullptr,
1181         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183         /*IsCombiner=*/false);
1184   }
1185   UDRMap.try_emplace(D, Combiner, Initializer);
1186   if (CGF) {
1187     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188     Decls.second.push_back(D);
1189   }
1190 }
1191 
1192 std::pair<llvm::Function *, llvm::Function *>
1193 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194   auto I = UDRMap.find(D);
1195   if (I != UDRMap.end())
1196     return I->second;
1197   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198   return UDRMap.lookup(D);
1199 }
1200 
1201 namespace {
1202 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203 // Builder if one is present.
1204 struct PushAndPopStackRAII {
1205   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206                       bool HasCancel, llvm::omp::Directive Kind)
1207       : OMPBuilder(OMPBuilder) {
1208     if (!OMPBuilder)
1209       return;
1210 
1211     // The following callback is the crucial part of clangs cleanup process.
1212     //
1213     // NOTE:
1214     // Once the OpenMPIRBuilder is used to create parallel regions (and
1215     // similar), the cancellation destination (Dest below) is determined via
1216     // IP. That means if we have variables to finalize we split the block at IP,
1217     // use the new block (=BB) as destination to build a JumpDest (via
1218     // getJumpDestInCurrentScope(BB)) which then is fed to
1219     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220     // to push & pop an FinalizationInfo object.
1221     // The FiniCB will still be needed but at the point where the
1222     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224       assert(IP.getBlock()->end() == IP.getPoint() &&
1225              "Clang CG should cause non-terminated block!");
1226       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227       CGF.Builder.restoreIP(IP);
1228       CodeGenFunction::JumpDest Dest =
1229           CGF.getOMPCancelDestination(OMPD_parallel);
1230       CGF.EmitBranchThroughCleanup(Dest);
1231     };
1232 
1233     // TODO: Remove this once we emit parallel regions through the
1234     //       OpenMPIRBuilder as it can do this setup internally.
1235     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236     OMPBuilder->pushFinalizationCB(std::move(FI));
1237   }
1238   ~PushAndPopStackRAII() {
1239     if (OMPBuilder)
1240       OMPBuilder->popFinalizationCB();
1241   }
1242   llvm::OpenMPIRBuilder *OMPBuilder;
1243 };
1244 } // namespace
1245 
1246 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250   assert(ThreadIDVar->getType()->isPointerType() &&
1251          "thread id variable must be of type kmp_int32 *");
1252   CodeGenFunction CGF(CGM, true);
1253   bool HasCancel = false;
1254   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255     HasCancel = OPD->hasCancel();
1256   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257     HasCancel = OPD->hasCancel();
1258   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259     HasCancel = OPSD->hasCancel();
1260   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261     HasCancel = OPFD->hasCancel();
1262   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263     HasCancel = OPFD->hasCancel();
1264   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265     HasCancel = OPFD->hasCancel();
1266   else if (const auto *OPFD =
1267                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272 
1273   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274   //       parallel region to make cancellation barriers work properly.
1275   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278                                     HasCancel, OutlinedHelperName);
1279   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281 }
1282 
1283 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287   return emitParallelOrTeamsOutlinedFunction(
1288       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289 }
1290 
1291 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295   return emitParallelOrTeamsOutlinedFunction(
1296       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297 }
1298 
1299 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303     bool Tied, unsigned &NumberOfParts) {
1304   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305                                               PrePostActionTy &) {
1306     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308     llvm::Value *TaskArgs[] = {
1309         UpLoc, ThreadID,
1310         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311                                     TaskTVar->getType()->castAs<PointerType>())
1312             .getPointer(CGF)};
1313     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1315                         TaskArgs);
1316   };
1317   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318                                                             UntiedCodeGen);
1319   CodeGen.setAction(Action);
1320   assert(!ThreadIDVar->getType()->isPointerType() &&
1321          "thread id variable must be of type kmp_int32 for tasks");
1322   const OpenMPDirectiveKind Region =
1323       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324                                                       : OMPD_task;
1325   const CapturedStmt *CS = D.getCapturedStmt(Region);
1326   bool HasCancel = false;
1327   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328     HasCancel = TD->hasCancel();
1329   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330     HasCancel = TD->hasCancel();
1331   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332     HasCancel = TD->hasCancel();
1333   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335 
1336   CodeGenFunction CGF(CGM, true);
1337   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338                                         InnermostKind, HasCancel, Action);
1339   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341   if (!Tied)
1342     NumberOfParts = Action.getNumberOfParts();
1343   return Res;
1344 }
1345 
1346 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347                              const RecordDecl *RD, const CGRecordLayout &RL,
1348                              ArrayRef<llvm::Constant *> Data) {
1349   llvm::StructType *StructTy = RL.getLLVMType();
1350   unsigned PrevIdx = 0;
1351   ConstantInitBuilder CIBuilder(CGM);
1352   auto DI = Data.begin();
1353   for (const FieldDecl *FD : RD->fields()) {
1354     unsigned Idx = RL.getLLVMFieldNo(FD);
1355     // Fill the alignment.
1356     for (unsigned I = PrevIdx; I < Idx; ++I)
1357       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358     PrevIdx = Idx + 1;
1359     Fields.add(*DI);
1360     ++DI;
1361   }
1362 }
1363 
1364 template <class... As>
1365 static llvm::GlobalVariable *
1366 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368                    As &&... Args) {
1369   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371   ConstantInitBuilder CIBuilder(CGM);
1372   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373   buildStructValue(Fields, CGM, RD, RL, Data);
1374   return Fields.finishAndCreateGlobal(
1375       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376       std::forward<As>(Args)...);
1377 }
1378 
1379 template <typename T>
1380 static void
1381 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382                                          ArrayRef<llvm::Constant *> Data,
1383                                          T &Parent) {
1384   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387   buildStructValue(Fields, CGM, RD, RL, Data);
1388   Fields.finishAndAddTo(Parent);
1389 }
1390 
1391 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392                                              bool AtCurrentPoint) {
1393   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1395 
1396   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397   if (AtCurrentPoint) {
1398     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400   } else {
1401     Elem.second.ServiceInsertPt =
1402         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404   }
1405 }
1406 
1407 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409   if (Elem.second.ServiceInsertPt) {
1410     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411     Elem.second.ServiceInsertPt = nullptr;
1412     Ptr->eraseFromParent();
1413   }
1414 }
1415 
1416 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417                                                   SourceLocation Loc,
1418                                                   SmallString<128> &Buffer) {
1419   llvm::raw_svector_ostream OS(Buffer);
1420   // Build debug location
1421   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422   OS << ";" << PLoc.getFilename() << ";";
1423   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424     OS << FD->getQualifiedNameAsString();
1425   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426   return OS.str();
1427 }
1428 
1429 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430                                                  SourceLocation Loc,
1431                                                  unsigned Flags) {
1432   llvm::Constant *SrcLocStr;
1433   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1434       Loc.isInvalid()) {
1435     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1436   } else {
1437     std::string FunctionName = "";
1438     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1439       FunctionName = FD->getQualifiedNameAsString();
1440     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1441     const char *FileName = PLoc.getFilename();
1442     unsigned Line = PLoc.getLine();
1443     unsigned Column = PLoc.getColumn();
1444     SrcLocStr =
1445         OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
1446   }
1447   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1448   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1449                                      Reserved2Flags);
1450 }
1451 
1452 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1453                                           SourceLocation Loc) {
1454   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1455   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1456   // the clang invariants used below might be broken.
1457   if (CGM.getLangOpts().OpenMPIRBuilder) {
1458     SmallString<128> Buffer;
1459     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1460     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1461         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1462     return OMPBuilder.getOrCreateThreadID(
1463         OMPBuilder.getOrCreateIdent(SrcLocStr));
1464   }
1465 
1466   llvm::Value *ThreadID = nullptr;
1467   // Check whether we've already cached a load of the thread id in this
1468   // function.
1469   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1470   if (I != OpenMPLocThreadIDMap.end()) {
1471     ThreadID = I->second.ThreadID;
1472     if (ThreadID != nullptr)
1473       return ThreadID;
1474   }
1475   // If exceptions are enabled, do not use parameter to avoid possible crash.
1476   if (auto *OMPRegionInfo =
1477           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1478     if (OMPRegionInfo->getThreadIDVariable()) {
1479       // Check if this an outlined function with thread id passed as argument.
1480       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1481       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1482       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1483           !CGF.getLangOpts().CXXExceptions ||
1484           CGF.Builder.GetInsertBlock() == TopBlock ||
1485           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1486           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1487               TopBlock ||
1488           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489               CGF.Builder.GetInsertBlock()) {
1490         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1491         // If value loaded in entry block, cache it and use it everywhere in
1492         // function.
1493         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1494           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1495           Elem.second.ThreadID = ThreadID;
1496         }
1497         return ThreadID;
1498       }
1499     }
1500   }
1501 
1502   // This is not an outlined function region - need to call __kmpc_int32
1503   // kmpc_global_thread_num(ident_t *loc).
1504   // Generate thread id value and cache this value for use across the
1505   // function.
1506   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1507   if (!Elem.second.ServiceInsertPt)
1508     setLocThreadIdInsertPt(CGF);
1509   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1510   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1511   llvm::CallInst *Call = CGF.Builder.CreateCall(
1512       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1513                                             OMPRTL___kmpc_global_thread_num),
1514       emitUpdateLocation(CGF, Loc));
1515   Call->setCallingConv(CGF.getRuntimeCC());
1516   Elem.second.ThreadID = Call;
1517   return Call;
1518 }
1519 
1520 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1521   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1522   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1523     clearLocThreadIdInsertPt(CGF);
1524     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1525   }
1526   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1527     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1528       UDRMap.erase(D);
1529     FunctionUDRMap.erase(CGF.CurFn);
1530   }
1531   auto I = FunctionUDMMap.find(CGF.CurFn);
1532   if (I != FunctionUDMMap.end()) {
1533     for(const auto *D : I->second)
1534       UDMMap.erase(D);
1535     FunctionUDMMap.erase(I);
1536   }
1537   LastprivateConditionalToTypes.erase(CGF.CurFn);
1538   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1539 }
1540 
1541 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1542   return OMPBuilder.IdentPtr;
1543 }
1544 
1545 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1546   if (!Kmpc_MicroTy) {
1547     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1548     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1549                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1550     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1551   }
1552   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1553 }
1554 
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1557                                              bool IsGPUDistribute) {
1558   assert((IVSize == 32 || IVSize == 64) &&
1559          "IV size is not compatible with the omp runtime");
1560   StringRef Name;
1561   if (IsGPUDistribute)
1562     Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1563                                     : "__kmpc_distribute_static_init_4u")
1564                         : (IVSigned ? "__kmpc_distribute_static_init_8"
1565                                     : "__kmpc_distribute_static_init_8u");
1566   else
1567     Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1568                                     : "__kmpc_for_static_init_4u")
1569                         : (IVSigned ? "__kmpc_for_static_init_8"
1570                                     : "__kmpc_for_static_init_8u");
1571 
1572   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1573   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1574   llvm::Type *TypeParams[] = {
1575     getIdentTyPointerTy(),                     // loc
1576     CGM.Int32Ty,                               // tid
1577     CGM.Int32Ty,                               // schedtype
1578     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1579     PtrTy,                                     // p_lower
1580     PtrTy,                                     // p_upper
1581     PtrTy,                                     // p_stride
1582     ITy,                                       // incr
1583     ITy                                        // chunk
1584   };
1585   auto *FnTy =
1586       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1587   return CGM.CreateRuntimeFunction(FnTy, Name);
1588 }
1589 
1590 llvm::FunctionCallee
1591 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1592   assert((IVSize == 32 || IVSize == 64) &&
1593          "IV size is not compatible with the omp runtime");
1594   StringRef Name =
1595       IVSize == 32
1596           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1597           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1598   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1599   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1600                                CGM.Int32Ty,           // tid
1601                                CGM.Int32Ty,           // schedtype
1602                                ITy,                   // lower
1603                                ITy,                   // upper
1604                                ITy,                   // stride
1605                                ITy                    // chunk
1606   };
1607   auto *FnTy =
1608       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1609   return CGM.CreateRuntimeFunction(FnTy, Name);
1610 }
1611 
1612 llvm::FunctionCallee
1613 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1614   assert((IVSize == 32 || IVSize == 64) &&
1615          "IV size is not compatible with the omp runtime");
1616   StringRef Name =
1617       IVSize == 32
1618           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1619           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1620   llvm::Type *TypeParams[] = {
1621       getIdentTyPointerTy(), // loc
1622       CGM.Int32Ty,           // tid
1623   };
1624   auto *FnTy =
1625       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1626   return CGM.CreateRuntimeFunction(FnTy, Name);
1627 }
1628 
1629 llvm::FunctionCallee
1630 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1631   assert((IVSize == 32 || IVSize == 64) &&
1632          "IV size is not compatible with the omp runtime");
1633   StringRef Name =
1634       IVSize == 32
1635           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1636           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1637   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1638   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1639   llvm::Type *TypeParams[] = {
1640     getIdentTyPointerTy(),                     // loc
1641     CGM.Int32Ty,                               // tid
1642     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1643     PtrTy,                                     // p_lower
1644     PtrTy,                                     // p_upper
1645     PtrTy                                      // p_stride
1646   };
1647   auto *FnTy =
1648       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1649   return CGM.CreateRuntimeFunction(FnTy, Name);
1650 }
1651 
1652 /// Obtain information that uniquely identifies a target entry. This
1653 /// consists of the file and device IDs as well as line number associated with
1654 /// the relevant entry source location.
1655 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1656                                      unsigned &DeviceID, unsigned &FileID,
1657                                      unsigned &LineNum) {
1658   SourceManager &SM = C.getSourceManager();
1659 
1660   // The loc should be always valid and have a file ID (the user cannot use
1661   // #pragma directives in macros)
1662 
1663   assert(Loc.isValid() && "Source location is expected to be always valid.");
1664 
1665   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1666   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1667 
1668   llvm::sys::fs::UniqueID ID;
1669   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1670     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1671     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1672     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1673       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1674           << PLoc.getFilename() << EC.message();
1675   }
1676 
1677   DeviceID = ID.getDevice();
1678   FileID = ID.getFile();
1679   LineNum = PLoc.getLine();
1680 }
1681 
1682 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1683   if (CGM.getLangOpts().OpenMPSimd)
1684     return Address::invalid();
1685   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1686       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1687   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1688               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1689                HasRequiresUnifiedSharedMemory))) {
1690     SmallString<64> PtrName;
1691     {
1692       llvm::raw_svector_ostream OS(PtrName);
1693       OS << CGM.getMangledName(GlobalDecl(VD));
1694       if (!VD->isExternallyVisible()) {
1695         unsigned DeviceID, FileID, Line;
1696         getTargetEntryUniqueInfo(CGM.getContext(),
1697                                  VD->getCanonicalDecl()->getBeginLoc(),
1698                                  DeviceID, FileID, Line);
1699         OS << llvm::format("_%x", FileID);
1700       }
1701       OS << "_decl_tgt_ref_ptr";
1702     }
1703     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1704     if (!Ptr) {
1705       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1706       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1707                                         PtrName);
1708 
1709       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1710       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1711 
1712       if (!CGM.getLangOpts().OpenMPIsDevice)
1713         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1714       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1715     }
1716     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1717   }
1718   return Address::invalid();
1719 }
1720 
1721 llvm::Constant *
1722 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1723   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1724          !CGM.getContext().getTargetInfo().isTLSSupported());
1725   // Lookup the entry, lazily creating it if necessary.
1726   std::string Suffix = getName({"cache", ""});
1727   return getOrCreateInternalVariable(
1728       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1729 }
1730 
1731 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1732                                                 const VarDecl *VD,
1733                                                 Address VDAddr,
1734                                                 SourceLocation Loc) {
1735   if (CGM.getLangOpts().OpenMPUseTLS &&
1736       CGM.getContext().getTargetInfo().isTLSSupported())
1737     return VDAddr;
1738 
1739   llvm::Type *VarTy = VDAddr.getElementType();
1740   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1741                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1742                                                        CGM.Int8PtrTy),
1743                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1744                          getOrCreateThreadPrivateCache(VD)};
1745   return Address(CGF.EmitRuntimeCall(
1746                      OMPBuilder.getOrCreateRuntimeFunction(
1747                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1748                      Args),
1749                  VDAddr.getAlignment());
1750 }
1751 
1752 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1753     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1754     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1755   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1756   // library.
1757   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1758   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1759                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1760                       OMPLoc);
1761   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1762   // to register constructor/destructor for variable.
1763   llvm::Value *Args[] = {
1764       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1765       Ctor, CopyCtor, Dtor};
1766   CGF.EmitRuntimeCall(
1767       OMPBuilder.getOrCreateRuntimeFunction(
1768           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1769       Args);
1770 }
1771 
1772 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1773     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1774     bool PerformInit, CodeGenFunction *CGF) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return nullptr;
1778 
1779   VD = VD->getDefinition(CGM.getContext());
1780   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1781     QualType ASTTy = VD->getType();
1782 
1783     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1784     const Expr *Init = VD->getAnyInitializer();
1785     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1786       // Generate function that re-emits the declaration's initializer into the
1787       // threadprivate copy of the variable VD
1788       CodeGenFunction CtorCGF(CGM);
1789       FunctionArgList Args;
1790       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1791                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1792                             ImplicitParamDecl::Other);
1793       Args.push_back(&Dst);
1794 
1795       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1796           CGM.getContext().VoidPtrTy, Args);
1797       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1798       std::string Name = getName({"__kmpc_global_ctor_", ""});
1799       llvm::Function *Fn =
1800           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1801       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1802                             Args, Loc, Loc);
1803       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1804           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1805           CGM.getContext().VoidPtrTy, Dst.getLocation());
1806       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1807       Arg = CtorCGF.Builder.CreateElementBitCast(
1808           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1809       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1810                                /*IsInitializer=*/true);
1811       ArgVal = CtorCGF.EmitLoadOfScalar(
1812           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1813           CGM.getContext().VoidPtrTy, Dst.getLocation());
1814       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1815       CtorCGF.FinishFunction();
1816       Ctor = Fn;
1817     }
1818     if (VD->getType().isDestructedType() != QualType::DK_none) {
1819       // Generate function that emits destructor call for the threadprivate copy
1820       // of the variable VD
1821       CodeGenFunction DtorCGF(CGM);
1822       FunctionArgList Args;
1823       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1824                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1825                             ImplicitParamDecl::Other);
1826       Args.push_back(&Dst);
1827 
1828       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1829           CGM.getContext().VoidTy, Args);
1830       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1831       std::string Name = getName({"__kmpc_global_dtor_", ""});
1832       llvm::Function *Fn =
1833           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1834       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1835       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1836                             Loc, Loc);
1837       // Create a scope with an artificial location for the body of this function.
1838       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1839       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1840           DtorCGF.GetAddrOfLocalVar(&Dst),
1841           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1842       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1843                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1844                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1845       DtorCGF.FinishFunction();
1846       Dtor = Fn;
1847     }
1848     // Do not emit init function if it is not required.
1849     if (!Ctor && !Dtor)
1850       return nullptr;
1851 
1852     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1853     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1854                                                /*isVarArg=*/false)
1855                            ->getPointerTo();
1856     // Copying constructor for the threadprivate variable.
1857     // Must be NULL - reserved by runtime, but currently it requires that this
1858     // parameter is always NULL. Otherwise it fires assertion.
1859     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1860     if (Ctor == nullptr) {
1861       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Ctor = llvm::Constant::getNullValue(CtorTy);
1865     }
1866     if (Dtor == nullptr) {
1867       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1868                                              /*isVarArg=*/false)
1869                          ->getPointerTo();
1870       Dtor = llvm::Constant::getNullValue(DtorTy);
1871     }
1872     if (!CGF) {
1873       auto *InitFunctionTy =
1874           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1875       std::string Name = getName({"__omp_threadprivate_init_", ""});
1876       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1877           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1878       CodeGenFunction InitCGF(CGM);
1879       FunctionArgList ArgList;
1880       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1881                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1882                             Loc, Loc);
1883       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1884       InitCGF.FinishFunction();
1885       return InitFunction;
1886     }
1887     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1888   }
1889   return nullptr;
1890 }
1891 
1892 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1893                                                      llvm::GlobalVariable *Addr,
1894                                                      bool PerformInit) {
1895   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1896       !CGM.getLangOpts().OpenMPIsDevice)
1897     return false;
1898   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1899       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1900   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1901       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1902        HasRequiresUnifiedSharedMemory))
1903     return CGM.getLangOpts().OpenMPIsDevice;
1904   VD = VD->getDefinition(CGM.getContext());
1905   assert(VD && "Unknown VarDecl");
1906 
1907   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1908     return CGM.getLangOpts().OpenMPIsDevice;
1909 
1910   QualType ASTTy = VD->getType();
1911   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1912 
1913   // Produce the unique prefix to identify the new target regions. We use
1914   // the source location of the variable declaration which we know to not
1915   // conflict with any target region.
1916   unsigned DeviceID;
1917   unsigned FileID;
1918   unsigned Line;
1919   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1920   SmallString<128> Buffer, Out;
1921   {
1922     llvm::raw_svector_ostream OS(Buffer);
1923     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1924        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1925   }
1926 
1927   const Expr *Init = VD->getAnyInitializer();
1928   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1929     llvm::Constant *Ctor;
1930     llvm::Constant *ID;
1931     if (CGM.getLangOpts().OpenMPIsDevice) {
1932       // Generate function that re-emits the declaration's initializer into
1933       // the threadprivate copy of the variable VD
1934       CodeGenFunction CtorCGF(CGM);
1935 
1936       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1937       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1938       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1939           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1940       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1941       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1942                             FunctionArgList(), Loc, Loc);
1943       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1944       CtorCGF.EmitAnyExprToMem(Init,
1945                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1946                                Init->getType().getQualifiers(),
1947                                /*IsInitializer=*/true);
1948       CtorCGF.FinishFunction();
1949       Ctor = Fn;
1950       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1951       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1952     } else {
1953       Ctor = new llvm::GlobalVariable(
1954           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955           llvm::GlobalValue::PrivateLinkage,
1956           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1957       ID = Ctor;
1958     }
1959 
1960     // Register the information for the entry associated with the constructor.
1961     Out.clear();
1962     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1963         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1964         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1965   }
1966   if (VD->getType().isDestructedType() != QualType::DK_none) {
1967     llvm::Constant *Dtor;
1968     llvm::Constant *ID;
1969     if (CGM.getLangOpts().OpenMPIsDevice) {
1970       // Generate function that emits destructor call for the threadprivate
1971       // copy of the variable VD
1972       CodeGenFunction DtorCGF(CGM);
1973 
1974       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1975       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1976       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1977           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1978       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1979       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1980                             FunctionArgList(), Loc, Loc);
1981       // Create a scope with an artificial location for the body of this
1982       // function.
1983       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1984       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1985                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1986                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1987       DtorCGF.FinishFunction();
1988       Dtor = Fn;
1989       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1990       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1991     } else {
1992       Dtor = new llvm::GlobalVariable(
1993           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1994           llvm::GlobalValue::PrivateLinkage,
1995           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1996       ID = Dtor;
1997     }
1998     // Register the information for the entry associated with the destructor.
1999     Out.clear();
2000     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2001         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2002         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2003   }
2004   return CGM.getLangOpts().OpenMPIsDevice;
2005 }
2006 
2007 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2008                                                           QualType VarType,
2009                                                           StringRef Name) {
2010   std::string Suffix = getName({"artificial", ""});
2011   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2012   llvm::GlobalVariable *GAddr =
2013       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2014   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2015       CGM.getTarget().isTLSSupported()) {
2016     GAddr->setThreadLocal(/*Val=*/true);
2017     return Address(GAddr, GAddr->getValueType(),
2018                    CGM.getContext().getTypeAlignInChars(VarType));
2019   }
2020   std::string CacheSuffix = getName({"cache", ""});
2021   llvm::Value *Args[] = {
2022       emitUpdateLocation(CGF, SourceLocation()),
2023       getThreadID(CGF, SourceLocation()),
2024       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2025       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2026                                 /*isSigned=*/false),
2027       getOrCreateInternalVariable(
2028           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2029   return Address(
2030       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2031           CGF.EmitRuntimeCall(
2032               OMPBuilder.getOrCreateRuntimeFunction(
2033                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2034               Args),
2035           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2036       CGM.getContext().getTypeAlignInChars(VarType));
2037 }
2038 
2039 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2040                                    const RegionCodeGenTy &ThenGen,
2041                                    const RegionCodeGenTy &ElseGen) {
2042   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2043 
2044   // If the condition constant folds and can be elided, try to avoid emitting
2045   // the condition and the dead arm of the if/else.
2046   bool CondConstant;
2047   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2048     if (CondConstant)
2049       ThenGen(CGF);
2050     else
2051       ElseGen(CGF);
2052     return;
2053   }
2054 
2055   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2056   // emit the conditional branch.
2057   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2058   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2059   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2060   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2061 
2062   // Emit the 'then' code.
2063   CGF.EmitBlock(ThenBlock);
2064   ThenGen(CGF);
2065   CGF.EmitBranch(ContBlock);
2066   // Emit the 'else' code if present.
2067   // There is no need to emit line number for unconditional branch.
2068   (void)ApplyDebugLocation::CreateEmpty(CGF);
2069   CGF.EmitBlock(ElseBlock);
2070   ElseGen(CGF);
2071   // There is no need to emit line number for unconditional branch.
2072   (void)ApplyDebugLocation::CreateEmpty(CGF);
2073   CGF.EmitBranch(ContBlock);
2074   // Emit the continuation block for code after the if.
2075   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2076 }
2077 
2078 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2079                                        llvm::Function *OutlinedFn,
2080                                        ArrayRef<llvm::Value *> CapturedVars,
2081                                        const Expr *IfCond,
2082                                        llvm::Value *NumThreads) {
2083   if (!CGF.HaveInsertPoint())
2084     return;
2085   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2086   auto &M = CGM.getModule();
2087   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2088                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2089     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2090     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2091     llvm::Value *Args[] = {
2092         RTLoc,
2093         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2094         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2095     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2096     RealArgs.append(std::begin(Args), std::end(Args));
2097     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2098 
2099     llvm::FunctionCallee RTLFn =
2100         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2101     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2102   };
2103   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2104                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2105     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2106     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2107     // Build calls:
2108     // __kmpc_serialized_parallel(&Loc, GTid);
2109     llvm::Value *Args[] = {RTLoc, ThreadID};
2110     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2111                             M, OMPRTL___kmpc_serialized_parallel),
2112                         Args);
2113 
2114     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2115     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2116     Address ZeroAddrBound =
2117         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2118                                          /*Name=*/".bound.zero.addr");
2119     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2120     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2121     // ThreadId for serialized parallels is 0.
2122     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2123     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2124     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2125 
2126     // Ensure we do not inline the function. This is trivially true for the ones
2127     // passed to __kmpc_fork_call but the ones called in serialized regions
2128     // could be inlined. This is not a perfect but it is closer to the invariant
2129     // we want, namely, every data environment starts with a new function.
2130     // TODO: We should pass the if condition to the runtime function and do the
2131     //       handling there. Much cleaner code.
2132     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2133     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2134     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2135 
2136     // __kmpc_end_serialized_parallel(&Loc, GTid);
2137     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2138     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2139                             M, OMPRTL___kmpc_end_serialized_parallel),
2140                         EndArgs);
2141   };
2142   if (IfCond) {
2143     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2144   } else {
2145     RegionCodeGenTy ThenRCG(ThenGen);
2146     ThenRCG(CGF);
2147   }
2148 }
2149 
2150 // If we're inside an (outlined) parallel region, use the region info's
2151 // thread-ID variable (it is passed in a first argument of the outlined function
2152 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2153 // regular serial code region, get thread ID by calling kmp_int32
2154 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2155 // return the address of that temp.
2156 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2157                                              SourceLocation Loc) {
2158   if (auto *OMPRegionInfo =
2159           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2160     if (OMPRegionInfo->getThreadIDVariable())
2161       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2162 
2163   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2164   QualType Int32Ty =
2165       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2166   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2167   CGF.EmitStoreOfScalar(ThreadID,
2168                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2169 
2170   return ThreadIDTemp;
2171 }
2172 
2173 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2174     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2175   SmallString<256> Buffer;
2176   llvm::raw_svector_ostream Out(Buffer);
2177   Out << Name;
2178   StringRef RuntimeName = Out.str();
2179   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2180   if (Elem.second) {
2181     assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2182            "OMP internal variable has different type than requested");
2183     return &*Elem.second;
2184   }
2185 
2186   return Elem.second = new llvm::GlobalVariable(
2187              CGM.getModule(), Ty, /*IsConstant*/ false,
2188              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2189              Elem.first(), /*InsertBefore=*/nullptr,
2190              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2191 }
2192 
2193 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2194   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2195   std::string Name = getName({Prefix, "var"});
2196   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2197 }
2198 
2199 namespace {
2200 /// Common pre(post)-action for different OpenMP constructs.
2201 class CommonActionTy final : public PrePostActionTy {
2202   llvm::FunctionCallee EnterCallee;
2203   ArrayRef<llvm::Value *> EnterArgs;
2204   llvm::FunctionCallee ExitCallee;
2205   ArrayRef<llvm::Value *> ExitArgs;
2206   bool Conditional;
2207   llvm::BasicBlock *ContBlock = nullptr;
2208 
2209 public:
2210   CommonActionTy(llvm::FunctionCallee EnterCallee,
2211                  ArrayRef<llvm::Value *> EnterArgs,
2212                  llvm::FunctionCallee ExitCallee,
2213                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2214       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2215         ExitArgs(ExitArgs), Conditional(Conditional) {}
2216   void Enter(CodeGenFunction &CGF) override {
2217     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2218     if (Conditional) {
2219       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2220       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2221       ContBlock = CGF.createBasicBlock("omp_if.end");
2222       // Generate the branch (If-stmt)
2223       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2224       CGF.EmitBlock(ThenBlock);
2225     }
2226   }
2227   void Done(CodeGenFunction &CGF) {
2228     // Emit the rest of blocks/branches
2229     CGF.EmitBranch(ContBlock);
2230     CGF.EmitBlock(ContBlock, true);
2231   }
2232   void Exit(CodeGenFunction &CGF) override {
2233     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2234   }
2235 };
2236 } // anonymous namespace
2237 
2238 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2239                                          StringRef CriticalName,
2240                                          const RegionCodeGenTy &CriticalOpGen,
2241                                          SourceLocation Loc, const Expr *Hint) {
2242   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2243   // CriticalOpGen();
2244   // __kmpc_end_critical(ident_t *, gtid, Lock);
2245   // Prepare arguments and build a call to __kmpc_critical
2246   if (!CGF.HaveInsertPoint())
2247     return;
2248   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2249                          getCriticalRegionLock(CriticalName)};
2250   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2251                                                 std::end(Args));
2252   if (Hint) {
2253     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2254         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2255   }
2256   CommonActionTy Action(
2257       OMPBuilder.getOrCreateRuntimeFunction(
2258           CGM.getModule(),
2259           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2260       EnterArgs,
2261       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2262                                             OMPRTL___kmpc_end_critical),
2263       Args);
2264   CriticalOpGen.setAction(Action);
2265   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2266 }
2267 
2268 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2269                                        const RegionCodeGenTy &MasterOpGen,
2270                                        SourceLocation Loc) {
2271   if (!CGF.HaveInsertPoint())
2272     return;
2273   // if(__kmpc_master(ident_t *, gtid)) {
2274   //   MasterOpGen();
2275   //   __kmpc_end_master(ident_t *, gtid);
2276   // }
2277   // Prepare arguments and build a call to __kmpc_master
2278   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2279   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2280                             CGM.getModule(), OMPRTL___kmpc_master),
2281                         Args,
2282                         OMPBuilder.getOrCreateRuntimeFunction(
2283                             CGM.getModule(), OMPRTL___kmpc_end_master),
2284                         Args,
2285                         /*Conditional=*/true);
2286   MasterOpGen.setAction(Action);
2287   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2288   Action.Done(CGF);
2289 }
2290 
2291 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2292                                        const RegionCodeGenTy &MaskedOpGen,
2293                                        SourceLocation Loc, const Expr *Filter) {
2294   if (!CGF.HaveInsertPoint())
2295     return;
2296   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2297   //   MaskedOpGen();
2298   //   __kmpc_end_masked(iden_t *, gtid);
2299   // }
2300   // Prepare arguments and build a call to __kmpc_masked
2301   llvm::Value *FilterVal = Filter
2302                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2303                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2304   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2305                          FilterVal};
2306   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2307                             getThreadID(CGF, Loc)};
2308   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2309                             CGM.getModule(), OMPRTL___kmpc_masked),
2310                         Args,
2311                         OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2313                         ArgsEnd,
2314                         /*Conditional=*/true);
2315   MaskedOpGen.setAction(Action);
2316   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2317   Action.Done(CGF);
2318 }
2319 
2320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2321                                         SourceLocation Loc) {
2322   if (!CGF.HaveInsertPoint())
2323     return;
2324   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2325     OMPBuilder.createTaskyield(CGF.Builder);
2326   } else {
2327     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2328     llvm::Value *Args[] = {
2329         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2330         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2331     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2332                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2333                         Args);
2334   }
2335 
2336   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2337     Region->emitUntiedSwitch(CGF);
2338 }
2339 
2340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2341                                           const RegionCodeGenTy &TaskgroupOpGen,
2342                                           SourceLocation Loc) {
2343   if (!CGF.HaveInsertPoint())
2344     return;
2345   // __kmpc_taskgroup(ident_t *, gtid);
2346   // TaskgroupOpGen();
2347   // __kmpc_end_taskgroup(ident_t *, gtid);
2348   // Prepare arguments and build a call to __kmpc_taskgroup
2349   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2350   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2351                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2352                         Args,
2353                         OMPBuilder.getOrCreateRuntimeFunction(
2354                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2355                         Args);
2356   TaskgroupOpGen.setAction(Action);
2357   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2358 }
2359 
2360 /// Given an array of pointers to variables, project the address of a
2361 /// given variable.
2362 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2363                                       unsigned Index, const VarDecl *Var) {
2364   // Pull out the pointer to the variable.
2365   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2366   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2367 
2368   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2369   Addr = CGF.Builder.CreateElementBitCast(
2370       Addr, CGF.ConvertTypeForMem(Var->getType()));
2371   return Addr;
2372 }
2373 
2374 static llvm::Value *emitCopyprivateCopyFunction(
2375     CodeGenModule &CGM, llvm::Type *ArgsType,
2376     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2377     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2378     SourceLocation Loc) {
2379   ASTContext &C = CGM.getContext();
2380   // void copy_func(void *LHSArg, void *RHSArg);
2381   FunctionArgList Args;
2382   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2383                            ImplicitParamDecl::Other);
2384   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   Args.push_back(&LHSArg);
2387   Args.push_back(&RHSArg);
2388   const auto &CGFI =
2389       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2390   std::string Name =
2391       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2392   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2393                                     llvm::GlobalValue::InternalLinkage, Name,
2394                                     &CGM.getModule());
2395   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2396   Fn->setDoesNotRecurse();
2397   CodeGenFunction CGF(CGM);
2398   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2399   // Dest = (void*[n])(LHSArg);
2400   // Src = (void*[n])(RHSArg);
2401   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2402       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2403       ArgsType), CGF.getPointerAlign());
2404   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2406       ArgsType), CGF.getPointerAlign());
2407   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2408   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2409   // ...
2410   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2411   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2412     const auto *DestVar =
2413         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2414     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2415 
2416     const auto *SrcVar =
2417         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2418     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2419 
2420     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2421     QualType Type = VD->getType();
2422     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2423   }
2424   CGF.FinishFunction();
2425   return Fn;
2426 }
2427 
2428 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2429                                        const RegionCodeGenTy &SingleOpGen,
2430                                        SourceLocation Loc,
2431                                        ArrayRef<const Expr *> CopyprivateVars,
2432                                        ArrayRef<const Expr *> SrcExprs,
2433                                        ArrayRef<const Expr *> DstExprs,
2434                                        ArrayRef<const Expr *> AssignmentOps) {
2435   if (!CGF.HaveInsertPoint())
2436     return;
2437   assert(CopyprivateVars.size() == SrcExprs.size() &&
2438          CopyprivateVars.size() == DstExprs.size() &&
2439          CopyprivateVars.size() == AssignmentOps.size());
2440   ASTContext &C = CGM.getContext();
2441   // int32 did_it = 0;
2442   // if(__kmpc_single(ident_t *, gtid)) {
2443   //   SingleOpGen();
2444   //   __kmpc_end_single(ident_t *, gtid);
2445   //   did_it = 1;
2446   // }
2447   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2448   // <copy_func>, did_it);
2449 
2450   Address DidIt = Address::invalid();
2451   if (!CopyprivateVars.empty()) {
2452     // int32 did_it = 0;
2453     QualType KmpInt32Ty =
2454         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2455     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2456     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2457   }
2458   // Prepare arguments and build a call to __kmpc_single
2459   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2460   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2461                             CGM.getModule(), OMPRTL___kmpc_single),
2462                         Args,
2463                         OMPBuilder.getOrCreateRuntimeFunction(
2464                             CGM.getModule(), OMPRTL___kmpc_end_single),
2465                         Args,
2466                         /*Conditional=*/true);
2467   SingleOpGen.setAction(Action);
2468   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2469   if (DidIt.isValid()) {
2470     // did_it = 1;
2471     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2472   }
2473   Action.Done(CGF);
2474   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2475   // <copy_func>, did_it);
2476   if (DidIt.isValid()) {
2477     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2478     QualType CopyprivateArrayTy = C.getConstantArrayType(
2479         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2480         /*IndexTypeQuals=*/0);
2481     // Create a list of all private variables for copyprivate.
2482     Address CopyprivateList =
2483         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2484     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2485       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2486       CGF.Builder.CreateStore(
2487           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2488               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2489               CGF.VoidPtrTy),
2490           Elem);
2491     }
2492     // Build function that copies private values from single region to all other
2493     // threads in the corresponding parallel region.
2494     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2495         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2496         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2497     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2498     Address CL =
2499       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2500                                                       CGF.VoidPtrTy);
2501     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2502     llvm::Value *Args[] = {
2503         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2504         getThreadID(CGF, Loc),        // i32 <gtid>
2505         BufSize,                      // size_t <buf_size>
2506         CL.getPointer(),              // void *<copyprivate list>
2507         CpyFn,                        // void (*) (void *, void *) <copy_func>
2508         DidItVal                      // i32 did_it
2509     };
2510     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2511                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2512                         Args);
2513   }
2514 }
2515 
2516 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2517                                         const RegionCodeGenTy &OrderedOpGen,
2518                                         SourceLocation Loc, bool IsThreads) {
2519   if (!CGF.HaveInsertPoint())
2520     return;
2521   // __kmpc_ordered(ident_t *, gtid);
2522   // OrderedOpGen();
2523   // __kmpc_end_ordered(ident_t *, gtid);
2524   // Prepare arguments and build a call to __kmpc_ordered
2525   if (IsThreads) {
2526     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2527     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2528                               CGM.getModule(), OMPRTL___kmpc_ordered),
2529                           Args,
2530                           OMPBuilder.getOrCreateRuntimeFunction(
2531                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2532                           Args);
2533     OrderedOpGen.setAction(Action);
2534     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2535     return;
2536   }
2537   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2538 }
2539 
2540 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2541   unsigned Flags;
2542   if (Kind == OMPD_for)
2543     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2544   else if (Kind == OMPD_sections)
2545     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2546   else if (Kind == OMPD_single)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2548   else if (Kind == OMPD_barrier)
2549     Flags = OMP_IDENT_BARRIER_EXPL;
2550   else
2551     Flags = OMP_IDENT_BARRIER_IMPL;
2552   return Flags;
2553 }
2554 
2555 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2556     CodeGenFunction &CGF, const OMPLoopDirective &S,
2557     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2558   // Check if the loop directive is actually a doacross loop directive. In this
2559   // case choose static, 1 schedule.
2560   if (llvm::any_of(
2561           S.getClausesOfKind<OMPOrderedClause>(),
2562           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2563     ScheduleKind = OMPC_SCHEDULE_static;
2564     // Chunk size is 1 in this case.
2565     llvm::APInt ChunkSize(32, 1);
2566     ChunkExpr = IntegerLiteral::Create(
2567         CGF.getContext(), ChunkSize,
2568         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2569         SourceLocation());
2570   }
2571 }
2572 
2573 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2574                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2575                                       bool ForceSimpleCall) {
2576   // Check if we should use the OMPBuilder
2577   auto *OMPRegionInfo =
2578       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2579   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2580     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2581         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2582     return;
2583   }
2584 
2585   if (!CGF.HaveInsertPoint())
2586     return;
2587   // Build call __kmpc_cancel_barrier(loc, thread_id);
2588   // Build call __kmpc_barrier(loc, thread_id);
2589   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2590   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2591   // thread_id);
2592   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2593                          getThreadID(CGF, Loc)};
2594   if (OMPRegionInfo) {
2595     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2596       llvm::Value *Result = CGF.EmitRuntimeCall(
2597           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2598                                                 OMPRTL___kmpc_cancel_barrier),
2599           Args);
2600       if (EmitChecks) {
2601         // if (__kmpc_cancel_barrier()) {
2602         //   exit from construct;
2603         // }
2604         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2605         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2606         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2607         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2608         CGF.EmitBlock(ExitBB);
2609         //   exit from construct;
2610         CodeGenFunction::JumpDest CancelDestination =
2611             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2612         CGF.EmitBranchThroughCleanup(CancelDestination);
2613         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2614       }
2615       return;
2616     }
2617   }
2618   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2619                           CGM.getModule(), OMPRTL___kmpc_barrier),
2620                       Args);
2621 }
2622 
2623 /// Map the OpenMP loop schedule to the runtime enumeration.
2624 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2625                                           bool Chunked, bool Ordered) {
2626   switch (ScheduleKind) {
2627   case OMPC_SCHEDULE_static:
2628     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2629                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2630   case OMPC_SCHEDULE_dynamic:
2631     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2632   case OMPC_SCHEDULE_guided:
2633     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2634   case OMPC_SCHEDULE_runtime:
2635     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2636   case OMPC_SCHEDULE_auto:
2637     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2638   case OMPC_SCHEDULE_unknown:
2639     assert(!Chunked && "chunk was specified but schedule kind not known");
2640     return Ordered ? OMP_ord_static : OMP_sch_static;
2641   }
2642   llvm_unreachable("Unexpected runtime schedule");
2643 }
2644 
2645 /// Map the OpenMP distribute schedule to the runtime enumeration.
2646 static OpenMPSchedType
2647 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2648   // only static is allowed for dist_schedule
2649   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2650 }
2651 
2652 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2653                                          bool Chunked) const {
2654   OpenMPSchedType Schedule =
2655       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2656   return Schedule == OMP_sch_static;
2657 }
2658 
2659 bool CGOpenMPRuntime::isStaticNonchunked(
2660     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2661   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2662   return Schedule == OMP_dist_sch_static;
2663 }
2664 
2665 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2666                                       bool Chunked) const {
2667   OpenMPSchedType Schedule =
2668       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2669   return Schedule == OMP_sch_static_chunked;
2670 }
2671 
2672 bool CGOpenMPRuntime::isStaticChunked(
2673     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2674   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2675   return Schedule == OMP_dist_sch_static_chunked;
2676 }
2677 
2678 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2679   OpenMPSchedType Schedule =
2680       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2681   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2682   return Schedule != OMP_sch_static;
2683 }
2684 
2685 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2686                                   OpenMPScheduleClauseModifier M1,
2687                                   OpenMPScheduleClauseModifier M2) {
2688   int Modifier = 0;
2689   switch (M1) {
2690   case OMPC_SCHEDULE_MODIFIER_monotonic:
2691     Modifier = OMP_sch_modifier_monotonic;
2692     break;
2693   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2694     Modifier = OMP_sch_modifier_nonmonotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_simd:
2697     if (Schedule == OMP_sch_static_chunked)
2698       Schedule = OMP_sch_static_balanced_chunked;
2699     break;
2700   case OMPC_SCHEDULE_MODIFIER_last:
2701   case OMPC_SCHEDULE_MODIFIER_unknown:
2702     break;
2703   }
2704   switch (M2) {
2705   case OMPC_SCHEDULE_MODIFIER_monotonic:
2706     Modifier = OMP_sch_modifier_monotonic;
2707     break;
2708   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2709     Modifier = OMP_sch_modifier_nonmonotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_simd:
2712     if (Schedule == OMP_sch_static_chunked)
2713       Schedule = OMP_sch_static_balanced_chunked;
2714     break;
2715   case OMPC_SCHEDULE_MODIFIER_last:
2716   case OMPC_SCHEDULE_MODIFIER_unknown:
2717     break;
2718   }
2719   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2720   // If the static schedule kind is specified or if the ordered clause is
2721   // specified, and if the nonmonotonic modifier is not specified, the effect is
2722   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2723   // modifier is specified, the effect is as if the nonmonotonic modifier is
2724   // specified.
2725   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2726     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2727           Schedule == OMP_sch_static_balanced_chunked ||
2728           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2729           Schedule == OMP_dist_sch_static_chunked ||
2730           Schedule == OMP_dist_sch_static))
2731       Modifier = OMP_sch_modifier_nonmonotonic;
2732   }
2733   return Schedule | Modifier;
2734 }
2735 
2736 void CGOpenMPRuntime::emitForDispatchInit(
2737     CodeGenFunction &CGF, SourceLocation Loc,
2738     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2739     bool Ordered, const DispatchRTInput &DispatchValues) {
2740   if (!CGF.HaveInsertPoint())
2741     return;
2742   OpenMPSchedType Schedule = getRuntimeSchedule(
2743       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2744   assert(Ordered ||
2745          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2746           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2747           Schedule != OMP_sch_static_balanced_chunked));
2748   // Call __kmpc_dispatch_init(
2749   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2750   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2751   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2752 
2753   // If the Chunk was not specified in the clause - use default value 1.
2754   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2755                                             : CGF.Builder.getIntN(IVSize, 1);
2756   llvm::Value *Args[] = {
2757       emitUpdateLocation(CGF, Loc),
2758       getThreadID(CGF, Loc),
2759       CGF.Builder.getInt32(addMonoNonMonoModifier(
2760           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2761       DispatchValues.LB,                                     // Lower
2762       DispatchValues.UB,                                     // Upper
2763       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2764       Chunk                                                  // Chunk
2765   };
2766   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2767 }
2768 
2769 static void emitForStaticInitCall(
2770     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2771     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2772     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2773     const CGOpenMPRuntime::StaticRTInput &Values) {
2774   if (!CGF.HaveInsertPoint())
2775     return;
2776 
2777   assert(!Values.Ordered);
2778   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2779          Schedule == OMP_sch_static_balanced_chunked ||
2780          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2781          Schedule == OMP_dist_sch_static ||
2782          Schedule == OMP_dist_sch_static_chunked);
2783 
2784   // Call __kmpc_for_static_init(
2785   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2786   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2787   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2788   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2789   llvm::Value *Chunk = Values.Chunk;
2790   if (Chunk == nullptr) {
2791     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2792             Schedule == OMP_dist_sch_static) &&
2793            "expected static non-chunked schedule");
2794     // If the Chunk was not specified in the clause - use default value 1.
2795     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2796   } else {
2797     assert((Schedule == OMP_sch_static_chunked ||
2798             Schedule == OMP_sch_static_balanced_chunked ||
2799             Schedule == OMP_ord_static_chunked ||
2800             Schedule == OMP_dist_sch_static_chunked) &&
2801            "expected static chunked schedule");
2802   }
2803   llvm::Value *Args[] = {
2804       UpdateLocation,
2805       ThreadId,
2806       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2807                                                   M2)), // Schedule type
2808       Values.IL.getPointer(),                           // &isLastIter
2809       Values.LB.getPointer(),                           // &LB
2810       Values.UB.getPointer(),                           // &UB
2811       Values.ST.getPointer(),                           // &Stride
2812       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2813       Chunk                                             // Chunk
2814   };
2815   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2816 }
2817 
2818 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2819                                         SourceLocation Loc,
2820                                         OpenMPDirectiveKind DKind,
2821                                         const OpenMPScheduleTy &ScheduleKind,
2822                                         const StaticRTInput &Values) {
2823   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2824       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2825   assert(isOpenMPWorksharingDirective(DKind) &&
2826          "Expected loop-based or sections-based directive.");
2827   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2828                                              isOpenMPLoopDirective(DKind)
2829                                                  ? OMP_IDENT_WORK_LOOP
2830                                                  : OMP_IDENT_WORK_SECTIONS);
2831   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2832   llvm::FunctionCallee StaticInitFunction =
2833       createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2834   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2835   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2836                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2837 }
2838 
2839 void CGOpenMPRuntime::emitDistributeStaticInit(
2840     CodeGenFunction &CGF, SourceLocation Loc,
2841     OpenMPDistScheduleClauseKind SchedKind,
2842     const CGOpenMPRuntime::StaticRTInput &Values) {
2843   OpenMPSchedType ScheduleNum =
2844       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2845   llvm::Value *UpdatedLocation =
2846       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2847   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2848   llvm::FunctionCallee StaticInitFunction;
2849   bool isGPUDistribute =
2850       CGM.getLangOpts().OpenMPIsDevice &&
2851       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2852   StaticInitFunction = createForStaticInitFunction(
2853       Values.IVSize, Values.IVSigned, isGPUDistribute);
2854 
2855   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2856                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2857                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2858 }
2859 
2860 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2861                                           SourceLocation Loc,
2862                                           OpenMPDirectiveKind DKind) {
2863   if (!CGF.HaveInsertPoint())
2864     return;
2865   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2866   llvm::Value *Args[] = {
2867       emitUpdateLocation(CGF, Loc,
2868                          isOpenMPDistributeDirective(DKind)
2869                              ? OMP_IDENT_WORK_DISTRIBUTE
2870                              : isOpenMPLoopDirective(DKind)
2871                                    ? OMP_IDENT_WORK_LOOP
2872                                    : OMP_IDENT_WORK_SECTIONS),
2873       getThreadID(CGF, Loc)};
2874   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2875   if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2876       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2877     CGF.EmitRuntimeCall(
2878         OMPBuilder.getOrCreateRuntimeFunction(
2879             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2880         Args);
2881   else
2882     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2883                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2884                         Args);
2885 }
2886 
2887 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2888                                                  SourceLocation Loc,
2889                                                  unsigned IVSize,
2890                                                  bool IVSigned) {
2891   if (!CGF.HaveInsertPoint())
2892     return;
2893   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2894   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2895   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2896 }
2897 
2898 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2899                                           SourceLocation Loc, unsigned IVSize,
2900                                           bool IVSigned, Address IL,
2901                                           Address LB, Address UB,
2902                                           Address ST) {
2903   // Call __kmpc_dispatch_next(
2904   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2905   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2906   //          kmp_int[32|64] *p_stride);
2907   llvm::Value *Args[] = {
2908       emitUpdateLocation(CGF, Loc),
2909       getThreadID(CGF, Loc),
2910       IL.getPointer(), // &isLastIter
2911       LB.getPointer(), // &Lower
2912       UB.getPointer(), // &Upper
2913       ST.getPointer()  // &Stride
2914   };
2915   llvm::Value *Call =
2916       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2917   return CGF.EmitScalarConversion(
2918       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2919       CGF.getContext().BoolTy, Loc);
2920 }
2921 
2922 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2923                                            llvm::Value *NumThreads,
2924                                            SourceLocation Loc) {
2925   if (!CGF.HaveInsertPoint())
2926     return;
2927   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2928   llvm::Value *Args[] = {
2929       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2930       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2931   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2932                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2933                       Args);
2934 }
2935 
2936 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2937                                          ProcBindKind ProcBind,
2938                                          SourceLocation Loc) {
2939   if (!CGF.HaveInsertPoint())
2940     return;
2941   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2942   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2943   llvm::Value *Args[] = {
2944       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2945       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2946   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2947                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2948                       Args);
2949 }
2950 
2951 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2952                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2953   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2954     OMPBuilder.createFlush(CGF.Builder);
2955   } else {
2956     if (!CGF.HaveInsertPoint())
2957       return;
2958     // Build call void __kmpc_flush(ident_t *loc)
2959     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2960                             CGM.getModule(), OMPRTL___kmpc_flush),
2961                         emitUpdateLocation(CGF, Loc));
2962   }
2963 }
2964 
2965 namespace {
2966 /// Indexes of fields for type kmp_task_t.
2967 enum KmpTaskTFields {
2968   /// List of shared variables.
2969   KmpTaskTShareds,
2970   /// Task routine.
2971   KmpTaskTRoutine,
2972   /// Partition id for the untied tasks.
2973   KmpTaskTPartId,
2974   /// Function with call of destructors for private variables.
2975   Data1,
2976   /// Task priority.
2977   Data2,
2978   /// (Taskloops only) Lower bound.
2979   KmpTaskTLowerBound,
2980   /// (Taskloops only) Upper bound.
2981   KmpTaskTUpperBound,
2982   /// (Taskloops only) Stride.
2983   KmpTaskTStride,
2984   /// (Taskloops only) Is last iteration flag.
2985   KmpTaskTLastIter,
2986   /// (Taskloops only) Reduction data.
2987   KmpTaskTReductions,
2988 };
2989 } // anonymous namespace
2990 
2991 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2992   return OffloadEntriesTargetRegion.empty() &&
2993          OffloadEntriesDeviceGlobalVar.empty();
2994 }
2995 
2996 /// Initialize target region entry.
2997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2998     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2999                                     StringRef ParentName, unsigned LineNum,
3000                                     unsigned Order) {
3001   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3002                                              "only required for the device "
3003                                              "code generation.");
3004   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3005       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3006                                    OMPTargetRegionEntryTargetRegion);
3007   ++OffloadingEntriesNum;
3008 }
3009 
3010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3011     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3012                                   StringRef ParentName, unsigned LineNum,
3013                                   llvm::Constant *Addr, llvm::Constant *ID,
3014                                   OMPTargetRegionEntryKind Flags) {
3015   // If we are emitting code for a target, the entry is already initialized,
3016   // only has to be registered.
3017   if (CGM.getLangOpts().OpenMPIsDevice) {
3018     // This could happen if the device compilation is invoked standalone.
3019     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3020       return;
3021     auto &Entry =
3022         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3023     Entry.setAddress(Addr);
3024     Entry.setID(ID);
3025     Entry.setFlags(Flags);
3026   } else {
3027     if (Flags ==
3028             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3029         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3030                                  /*IgnoreAddressId*/ true))
3031       return;
3032     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3033            "Target region entry already registered!");
3034     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3035     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3036     ++OffloadingEntriesNum;
3037   }
3038 }
3039 
3040 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3041     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3042     bool IgnoreAddressId) const {
3043   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3044   if (PerDevice == OffloadEntriesTargetRegion.end())
3045     return false;
3046   auto PerFile = PerDevice->second.find(FileID);
3047   if (PerFile == PerDevice->second.end())
3048     return false;
3049   auto PerParentName = PerFile->second.find(ParentName);
3050   if (PerParentName == PerFile->second.end())
3051     return false;
3052   auto PerLine = PerParentName->second.find(LineNum);
3053   if (PerLine == PerParentName->second.end())
3054     return false;
3055   // Fail if this entry is already registered.
3056   if (!IgnoreAddressId &&
3057       (PerLine->second.getAddress() || PerLine->second.getID()))
3058     return false;
3059   return true;
3060 }
3061 
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3063     const OffloadTargetRegionEntryInfoActTy &Action) {
3064   // Scan all target region entries and perform the provided action.
3065   for (const auto &D : OffloadEntriesTargetRegion)
3066     for (const auto &F : D.second)
3067       for (const auto &P : F.second)
3068         for (const auto &L : P.second)
3069           Action(D.first, F.first, P.first(), L.first, L.second);
3070 }
3071 
3072 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3073     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3074                                        OMPTargetGlobalVarEntryKind Flags,
3075                                        unsigned Order) {
3076   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3077                                              "only required for the device "
3078                                              "code generation.");
3079   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3080   ++OffloadingEntriesNum;
3081 }
3082 
3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3084     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3085                                      CharUnits VarSize,
3086                                      OMPTargetGlobalVarEntryKind Flags,
3087                                      llvm::GlobalValue::LinkageTypes Linkage) {
3088   if (CGM.getLangOpts().OpenMPIsDevice) {
3089     // This could happen if the device compilation is invoked standalone.
3090     if (!hasDeviceGlobalVarEntryInfo(VarName))
3091       return;
3092     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3093     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3094       if (Entry.getVarSize().isZero()) {
3095         Entry.setVarSize(VarSize);
3096         Entry.setLinkage(Linkage);
3097       }
3098       return;
3099     }
3100     Entry.setVarSize(VarSize);
3101     Entry.setLinkage(Linkage);
3102     Entry.setAddress(Addr);
3103   } else {
3104     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3105       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3106       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3107              "Entry not initialized!");
3108       if (Entry.getVarSize().isZero()) {
3109         Entry.setVarSize(VarSize);
3110         Entry.setLinkage(Linkage);
3111       }
3112       return;
3113     }
3114     OffloadEntriesDeviceGlobalVar.try_emplace(
3115         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3116     ++OffloadingEntriesNum;
3117   }
3118 }
3119 
3120 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3121     actOnDeviceGlobalVarEntriesInfo(
3122         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3123   // Scan all target region entries and perform the provided action.
3124   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3125     Action(E.getKey(), E.getValue());
3126 }
3127 
3128 void CGOpenMPRuntime::createOffloadEntry(
3129     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3130     llvm::GlobalValue::LinkageTypes Linkage) {
3131   StringRef Name = Addr->getName();
3132   llvm::Module &M = CGM.getModule();
3133   llvm::LLVMContext &C = M.getContext();
3134 
3135   // Create constant string with the name.
3136   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3137 
3138   std::string StringName = getName({"omp_offloading", "entry_name"});
3139   auto *Str = new llvm::GlobalVariable(
3140       M, StrPtrInit->getType(), /*isConstant=*/true,
3141       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3142   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3143 
3144   llvm::Constant *Data[] = {
3145       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3146       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3147       llvm::ConstantInt::get(CGM.SizeTy, Size),
3148       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3149       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3150   std::string EntryName = getName({"omp_offloading", "entry", ""});
3151   llvm::GlobalVariable *Entry = createGlobalStruct(
3152       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3153       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3154 
3155   // The entry has to be created in the section the linker expects it to be.
3156   Entry->setSection("omp_offloading_entries");
3157 }
3158 
3159 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3160   // Emit the offloading entries and metadata so that the device codegen side
3161   // can easily figure out what to emit. The produced metadata looks like
3162   // this:
3163   //
3164   // !omp_offload.info = !{!1, ...}
3165   //
3166   // Right now we only generate metadata for function that contain target
3167   // regions.
3168 
3169   // If we are in simd mode or there are no entries, we don't need to do
3170   // anything.
3171   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3172     return;
3173 
3174   llvm::Module &M = CGM.getModule();
3175   llvm::LLVMContext &C = M.getContext();
3176   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3177                          SourceLocation, StringRef>,
3178               16>
3179       OrderedEntries(OffloadEntriesInfoManager.size());
3180   llvm::SmallVector<StringRef, 16> ParentFunctions(
3181       OffloadEntriesInfoManager.size());
3182 
3183   // Auxiliary methods to create metadata values and strings.
3184   auto &&GetMDInt = [this](unsigned V) {
3185     return llvm::ConstantAsMetadata::get(
3186         llvm::ConstantInt::get(CGM.Int32Ty, V));
3187   };
3188 
3189   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3190 
3191   // Create the offloading info metadata node.
3192   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3193 
3194   // Create function that emits metadata for each target region entry;
3195   auto &&TargetRegionMetadataEmitter =
3196       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3197        &GetMDString](
3198           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3199           unsigned Line,
3200           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3201         // Generate metadata for target regions. Each entry of this metadata
3202         // contains:
3203         // - Entry 0 -> Kind of this type of metadata (0).
3204         // - Entry 1 -> Device ID of the file where the entry was identified.
3205         // - Entry 2 -> File ID of the file where the entry was identified.
3206         // - Entry 3 -> Mangled name of the function where the entry was
3207         // identified.
3208         // - Entry 4 -> Line in the file where the entry was identified.
3209         // - Entry 5 -> Order the entry was created.
3210         // The first element of the metadata node is the kind.
3211         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3212                                  GetMDInt(FileID),      GetMDString(ParentName),
3213                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3214 
3215         SourceLocation Loc;
3216         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3217                   E = CGM.getContext().getSourceManager().fileinfo_end();
3218              I != E; ++I) {
3219           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3220               I->getFirst()->getUniqueID().getFile() == FileID) {
3221             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3222                 I->getFirst(), Line, 1);
3223             break;
3224           }
3225         }
3226         // Save this entry in the right position of the ordered entries array.
3227         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3228         ParentFunctions[E.getOrder()] = ParentName;
3229 
3230         // Add metadata to the named metadata node.
3231         MD->addOperand(llvm::MDNode::get(C, Ops));
3232       };
3233 
3234   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3235       TargetRegionMetadataEmitter);
3236 
3237   // Create function that emits metadata for each device global variable entry;
3238   auto &&DeviceGlobalVarMetadataEmitter =
3239       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3240        MD](StringRef MangledName,
3241            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3242                &E) {
3243         // Generate metadata for global variables. Each entry of this metadata
3244         // contains:
3245         // - Entry 0 -> Kind of this type of metadata (1).
3246         // - Entry 1 -> Mangled name of the variable.
3247         // - Entry 2 -> Declare target kind.
3248         // - Entry 3 -> Order the entry was created.
3249         // The first element of the metadata node is the kind.
3250         llvm::Metadata *Ops[] = {
3251             GetMDInt(E.getKind()), GetMDString(MangledName),
3252             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3253 
3254         // Save this entry in the right position of the ordered entries array.
3255         OrderedEntries[E.getOrder()] =
3256             std::make_tuple(&E, SourceLocation(), MangledName);
3257 
3258         // Add metadata to the named metadata node.
3259         MD->addOperand(llvm::MDNode::get(C, Ops));
3260       };
3261 
3262   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3263       DeviceGlobalVarMetadataEmitter);
3264 
3265   for (const auto &E : OrderedEntries) {
3266     assert(std::get<0>(E) && "All ordered entries must exist!");
3267     if (const auto *CE =
3268             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3269                 std::get<0>(E))) {
3270       if (!CE->getID() || !CE->getAddress()) {
3271         // Do not blame the entry if the parent funtion is not emitted.
3272         StringRef FnName = ParentFunctions[CE->getOrder()];
3273         if (!CGM.GetGlobalValue(FnName))
3274           continue;
3275         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3276             DiagnosticsEngine::Error,
3277             "Offloading entry for target region in %0 is incorrect: either the "
3278             "address or the ID is invalid.");
3279         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3280         continue;
3281       }
3282       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3283                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3284     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3285                                              OffloadEntryInfoDeviceGlobalVar>(
3286                    std::get<0>(E))) {
3287       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3288           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3289               CE->getFlags());
3290       switch (Flags) {
3291       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3292         if (CGM.getLangOpts().OpenMPIsDevice &&
3293             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3294           continue;
3295         if (!CE->getAddress()) {
3296           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3297               DiagnosticsEngine::Error, "Offloading entry for declare target "
3298                                         "variable %0 is incorrect: the "
3299                                         "address is invalid.");
3300           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3301           continue;
3302         }
3303         // The vaiable has no definition - no need to add the entry.
3304         if (CE->getVarSize().isZero())
3305           continue;
3306         break;
3307       }
3308       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3309         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3310                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3311                "Declaret target link address is set.");
3312         if (CGM.getLangOpts().OpenMPIsDevice)
3313           continue;
3314         if (!CE->getAddress()) {
3315           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3316               DiagnosticsEngine::Error,
3317               "Offloading entry for declare target variable is incorrect: the "
3318               "address is invalid.");
3319           CGM.getDiags().Report(DiagID);
3320           continue;
3321         }
3322         break;
3323       }
3324       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3325                          CE->getVarSize().getQuantity(), Flags,
3326                          CE->getLinkage());
3327     } else {
3328       llvm_unreachable("Unsupported entry kind.");
3329     }
3330   }
3331 }
3332 
3333 /// Loads all the offload entries information from the host IR
3334 /// metadata.
3335 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3336   // If we are in target mode, load the metadata from the host IR. This code has
3337   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3338 
3339   if (!CGM.getLangOpts().OpenMPIsDevice)
3340     return;
3341 
3342   if (CGM.getLangOpts().OMPHostIRFile.empty())
3343     return;
3344 
3345   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3346   if (auto EC = Buf.getError()) {
3347     CGM.getDiags().Report(diag::err_cannot_open_file)
3348         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3349     return;
3350   }
3351 
3352   llvm::LLVMContext C;
3353   auto ME = expectedToErrorOrAndEmitErrors(
3354       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3355 
3356   if (auto EC = ME.getError()) {
3357     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3358         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3359     CGM.getDiags().Report(DiagID)
3360         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3361     return;
3362   }
3363 
3364   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3365   if (!MD)
3366     return;
3367 
3368   for (llvm::MDNode *MN : MD->operands()) {
3369     auto &&GetMDInt = [MN](unsigned Idx) {
3370       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3371       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3372     };
3373 
3374     auto &&GetMDString = [MN](unsigned Idx) {
3375       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3376       return V->getString();
3377     };
3378 
3379     switch (GetMDInt(0)) {
3380     default:
3381       llvm_unreachable("Unexpected metadata!");
3382       break;
3383     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3384         OffloadingEntryInfoTargetRegion:
3385       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3386           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3387           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3388           /*Order=*/GetMDInt(5));
3389       break;
3390     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3391         OffloadingEntryInfoDeviceGlobalVar:
3392       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3393           /*MangledName=*/GetMDString(1),
3394           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3395               /*Flags=*/GetMDInt(2)),
3396           /*Order=*/GetMDInt(3));
3397       break;
3398     }
3399   }
3400 }
3401 
3402 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3403   if (!KmpRoutineEntryPtrTy) {
3404     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3405     ASTContext &C = CGM.getContext();
3406     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3407     FunctionProtoType::ExtProtoInfo EPI;
3408     KmpRoutineEntryPtrQTy = C.getPointerType(
3409         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3410     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3411   }
3412 }
3413 
3414 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3415   // Make sure the type of the entry is already created. This is the type we
3416   // have to create:
3417   // struct __tgt_offload_entry{
3418   //   void      *addr;       // Pointer to the offload entry info.
3419   //                          // (function or global)
3420   //   char      *name;       // Name of the function or global.
3421   //   size_t     size;       // Size of the entry info (0 if it a function).
3422   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3423   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3424   // };
3425   if (TgtOffloadEntryQTy.isNull()) {
3426     ASTContext &C = CGM.getContext();
3427     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3428     RD->startDefinition();
3429     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3430     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3431     addFieldToRecordDecl(C, RD, C.getSizeType());
3432     addFieldToRecordDecl(
3433         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3434     addFieldToRecordDecl(
3435         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3436     RD->completeDefinition();
3437     RD->addAttr(PackedAttr::CreateImplicit(C));
3438     TgtOffloadEntryQTy = C.getRecordType(RD);
3439   }
3440   return TgtOffloadEntryQTy;
3441 }
3442 
3443 namespace {
3444 struct PrivateHelpersTy {
3445   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3446                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3447       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3448         PrivateElemInit(PrivateElemInit) {}
3449   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3450   const Expr *OriginalRef = nullptr;
3451   const VarDecl *Original = nullptr;
3452   const VarDecl *PrivateCopy = nullptr;
3453   const VarDecl *PrivateElemInit = nullptr;
3454   bool isLocalPrivate() const {
3455     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3456   }
3457 };
3458 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3459 } // anonymous namespace
3460 
3461 static bool isAllocatableDecl(const VarDecl *VD) {
3462   const VarDecl *CVD = VD->getCanonicalDecl();
3463   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3464     return false;
3465   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3466   // Use the default allocation.
3467   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3468             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3469            !AA->getAllocator());
3470 }
3471 
3472 static RecordDecl *
3473 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3474   if (!Privates.empty()) {
3475     ASTContext &C = CGM.getContext();
3476     // Build struct .kmp_privates_t. {
3477     //         /*  private vars  */
3478     //       };
3479     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3480     RD->startDefinition();
3481     for (const auto &Pair : Privates) {
3482       const VarDecl *VD = Pair.second.Original;
3483       QualType Type = VD->getType().getNonReferenceType();
3484       // If the private variable is a local variable with lvalue ref type,
3485       // allocate the pointer instead of the pointee type.
3486       if (Pair.second.isLocalPrivate()) {
3487         if (VD->getType()->isLValueReferenceType())
3488           Type = C.getPointerType(Type);
3489         if (isAllocatableDecl(VD))
3490           Type = C.getPointerType(Type);
3491       }
3492       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3493       if (VD->hasAttrs()) {
3494         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3495              E(VD->getAttrs().end());
3496              I != E; ++I)
3497           FD->addAttr(*I);
3498       }
3499     }
3500     RD->completeDefinition();
3501     return RD;
3502   }
3503   return nullptr;
3504 }
3505 
3506 static RecordDecl *
3507 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3508                          QualType KmpInt32Ty,
3509                          QualType KmpRoutineEntryPointerQTy) {
3510   ASTContext &C = CGM.getContext();
3511   // Build struct kmp_task_t {
3512   //         void *              shareds;
3513   //         kmp_routine_entry_t routine;
3514   //         kmp_int32           part_id;
3515   //         kmp_cmplrdata_t data1;
3516   //         kmp_cmplrdata_t data2;
3517   // For taskloops additional fields:
3518   //         kmp_uint64          lb;
3519   //         kmp_uint64          ub;
3520   //         kmp_int64           st;
3521   //         kmp_int32           liter;
3522   //         void *              reductions;
3523   //       };
3524   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3525   UD->startDefinition();
3526   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3527   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3528   UD->completeDefinition();
3529   QualType KmpCmplrdataTy = C.getRecordType(UD);
3530   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3531   RD->startDefinition();
3532   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3533   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3534   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3535   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3536   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3537   if (isOpenMPTaskLoopDirective(Kind)) {
3538     QualType KmpUInt64Ty =
3539         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3540     QualType KmpInt64Ty =
3541         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3542     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3543     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3544     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3545     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3546     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3547   }
3548   RD->completeDefinition();
3549   return RD;
3550 }
3551 
3552 static RecordDecl *
3553 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3554                                      ArrayRef<PrivateDataTy> Privates) {
3555   ASTContext &C = CGM.getContext();
3556   // Build struct kmp_task_t_with_privates {
3557   //         kmp_task_t task_data;
3558   //         .kmp_privates_t. privates;
3559   //       };
3560   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3561   RD->startDefinition();
3562   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3563   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3564     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3565   RD->completeDefinition();
3566   return RD;
3567 }
3568 
3569 /// Emit a proxy function which accepts kmp_task_t as the second
3570 /// argument.
3571 /// \code
3572 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3573 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3574 ///   For taskloops:
3575 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3576 ///   tt->reductions, tt->shareds);
3577 ///   return 0;
3578 /// }
3579 /// \endcode
3580 static llvm::Function *
3581 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3582                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3583                       QualType KmpTaskTWithPrivatesPtrQTy,
3584                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3585                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3586                       llvm::Value *TaskPrivatesMap) {
3587   ASTContext &C = CGM.getContext();
3588   FunctionArgList Args;
3589   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3590                             ImplicitParamDecl::Other);
3591   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3592                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3593                                 ImplicitParamDecl::Other);
3594   Args.push_back(&GtidArg);
3595   Args.push_back(&TaskTypeArg);
3596   const auto &TaskEntryFnInfo =
3597       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3598   llvm::FunctionType *TaskEntryTy =
3599       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3600   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3601   auto *TaskEntry = llvm::Function::Create(
3602       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3603   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3604   TaskEntry->setDoesNotRecurse();
3605   CodeGenFunction CGF(CGM);
3606   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3607                     Loc, Loc);
3608 
3609   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3610   // tt,
3611   // For taskloops:
3612   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3613   // tt->task_data.shareds);
3614   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3615       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3616   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3617       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3618       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3619   const auto *KmpTaskTWithPrivatesQTyRD =
3620       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3621   LValue Base =
3622       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3623   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3624   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3625   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3626   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3627 
3628   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3629   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3630   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3631       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3632       CGF.ConvertTypeForMem(SharedsPtrTy));
3633 
3634   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3635   llvm::Value *PrivatesParam;
3636   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3637     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3638     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3639         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3640   } else {
3641     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3642   }
3643 
3644   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3645                                TaskPrivatesMap,
3646                                CGF.Builder
3647                                    .CreatePointerBitCastOrAddrSpaceCast(
3648                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3649                                    .getPointer()};
3650   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3651                                           std::end(CommonArgs));
3652   if (isOpenMPTaskLoopDirective(Kind)) {
3653     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3654     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3655     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3656     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3657     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3658     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3659     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3660     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3661     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3662     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3663     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3664     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3665     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3666     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3667     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3668     CallArgs.push_back(LBParam);
3669     CallArgs.push_back(UBParam);
3670     CallArgs.push_back(StParam);
3671     CallArgs.push_back(LIParam);
3672     CallArgs.push_back(RParam);
3673   }
3674   CallArgs.push_back(SharedsParam);
3675 
3676   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3677                                                   CallArgs);
3678   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3679                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3680   CGF.FinishFunction();
3681   return TaskEntry;
3682 }
3683 
3684 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3685                                             SourceLocation Loc,
3686                                             QualType KmpInt32Ty,
3687                                             QualType KmpTaskTWithPrivatesPtrQTy,
3688                                             QualType KmpTaskTWithPrivatesQTy) {
3689   ASTContext &C = CGM.getContext();
3690   FunctionArgList Args;
3691   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3692                             ImplicitParamDecl::Other);
3693   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3694                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3695                                 ImplicitParamDecl::Other);
3696   Args.push_back(&GtidArg);
3697   Args.push_back(&TaskTypeArg);
3698   const auto &DestructorFnInfo =
3699       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3700   llvm::FunctionType *DestructorFnTy =
3701       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3702   std::string Name =
3703       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3704   auto *DestructorFn =
3705       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3706                              Name, &CGM.getModule());
3707   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3708                                     DestructorFnInfo);
3709   DestructorFn->setDoesNotRecurse();
3710   CodeGenFunction CGF(CGM);
3711   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3712                     Args, Loc, Loc);
3713 
3714   LValue Base = CGF.EmitLoadOfPointerLValue(
3715       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3716       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3717   const auto *KmpTaskTWithPrivatesQTyRD =
3718       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3719   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3720   Base = CGF.EmitLValueForField(Base, *FI);
3721   for (const auto *Field :
3722        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3723     if (QualType::DestructionKind DtorKind =
3724             Field->getType().isDestructedType()) {
3725       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3726       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3727     }
3728   }
3729   CGF.FinishFunction();
3730   return DestructorFn;
3731 }
3732 
3733 /// Emit a privates mapping function for correct handling of private and
3734 /// firstprivate variables.
3735 /// \code
3736 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3737 /// **noalias priv1,...,  <tyn> **noalias privn) {
3738 ///   *priv1 = &.privates.priv1;
3739 ///   ...;
3740 ///   *privn = &.privates.privn;
3741 /// }
3742 /// \endcode
3743 static llvm::Value *
3744 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3745                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3746                                ArrayRef<PrivateDataTy> Privates) {
3747   ASTContext &C = CGM.getContext();
3748   FunctionArgList Args;
3749   ImplicitParamDecl TaskPrivatesArg(
3750       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3751       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3752       ImplicitParamDecl::Other);
3753   Args.push_back(&TaskPrivatesArg);
3754   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3755   unsigned Counter = 1;
3756   for (const Expr *E : Data.PrivateVars) {
3757     Args.push_back(ImplicitParamDecl::Create(
3758         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3759         C.getPointerType(C.getPointerType(E->getType()))
3760             .withConst()
3761             .withRestrict(),
3762         ImplicitParamDecl::Other));
3763     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3764     PrivateVarsPos[VD] = Counter;
3765     ++Counter;
3766   }
3767   for (const Expr *E : Data.FirstprivateVars) {
3768     Args.push_back(ImplicitParamDecl::Create(
3769         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3770         C.getPointerType(C.getPointerType(E->getType()))
3771             .withConst()
3772             .withRestrict(),
3773         ImplicitParamDecl::Other));
3774     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3775     PrivateVarsPos[VD] = Counter;
3776     ++Counter;
3777   }
3778   for (const Expr *E : Data.LastprivateVars) {
3779     Args.push_back(ImplicitParamDecl::Create(
3780         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781         C.getPointerType(C.getPointerType(E->getType()))
3782             .withConst()
3783             .withRestrict(),
3784         ImplicitParamDecl::Other));
3785     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3786     PrivateVarsPos[VD] = Counter;
3787     ++Counter;
3788   }
3789   for (const VarDecl *VD : Data.PrivateLocals) {
3790     QualType Ty = VD->getType().getNonReferenceType();
3791     if (VD->getType()->isLValueReferenceType())
3792       Ty = C.getPointerType(Ty);
3793     if (isAllocatableDecl(VD))
3794       Ty = C.getPointerType(Ty);
3795     Args.push_back(ImplicitParamDecl::Create(
3796         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3797         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3798         ImplicitParamDecl::Other));
3799     PrivateVarsPos[VD] = Counter;
3800     ++Counter;
3801   }
3802   const auto &TaskPrivatesMapFnInfo =
3803       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3804   llvm::FunctionType *TaskPrivatesMapTy =
3805       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3806   std::string Name =
3807       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3808   auto *TaskPrivatesMap = llvm::Function::Create(
3809       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3810       &CGM.getModule());
3811   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3812                                     TaskPrivatesMapFnInfo);
3813   if (CGM.getLangOpts().Optimize) {
3814     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3815     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3816     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3817   }
3818   CodeGenFunction CGF(CGM);
3819   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3820                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3821 
3822   // *privi = &.privates.privi;
3823   LValue Base = CGF.EmitLoadOfPointerLValue(
3824       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3825       TaskPrivatesArg.getType()->castAs<PointerType>());
3826   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3827   Counter = 0;
3828   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3829     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3830     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3831     LValue RefLVal =
3832         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3833     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3834         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3835     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3836     ++Counter;
3837   }
3838   CGF.FinishFunction();
3839   return TaskPrivatesMap;
3840 }
3841 
3842 /// Emit initialization for private variables in task-based directives.
3843 static void emitPrivatesInit(CodeGenFunction &CGF,
3844                              const OMPExecutableDirective &D,
3845                              Address KmpTaskSharedsPtr, LValue TDBase,
3846                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3847                              QualType SharedsTy, QualType SharedsPtrTy,
3848                              const OMPTaskDataTy &Data,
3849                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3850   ASTContext &C = CGF.getContext();
3851   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3852   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3853   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3854                                  ? OMPD_taskloop
3855                                  : OMPD_task;
3856   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3857   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3858   LValue SrcBase;
3859   bool IsTargetTask =
3860       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3861       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3862   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3863   // PointersArray, SizesArray, and MappersArray. The original variables for
3864   // these arrays are not captured and we get their addresses explicitly.
3865   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3866       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3867     SrcBase = CGF.MakeAddrLValue(
3868         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3869             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3870         SharedsTy);
3871   }
3872   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3873   for (const PrivateDataTy &Pair : Privates) {
3874     // Do not initialize private locals.
3875     if (Pair.second.isLocalPrivate()) {
3876       ++FI;
3877       continue;
3878     }
3879     const VarDecl *VD = Pair.second.PrivateCopy;
3880     const Expr *Init = VD->getAnyInitializer();
3881     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3882                              !CGF.isTrivialInitializer(Init)))) {
3883       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3884       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3885         const VarDecl *OriginalVD = Pair.second.Original;
3886         // Check if the variable is the target-based BasePointersArray,
3887         // PointersArray, SizesArray, or MappersArray.
3888         LValue SharedRefLValue;
3889         QualType Type = PrivateLValue.getType();
3890         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3891         if (IsTargetTask && !SharedField) {
3892           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3893                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3894                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3895                          ->getNumParams() == 0 &&
3896                  isa<TranslationUnitDecl>(
3897                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3898                          ->getDeclContext()) &&
3899                  "Expected artificial target data variable.");
3900           SharedRefLValue =
3901               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3902         } else if (ForDup) {
3903           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3904           SharedRefLValue = CGF.MakeAddrLValue(
3905               Address(SharedRefLValue.getPointer(CGF),
3906                       C.getDeclAlign(OriginalVD)),
3907               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3908               SharedRefLValue.getTBAAInfo());
3909         } else if (CGF.LambdaCaptureFields.count(
3910                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3911                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3912           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3913         } else {
3914           // Processing for implicitly captured variables.
3915           InlinedOpenMPRegionRAII Region(
3916               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3917               /*HasCancel=*/false, /*NoInheritance=*/true);
3918           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3919         }
3920         if (Type->isArrayType()) {
3921           // Initialize firstprivate array.
3922           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3923             // Perform simple memcpy.
3924             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3925           } else {
3926             // Initialize firstprivate array using element-by-element
3927             // initialization.
3928             CGF.EmitOMPAggregateAssign(
3929                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3930                 Type,
3931                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3932                                                   Address SrcElement) {
3933                   // Clean up any temporaries needed by the initialization.
3934                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3935                   InitScope.addPrivate(
3936                       Elem, [SrcElement]() -> Address { return SrcElement; });
3937                   (void)InitScope.Privatize();
3938                   // Emit initialization for single element.
3939                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3940                       CGF, &CapturesInfo);
3941                   CGF.EmitAnyExprToMem(Init, DestElement,
3942                                        Init->getType().getQualifiers(),
3943                                        /*IsInitializer=*/false);
3944                 });
3945           }
3946         } else {
3947           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3948           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3949             return SharedRefLValue.getAddress(CGF);
3950           });
3951           (void)InitScope.Privatize();
3952           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3953           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3954                              /*capturedByInit=*/false);
3955         }
3956       } else {
3957         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3958       }
3959     }
3960     ++FI;
3961   }
3962 }
3963 
3964 /// Check if duplication function is required for taskloops.
3965 static bool checkInitIsRequired(CodeGenFunction &CGF,
3966                                 ArrayRef<PrivateDataTy> Privates) {
3967   bool InitRequired = false;
3968   for (const PrivateDataTy &Pair : Privates) {
3969     if (Pair.second.isLocalPrivate())
3970       continue;
3971     const VarDecl *VD = Pair.second.PrivateCopy;
3972     const Expr *Init = VD->getAnyInitializer();
3973     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3974                                     !CGF.isTrivialInitializer(Init));
3975     if (InitRequired)
3976       break;
3977   }
3978   return InitRequired;
3979 }
3980 
3981 
3982 /// Emit task_dup function (for initialization of
3983 /// private/firstprivate/lastprivate vars and last_iter flag)
3984 /// \code
3985 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3986 /// lastpriv) {
3987 /// // setup lastprivate flag
3988 ///    task_dst->last = lastpriv;
3989 /// // could be constructor calls here...
3990 /// }
3991 /// \endcode
3992 static llvm::Value *
3993 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3994                     const OMPExecutableDirective &D,
3995                     QualType KmpTaskTWithPrivatesPtrQTy,
3996                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3997                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3998                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3999                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4000   ASTContext &C = CGM.getContext();
4001   FunctionArgList Args;
4002   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4003                            KmpTaskTWithPrivatesPtrQTy,
4004                            ImplicitParamDecl::Other);
4005   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4006                            KmpTaskTWithPrivatesPtrQTy,
4007                            ImplicitParamDecl::Other);
4008   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4009                                 ImplicitParamDecl::Other);
4010   Args.push_back(&DstArg);
4011   Args.push_back(&SrcArg);
4012   Args.push_back(&LastprivArg);
4013   const auto &TaskDupFnInfo =
4014       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4015   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4016   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4017   auto *TaskDup = llvm::Function::Create(
4018       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4019   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4020   TaskDup->setDoesNotRecurse();
4021   CodeGenFunction CGF(CGM);
4022   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4023                     Loc);
4024 
4025   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4026       CGF.GetAddrOfLocalVar(&DstArg),
4027       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4028   // task_dst->liter = lastpriv;
4029   if (WithLastIter) {
4030     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4031     LValue Base = CGF.EmitLValueForField(
4032         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4033     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4034     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4035         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4036     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4037   }
4038 
4039   // Emit initial values for private copies (if any).
4040   assert(!Privates.empty());
4041   Address KmpTaskSharedsPtr = Address::invalid();
4042   if (!Data.FirstprivateVars.empty()) {
4043     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4044         CGF.GetAddrOfLocalVar(&SrcArg),
4045         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4046     LValue Base = CGF.EmitLValueForField(
4047         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4048     KmpTaskSharedsPtr = Address(
4049         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4050                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4051                                                   KmpTaskTShareds)),
4052                              Loc),
4053         CGM.getNaturalTypeAlignment(SharedsTy));
4054   }
4055   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4056                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4057   CGF.FinishFunction();
4058   return TaskDup;
4059 }
4060 
4061 /// Checks if destructor function is required to be generated.
4062 /// \return true if cleanups are required, false otherwise.
4063 static bool
4064 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4065                          ArrayRef<PrivateDataTy> Privates) {
4066   for (const PrivateDataTy &P : Privates) {
4067     if (P.second.isLocalPrivate())
4068       continue;
4069     QualType Ty = P.second.Original->getType().getNonReferenceType();
4070     if (Ty.isDestructedType())
4071       return true;
4072   }
4073   return false;
4074 }
4075 
4076 namespace {
4077 /// Loop generator for OpenMP iterator expression.
4078 class OMPIteratorGeneratorScope final
4079     : public CodeGenFunction::OMPPrivateScope {
4080   CodeGenFunction &CGF;
4081   const OMPIteratorExpr *E = nullptr;
4082   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4083   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4084   OMPIteratorGeneratorScope() = delete;
4085   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4086 
4087 public:
4088   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4089       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4090     if (!E)
4091       return;
4092     SmallVector<llvm::Value *, 4> Uppers;
4093     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4094       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4095       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4096       addPrivate(VD, [&CGF, VD]() {
4097         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4098       });
4099       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4100       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4101         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4102                                  "counter.addr");
4103       });
4104     }
4105     Privatize();
4106 
4107     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4108       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4109       LValue CLVal =
4110           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4111                              HelperData.CounterVD->getType());
4112       // Counter = 0;
4113       CGF.EmitStoreOfScalar(
4114           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4115           CLVal);
4116       CodeGenFunction::JumpDest &ContDest =
4117           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4118       CodeGenFunction::JumpDest &ExitDest =
4119           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4120       // N = <number-of_iterations>;
4121       llvm::Value *N = Uppers[I];
4122       // cont:
4123       // if (Counter < N) goto body; else goto exit;
4124       CGF.EmitBlock(ContDest.getBlock());
4125       auto *CVal =
4126           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4127       llvm::Value *Cmp =
4128           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4129               ? CGF.Builder.CreateICmpSLT(CVal, N)
4130               : CGF.Builder.CreateICmpULT(CVal, N);
4131       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4132       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4133       // body:
4134       CGF.EmitBlock(BodyBB);
4135       // Iteri = Begini + Counter * Stepi;
4136       CGF.EmitIgnoredExpr(HelperData.Update);
4137     }
4138   }
4139   ~OMPIteratorGeneratorScope() {
4140     if (!E)
4141       return;
4142     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4143       // Counter = Counter + 1;
4144       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4145       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4146       // goto cont;
4147       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4148       // exit:
4149       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4150     }
4151   }
4152 };
4153 } // namespace
4154 
4155 static std::pair<llvm::Value *, llvm::Value *>
4156 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4157   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4158   llvm::Value *Addr;
4159   if (OASE) {
4160     const Expr *Base = OASE->getBase();
4161     Addr = CGF.EmitScalarExpr(Base);
4162   } else {
4163     Addr = CGF.EmitLValue(E).getPointer(CGF);
4164   }
4165   llvm::Value *SizeVal;
4166   QualType Ty = E->getType();
4167   if (OASE) {
4168     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4169     for (const Expr *SE : OASE->getDimensions()) {
4170       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4171       Sz = CGF.EmitScalarConversion(
4172           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4173       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4174     }
4175   } else if (const auto *ASE =
4176                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4177     LValue UpAddrLVal =
4178         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4179     Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4180     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4181         UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4182     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4183     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4184     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4185   } else {
4186     SizeVal = CGF.getTypeSize(Ty);
4187   }
4188   return std::make_pair(Addr, SizeVal);
4189 }
4190 
4191 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4192 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4193   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4194   if (KmpTaskAffinityInfoTy.isNull()) {
4195     RecordDecl *KmpAffinityInfoRD =
4196         C.buildImplicitRecord("kmp_task_affinity_info_t");
4197     KmpAffinityInfoRD->startDefinition();
4198     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4199     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4200     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4201     KmpAffinityInfoRD->completeDefinition();
4202     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4203   }
4204 }
4205 
4206 CGOpenMPRuntime::TaskResultTy
4207 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4208                               const OMPExecutableDirective &D,
4209                               llvm::Function *TaskFunction, QualType SharedsTy,
4210                               Address Shareds, const OMPTaskDataTy &Data) {
4211   ASTContext &C = CGM.getContext();
4212   llvm::SmallVector<PrivateDataTy, 4> Privates;
4213   // Aggregate privates and sort them by the alignment.
4214   const auto *I = Data.PrivateCopies.begin();
4215   for (const Expr *E : Data.PrivateVars) {
4216     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4217     Privates.emplace_back(
4218         C.getDeclAlign(VD),
4219         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4220                          /*PrivateElemInit=*/nullptr));
4221     ++I;
4222   }
4223   I = Data.FirstprivateCopies.begin();
4224   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4225   for (const Expr *E : Data.FirstprivateVars) {
4226     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4227     Privates.emplace_back(
4228         C.getDeclAlign(VD),
4229         PrivateHelpersTy(
4230             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4231             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4232     ++I;
4233     ++IElemInitRef;
4234   }
4235   I = Data.LastprivateCopies.begin();
4236   for (const Expr *E : Data.LastprivateVars) {
4237     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4238     Privates.emplace_back(
4239         C.getDeclAlign(VD),
4240         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4241                          /*PrivateElemInit=*/nullptr));
4242     ++I;
4243   }
4244   for (const VarDecl *VD : Data.PrivateLocals) {
4245     if (isAllocatableDecl(VD))
4246       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4247     else
4248       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4249   }
4250   llvm::stable_sort(Privates,
4251                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4252                       return L.first > R.first;
4253                     });
4254   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4255   // Build type kmp_routine_entry_t (if not built yet).
4256   emitKmpRoutineEntryT(KmpInt32Ty);
4257   // Build type kmp_task_t (if not built yet).
4258   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4259     if (SavedKmpTaskloopTQTy.isNull()) {
4260       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4261           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4262     }
4263     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4264   } else {
4265     assert((D.getDirectiveKind() == OMPD_task ||
4266             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4267             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4268            "Expected taskloop, task or target directive");
4269     if (SavedKmpTaskTQTy.isNull()) {
4270       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4271           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4272     }
4273     KmpTaskTQTy = SavedKmpTaskTQTy;
4274   }
4275   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4276   // Build particular struct kmp_task_t for the given task.
4277   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4278       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4279   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4280   QualType KmpTaskTWithPrivatesPtrQTy =
4281       C.getPointerType(KmpTaskTWithPrivatesQTy);
4282   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4283   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4284       KmpTaskTWithPrivatesTy->getPointerTo();
4285   llvm::Value *KmpTaskTWithPrivatesTySize =
4286       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4287   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4288 
4289   // Emit initial values for private copies (if any).
4290   llvm::Value *TaskPrivatesMap = nullptr;
4291   llvm::Type *TaskPrivatesMapTy =
4292       std::next(TaskFunction->arg_begin(), 3)->getType();
4293   if (!Privates.empty()) {
4294     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4295     TaskPrivatesMap =
4296         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4297     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4298         TaskPrivatesMap, TaskPrivatesMapTy);
4299   } else {
4300     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4301         cast<llvm::PointerType>(TaskPrivatesMapTy));
4302   }
4303   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4304   // kmp_task_t *tt);
4305   llvm::Function *TaskEntry = emitProxyTaskFunction(
4306       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4307       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4308       TaskPrivatesMap);
4309 
4310   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4311   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4312   // kmp_routine_entry_t *task_entry);
4313   // Task flags. Format is taken from
4314   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4315   // description of kmp_tasking_flags struct.
4316   enum {
4317     TiedFlag = 0x1,
4318     FinalFlag = 0x2,
4319     DestructorsFlag = 0x8,
4320     PriorityFlag = 0x20,
4321     DetachableFlag = 0x40,
4322   };
4323   unsigned Flags = Data.Tied ? TiedFlag : 0;
4324   bool NeedsCleanup = false;
4325   if (!Privates.empty()) {
4326     NeedsCleanup =
4327         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4328     if (NeedsCleanup)
4329       Flags = Flags | DestructorsFlag;
4330   }
4331   if (Data.Priority.getInt())
4332     Flags = Flags | PriorityFlag;
4333   if (D.hasClausesOfKind<OMPDetachClause>())
4334     Flags = Flags | DetachableFlag;
4335   llvm::Value *TaskFlags =
4336       Data.Final.getPointer()
4337           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4338                                      CGF.Builder.getInt32(FinalFlag),
4339                                      CGF.Builder.getInt32(/*C=*/0))
4340           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4341   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4342   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4343   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4344       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4345       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346           TaskEntry, KmpRoutineEntryPtrTy)};
4347   llvm::Value *NewTask;
4348   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4349     // Check if we have any device clause associated with the directive.
4350     const Expr *Device = nullptr;
4351     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4352       Device = C->getDevice();
4353     // Emit device ID if any otherwise use default value.
4354     llvm::Value *DeviceID;
4355     if (Device)
4356       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4357                                            CGF.Int64Ty, /*isSigned=*/true);
4358     else
4359       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4360     AllocArgs.push_back(DeviceID);
4361     NewTask = CGF.EmitRuntimeCall(
4362         OMPBuilder.getOrCreateRuntimeFunction(
4363             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4364         AllocArgs);
4365   } else {
4366     NewTask =
4367         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4368                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4369                             AllocArgs);
4370   }
4371   // Emit detach clause initialization.
4372   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4373   // task_descriptor);
4374   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4375     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4376     LValue EvtLVal = CGF.EmitLValue(Evt);
4377 
4378     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4379     // int gtid, kmp_task_t *task);
4380     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4381     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4382     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4383     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4384         OMPBuilder.getOrCreateRuntimeFunction(
4385             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4386         {Loc, Tid, NewTask});
4387     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4388                                       Evt->getExprLoc());
4389     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4390   }
4391   // Process affinity clauses.
4392   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4393     // Process list of affinity data.
4394     ASTContext &C = CGM.getContext();
4395     Address AffinitiesArray = Address::invalid();
4396     // Calculate number of elements to form the array of affinity data.
4397     llvm::Value *NumOfElements = nullptr;
4398     unsigned NumAffinities = 0;
4399     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4400       if (const Expr *Modifier = C->getModifier()) {
4401         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4402         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4403           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4404           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4405           NumOfElements =
4406               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4407         }
4408       } else {
4409         NumAffinities += C->varlist_size();
4410       }
4411     }
4412     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4413     // Fields ids in kmp_task_affinity_info record.
4414     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4415 
4416     QualType KmpTaskAffinityInfoArrayTy;
4417     if (NumOfElements) {
4418       NumOfElements = CGF.Builder.CreateNUWAdd(
4419           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4420       auto *OVE = new (C) OpaqueValueExpr(
4421           Loc,
4422           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4423           VK_PRValue);
4424       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4425                                                     RValue::get(NumOfElements));
4426       KmpTaskAffinityInfoArrayTy =
4427           C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4428                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4429       // Properly emit variable-sized array.
4430       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4431                                            ImplicitParamDecl::Other);
4432       CGF.EmitVarDecl(*PD);
4433       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4434       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4435                                                 /*isSigned=*/false);
4436     } else {
4437       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4438           KmpTaskAffinityInfoTy,
4439           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4440           ArrayType::Normal, /*IndexTypeQuals=*/0);
4441       AffinitiesArray =
4442           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4443       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4444       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4445                                              /*isSigned=*/false);
4446     }
4447 
4448     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4449     // Fill array by elements without iterators.
4450     unsigned Pos = 0;
4451     bool HasIterator = false;
4452     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4453       if (C->getModifier()) {
4454         HasIterator = true;
4455         continue;
4456       }
4457       for (const Expr *E : C->varlists()) {
4458         llvm::Value *Addr;
4459         llvm::Value *Size;
4460         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4461         LValue Base =
4462             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4463                                KmpTaskAffinityInfoTy);
4464         // affs[i].base_addr = &<Affinities[i].second>;
4465         LValue BaseAddrLVal = CGF.EmitLValueForField(
4466             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4467         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4468                               BaseAddrLVal);
4469         // affs[i].len = sizeof(<Affinities[i].second>);
4470         LValue LenLVal = CGF.EmitLValueForField(
4471             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4472         CGF.EmitStoreOfScalar(Size, LenLVal);
4473         ++Pos;
4474       }
4475     }
4476     LValue PosLVal;
4477     if (HasIterator) {
4478       PosLVal = CGF.MakeAddrLValue(
4479           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4480           C.getSizeType());
4481       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4482     }
4483     // Process elements with iterators.
4484     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4485       const Expr *Modifier = C->getModifier();
4486       if (!Modifier)
4487         continue;
4488       OMPIteratorGeneratorScope IteratorScope(
4489           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4490       for (const Expr *E : C->varlists()) {
4491         llvm::Value *Addr;
4492         llvm::Value *Size;
4493         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4494         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4495         LValue Base = CGF.MakeAddrLValue(
4496             CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4497         // affs[i].base_addr = &<Affinities[i].second>;
4498         LValue BaseAddrLVal = CGF.EmitLValueForField(
4499             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4500         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4501                               BaseAddrLVal);
4502         // affs[i].len = sizeof(<Affinities[i].second>);
4503         LValue LenLVal = CGF.EmitLValueForField(
4504             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4505         CGF.EmitStoreOfScalar(Size, LenLVal);
4506         Idx = CGF.Builder.CreateNUWAdd(
4507             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4508         CGF.EmitStoreOfScalar(Idx, PosLVal);
4509       }
4510     }
4511     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4512     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4513     // naffins, kmp_task_affinity_info_t *affin_list);
4514     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4515     llvm::Value *GTid = getThreadID(CGF, Loc);
4516     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4517         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4518     // FIXME: Emit the function and ignore its result for now unless the
4519     // runtime function is properly implemented.
4520     (void)CGF.EmitRuntimeCall(
4521         OMPBuilder.getOrCreateRuntimeFunction(
4522             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4523         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4524   }
4525   llvm::Value *NewTaskNewTaskTTy =
4526       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4527           NewTask, KmpTaskTWithPrivatesPtrTy);
4528   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4529                                                KmpTaskTWithPrivatesQTy);
4530   LValue TDBase =
4531       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4532   // Fill the data in the resulting kmp_task_t record.
4533   // Copy shareds if there are any.
4534   Address KmpTaskSharedsPtr = Address::invalid();
4535   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4536     KmpTaskSharedsPtr =
4537         Address(CGF.EmitLoadOfScalar(
4538                     CGF.EmitLValueForField(
4539                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4540                                            KmpTaskTShareds)),
4541                     Loc),
4542                 CGM.getNaturalTypeAlignment(SharedsTy));
4543     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4544     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4545     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4546   }
4547   // Emit initial values for private copies (if any).
4548   TaskResultTy Result;
4549   if (!Privates.empty()) {
4550     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4551                      SharedsTy, SharedsPtrTy, Data, Privates,
4552                      /*ForDup=*/false);
4553     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4554         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4555       Result.TaskDupFn = emitTaskDupFunction(
4556           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4557           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4558           /*WithLastIter=*/!Data.LastprivateVars.empty());
4559     }
4560   }
4561   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4562   enum { Priority = 0, Destructors = 1 };
4563   // Provide pointer to function with destructors for privates.
4564   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4565   const RecordDecl *KmpCmplrdataUD =
4566       (*FI)->getType()->getAsUnionType()->getDecl();
4567   if (NeedsCleanup) {
4568     llvm::Value *DestructorFn = emitDestructorsFunction(
4569         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4570         KmpTaskTWithPrivatesQTy);
4571     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4572     LValue DestructorsLV = CGF.EmitLValueForField(
4573         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4574     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575                               DestructorFn, KmpRoutineEntryPtrTy),
4576                           DestructorsLV);
4577   }
4578   // Set priority.
4579   if (Data.Priority.getInt()) {
4580     LValue Data2LV = CGF.EmitLValueForField(
4581         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4582     LValue PriorityLV = CGF.EmitLValueForField(
4583         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4584     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4585   }
4586   Result.NewTask = NewTask;
4587   Result.TaskEntry = TaskEntry;
4588   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4589   Result.TDBase = TDBase;
4590   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4591   return Result;
4592 }
4593 
4594 namespace {
4595 /// Dependence kind for RTL.
4596 enum RTLDependenceKindTy {
4597   DepIn = 0x01,
4598   DepInOut = 0x3,
4599   DepMutexInOutSet = 0x4
4600 };
4601 /// Fields ids in kmp_depend_info record.
4602 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4603 } // namespace
4604 
4605 /// Translates internal dependency kind into the runtime kind.
4606 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4607   RTLDependenceKindTy DepKind;
4608   switch (K) {
4609   case OMPC_DEPEND_in:
4610     DepKind = DepIn;
4611     break;
4612   // Out and InOut dependencies must use the same code.
4613   case OMPC_DEPEND_out:
4614   case OMPC_DEPEND_inout:
4615     DepKind = DepInOut;
4616     break;
4617   case OMPC_DEPEND_mutexinoutset:
4618     DepKind = DepMutexInOutSet;
4619     break;
4620   case OMPC_DEPEND_source:
4621   case OMPC_DEPEND_sink:
4622   case OMPC_DEPEND_depobj:
4623   case OMPC_DEPEND_unknown:
4624     llvm_unreachable("Unknown task dependence type");
4625   }
4626   return DepKind;
4627 }
4628 
4629 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4630 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4631                            QualType &FlagsTy) {
4632   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4633   if (KmpDependInfoTy.isNull()) {
4634     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4635     KmpDependInfoRD->startDefinition();
4636     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4637     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4638     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4639     KmpDependInfoRD->completeDefinition();
4640     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4641   }
4642 }
4643 
4644 std::pair<llvm::Value *, LValue>
4645 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4646                                    SourceLocation Loc) {
4647   ASTContext &C = CGM.getContext();
4648   QualType FlagsTy;
4649   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4650   RecordDecl *KmpDependInfoRD =
4651       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4652   LValue Base = CGF.EmitLoadOfPointerLValue(
4653       DepobjLVal.getAddress(CGF),
4654       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4655   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4656   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4657           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4658   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4659                             Base.getTBAAInfo());
4660   Address DepObjAddr = CGF.Builder.CreateGEP(
4661       Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4662   LValue NumDepsBase = CGF.MakeAddrLValue(
4663       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4664   // NumDeps = deps[i].base_addr;
4665   LValue BaseAddrLVal = CGF.EmitLValueForField(
4666       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4667   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4668   return std::make_pair(NumDeps, Base);
4669 }
4670 
4671 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4672                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4673                            const OMPTaskDataTy::DependData &Data,
4674                            Address DependenciesArray) {
4675   CodeGenModule &CGM = CGF.CGM;
4676   ASTContext &C = CGM.getContext();
4677   QualType FlagsTy;
4678   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4679   RecordDecl *KmpDependInfoRD =
4680       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4681   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4682 
4683   OMPIteratorGeneratorScope IteratorScope(
4684       CGF, cast_or_null<OMPIteratorExpr>(
4685                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4686                                  : nullptr));
4687   for (const Expr *E : Data.DepExprs) {
4688     llvm::Value *Addr;
4689     llvm::Value *Size;
4690     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4691     LValue Base;
4692     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4693       Base = CGF.MakeAddrLValue(
4694           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4695     } else {
4696       LValue &PosLVal = *Pos.get<LValue *>();
4697       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4698       Base = CGF.MakeAddrLValue(
4699           CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4700     }
4701     // deps[i].base_addr = &<Dependencies[i].second>;
4702     LValue BaseAddrLVal = CGF.EmitLValueForField(
4703         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4704     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4705                           BaseAddrLVal);
4706     // deps[i].len = sizeof(<Dependencies[i].second>);
4707     LValue LenLVal = CGF.EmitLValueForField(
4708         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4709     CGF.EmitStoreOfScalar(Size, LenLVal);
4710     // deps[i].flags = <Dependencies[i].first>;
4711     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4712     LValue FlagsLVal = CGF.EmitLValueForField(
4713         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4714     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4715                           FlagsLVal);
4716     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4717       ++(*P);
4718     } else {
4719       LValue &PosLVal = *Pos.get<LValue *>();
4720       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4721       Idx = CGF.Builder.CreateNUWAdd(Idx,
4722                                      llvm::ConstantInt::get(Idx->getType(), 1));
4723       CGF.EmitStoreOfScalar(Idx, PosLVal);
4724     }
4725   }
4726 }
4727 
4728 static SmallVector<llvm::Value *, 4>
4729 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4730                         const OMPTaskDataTy::DependData &Data) {
4731   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4732          "Expected depobj dependecy kind.");
4733   SmallVector<llvm::Value *, 4> Sizes;
4734   SmallVector<LValue, 4> SizeLVals;
4735   ASTContext &C = CGF.getContext();
4736   QualType FlagsTy;
4737   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4738   RecordDecl *KmpDependInfoRD =
4739       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4740   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4741   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4742   {
4743     OMPIteratorGeneratorScope IteratorScope(
4744         CGF, cast_or_null<OMPIteratorExpr>(
4745                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4746                                    : nullptr));
4747     for (const Expr *E : Data.DepExprs) {
4748       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4749       LValue Base = CGF.EmitLoadOfPointerLValue(
4750           DepobjLVal.getAddress(CGF),
4751           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4752       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4753           Base.getAddress(CGF), KmpDependInfoPtrT);
4754       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4755                                 Base.getTBAAInfo());
4756       Address DepObjAddr = CGF.Builder.CreateGEP(
4757           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4758       LValue NumDepsBase = CGF.MakeAddrLValue(
4759           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4760       // NumDeps = deps[i].base_addr;
4761       LValue BaseAddrLVal = CGF.EmitLValueForField(
4762           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4763       llvm::Value *NumDeps =
4764           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4765       LValue NumLVal = CGF.MakeAddrLValue(
4766           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4767           C.getUIntPtrType());
4768       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4769                               NumLVal.getAddress(CGF));
4770       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4771       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4772       CGF.EmitStoreOfScalar(Add, NumLVal);
4773       SizeLVals.push_back(NumLVal);
4774     }
4775   }
4776   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4777     llvm::Value *Size =
4778         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4779     Sizes.push_back(Size);
4780   }
4781   return Sizes;
4782 }
4783 
4784 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4785                                LValue PosLVal,
4786                                const OMPTaskDataTy::DependData &Data,
4787                                Address DependenciesArray) {
4788   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4789          "Expected depobj dependecy kind.");
4790   ASTContext &C = CGF.getContext();
4791   QualType FlagsTy;
4792   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4793   RecordDecl *KmpDependInfoRD =
4794       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4795   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4796   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4797   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4798   {
4799     OMPIteratorGeneratorScope IteratorScope(
4800         CGF, cast_or_null<OMPIteratorExpr>(
4801                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4802                                    : nullptr));
4803     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4804       const Expr *E = Data.DepExprs[I];
4805       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4806       LValue Base = CGF.EmitLoadOfPointerLValue(
4807           DepobjLVal.getAddress(CGF),
4808           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4809       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4810           Base.getAddress(CGF), KmpDependInfoPtrT);
4811       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4812                                 Base.getTBAAInfo());
4813 
4814       // Get number of elements in a single depobj.
4815       Address DepObjAddr = CGF.Builder.CreateGEP(
4816           Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4817       LValue NumDepsBase = CGF.MakeAddrLValue(
4818           DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4819       // NumDeps = deps[i].base_addr;
4820       LValue BaseAddrLVal = CGF.EmitLValueForField(
4821           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4822       llvm::Value *NumDeps =
4823           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4824 
4825       // memcopy dependency data.
4826       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4827           ElSize,
4828           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4829       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4830       Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4831       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4832 
4833       // Increase pos.
4834       // pos += size;
4835       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4836       CGF.EmitStoreOfScalar(Add, PosLVal);
4837     }
4838   }
4839 }
4840 
4841 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4842     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4843     SourceLocation Loc) {
4844   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4845         return D.DepExprs.empty();
4846       }))
4847     return std::make_pair(nullptr, Address::invalid());
4848   // Process list of dependencies.
4849   ASTContext &C = CGM.getContext();
4850   Address DependenciesArray = Address::invalid();
4851   llvm::Value *NumOfElements = nullptr;
4852   unsigned NumDependencies = std::accumulate(
4853       Dependencies.begin(), Dependencies.end(), 0,
4854       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4855         return D.DepKind == OMPC_DEPEND_depobj
4856                    ? V
4857                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4858       });
4859   QualType FlagsTy;
4860   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4861   bool HasDepobjDeps = false;
4862   bool HasRegularWithIterators = false;
4863   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4864   llvm::Value *NumOfRegularWithIterators =
4865       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4866   // Calculate number of depobj dependecies and regular deps with the iterators.
4867   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4868     if (D.DepKind == OMPC_DEPEND_depobj) {
4869       SmallVector<llvm::Value *, 4> Sizes =
4870           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4871       for (llvm::Value *Size : Sizes) {
4872         NumOfDepobjElements =
4873             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4874       }
4875       HasDepobjDeps = true;
4876       continue;
4877     }
4878     // Include number of iterations, if any.
4879 
4880     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4881       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4882         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4883         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4884         llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4885             Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4886         NumOfRegularWithIterators =
4887             CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4888       }
4889       HasRegularWithIterators = true;
4890       continue;
4891     }
4892   }
4893 
4894   QualType KmpDependInfoArrayTy;
4895   if (HasDepobjDeps || HasRegularWithIterators) {
4896     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4897                                            /*isSigned=*/false);
4898     if (HasDepobjDeps) {
4899       NumOfElements =
4900           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4901     }
4902     if (HasRegularWithIterators) {
4903       NumOfElements =
4904           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4905     }
4906     auto *OVE = new (C) OpaqueValueExpr(
4907         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4908         VK_PRValue);
4909     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4910                                                   RValue::get(NumOfElements));
4911     KmpDependInfoArrayTy =
4912         C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4913                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4914     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4915     // Properly emit variable-sized array.
4916     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4917                                          ImplicitParamDecl::Other);
4918     CGF.EmitVarDecl(*PD);
4919     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4920     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4921                                               /*isSigned=*/false);
4922   } else {
4923     KmpDependInfoArrayTy = C.getConstantArrayType(
4924         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4925         ArrayType::Normal, /*IndexTypeQuals=*/0);
4926     DependenciesArray =
4927         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4928     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4929     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4930                                            /*isSigned=*/false);
4931   }
4932   unsigned Pos = 0;
4933   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4934     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4935         Dependencies[I].IteratorExpr)
4936       continue;
4937     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4938                    DependenciesArray);
4939   }
4940   // Copy regular dependecies with iterators.
4941   LValue PosLVal = CGF.MakeAddrLValue(
4942       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4943   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4944   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4945     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4946         !Dependencies[I].IteratorExpr)
4947       continue;
4948     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4949                    DependenciesArray);
4950   }
4951   // Copy final depobj arrays without iterators.
4952   if (HasDepobjDeps) {
4953     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4954       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4955         continue;
4956       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4957                          DependenciesArray);
4958     }
4959   }
4960   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4961       DependenciesArray, CGF.VoidPtrTy);
4962   return std::make_pair(NumOfElements, DependenciesArray);
4963 }
4964 
4965 Address CGOpenMPRuntime::emitDepobjDependClause(
4966     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4967     SourceLocation Loc) {
4968   if (Dependencies.DepExprs.empty())
4969     return Address::invalid();
4970   // Process list of dependencies.
4971   ASTContext &C = CGM.getContext();
4972   Address DependenciesArray = Address::invalid();
4973   unsigned NumDependencies = Dependencies.DepExprs.size();
4974   QualType FlagsTy;
4975   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4976   RecordDecl *KmpDependInfoRD =
4977       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4978 
4979   llvm::Value *Size;
4980   // Define type kmp_depend_info[<Dependencies.size()>];
4981   // For depobj reserve one extra element to store the number of elements.
4982   // It is required to handle depobj(x) update(in) construct.
4983   // kmp_depend_info[<Dependencies.size()>] deps;
4984   llvm::Value *NumDepsVal;
4985   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4986   if (const auto *IE =
4987           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4988     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4989     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4990       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4991       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4992       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4993     }
4994     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4995                                     NumDepsVal);
4996     CharUnits SizeInBytes =
4997         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4998     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4999     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5000     NumDepsVal =
5001         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5002   } else {
5003     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5004         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5005         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5006     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5007     Size = CGM.getSize(Sz.alignTo(Align));
5008     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5009   }
5010   // Need to allocate on the dynamic memory.
5011   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5012   // Use default allocator.
5013   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5014   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5015 
5016   llvm::Value *Addr =
5017       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5018                               CGM.getModule(), OMPRTL___kmpc_alloc),
5019                           Args, ".dep.arr.addr");
5020   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5021       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5022   DependenciesArray = Address(Addr, Align);
5023   // Write number of elements in the first element of array for depobj.
5024   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5025   // deps[i].base_addr = NumDependencies;
5026   LValue BaseAddrLVal = CGF.EmitLValueForField(
5027       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5028   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5029   llvm::PointerUnion<unsigned *, LValue *> Pos;
5030   unsigned Idx = 1;
5031   LValue PosLVal;
5032   if (Dependencies.IteratorExpr) {
5033     PosLVal = CGF.MakeAddrLValue(
5034         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5035         C.getSizeType());
5036     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5037                           /*IsInit=*/true);
5038     Pos = &PosLVal;
5039   } else {
5040     Pos = &Idx;
5041   }
5042   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5043   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5044       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5045   return DependenciesArray;
5046 }
5047 
5048 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5049                                         SourceLocation Loc) {
5050   ASTContext &C = CGM.getContext();
5051   QualType FlagsTy;
5052   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5053   LValue Base = CGF.EmitLoadOfPointerLValue(
5054       DepobjLVal.getAddress(CGF),
5055       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5056   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5057   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5058       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5059   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5060       Addr.getElementType(), Addr.getPointer(),
5061       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5062   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5063                                                                CGF.VoidPtrTy);
5064   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5065   // Use default allocator.
5066   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5067   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5068 
5069   // _kmpc_free(gtid, addr, nullptr);
5070   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5071                                 CGM.getModule(), OMPRTL___kmpc_free),
5072                             Args);
5073 }
5074 
5075 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5076                                        OpenMPDependClauseKind NewDepKind,
5077                                        SourceLocation Loc) {
5078   ASTContext &C = CGM.getContext();
5079   QualType FlagsTy;
5080   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5081   RecordDecl *KmpDependInfoRD =
5082       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5083   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5084   llvm::Value *NumDeps;
5085   LValue Base;
5086   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5087 
5088   Address Begin = Base.getAddress(CGF);
5089   // Cast from pointer to array type to pointer to single element.
5090   llvm::Value *End = CGF.Builder.CreateGEP(
5091       Begin.getElementType(), Begin.getPointer(), NumDeps);
5092   // The basic structure here is a while-do loop.
5093   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5094   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5095   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5096   CGF.EmitBlock(BodyBB);
5097   llvm::PHINode *ElementPHI =
5098       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5099   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5100   Begin = Address(ElementPHI, Begin.getAlignment());
5101   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5102                             Base.getTBAAInfo());
5103   // deps[i].flags = NewDepKind;
5104   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5105   LValue FlagsLVal = CGF.EmitLValueForField(
5106       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5107   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5108                         FlagsLVal);
5109 
5110   // Shift the address forward by one element.
5111   Address ElementNext =
5112       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5113   ElementPHI->addIncoming(ElementNext.getPointer(),
5114                           CGF.Builder.GetInsertBlock());
5115   llvm::Value *IsEmpty =
5116       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5117   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5118   // Done.
5119   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5120 }
5121 
5122 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5123                                    const OMPExecutableDirective &D,
5124                                    llvm::Function *TaskFunction,
5125                                    QualType SharedsTy, Address Shareds,
5126                                    const Expr *IfCond,
5127                                    const OMPTaskDataTy &Data) {
5128   if (!CGF.HaveInsertPoint())
5129     return;
5130 
5131   TaskResultTy Result =
5132       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5133   llvm::Value *NewTask = Result.NewTask;
5134   llvm::Function *TaskEntry = Result.TaskEntry;
5135   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5136   LValue TDBase = Result.TDBase;
5137   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5138   // Process list of dependences.
5139   Address DependenciesArray = Address::invalid();
5140   llvm::Value *NumOfElements;
5141   std::tie(NumOfElements, DependenciesArray) =
5142       emitDependClause(CGF, Data.Dependences, Loc);
5143 
5144   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5145   // libcall.
5146   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5147   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5148   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5149   // list is not empty
5150   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5151   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5152   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5153   llvm::Value *DepTaskArgs[7];
5154   if (!Data.Dependences.empty()) {
5155     DepTaskArgs[0] = UpLoc;
5156     DepTaskArgs[1] = ThreadID;
5157     DepTaskArgs[2] = NewTask;
5158     DepTaskArgs[3] = NumOfElements;
5159     DepTaskArgs[4] = DependenciesArray.getPointer();
5160     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5161     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5162   }
5163   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5164                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5165     if (!Data.Tied) {
5166       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5167       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5168       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5169     }
5170     if (!Data.Dependences.empty()) {
5171       CGF.EmitRuntimeCall(
5172           OMPBuilder.getOrCreateRuntimeFunction(
5173               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5174           DepTaskArgs);
5175     } else {
5176       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5177                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5178                           TaskArgs);
5179     }
5180     // Check if parent region is untied and build return for untied task;
5181     if (auto *Region =
5182             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5183       Region->emitUntiedSwitch(CGF);
5184   };
5185 
5186   llvm::Value *DepWaitTaskArgs[6];
5187   if (!Data.Dependences.empty()) {
5188     DepWaitTaskArgs[0] = UpLoc;
5189     DepWaitTaskArgs[1] = ThreadID;
5190     DepWaitTaskArgs[2] = NumOfElements;
5191     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5192     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5193     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5194   }
5195   auto &M = CGM.getModule();
5196   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5197                         TaskEntry, &Data, &DepWaitTaskArgs,
5198                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5199     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5200     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5201     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5202     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5203     // is specified.
5204     if (!Data.Dependences.empty())
5205       CGF.EmitRuntimeCall(
5206           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5207           DepWaitTaskArgs);
5208     // Call proxy_task_entry(gtid, new_task);
5209     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5210                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5211       Action.Enter(CGF);
5212       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5213       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5214                                                           OutlinedFnArgs);
5215     };
5216 
5217     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5218     // kmp_task_t *new_task);
5219     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5220     // kmp_task_t *new_task);
5221     RegionCodeGenTy RCG(CodeGen);
5222     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5223                               M, OMPRTL___kmpc_omp_task_begin_if0),
5224                           TaskArgs,
5225                           OMPBuilder.getOrCreateRuntimeFunction(
5226                               M, OMPRTL___kmpc_omp_task_complete_if0),
5227                           TaskArgs);
5228     RCG.setAction(Action);
5229     RCG(CGF);
5230   };
5231 
5232   if (IfCond) {
5233     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5234   } else {
5235     RegionCodeGenTy ThenRCG(ThenCodeGen);
5236     ThenRCG(CGF);
5237   }
5238 }
5239 
5240 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5241                                        const OMPLoopDirective &D,
5242                                        llvm::Function *TaskFunction,
5243                                        QualType SharedsTy, Address Shareds,
5244                                        const Expr *IfCond,
5245                                        const OMPTaskDataTy &Data) {
5246   if (!CGF.HaveInsertPoint())
5247     return;
5248   TaskResultTy Result =
5249       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5250   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5251   // libcall.
5252   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5253   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5254   // sched, kmp_uint64 grainsize, void *task_dup);
5255   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5256   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5257   llvm::Value *IfVal;
5258   if (IfCond) {
5259     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5260                                       /*isSigned=*/true);
5261   } else {
5262     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5263   }
5264 
5265   LValue LBLVal = CGF.EmitLValueForField(
5266       Result.TDBase,
5267       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5268   const auto *LBVar =
5269       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5270   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5271                        LBLVal.getQuals(),
5272                        /*IsInitializer=*/true);
5273   LValue UBLVal = CGF.EmitLValueForField(
5274       Result.TDBase,
5275       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5276   const auto *UBVar =
5277       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5278   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5279                        UBLVal.getQuals(),
5280                        /*IsInitializer=*/true);
5281   LValue StLVal = CGF.EmitLValueForField(
5282       Result.TDBase,
5283       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5284   const auto *StVar =
5285       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5286   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5287                        StLVal.getQuals(),
5288                        /*IsInitializer=*/true);
5289   // Store reductions address.
5290   LValue RedLVal = CGF.EmitLValueForField(
5291       Result.TDBase,
5292       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5293   if (Data.Reductions) {
5294     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5295   } else {
5296     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5297                                CGF.getContext().VoidPtrTy);
5298   }
5299   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5300   llvm::Value *TaskArgs[] = {
5301       UpLoc,
5302       ThreadID,
5303       Result.NewTask,
5304       IfVal,
5305       LBLVal.getPointer(CGF),
5306       UBLVal.getPointer(CGF),
5307       CGF.EmitLoadOfScalar(StLVal, Loc),
5308       llvm::ConstantInt::getSigned(
5309           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5310       llvm::ConstantInt::getSigned(
5311           CGF.IntTy, Data.Schedule.getPointer()
5312                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5313                          : NoSchedule),
5314       Data.Schedule.getPointer()
5315           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5316                                       /*isSigned=*/false)
5317           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5318       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5319                              Result.TaskDupFn, CGF.VoidPtrTy)
5320                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5321   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5322                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5323                       TaskArgs);
5324 }
5325 
5326 /// Emit reduction operation for each element of array (required for
5327 /// array sections) LHS op = RHS.
5328 /// \param Type Type of array.
5329 /// \param LHSVar Variable on the left side of the reduction operation
5330 /// (references element of array in original variable).
5331 /// \param RHSVar Variable on the right side of the reduction operation
5332 /// (references element of array in original variable).
5333 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5334 /// RHSVar.
5335 static void EmitOMPAggregateReduction(
5336     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5337     const VarDecl *RHSVar,
5338     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5339                                   const Expr *, const Expr *)> &RedOpGen,
5340     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5341     const Expr *UpExpr = nullptr) {
5342   // Perform element-by-element initialization.
5343   QualType ElementTy;
5344   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5345   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5346 
5347   // Drill down to the base element type on both arrays.
5348   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5349   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5350 
5351   llvm::Value *RHSBegin = RHSAddr.getPointer();
5352   llvm::Value *LHSBegin = LHSAddr.getPointer();
5353   // Cast from pointer to array type to pointer to single element.
5354   llvm::Value *LHSEnd =
5355       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5356   // The basic structure here is a while-do loop.
5357   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5358   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5359   llvm::Value *IsEmpty =
5360       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5361   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5362 
5363   // Enter the loop body, making that address the current address.
5364   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5365   CGF.EmitBlock(BodyBB);
5366 
5367   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5368 
5369   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5370       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5371   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5372   Address RHSElementCurrent =
5373       Address(RHSElementPHI,
5374               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5375 
5376   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5377       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5378   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5379   Address LHSElementCurrent =
5380       Address(LHSElementPHI,
5381               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5382 
5383   // Emit copy.
5384   CodeGenFunction::OMPPrivateScope Scope(CGF);
5385   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5386   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5387   Scope.Privatize();
5388   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5389   Scope.ForceCleanup();
5390 
5391   // Shift the address forward by one element.
5392   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5393       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5394       "omp.arraycpy.dest.element");
5395   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5396       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5397       "omp.arraycpy.src.element");
5398   // Check whether we've reached the end.
5399   llvm::Value *Done =
5400       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5401   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5402   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5403   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5404 
5405   // Done.
5406   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5407 }
5408 
5409 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5410 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5411 /// UDR combiner function.
5412 static void emitReductionCombiner(CodeGenFunction &CGF,
5413                                   const Expr *ReductionOp) {
5414   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5415     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5416       if (const auto *DRE =
5417               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5418         if (const auto *DRD =
5419                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5420           std::pair<llvm::Function *, llvm::Function *> Reduction =
5421               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5422           RValue Func = RValue::get(Reduction.first);
5423           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5424           CGF.EmitIgnoredExpr(ReductionOp);
5425           return;
5426         }
5427   CGF.EmitIgnoredExpr(ReductionOp);
5428 }
5429 
5430 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5431     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5432     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5433     ArrayRef<const Expr *> ReductionOps) {
5434   ASTContext &C = CGM.getContext();
5435 
5436   // void reduction_func(void *LHSArg, void *RHSArg);
5437   FunctionArgList Args;
5438   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5439                            ImplicitParamDecl::Other);
5440   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5441                            ImplicitParamDecl::Other);
5442   Args.push_back(&LHSArg);
5443   Args.push_back(&RHSArg);
5444   const auto &CGFI =
5445       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5446   std::string Name = getName({"omp", "reduction", "reduction_func"});
5447   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5448                                     llvm::GlobalValue::InternalLinkage, Name,
5449                                     &CGM.getModule());
5450   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5451   Fn->setDoesNotRecurse();
5452   CodeGenFunction CGF(CGM);
5453   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5454 
5455   // Dst = (void*[n])(LHSArg);
5456   // Src = (void*[n])(RHSArg);
5457   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5458       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5459       ArgsType), CGF.getPointerAlign());
5460   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5461       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5462       ArgsType), CGF.getPointerAlign());
5463 
5464   //  ...
5465   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5466   //  ...
5467   CodeGenFunction::OMPPrivateScope Scope(CGF);
5468   auto IPriv = Privates.begin();
5469   unsigned Idx = 0;
5470   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5471     const auto *RHSVar =
5472         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5473     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5474       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5475     });
5476     const auto *LHSVar =
5477         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5478     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5479       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5480     });
5481     QualType PrivTy = (*IPriv)->getType();
5482     if (PrivTy->isVariablyModifiedType()) {
5483       // Get array size and emit VLA type.
5484       ++Idx;
5485       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5486       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5487       const VariableArrayType *VLA =
5488           CGF.getContext().getAsVariableArrayType(PrivTy);
5489       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5490       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5491           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5492       CGF.EmitVariablyModifiedType(PrivTy);
5493     }
5494   }
5495   Scope.Privatize();
5496   IPriv = Privates.begin();
5497   auto ILHS = LHSExprs.begin();
5498   auto IRHS = RHSExprs.begin();
5499   for (const Expr *E : ReductionOps) {
5500     if ((*IPriv)->getType()->isArrayType()) {
5501       // Emit reduction for array section.
5502       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5503       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5504       EmitOMPAggregateReduction(
5505           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5506           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5507             emitReductionCombiner(CGF, E);
5508           });
5509     } else {
5510       // Emit reduction for array subscript or single variable.
5511       emitReductionCombiner(CGF, E);
5512     }
5513     ++IPriv;
5514     ++ILHS;
5515     ++IRHS;
5516   }
5517   Scope.ForceCleanup();
5518   CGF.FinishFunction();
5519   return Fn;
5520 }
5521 
5522 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5523                                                   const Expr *ReductionOp,
5524                                                   const Expr *PrivateRef,
5525                                                   const DeclRefExpr *LHS,
5526                                                   const DeclRefExpr *RHS) {
5527   if (PrivateRef->getType()->isArrayType()) {
5528     // Emit reduction for array section.
5529     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5530     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5531     EmitOMPAggregateReduction(
5532         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5533         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5534           emitReductionCombiner(CGF, ReductionOp);
5535         });
5536   } else {
5537     // Emit reduction for array subscript or single variable.
5538     emitReductionCombiner(CGF, ReductionOp);
5539   }
5540 }
5541 
5542 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5543                                     ArrayRef<const Expr *> Privates,
5544                                     ArrayRef<const Expr *> LHSExprs,
5545                                     ArrayRef<const Expr *> RHSExprs,
5546                                     ArrayRef<const Expr *> ReductionOps,
5547                                     ReductionOptionsTy Options) {
5548   if (!CGF.HaveInsertPoint())
5549     return;
5550 
5551   bool WithNowait = Options.WithNowait;
5552   bool SimpleReduction = Options.SimpleReduction;
5553 
5554   // Next code should be emitted for reduction:
5555   //
5556   // static kmp_critical_name lock = { 0 };
5557   //
5558   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5559   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5560   //  ...
5561   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5562   //  *(Type<n>-1*)rhs[<n>-1]);
5563   // }
5564   //
5565   // ...
5566   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5567   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5568   // RedList, reduce_func, &<lock>)) {
5569   // case 1:
5570   //  ...
5571   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5572   //  ...
5573   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5574   // break;
5575   // case 2:
5576   //  ...
5577   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5578   //  ...
5579   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5580   // break;
5581   // default:;
5582   // }
5583   //
5584   // if SimpleReduction is true, only the next code is generated:
5585   //  ...
5586   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5587   //  ...
5588 
5589   ASTContext &C = CGM.getContext();
5590 
5591   if (SimpleReduction) {
5592     CodeGenFunction::RunCleanupsScope Scope(CGF);
5593     auto IPriv = Privates.begin();
5594     auto ILHS = LHSExprs.begin();
5595     auto IRHS = RHSExprs.begin();
5596     for (const Expr *E : ReductionOps) {
5597       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5598                                   cast<DeclRefExpr>(*IRHS));
5599       ++IPriv;
5600       ++ILHS;
5601       ++IRHS;
5602     }
5603     return;
5604   }
5605 
5606   // 1. Build a list of reduction variables.
5607   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5608   auto Size = RHSExprs.size();
5609   for (const Expr *E : Privates) {
5610     if (E->getType()->isVariablyModifiedType())
5611       // Reserve place for array size.
5612       ++Size;
5613   }
5614   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5615   QualType ReductionArrayTy =
5616       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5617                              /*IndexTypeQuals=*/0);
5618   Address ReductionList =
5619       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5620   auto IPriv = Privates.begin();
5621   unsigned Idx = 0;
5622   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5623     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5624     CGF.Builder.CreateStore(
5625         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5626             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5627         Elem);
5628     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5629       // Store array size.
5630       ++Idx;
5631       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5632       llvm::Value *Size = CGF.Builder.CreateIntCast(
5633           CGF.getVLASize(
5634                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5635               .NumElts,
5636           CGF.SizeTy, /*isSigned=*/false);
5637       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5638                               Elem);
5639     }
5640   }
5641 
5642   // 2. Emit reduce_func().
5643   llvm::Function *ReductionFn = emitReductionFunction(
5644       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5645       LHSExprs, RHSExprs, ReductionOps);
5646 
5647   // 3. Create static kmp_critical_name lock = { 0 };
5648   std::string Name = getName({"reduction"});
5649   llvm::Value *Lock = getCriticalRegionLock(Name);
5650 
5651   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5652   // RedList, reduce_func, &<lock>);
5653   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5654   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5655   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5656   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5657       ReductionList.getPointer(), CGF.VoidPtrTy);
5658   llvm::Value *Args[] = {
5659       IdentTLoc,                             // ident_t *<loc>
5660       ThreadId,                              // i32 <gtid>
5661       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5662       ReductionArrayTySize,                  // size_type sizeof(RedList)
5663       RL,                                    // void *RedList
5664       ReductionFn, // void (*) (void *, void *) <reduce_func>
5665       Lock         // kmp_critical_name *&<lock>
5666   };
5667   llvm::Value *Res = CGF.EmitRuntimeCall(
5668       OMPBuilder.getOrCreateRuntimeFunction(
5669           CGM.getModule(),
5670           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5671       Args);
5672 
5673   // 5. Build switch(res)
5674   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5675   llvm::SwitchInst *SwInst =
5676       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5677 
5678   // 6. Build case 1:
5679   //  ...
5680   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5681   //  ...
5682   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5683   // break;
5684   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5685   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5686   CGF.EmitBlock(Case1BB);
5687 
5688   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5689   llvm::Value *EndArgs[] = {
5690       IdentTLoc, // ident_t *<loc>
5691       ThreadId,  // i32 <gtid>
5692       Lock       // kmp_critical_name *&<lock>
5693   };
5694   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5695                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5696     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5697     auto IPriv = Privates.begin();
5698     auto ILHS = LHSExprs.begin();
5699     auto IRHS = RHSExprs.begin();
5700     for (const Expr *E : ReductionOps) {
5701       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5702                                      cast<DeclRefExpr>(*IRHS));
5703       ++IPriv;
5704       ++ILHS;
5705       ++IRHS;
5706     }
5707   };
5708   RegionCodeGenTy RCG(CodeGen);
5709   CommonActionTy Action(
5710       nullptr, llvm::None,
5711       OMPBuilder.getOrCreateRuntimeFunction(
5712           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5713                                       : OMPRTL___kmpc_end_reduce),
5714       EndArgs);
5715   RCG.setAction(Action);
5716   RCG(CGF);
5717 
5718   CGF.EmitBranch(DefaultBB);
5719 
5720   // 7. Build case 2:
5721   //  ...
5722   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5723   //  ...
5724   // break;
5725   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5726   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5727   CGF.EmitBlock(Case2BB);
5728 
5729   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5730                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5731     auto ILHS = LHSExprs.begin();
5732     auto IRHS = RHSExprs.begin();
5733     auto IPriv = Privates.begin();
5734     for (const Expr *E : ReductionOps) {
5735       const Expr *XExpr = nullptr;
5736       const Expr *EExpr = nullptr;
5737       const Expr *UpExpr = nullptr;
5738       BinaryOperatorKind BO = BO_Comma;
5739       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5740         if (BO->getOpcode() == BO_Assign) {
5741           XExpr = BO->getLHS();
5742           UpExpr = BO->getRHS();
5743         }
5744       }
5745       // Try to emit update expression as a simple atomic.
5746       const Expr *RHSExpr = UpExpr;
5747       if (RHSExpr) {
5748         // Analyze RHS part of the whole expression.
5749         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5750                 RHSExpr->IgnoreParenImpCasts())) {
5751           // If this is a conditional operator, analyze its condition for
5752           // min/max reduction operator.
5753           RHSExpr = ACO->getCond();
5754         }
5755         if (const auto *BORHS =
5756                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5757           EExpr = BORHS->getRHS();
5758           BO = BORHS->getOpcode();
5759         }
5760       }
5761       if (XExpr) {
5762         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5763         auto &&AtomicRedGen = [BO, VD,
5764                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5765                                     const Expr *EExpr, const Expr *UpExpr) {
5766           LValue X = CGF.EmitLValue(XExpr);
5767           RValue E;
5768           if (EExpr)
5769             E = CGF.EmitAnyExpr(EExpr);
5770           CGF.EmitOMPAtomicSimpleUpdateExpr(
5771               X, E, BO, /*IsXLHSInRHSPart=*/true,
5772               llvm::AtomicOrdering::Monotonic, Loc,
5773               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5774                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5775                 PrivateScope.addPrivate(
5776                     VD, [&CGF, VD, XRValue, Loc]() {
5777                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5778                       CGF.emitOMPSimpleStore(
5779                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5780                           VD->getType().getNonReferenceType(), Loc);
5781                       return LHSTemp;
5782                     });
5783                 (void)PrivateScope.Privatize();
5784                 return CGF.EmitAnyExpr(UpExpr);
5785               });
5786         };
5787         if ((*IPriv)->getType()->isArrayType()) {
5788           // Emit atomic reduction for array section.
5789           const auto *RHSVar =
5790               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5791           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5792                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5793         } else {
5794           // Emit atomic reduction for array subscript or single variable.
5795           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5796         }
5797       } else {
5798         // Emit as a critical region.
5799         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5800                                            const Expr *, const Expr *) {
5801           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5802           std::string Name = RT.getName({"atomic_reduction"});
5803           RT.emitCriticalRegion(
5804               CGF, Name,
5805               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5806                 Action.Enter(CGF);
5807                 emitReductionCombiner(CGF, E);
5808               },
5809               Loc);
5810         };
5811         if ((*IPriv)->getType()->isArrayType()) {
5812           const auto *LHSVar =
5813               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5814           const auto *RHSVar =
5815               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5816           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5817                                     CritRedGen);
5818         } else {
5819           CritRedGen(CGF, nullptr, nullptr, nullptr);
5820         }
5821       }
5822       ++ILHS;
5823       ++IRHS;
5824       ++IPriv;
5825     }
5826   };
5827   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5828   if (!WithNowait) {
5829     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5830     llvm::Value *EndArgs[] = {
5831         IdentTLoc, // ident_t *<loc>
5832         ThreadId,  // i32 <gtid>
5833         Lock       // kmp_critical_name *&<lock>
5834     };
5835     CommonActionTy Action(nullptr, llvm::None,
5836                           OMPBuilder.getOrCreateRuntimeFunction(
5837                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5838                           EndArgs);
5839     AtomicRCG.setAction(Action);
5840     AtomicRCG(CGF);
5841   } else {
5842     AtomicRCG(CGF);
5843   }
5844 
5845   CGF.EmitBranch(DefaultBB);
5846   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5847 }
5848 
5849 /// Generates unique name for artificial threadprivate variables.
5850 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5851 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5852                                       const Expr *Ref) {
5853   SmallString<256> Buffer;
5854   llvm::raw_svector_ostream Out(Buffer);
5855   const clang::DeclRefExpr *DE;
5856   const VarDecl *D = ::getBaseDecl(Ref, DE);
5857   if (!D)
5858     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5859   D = D->getCanonicalDecl();
5860   std::string Name = CGM.getOpenMPRuntime().getName(
5861       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5862   Out << Prefix << Name << "_"
5863       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5864   return std::string(Out.str());
5865 }
5866 
5867 /// Emits reduction initializer function:
5868 /// \code
5869 /// void @.red_init(void* %arg, void* %orig) {
5870 /// %0 = bitcast void* %arg to <type>*
5871 /// store <type> <init>, <type>* %0
5872 /// ret void
5873 /// }
5874 /// \endcode
5875 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5876                                            SourceLocation Loc,
5877                                            ReductionCodeGen &RCG, unsigned N) {
5878   ASTContext &C = CGM.getContext();
5879   QualType VoidPtrTy = C.VoidPtrTy;
5880   VoidPtrTy.addRestrict();
5881   FunctionArgList Args;
5882   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5883                           ImplicitParamDecl::Other);
5884   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5885                               ImplicitParamDecl::Other);
5886   Args.emplace_back(&Param);
5887   Args.emplace_back(&ParamOrig);
5888   const auto &FnInfo =
5889       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5890   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5891   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5892   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5893                                     Name, &CGM.getModule());
5894   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5895   Fn->setDoesNotRecurse();
5896   CodeGenFunction CGF(CGM);
5897   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5898   Address PrivateAddr = CGF.EmitLoadOfPointer(
5899       CGF.GetAddrOfLocalVar(&Param),
5900       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5901   llvm::Value *Size = nullptr;
5902   // If the size of the reduction item is non-constant, load it from global
5903   // threadprivate variable.
5904   if (RCG.getSizes(N).second) {
5905     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5906         CGF, CGM.getContext().getSizeType(),
5907         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5908     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5909                                 CGM.getContext().getSizeType(), Loc);
5910   }
5911   RCG.emitAggregateType(CGF, N, Size);
5912   Address OrigAddr = Address::invalid();
5913   // If initializer uses initializer from declare reduction construct, emit a
5914   // pointer to the address of the original reduction item (reuired by reduction
5915   // initializer)
5916   if (RCG.usesReductionInitializer(N)) {
5917     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5918     OrigAddr = CGF.EmitLoadOfPointer(
5919         SharedAddr,
5920         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5921   }
5922   // Emit the initializer:
5923   // %0 = bitcast void* %arg to <type>*
5924   // store <type> <init>, <type>* %0
5925   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5926                          [](CodeGenFunction &) { return false; });
5927   CGF.FinishFunction();
5928   return Fn;
5929 }
5930 
5931 /// Emits reduction combiner function:
5932 /// \code
5933 /// void @.red_comb(void* %arg0, void* %arg1) {
5934 /// %lhs = bitcast void* %arg0 to <type>*
5935 /// %rhs = bitcast void* %arg1 to <type>*
5936 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5937 /// store <type> %2, <type>* %lhs
5938 /// ret void
5939 /// }
5940 /// \endcode
5941 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5942                                            SourceLocation Loc,
5943                                            ReductionCodeGen &RCG, unsigned N,
5944                                            const Expr *ReductionOp,
5945                                            const Expr *LHS, const Expr *RHS,
5946                                            const Expr *PrivateRef) {
5947   ASTContext &C = CGM.getContext();
5948   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5949   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5950   FunctionArgList Args;
5951   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5952                                C.VoidPtrTy, ImplicitParamDecl::Other);
5953   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5954                             ImplicitParamDecl::Other);
5955   Args.emplace_back(&ParamInOut);
5956   Args.emplace_back(&ParamIn);
5957   const auto &FnInfo =
5958       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5959   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5960   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5961   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5962                                     Name, &CGM.getModule());
5963   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5964   Fn->setDoesNotRecurse();
5965   CodeGenFunction CGF(CGM);
5966   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5967   llvm::Value *Size = nullptr;
5968   // If the size of the reduction item is non-constant, load it from global
5969   // threadprivate variable.
5970   if (RCG.getSizes(N).second) {
5971     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5972         CGF, CGM.getContext().getSizeType(),
5973         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5974     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5975                                 CGM.getContext().getSizeType(), Loc);
5976   }
5977   RCG.emitAggregateType(CGF, N, Size);
5978   // Remap lhs and rhs variables to the addresses of the function arguments.
5979   // %lhs = bitcast void* %arg0 to <type>*
5980   // %rhs = bitcast void* %arg1 to <type>*
5981   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5982   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5983     // Pull out the pointer to the variable.
5984     Address PtrAddr = CGF.EmitLoadOfPointer(
5985         CGF.GetAddrOfLocalVar(&ParamInOut),
5986         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5987     return CGF.Builder.CreateElementBitCast(
5988         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5989   });
5990   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5991     // Pull out the pointer to the variable.
5992     Address PtrAddr = CGF.EmitLoadOfPointer(
5993         CGF.GetAddrOfLocalVar(&ParamIn),
5994         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5995     return CGF.Builder.CreateElementBitCast(
5996         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5997   });
5998   PrivateScope.Privatize();
5999   // Emit the combiner body:
6000   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6001   // store <type> %2, <type>* %lhs
6002   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6003       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6004       cast<DeclRefExpr>(RHS));
6005   CGF.FinishFunction();
6006   return Fn;
6007 }
6008 
6009 /// Emits reduction finalizer function:
6010 /// \code
6011 /// void @.red_fini(void* %arg) {
6012 /// %0 = bitcast void* %arg to <type>*
6013 /// <destroy>(<type>* %0)
6014 /// ret void
6015 /// }
6016 /// \endcode
6017 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6018                                            SourceLocation Loc,
6019                                            ReductionCodeGen &RCG, unsigned N) {
6020   if (!RCG.needCleanups(N))
6021     return nullptr;
6022   ASTContext &C = CGM.getContext();
6023   FunctionArgList Args;
6024   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6025                           ImplicitParamDecl::Other);
6026   Args.emplace_back(&Param);
6027   const auto &FnInfo =
6028       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6029   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6030   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6031   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6032                                     Name, &CGM.getModule());
6033   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6034   Fn->setDoesNotRecurse();
6035   CodeGenFunction CGF(CGM);
6036   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6037   Address PrivateAddr = CGF.EmitLoadOfPointer(
6038       CGF.GetAddrOfLocalVar(&Param),
6039       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6040   llvm::Value *Size = nullptr;
6041   // If the size of the reduction item is non-constant, load it from global
6042   // threadprivate variable.
6043   if (RCG.getSizes(N).second) {
6044     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6045         CGF, CGM.getContext().getSizeType(),
6046         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6047     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6048                                 CGM.getContext().getSizeType(), Loc);
6049   }
6050   RCG.emitAggregateType(CGF, N, Size);
6051   // Emit the finalizer body:
6052   // <destroy>(<type>* %0)
6053   RCG.emitCleanups(CGF, N, PrivateAddr);
6054   CGF.FinishFunction(Loc);
6055   return Fn;
6056 }
6057 
6058 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6059     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6060     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6061   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6062     return nullptr;
6063 
6064   // Build typedef struct:
6065   // kmp_taskred_input {
6066   //   void *reduce_shar; // shared reduction item
6067   //   void *reduce_orig; // original reduction item used for initialization
6068   //   size_t reduce_size; // size of data item
6069   //   void *reduce_init; // data initialization routine
6070   //   void *reduce_fini; // data finalization routine
6071   //   void *reduce_comb; // data combiner routine
6072   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6073   // } kmp_taskred_input_t;
6074   ASTContext &C = CGM.getContext();
6075   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6076   RD->startDefinition();
6077   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6078   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6079   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6080   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6081   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6082   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6084       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6085   RD->completeDefinition();
6086   QualType RDType = C.getRecordType(RD);
6087   unsigned Size = Data.ReductionVars.size();
6088   llvm::APInt ArraySize(/*numBits=*/64, Size);
6089   QualType ArrayRDType = C.getConstantArrayType(
6090       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6091   // kmp_task_red_input_t .rd_input.[Size];
6092   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6093   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6094                        Data.ReductionCopies, Data.ReductionOps);
6095   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6096     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6097     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6098                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6099     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6100         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6101         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6102         ".rd_input.gep.");
6103     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6104     // ElemLVal.reduce_shar = &Shareds[Cnt];
6105     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6106     RCG.emitSharedOrigLValue(CGF, Cnt);
6107     llvm::Value *CastedShared =
6108         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6109     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6110     // ElemLVal.reduce_orig = &Origs[Cnt];
6111     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6112     llvm::Value *CastedOrig =
6113         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6114     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6115     RCG.emitAggregateType(CGF, Cnt);
6116     llvm::Value *SizeValInChars;
6117     llvm::Value *SizeVal;
6118     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6119     // We use delayed creation/initialization for VLAs and array sections. It is
6120     // required because runtime does not provide the way to pass the sizes of
6121     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6122     // threadprivate global variables are used to store these values and use
6123     // them in the functions.
6124     bool DelayedCreation = !!SizeVal;
6125     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6126                                                /*isSigned=*/false);
6127     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6128     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6129     // ElemLVal.reduce_init = init;
6130     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6131     llvm::Value *InitAddr =
6132         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6133     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6134     // ElemLVal.reduce_fini = fini;
6135     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6136     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6137     llvm::Value *FiniAddr = Fini
6138                                 ? CGF.EmitCastToVoidPtr(Fini)
6139                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6140     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6141     // ElemLVal.reduce_comb = comb;
6142     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6143     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6144         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6145         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6146     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6147     // ElemLVal.flags = 0;
6148     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6149     if (DelayedCreation) {
6150       CGF.EmitStoreOfScalar(
6151           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6152           FlagsLVal);
6153     } else
6154       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6155                                  FlagsLVal.getType());
6156   }
6157   if (Data.IsReductionWithTaskMod) {
6158     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6159     // is_ws, int num, void *data);
6160     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6161     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6162                                                   CGM.IntTy, /*isSigned=*/true);
6163     llvm::Value *Args[] = {
6164         IdentTLoc, GTid,
6165         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6166                                /*isSigned=*/true),
6167         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6168         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6169             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6170     return CGF.EmitRuntimeCall(
6171         OMPBuilder.getOrCreateRuntimeFunction(
6172             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6173         Args);
6174   }
6175   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6176   llvm::Value *Args[] = {
6177       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6178                                 /*isSigned=*/true),
6179       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6180       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6181                                                       CGM.VoidPtrTy)};
6182   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6183                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6184                              Args);
6185 }
6186 
6187 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6188                                             SourceLocation Loc,
6189                                             bool IsWorksharingReduction) {
6190   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6191   // is_ws, int num, void *data);
6192   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6193   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6194                                                 CGM.IntTy, /*isSigned=*/true);
6195   llvm::Value *Args[] = {IdentTLoc, GTid,
6196                          llvm::ConstantInt::get(CGM.IntTy,
6197                                                 IsWorksharingReduction ? 1 : 0,
6198                                                 /*isSigned=*/true)};
6199   (void)CGF.EmitRuntimeCall(
6200       OMPBuilder.getOrCreateRuntimeFunction(
6201           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6202       Args);
6203 }
6204 
6205 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6206                                               SourceLocation Loc,
6207                                               ReductionCodeGen &RCG,
6208                                               unsigned N) {
6209   auto Sizes = RCG.getSizes(N);
6210   // Emit threadprivate global variable if the type is non-constant
6211   // (Sizes.second = nullptr).
6212   if (Sizes.second) {
6213     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6214                                                      /*isSigned=*/false);
6215     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6216         CGF, CGM.getContext().getSizeType(),
6217         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6218     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6219   }
6220 }
6221 
6222 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6223                                               SourceLocation Loc,
6224                                               llvm::Value *ReductionsPtr,
6225                                               LValue SharedLVal) {
6226   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6227   // *d);
6228   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6229                                                    CGM.IntTy,
6230                                                    /*isSigned=*/true),
6231                          ReductionsPtr,
6232                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6233                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6234   return Address(
6235       CGF.EmitRuntimeCall(
6236           OMPBuilder.getOrCreateRuntimeFunction(
6237               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6238           Args),
6239       SharedLVal.getAlignment());
6240 }
6241 
6242 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6243                                        const OMPTaskDataTy &Data) {
6244   if (!CGF.HaveInsertPoint())
6245     return;
6246 
6247   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6248     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6249     OMPBuilder.createTaskwait(CGF.Builder);
6250   } else {
6251     llvm::Value *ThreadID = getThreadID(CGF, Loc);
6252     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6253     auto &M = CGM.getModule();
6254     Address DependenciesArray = Address::invalid();
6255     llvm::Value *NumOfElements;
6256     std::tie(NumOfElements, DependenciesArray) =
6257         emitDependClause(CGF, Data.Dependences, Loc);
6258     llvm::Value *DepWaitTaskArgs[6];
6259     if (!Data.Dependences.empty()) {
6260       DepWaitTaskArgs[0] = UpLoc;
6261       DepWaitTaskArgs[1] = ThreadID;
6262       DepWaitTaskArgs[2] = NumOfElements;
6263       DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6264       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6265       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6266 
6267       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6268 
6269       // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6270       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6271       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6272       // is specified.
6273       CGF.EmitRuntimeCall(
6274           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6275           DepWaitTaskArgs);
6276 
6277     } else {
6278 
6279       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6280       // global_tid);
6281       llvm::Value *Args[] = {UpLoc, ThreadID};
6282       // Ignore return result until untied tasks are supported.
6283       CGF.EmitRuntimeCall(
6284           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6285           Args);
6286     }
6287   }
6288 
6289   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6290     Region->emitUntiedSwitch(CGF);
6291 }
6292 
6293 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6294                                            OpenMPDirectiveKind InnerKind,
6295                                            const RegionCodeGenTy &CodeGen,
6296                                            bool HasCancel) {
6297   if (!CGF.HaveInsertPoint())
6298     return;
6299   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6300                                  InnerKind != OMPD_critical &&
6301                                      InnerKind != OMPD_master &&
6302                                      InnerKind != OMPD_masked);
6303   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6304 }
6305 
6306 namespace {
6307 enum RTCancelKind {
6308   CancelNoreq = 0,
6309   CancelParallel = 1,
6310   CancelLoop = 2,
6311   CancelSections = 3,
6312   CancelTaskgroup = 4
6313 };
6314 } // anonymous namespace
6315 
6316 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6317   RTCancelKind CancelKind = CancelNoreq;
6318   if (CancelRegion == OMPD_parallel)
6319     CancelKind = CancelParallel;
6320   else if (CancelRegion == OMPD_for)
6321     CancelKind = CancelLoop;
6322   else if (CancelRegion == OMPD_sections)
6323     CancelKind = CancelSections;
6324   else {
6325     assert(CancelRegion == OMPD_taskgroup);
6326     CancelKind = CancelTaskgroup;
6327   }
6328   return CancelKind;
6329 }
6330 
6331 void CGOpenMPRuntime::emitCancellationPointCall(
6332     CodeGenFunction &CGF, SourceLocation Loc,
6333     OpenMPDirectiveKind CancelRegion) {
6334   if (!CGF.HaveInsertPoint())
6335     return;
6336   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6337   // global_tid, kmp_int32 cncl_kind);
6338   if (auto *OMPRegionInfo =
6339           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6340     // For 'cancellation point taskgroup', the task region info may not have a
6341     // cancel. This may instead happen in another adjacent task.
6342     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6343       llvm::Value *Args[] = {
6344           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6345           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6346       // Ignore return result until untied tasks are supported.
6347       llvm::Value *Result = CGF.EmitRuntimeCall(
6348           OMPBuilder.getOrCreateRuntimeFunction(
6349               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6350           Args);
6351       // if (__kmpc_cancellationpoint()) {
6352       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6353       //   exit from construct;
6354       // }
6355       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6356       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6357       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6358       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6359       CGF.EmitBlock(ExitBB);
6360       if (CancelRegion == OMPD_parallel)
6361         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6362       // exit from construct;
6363       CodeGenFunction::JumpDest CancelDest =
6364           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6365       CGF.EmitBranchThroughCleanup(CancelDest);
6366       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6367     }
6368   }
6369 }
6370 
6371 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6372                                      const Expr *IfCond,
6373                                      OpenMPDirectiveKind CancelRegion) {
6374   if (!CGF.HaveInsertPoint())
6375     return;
6376   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6377   // kmp_int32 cncl_kind);
6378   auto &M = CGM.getModule();
6379   if (auto *OMPRegionInfo =
6380           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6381     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6382                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6383       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6384       llvm::Value *Args[] = {
6385           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6386           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6387       // Ignore return result until untied tasks are supported.
6388       llvm::Value *Result = CGF.EmitRuntimeCall(
6389           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6390       // if (__kmpc_cancel()) {
6391       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6392       //   exit from construct;
6393       // }
6394       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6395       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6396       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6397       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6398       CGF.EmitBlock(ExitBB);
6399       if (CancelRegion == OMPD_parallel)
6400         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6401       // exit from construct;
6402       CodeGenFunction::JumpDest CancelDest =
6403           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6404       CGF.EmitBranchThroughCleanup(CancelDest);
6405       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6406     };
6407     if (IfCond) {
6408       emitIfClause(CGF, IfCond, ThenGen,
6409                    [](CodeGenFunction &, PrePostActionTy &) {});
6410     } else {
6411       RegionCodeGenTy ThenRCG(ThenGen);
6412       ThenRCG(CGF);
6413     }
6414   }
6415 }
6416 
6417 namespace {
6418 /// Cleanup action for uses_allocators support.
6419 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6420   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6421 
6422 public:
6423   OMPUsesAllocatorsActionTy(
6424       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6425       : Allocators(Allocators) {}
6426   void Enter(CodeGenFunction &CGF) override {
6427     if (!CGF.HaveInsertPoint())
6428       return;
6429     for (const auto &AllocatorData : Allocators) {
6430       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6431           CGF, AllocatorData.first, AllocatorData.second);
6432     }
6433   }
6434   void Exit(CodeGenFunction &CGF) override {
6435     if (!CGF.HaveInsertPoint())
6436       return;
6437     for (const auto &AllocatorData : Allocators) {
6438       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6439                                                         AllocatorData.first);
6440     }
6441   }
6442 };
6443 } // namespace
6444 
6445 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6446     const OMPExecutableDirective &D, StringRef ParentName,
6447     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6448     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6449   assert(!ParentName.empty() && "Invalid target region parent name!");
6450   HasEmittedTargetRegion = true;
6451   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6452   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6453     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6454       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6455       if (!D.AllocatorTraits)
6456         continue;
6457       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6458     }
6459   }
6460   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6461   CodeGen.setAction(UsesAllocatorAction);
6462   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6463                                    IsOffloadEntry, CodeGen);
6464 }
6465 
6466 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6467                                              const Expr *Allocator,
6468                                              const Expr *AllocatorTraits) {
6469   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6470   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6471   // Use default memspace handle.
6472   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6473   llvm::Value *NumTraits = llvm::ConstantInt::get(
6474       CGF.IntTy, cast<ConstantArrayType>(
6475                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6476                      ->getSize()
6477                      .getLimitedValue());
6478   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6479   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6480       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6481   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6482                                            AllocatorTraitsLVal.getBaseInfo(),
6483                                            AllocatorTraitsLVal.getTBAAInfo());
6484   llvm::Value *Traits =
6485       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6486 
6487   llvm::Value *AllocatorVal =
6488       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6489                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6490                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6491   // Store to allocator.
6492   CGF.EmitVarDecl(*cast<VarDecl>(
6493       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6494   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6495   AllocatorVal =
6496       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6497                                Allocator->getType(), Allocator->getExprLoc());
6498   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6499 }
6500 
6501 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6502                                              const Expr *Allocator) {
6503   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6504   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6505   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6506   llvm::Value *AllocatorVal =
6507       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6508   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6509                                           CGF.getContext().VoidPtrTy,
6510                                           Allocator->getExprLoc());
6511   (void)CGF.EmitRuntimeCall(
6512       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6513                                             OMPRTL___kmpc_destroy_allocator),
6514       {ThreadId, AllocatorVal});
6515 }
6516 
6517 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6518     const OMPExecutableDirective &D, StringRef ParentName,
6519     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6520     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6521   // Create a unique name for the entry function using the source location
6522   // information of the current target region. The name will be something like:
6523   //
6524   // __omp_offloading_DD_FFFF_PP_lBB
6525   //
6526   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6527   // mangled name of the function that encloses the target region and BB is the
6528   // line number of the target region.
6529 
6530   unsigned DeviceID;
6531   unsigned FileID;
6532   unsigned Line;
6533   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6534                            Line);
6535   SmallString<64> EntryFnName;
6536   {
6537     llvm::raw_svector_ostream OS(EntryFnName);
6538     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6539        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6540   }
6541 
6542   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6543 
6544   CodeGenFunction CGF(CGM, true);
6545   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6546   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6547 
6548   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6549 
6550   // If this target outline function is not an offload entry, we don't need to
6551   // register it.
6552   if (!IsOffloadEntry)
6553     return;
6554 
6555   // The target region ID is used by the runtime library to identify the current
6556   // target region, so it only has to be unique and not necessarily point to
6557   // anything. It could be the pointer to the outlined function that implements
6558   // the target region, but we aren't using that so that the compiler doesn't
6559   // need to keep that, and could therefore inline the host function if proven
6560   // worthwhile during optimization. In the other hand, if emitting code for the
6561   // device, the ID has to be the function address so that it can retrieved from
6562   // the offloading entry and launched by the runtime library. We also mark the
6563   // outlined function to have external linkage in case we are emitting code for
6564   // the device, because these functions will be entry points to the device.
6565 
6566   if (CGM.getLangOpts().OpenMPIsDevice) {
6567     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6568     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6569     OutlinedFn->setDSOLocal(false);
6570     if (CGM.getTriple().isAMDGCN())
6571       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6572   } else {
6573     std::string Name = getName({EntryFnName, "region_id"});
6574     OutlinedFnID = new llvm::GlobalVariable(
6575         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6576         llvm::GlobalValue::WeakAnyLinkage,
6577         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6578   }
6579 
6580   // Register the information for the entry associated with this target region.
6581   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6582       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6583       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6584 
6585   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6586   int32_t DefaultValTeams = -1;
6587   getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6588   if (DefaultValTeams > 0) {
6589     OutlinedFn->addFnAttr("omp_target_num_teams",
6590                           std::to_string(DefaultValTeams));
6591   }
6592   int32_t DefaultValThreads = -1;
6593   getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6594   if (DefaultValThreads > 0) {
6595     OutlinedFn->addFnAttr("omp_target_thread_limit",
6596                           std::to_string(DefaultValThreads));
6597   }
6598 
6599   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6600 }
6601 
6602 /// Checks if the expression is constant or does not have non-trivial function
6603 /// calls.
6604 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6605   // We can skip constant expressions.
6606   // We can skip expressions with trivial calls or simple expressions.
6607   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6608           !E->hasNonTrivialCall(Ctx)) &&
6609          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6610 }
6611 
6612 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6613                                                     const Stmt *Body) {
6614   const Stmt *Child = Body->IgnoreContainers();
6615   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6616     Child = nullptr;
6617     for (const Stmt *S : C->body()) {
6618       if (const auto *E = dyn_cast<Expr>(S)) {
6619         if (isTrivial(Ctx, E))
6620           continue;
6621       }
6622       // Some of the statements can be ignored.
6623       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6624           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6625         continue;
6626       // Analyze declarations.
6627       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6628         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6629               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6630                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6631                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6632                   isa<UsingDirectiveDecl>(D) ||
6633                   isa<OMPDeclareReductionDecl>(D) ||
6634                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6635                 return true;
6636               const auto *VD = dyn_cast<VarDecl>(D);
6637               if (!VD)
6638                 return false;
6639               return VD->hasGlobalStorage() || !VD->isUsed();
6640             }))
6641           continue;
6642       }
6643       // Found multiple children - cannot get the one child only.
6644       if (Child)
6645         return nullptr;
6646       Child = S;
6647     }
6648     if (Child)
6649       Child = Child->IgnoreContainers();
6650   }
6651   return Child;
6652 }
6653 
6654 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6655     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6656     int32_t &DefaultVal) {
6657 
6658   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6659   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6660          "Expected target-based executable directive.");
6661   switch (DirectiveKind) {
6662   case OMPD_target: {
6663     const auto *CS = D.getInnermostCapturedStmt();
6664     const auto *Body =
6665         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6666     const Stmt *ChildStmt =
6667         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6668     if (const auto *NestedDir =
6669             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6670       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6671         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6672           const Expr *NumTeams =
6673               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6674           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6675             if (auto Constant =
6676                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6677               DefaultVal = Constant->getExtValue();
6678           return NumTeams;
6679         }
6680         DefaultVal = 0;
6681         return nullptr;
6682       }
6683       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6684           isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6685         DefaultVal = 1;
6686         return nullptr;
6687       }
6688       DefaultVal = 1;
6689       return nullptr;
6690     }
6691     // A value of -1 is used to check if we need to emit no teams region
6692     DefaultVal = -1;
6693     return nullptr;
6694   }
6695   case OMPD_target_teams:
6696   case OMPD_target_teams_distribute:
6697   case OMPD_target_teams_distribute_simd:
6698   case OMPD_target_teams_distribute_parallel_for:
6699   case OMPD_target_teams_distribute_parallel_for_simd: {
6700     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6701       const Expr *NumTeams =
6702           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6703       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6704         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6705           DefaultVal = Constant->getExtValue();
6706       return NumTeams;
6707     }
6708     DefaultVal = 0;
6709     return nullptr;
6710   }
6711   case OMPD_target_parallel:
6712   case OMPD_target_parallel_for:
6713   case OMPD_target_parallel_for_simd:
6714   case OMPD_target_simd:
6715     DefaultVal = 1;
6716     return nullptr;
6717   case OMPD_parallel:
6718   case OMPD_for:
6719   case OMPD_parallel_for:
6720   case OMPD_parallel_master:
6721   case OMPD_parallel_sections:
6722   case OMPD_for_simd:
6723   case OMPD_parallel_for_simd:
6724   case OMPD_cancel:
6725   case OMPD_cancellation_point:
6726   case OMPD_ordered:
6727   case OMPD_threadprivate:
6728   case OMPD_allocate:
6729   case OMPD_task:
6730   case OMPD_simd:
6731   case OMPD_tile:
6732   case OMPD_unroll:
6733   case OMPD_sections:
6734   case OMPD_section:
6735   case OMPD_single:
6736   case OMPD_master:
6737   case OMPD_critical:
6738   case OMPD_taskyield:
6739   case OMPD_barrier:
6740   case OMPD_taskwait:
6741   case OMPD_taskgroup:
6742   case OMPD_atomic:
6743   case OMPD_flush:
6744   case OMPD_depobj:
6745   case OMPD_scan:
6746   case OMPD_teams:
6747   case OMPD_target_data:
6748   case OMPD_target_exit_data:
6749   case OMPD_target_enter_data:
6750   case OMPD_distribute:
6751   case OMPD_distribute_simd:
6752   case OMPD_distribute_parallel_for:
6753   case OMPD_distribute_parallel_for_simd:
6754   case OMPD_teams_distribute:
6755   case OMPD_teams_distribute_simd:
6756   case OMPD_teams_distribute_parallel_for:
6757   case OMPD_teams_distribute_parallel_for_simd:
6758   case OMPD_target_update:
6759   case OMPD_declare_simd:
6760   case OMPD_declare_variant:
6761   case OMPD_begin_declare_variant:
6762   case OMPD_end_declare_variant:
6763   case OMPD_declare_target:
6764   case OMPD_end_declare_target:
6765   case OMPD_declare_reduction:
6766   case OMPD_declare_mapper:
6767   case OMPD_taskloop:
6768   case OMPD_taskloop_simd:
6769   case OMPD_master_taskloop:
6770   case OMPD_master_taskloop_simd:
6771   case OMPD_parallel_master_taskloop:
6772   case OMPD_parallel_master_taskloop_simd:
6773   case OMPD_requires:
6774   case OMPD_metadirective:
6775   case OMPD_unknown:
6776     break;
6777   default:
6778     break;
6779   }
6780   llvm_unreachable("Unexpected directive kind.");
6781 }
6782 
6783 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6784     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6785   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6786          "Clauses associated with the teams directive expected to be emitted "
6787          "only for the host!");
6788   CGBuilderTy &Bld = CGF.Builder;
6789   int32_t DefaultNT = -1;
6790   const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6791   if (NumTeams != nullptr) {
6792     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6793 
6794     switch (DirectiveKind) {
6795     case OMPD_target: {
6796       const auto *CS = D.getInnermostCapturedStmt();
6797       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6798       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6799       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6800                                                   /*IgnoreResultAssign*/ true);
6801       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6802                              /*isSigned=*/true);
6803     }
6804     case OMPD_target_teams:
6805     case OMPD_target_teams_distribute:
6806     case OMPD_target_teams_distribute_simd:
6807     case OMPD_target_teams_distribute_parallel_for:
6808     case OMPD_target_teams_distribute_parallel_for_simd: {
6809       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6810       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6811                                                   /*IgnoreResultAssign*/ true);
6812       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6813                              /*isSigned=*/true);
6814     }
6815     default:
6816       break;
6817     }
6818   } else if (DefaultNT == -1) {
6819     return nullptr;
6820   }
6821 
6822   return Bld.getInt32(DefaultNT);
6823 }
6824 
6825 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6826                                   llvm::Value *DefaultThreadLimitVal) {
6827   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6828       CGF.getContext(), CS->getCapturedStmt());
6829   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6830     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6831       llvm::Value *NumThreads = nullptr;
6832       llvm::Value *CondVal = nullptr;
6833       // Handle if clause. If if clause present, the number of threads is
6834       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6835       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6836         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6837         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6838         const OMPIfClause *IfClause = nullptr;
6839         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6840           if (C->getNameModifier() == OMPD_unknown ||
6841               C->getNameModifier() == OMPD_parallel) {
6842             IfClause = C;
6843             break;
6844           }
6845         }
6846         if (IfClause) {
6847           const Expr *Cond = IfClause->getCondition();
6848           bool Result;
6849           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6850             if (!Result)
6851               return CGF.Builder.getInt32(1);
6852           } else {
6853             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6854             if (const auto *PreInit =
6855                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6856               for (const auto *I : PreInit->decls()) {
6857                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6858                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6859                 } else {
6860                   CodeGenFunction::AutoVarEmission Emission =
6861                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6862                   CGF.EmitAutoVarCleanups(Emission);
6863                 }
6864               }
6865             }
6866             CondVal = CGF.EvaluateExprAsBool(Cond);
6867           }
6868         }
6869       }
6870       // Check the value of num_threads clause iff if clause was not specified
6871       // or is not evaluated to false.
6872       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6873         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6874         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6875         const auto *NumThreadsClause =
6876             Dir->getSingleClause<OMPNumThreadsClause>();
6877         CodeGenFunction::LexicalScope Scope(
6878             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6879         if (const auto *PreInit =
6880                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6881           for (const auto *I : PreInit->decls()) {
6882             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6883               CGF.EmitVarDecl(cast<VarDecl>(*I));
6884             } else {
6885               CodeGenFunction::AutoVarEmission Emission =
6886                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6887               CGF.EmitAutoVarCleanups(Emission);
6888             }
6889           }
6890         }
6891         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6892         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6893                                                /*isSigned=*/false);
6894         if (DefaultThreadLimitVal)
6895           NumThreads = CGF.Builder.CreateSelect(
6896               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6897               DefaultThreadLimitVal, NumThreads);
6898       } else {
6899         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6900                                            : CGF.Builder.getInt32(0);
6901       }
6902       // Process condition of the if clause.
6903       if (CondVal) {
6904         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6905                                               CGF.Builder.getInt32(1));
6906       }
6907       return NumThreads;
6908     }
6909     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6910       return CGF.Builder.getInt32(1);
6911     return DefaultThreadLimitVal;
6912   }
6913   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6914                                : CGF.Builder.getInt32(0);
6915 }
6916 
6917 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6918     CodeGenFunction &CGF, const OMPExecutableDirective &D,
6919     int32_t &DefaultVal) {
6920   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6921   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6922          "Expected target-based executable directive.");
6923 
6924   switch (DirectiveKind) {
6925   case OMPD_target:
6926     // Teams have no clause thread_limit
6927     return nullptr;
6928   case OMPD_target_teams:
6929   case OMPD_target_teams_distribute:
6930     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6931       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6932       const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6933       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6934         if (auto Constant =
6935                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6936           DefaultVal = Constant->getExtValue();
6937       return ThreadLimit;
6938     }
6939     return nullptr;
6940   case OMPD_target_parallel:
6941   case OMPD_target_parallel_for:
6942   case OMPD_target_parallel_for_simd:
6943   case OMPD_target_teams_distribute_parallel_for:
6944   case OMPD_target_teams_distribute_parallel_for_simd: {
6945     Expr *ThreadLimit = nullptr;
6946     Expr *NumThreads = nullptr;
6947     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6948       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6949       ThreadLimit = ThreadLimitClause->getThreadLimit();
6950       if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6951         if (auto Constant =
6952                 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6953           DefaultVal = Constant->getExtValue();
6954     }
6955     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6956       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6957       NumThreads = NumThreadsClause->getNumThreads();
6958       if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6959         if (auto Constant =
6960                 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6961           if (Constant->getExtValue() < DefaultVal) {
6962             DefaultVal = Constant->getExtValue();
6963             ThreadLimit = NumThreads;
6964           }
6965         }
6966       }
6967     }
6968     return ThreadLimit;
6969   }
6970   case OMPD_target_teams_distribute_simd:
6971   case OMPD_target_simd:
6972     DefaultVal = 1;
6973     return nullptr;
6974   case OMPD_parallel:
6975   case OMPD_for:
6976   case OMPD_parallel_for:
6977   case OMPD_parallel_master:
6978   case OMPD_parallel_sections:
6979   case OMPD_for_simd:
6980   case OMPD_parallel_for_simd:
6981   case OMPD_cancel:
6982   case OMPD_cancellation_point:
6983   case OMPD_ordered:
6984   case OMPD_threadprivate:
6985   case OMPD_allocate:
6986   case OMPD_task:
6987   case OMPD_simd:
6988   case OMPD_tile:
6989   case OMPD_unroll:
6990   case OMPD_sections:
6991   case OMPD_section:
6992   case OMPD_single:
6993   case OMPD_master:
6994   case OMPD_critical:
6995   case OMPD_taskyield:
6996   case OMPD_barrier:
6997   case OMPD_taskwait:
6998   case OMPD_taskgroup:
6999   case OMPD_atomic:
7000   case OMPD_flush:
7001   case OMPD_depobj:
7002   case OMPD_scan:
7003   case OMPD_teams:
7004   case OMPD_target_data:
7005   case OMPD_target_exit_data:
7006   case OMPD_target_enter_data:
7007   case OMPD_distribute:
7008   case OMPD_distribute_simd:
7009   case OMPD_distribute_parallel_for:
7010   case OMPD_distribute_parallel_for_simd:
7011   case OMPD_teams_distribute:
7012   case OMPD_teams_distribute_simd:
7013   case OMPD_teams_distribute_parallel_for:
7014   case OMPD_teams_distribute_parallel_for_simd:
7015   case OMPD_target_update:
7016   case OMPD_declare_simd:
7017   case OMPD_declare_variant:
7018   case OMPD_begin_declare_variant:
7019   case OMPD_end_declare_variant:
7020   case OMPD_declare_target:
7021   case OMPD_end_declare_target:
7022   case OMPD_declare_reduction:
7023   case OMPD_declare_mapper:
7024   case OMPD_taskloop:
7025   case OMPD_taskloop_simd:
7026   case OMPD_master_taskloop:
7027   case OMPD_master_taskloop_simd:
7028   case OMPD_parallel_master_taskloop:
7029   case OMPD_parallel_master_taskloop_simd:
7030   case OMPD_requires:
7031   case OMPD_unknown:
7032     break;
7033   default:
7034     break;
7035   }
7036   llvm_unreachable("Unsupported directive kind.");
7037 }
7038 
7039 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7040     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7041   assert(!CGF.getLangOpts().OpenMPIsDevice &&
7042          "Clauses associated with the teams directive expected to be emitted "
7043          "only for the host!");
7044   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7045   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
7046          "Expected target-based executable directive.");
7047   CGBuilderTy &Bld = CGF.Builder;
7048   llvm::Value *ThreadLimitVal = nullptr;
7049   llvm::Value *NumThreadsVal = nullptr;
7050   switch (DirectiveKind) {
7051   case OMPD_target: {
7052     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7053     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7054       return NumThreads;
7055     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7056         CGF.getContext(), CS->getCapturedStmt());
7057     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7058       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7059         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7060         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7061         const auto *ThreadLimitClause =
7062             Dir->getSingleClause<OMPThreadLimitClause>();
7063         CodeGenFunction::LexicalScope Scope(
7064             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7065         if (const auto *PreInit =
7066                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7067           for (const auto *I : PreInit->decls()) {
7068             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7069               CGF.EmitVarDecl(cast<VarDecl>(*I));
7070             } else {
7071               CodeGenFunction::AutoVarEmission Emission =
7072                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7073               CGF.EmitAutoVarCleanups(Emission);
7074             }
7075           }
7076         }
7077         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7078             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7079         ThreadLimitVal =
7080             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7081       }
7082       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7083           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7084         CS = Dir->getInnermostCapturedStmt();
7085         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7086             CGF.getContext(), CS->getCapturedStmt());
7087         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7088       }
7089       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7090           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7091         CS = Dir->getInnermostCapturedStmt();
7092         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7093           return NumThreads;
7094       }
7095       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7096         return Bld.getInt32(1);
7097     }
7098     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7099   }
7100   case OMPD_target_teams: {
7101     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7102       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7103       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7104       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7105           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7106       ThreadLimitVal =
7107           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7108     }
7109     const CapturedStmt *CS = D.getInnermostCapturedStmt();
7110     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7111       return NumThreads;
7112     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7113         CGF.getContext(), CS->getCapturedStmt());
7114     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7115       if (Dir->getDirectiveKind() == OMPD_distribute) {
7116         CS = Dir->getInnermostCapturedStmt();
7117         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7118           return NumThreads;
7119       }
7120     }
7121     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7122   }
7123   case OMPD_target_teams_distribute:
7124     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7125       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7126       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7127       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7128           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7129       ThreadLimitVal =
7130           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7131     }
7132     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7133   case OMPD_target_parallel:
7134   case OMPD_target_parallel_for:
7135   case OMPD_target_parallel_for_simd:
7136   case OMPD_target_teams_distribute_parallel_for:
7137   case OMPD_target_teams_distribute_parallel_for_simd: {
7138     llvm::Value *CondVal = nullptr;
7139     // Handle if clause. If if clause present, the number of threads is
7140     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7141     if (D.hasClausesOfKind<OMPIfClause>()) {
7142       const OMPIfClause *IfClause = nullptr;
7143       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7144         if (C->getNameModifier() == OMPD_unknown ||
7145             C->getNameModifier() == OMPD_parallel) {
7146           IfClause = C;
7147           break;
7148         }
7149       }
7150       if (IfClause) {
7151         const Expr *Cond = IfClause->getCondition();
7152         bool Result;
7153         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7154           if (!Result)
7155             return Bld.getInt32(1);
7156         } else {
7157           CodeGenFunction::RunCleanupsScope Scope(CGF);
7158           CondVal = CGF.EvaluateExprAsBool(Cond);
7159         }
7160       }
7161     }
7162     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7163       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7164       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7165       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7166           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7167       ThreadLimitVal =
7168           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7169     }
7170     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7171       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7172       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7173       llvm::Value *NumThreads = CGF.EmitScalarExpr(
7174           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7175       NumThreadsVal =
7176           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7177       ThreadLimitVal = ThreadLimitVal
7178                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7179                                                                 ThreadLimitVal),
7180                                               NumThreadsVal, ThreadLimitVal)
7181                            : NumThreadsVal;
7182     }
7183     if (!ThreadLimitVal)
7184       ThreadLimitVal = Bld.getInt32(0);
7185     if (CondVal)
7186       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7187     return ThreadLimitVal;
7188   }
7189   case OMPD_target_teams_distribute_simd:
7190   case OMPD_target_simd:
7191     return Bld.getInt32(1);
7192   case OMPD_parallel:
7193   case OMPD_for:
7194   case OMPD_parallel_for:
7195   case OMPD_parallel_master:
7196   case OMPD_parallel_sections:
7197   case OMPD_for_simd:
7198   case OMPD_parallel_for_simd:
7199   case OMPD_cancel:
7200   case OMPD_cancellation_point:
7201   case OMPD_ordered:
7202   case OMPD_threadprivate:
7203   case OMPD_allocate:
7204   case OMPD_task:
7205   case OMPD_simd:
7206   case OMPD_tile:
7207   case OMPD_unroll:
7208   case OMPD_sections:
7209   case OMPD_section:
7210   case OMPD_single:
7211   case OMPD_master:
7212   case OMPD_critical:
7213   case OMPD_taskyield:
7214   case OMPD_barrier:
7215   case OMPD_taskwait:
7216   case OMPD_taskgroup:
7217   case OMPD_atomic:
7218   case OMPD_flush:
7219   case OMPD_depobj:
7220   case OMPD_scan:
7221   case OMPD_teams:
7222   case OMPD_target_data:
7223   case OMPD_target_exit_data:
7224   case OMPD_target_enter_data:
7225   case OMPD_distribute:
7226   case OMPD_distribute_simd:
7227   case OMPD_distribute_parallel_for:
7228   case OMPD_distribute_parallel_for_simd:
7229   case OMPD_teams_distribute:
7230   case OMPD_teams_distribute_simd:
7231   case OMPD_teams_distribute_parallel_for:
7232   case OMPD_teams_distribute_parallel_for_simd:
7233   case OMPD_target_update:
7234   case OMPD_declare_simd:
7235   case OMPD_declare_variant:
7236   case OMPD_begin_declare_variant:
7237   case OMPD_end_declare_variant:
7238   case OMPD_declare_target:
7239   case OMPD_end_declare_target:
7240   case OMPD_declare_reduction:
7241   case OMPD_declare_mapper:
7242   case OMPD_taskloop:
7243   case OMPD_taskloop_simd:
7244   case OMPD_master_taskloop:
7245   case OMPD_master_taskloop_simd:
7246   case OMPD_parallel_master_taskloop:
7247   case OMPD_parallel_master_taskloop_simd:
7248   case OMPD_requires:
7249   case OMPD_metadirective:
7250   case OMPD_unknown:
7251     break;
7252   default:
7253     break;
7254   }
7255   llvm_unreachable("Unsupported directive kind.");
7256 }
7257 
7258 namespace {
7259 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7260 
7261 // Utility to handle information from clauses associated with a given
7262 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7263 // It provides a convenient interface to obtain the information and generate
7264 // code for that information.
7265 class MappableExprsHandler {
7266 public:
7267   /// Values for bit flags used to specify the mapping type for
7268   /// offloading.
7269   enum OpenMPOffloadMappingFlags : uint64_t {
7270     /// No flags
7271     OMP_MAP_NONE = 0x0,
7272     /// Allocate memory on the device and move data from host to device.
7273     OMP_MAP_TO = 0x01,
7274     /// Allocate memory on the device and move data from device to host.
7275     OMP_MAP_FROM = 0x02,
7276     /// Always perform the requested mapping action on the element, even
7277     /// if it was already mapped before.
7278     OMP_MAP_ALWAYS = 0x04,
7279     /// Delete the element from the device environment, ignoring the
7280     /// current reference count associated with the element.
7281     OMP_MAP_DELETE = 0x08,
7282     /// The element being mapped is a pointer-pointee pair; both the
7283     /// pointer and the pointee should be mapped.
7284     OMP_MAP_PTR_AND_OBJ = 0x10,
7285     /// This flags signals that the base address of an entry should be
7286     /// passed to the target kernel as an argument.
7287     OMP_MAP_TARGET_PARAM = 0x20,
7288     /// Signal that the runtime library has to return the device pointer
7289     /// in the current position for the data being mapped. Used when we have the
7290     /// use_device_ptr or use_device_addr clause.
7291     OMP_MAP_RETURN_PARAM = 0x40,
7292     /// This flag signals that the reference being passed is a pointer to
7293     /// private data.
7294     OMP_MAP_PRIVATE = 0x80,
7295     /// Pass the element to the device by value.
7296     OMP_MAP_LITERAL = 0x100,
7297     /// Implicit map
7298     OMP_MAP_IMPLICIT = 0x200,
7299     /// Close is a hint to the runtime to allocate memory close to
7300     /// the target device.
7301     OMP_MAP_CLOSE = 0x400,
7302     /// 0x800 is reserved for compatibility with XLC.
7303     /// Produce a runtime error if the data is not already allocated.
7304     OMP_MAP_PRESENT = 0x1000,
7305     // Increment and decrement a separate reference counter so that the data
7306     // cannot be unmapped within the associated region.  Thus, this flag is
7307     // intended to be used on 'target' and 'target data' directives because they
7308     // are inherently structured.  It is not intended to be used on 'target
7309     // enter data' and 'target exit data' directives because they are inherently
7310     // dynamic.
7311     // This is an OpenMP extension for the sake of OpenACC support.
7312     OMP_MAP_OMPX_HOLD = 0x2000,
7313     /// Signal that the runtime library should use args as an array of
7314     /// descriptor_dim pointers and use args_size as dims. Used when we have
7315     /// non-contiguous list items in target update directive
7316     OMP_MAP_NON_CONTIG = 0x100000000000,
7317     /// The 16 MSBs of the flags indicate whether the entry is member of some
7318     /// struct/class.
7319     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7320     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7321   };
7322 
7323   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7324   static unsigned getFlagMemberOffset() {
7325     unsigned Offset = 0;
7326     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7327          Remain = Remain >> 1)
7328       Offset++;
7329     return Offset;
7330   }
7331 
7332   /// Class that holds debugging information for a data mapping to be passed to
7333   /// the runtime library.
7334   class MappingExprInfo {
7335     /// The variable declaration used for the data mapping.
7336     const ValueDecl *MapDecl = nullptr;
7337     /// The original expression used in the map clause, or null if there is
7338     /// none.
7339     const Expr *MapExpr = nullptr;
7340 
7341   public:
7342     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7343         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7344 
7345     const ValueDecl *getMapDecl() const { return MapDecl; }
7346     const Expr *getMapExpr() const { return MapExpr; }
7347   };
7348 
7349   /// Class that associates information with a base pointer to be passed to the
7350   /// runtime library.
7351   class BasePointerInfo {
7352     /// The base pointer.
7353     llvm::Value *Ptr = nullptr;
7354     /// The base declaration that refers to this device pointer, or null if
7355     /// there is none.
7356     const ValueDecl *DevPtrDecl = nullptr;
7357 
7358   public:
7359     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7360         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7361     llvm::Value *operator*() const { return Ptr; }
7362     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7363     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7364   };
7365 
7366   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7367   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7368   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7369   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7370   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7371   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7372   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7373 
7374   /// This structure contains combined information generated for mappable
7375   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7376   /// mappers, and non-contiguous information.
7377   struct MapCombinedInfoTy {
7378     struct StructNonContiguousInfo {
7379       bool IsNonContiguous = false;
7380       MapDimArrayTy Dims;
7381       MapNonContiguousArrayTy Offsets;
7382       MapNonContiguousArrayTy Counts;
7383       MapNonContiguousArrayTy Strides;
7384     };
7385     MapExprsArrayTy Exprs;
7386     MapBaseValuesArrayTy BasePointers;
7387     MapValuesArrayTy Pointers;
7388     MapValuesArrayTy Sizes;
7389     MapFlagsArrayTy Types;
7390     MapMappersArrayTy Mappers;
7391     StructNonContiguousInfo NonContigInfo;
7392 
7393     /// Append arrays in \a CurInfo.
7394     void append(MapCombinedInfoTy &CurInfo) {
7395       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7396       BasePointers.append(CurInfo.BasePointers.begin(),
7397                           CurInfo.BasePointers.end());
7398       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7399       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7400       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7401       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7402       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7403                                  CurInfo.NonContigInfo.Dims.end());
7404       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7405                                     CurInfo.NonContigInfo.Offsets.end());
7406       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7407                                    CurInfo.NonContigInfo.Counts.end());
7408       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7409                                     CurInfo.NonContigInfo.Strides.end());
7410     }
7411   };
7412 
7413   /// Map between a struct and the its lowest & highest elements which have been
7414   /// mapped.
7415   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7416   ///                    HE(FieldIndex, Pointer)}
7417   struct StructRangeInfoTy {
7418     MapCombinedInfoTy PreliminaryMapData;
7419     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7420         0, Address::invalid()};
7421     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7422         0, Address::invalid()};
7423     Address Base = Address::invalid();
7424     Address LB = Address::invalid();
7425     bool IsArraySection = false;
7426     bool HasCompleteRecord = false;
7427   };
7428 
7429 private:
7430   /// Kind that defines how a device pointer has to be returned.
7431   struct MapInfo {
7432     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7433     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7434     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7435     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7436     bool ReturnDevicePointer = false;
7437     bool IsImplicit = false;
7438     const ValueDecl *Mapper = nullptr;
7439     const Expr *VarRef = nullptr;
7440     bool ForDeviceAddr = false;
7441 
7442     MapInfo() = default;
7443     MapInfo(
7444         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7445         OpenMPMapClauseKind MapType,
7446         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7447         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7448         bool ReturnDevicePointer, bool IsImplicit,
7449         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7450         bool ForDeviceAddr = false)
7451         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7452           MotionModifiers(MotionModifiers),
7453           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7454           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7455   };
7456 
7457   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7458   /// member and there is no map information about it, then emission of that
7459   /// entry is deferred until the whole struct has been processed.
7460   struct DeferredDevicePtrEntryTy {
7461     const Expr *IE = nullptr;
7462     const ValueDecl *VD = nullptr;
7463     bool ForDeviceAddr = false;
7464 
7465     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7466                              bool ForDeviceAddr)
7467         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7468   };
7469 
7470   /// The target directive from where the mappable clauses were extracted. It
7471   /// is either a executable directive or a user-defined mapper directive.
7472   llvm::PointerUnion<const OMPExecutableDirective *,
7473                      const OMPDeclareMapperDecl *>
7474       CurDir;
7475 
7476   /// Function the directive is being generated for.
7477   CodeGenFunction &CGF;
7478 
7479   /// Set of all first private variables in the current directive.
7480   /// bool data is set to true if the variable is implicitly marked as
7481   /// firstprivate, false otherwise.
7482   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7483 
7484   /// Map between device pointer declarations and their expression components.
7485   /// The key value for declarations in 'this' is null.
7486   llvm::DenseMap<
7487       const ValueDecl *,
7488       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7489       DevPointersMap;
7490 
7491   /// Map between lambda declarations and their map type.
7492   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7493 
7494   llvm::Value *getExprTypeSize(const Expr *E) const {
7495     QualType ExprTy = E->getType().getCanonicalType();
7496 
7497     // Calculate the size for array shaping expression.
7498     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7499       llvm::Value *Size =
7500           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7501       for (const Expr *SE : OAE->getDimensions()) {
7502         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7503         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7504                                       CGF.getContext().getSizeType(),
7505                                       SE->getExprLoc());
7506         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7507       }
7508       return Size;
7509     }
7510 
7511     // Reference types are ignored for mapping purposes.
7512     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7513       ExprTy = RefTy->getPointeeType().getCanonicalType();
7514 
7515     // Given that an array section is considered a built-in type, we need to
7516     // do the calculation based on the length of the section instead of relying
7517     // on CGF.getTypeSize(E->getType()).
7518     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7519       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7520                             OAE->getBase()->IgnoreParenImpCasts())
7521                             .getCanonicalType();
7522 
7523       // If there is no length associated with the expression and lower bound is
7524       // not specified too, that means we are using the whole length of the
7525       // base.
7526       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7527           !OAE->getLowerBound())
7528         return CGF.getTypeSize(BaseTy);
7529 
7530       llvm::Value *ElemSize;
7531       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7532         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7533       } else {
7534         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7535         assert(ATy && "Expecting array type if not a pointer type.");
7536         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7537       }
7538 
7539       // If we don't have a length at this point, that is because we have an
7540       // array section with a single element.
7541       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7542         return ElemSize;
7543 
7544       if (const Expr *LenExpr = OAE->getLength()) {
7545         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7546         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7547                                              CGF.getContext().getSizeType(),
7548                                              LenExpr->getExprLoc());
7549         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7550       }
7551       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7552              OAE->getLowerBound() && "expected array_section[lb:].");
7553       // Size = sizetype - lb * elemtype;
7554       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7555       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7556       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7557                                        CGF.getContext().getSizeType(),
7558                                        OAE->getLowerBound()->getExprLoc());
7559       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7560       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7561       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7562       LengthVal = CGF.Builder.CreateSelect(
7563           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7564       return LengthVal;
7565     }
7566     return CGF.getTypeSize(ExprTy);
7567   }
7568 
7569   /// Return the corresponding bits for a given map clause modifier. Add
7570   /// a flag marking the map as a pointer if requested. Add a flag marking the
7571   /// map as the first one of a series of maps that relate to the same map
7572   /// expression.
7573   OpenMPOffloadMappingFlags getMapTypeBits(
7574       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7575       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7576       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7577     OpenMPOffloadMappingFlags Bits =
7578         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7579     switch (MapType) {
7580     case OMPC_MAP_alloc:
7581     case OMPC_MAP_release:
7582       // alloc and release is the default behavior in the runtime library,  i.e.
7583       // if we don't pass any bits alloc/release that is what the runtime is
7584       // going to do. Therefore, we don't need to signal anything for these two
7585       // type modifiers.
7586       break;
7587     case OMPC_MAP_to:
7588       Bits |= OMP_MAP_TO;
7589       break;
7590     case OMPC_MAP_from:
7591       Bits |= OMP_MAP_FROM;
7592       break;
7593     case OMPC_MAP_tofrom:
7594       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7595       break;
7596     case OMPC_MAP_delete:
7597       Bits |= OMP_MAP_DELETE;
7598       break;
7599     case OMPC_MAP_unknown:
7600       llvm_unreachable("Unexpected map type!");
7601     }
7602     if (AddPtrFlag)
7603       Bits |= OMP_MAP_PTR_AND_OBJ;
7604     if (AddIsTargetParamFlag)
7605       Bits |= OMP_MAP_TARGET_PARAM;
7606     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7607       Bits |= OMP_MAP_ALWAYS;
7608     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7609       Bits |= OMP_MAP_CLOSE;
7610     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7611         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7612       Bits |= OMP_MAP_PRESENT;
7613     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7614       Bits |= OMP_MAP_OMPX_HOLD;
7615     if (IsNonContiguous)
7616       Bits |= OMP_MAP_NON_CONTIG;
7617     return Bits;
7618   }
7619 
7620   /// Return true if the provided expression is a final array section. A
7621   /// final array section, is one whose length can't be proved to be one.
7622   bool isFinalArraySectionExpression(const Expr *E) const {
7623     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7624 
7625     // It is not an array section and therefore not a unity-size one.
7626     if (!OASE)
7627       return false;
7628 
7629     // An array section with no colon always refer to a single element.
7630     if (OASE->getColonLocFirst().isInvalid())
7631       return false;
7632 
7633     const Expr *Length = OASE->getLength();
7634 
7635     // If we don't have a length we have to check if the array has size 1
7636     // for this dimension. Also, we should always expect a length if the
7637     // base type is pointer.
7638     if (!Length) {
7639       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7640                              OASE->getBase()->IgnoreParenImpCasts())
7641                              .getCanonicalType();
7642       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7643         return ATy->getSize().getSExtValue() != 1;
7644       // If we don't have a constant dimension length, we have to consider
7645       // the current section as having any size, so it is not necessarily
7646       // unitary. If it happen to be unity size, that's user fault.
7647       return true;
7648     }
7649 
7650     // Check if the length evaluates to 1.
7651     Expr::EvalResult Result;
7652     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7653       return true; // Can have more that size 1.
7654 
7655     llvm::APSInt ConstLength = Result.Val.getInt();
7656     return ConstLength.getSExtValue() != 1;
7657   }
7658 
7659   /// Generate the base pointers, section pointers, sizes, map type bits, and
7660   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7661   /// map type, map or motion modifiers, and expression components.
7662   /// \a IsFirstComponent should be set to true if the provided set of
7663   /// components is the first associated with a capture.
7664   void generateInfoForComponentList(
7665       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7666       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7667       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7668       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7669       bool IsFirstComponentList, bool IsImplicit,
7670       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7671       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7672       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7673           OverlappedElements = llvm::None) const {
7674     // The following summarizes what has to be generated for each map and the
7675     // types below. The generated information is expressed in this order:
7676     // base pointer, section pointer, size, flags
7677     // (to add to the ones that come from the map type and modifier).
7678     //
7679     // double d;
7680     // int i[100];
7681     // float *p;
7682     //
7683     // struct S1 {
7684     //   int i;
7685     //   float f[50];
7686     // }
7687     // struct S2 {
7688     //   int i;
7689     //   float f[50];
7690     //   S1 s;
7691     //   double *p;
7692     //   struct S2 *ps;
7693     //   int &ref;
7694     // }
7695     // S2 s;
7696     // S2 *ps;
7697     //
7698     // map(d)
7699     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7700     //
7701     // map(i)
7702     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7703     //
7704     // map(i[1:23])
7705     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7706     //
7707     // map(p)
7708     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7709     //
7710     // map(p[1:24])
7711     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7712     // in unified shared memory mode or for local pointers
7713     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7714     //
7715     // map(s)
7716     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7717     //
7718     // map(s.i)
7719     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7720     //
7721     // map(s.s.f)
7722     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7723     //
7724     // map(s.p)
7725     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7726     //
7727     // map(to: s.p[:22])
7728     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7729     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7730     // &(s.p), &(s.p[0]), 22*sizeof(double),
7731     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7732     // (*) alloc space for struct members, only this is a target parameter
7733     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7734     //      optimizes this entry out, same in the examples below)
7735     // (***) map the pointee (map: to)
7736     //
7737     // map(to: s.ref)
7738     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7739     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7740     // (*) alloc space for struct members, only this is a target parameter
7741     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7742     //      optimizes this entry out, same in the examples below)
7743     // (***) map the pointee (map: to)
7744     //
7745     // map(s.ps)
7746     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7747     //
7748     // map(from: s.ps->s.i)
7749     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7750     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7751     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7752     //
7753     // map(to: s.ps->ps)
7754     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7755     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7756     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7757     //
7758     // map(s.ps->ps->ps)
7759     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7760     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7761     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7762     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7763     //
7764     // map(to: s.ps->ps->s.f[:22])
7765     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7766     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7767     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7768     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7769     //
7770     // map(ps)
7771     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7772     //
7773     // map(ps->i)
7774     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7775     //
7776     // map(ps->s.f)
7777     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7778     //
7779     // map(from: ps->p)
7780     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7781     //
7782     // map(to: ps->p[:22])
7783     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7784     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7785     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7786     //
7787     // map(ps->ps)
7788     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7789     //
7790     // map(from: ps->ps->s.i)
7791     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7792     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7793     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7794     //
7795     // map(from: ps->ps->ps)
7796     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7797     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7798     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7799     //
7800     // map(ps->ps->ps->ps)
7801     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7802     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7803     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7804     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7805     //
7806     // map(to: ps->ps->ps->s.f[:22])
7807     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7808     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7809     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7810     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7811     //
7812     // map(to: s.f[:22]) map(from: s.p[:33])
7813     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7814     //     sizeof(double*) (**), TARGET_PARAM
7815     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7816     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7817     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7818     // (*) allocate contiguous space needed to fit all mapped members even if
7819     //     we allocate space for members not mapped (in this example,
7820     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7821     //     them as well because they fall between &s.f[0] and &s.p)
7822     //
7823     // map(from: s.f[:22]) map(to: ps->p[:33])
7824     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7825     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7826     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7827     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7828     // (*) the struct this entry pertains to is the 2nd element in the list of
7829     //     arguments, hence MEMBER_OF(2)
7830     //
7831     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7832     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7833     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7834     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7835     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7836     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7837     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7838     // (*) the struct this entry pertains to is the 4th element in the list
7839     //     of arguments, hence MEMBER_OF(4)
7840 
7841     // Track if the map information being generated is the first for a capture.
7842     bool IsCaptureFirstInfo = IsFirstComponentList;
7843     // When the variable is on a declare target link or in a to clause with
7844     // unified memory, a reference is needed to hold the host/device address
7845     // of the variable.
7846     bool RequiresReference = false;
7847 
7848     // Scan the components from the base to the complete expression.
7849     auto CI = Components.rbegin();
7850     auto CE = Components.rend();
7851     auto I = CI;
7852 
7853     // Track if the map information being generated is the first for a list of
7854     // components.
7855     bool IsExpressionFirstInfo = true;
7856     bool FirstPointerInComplexData = false;
7857     Address BP = Address::invalid();
7858     const Expr *AssocExpr = I->getAssociatedExpression();
7859     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7860     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7861     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7862 
7863     if (isa<MemberExpr>(AssocExpr)) {
7864       // The base is the 'this' pointer. The content of the pointer is going
7865       // to be the base of the field being mapped.
7866       BP = CGF.LoadCXXThisAddress();
7867     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7868                (OASE &&
7869                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7870       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7871     } else if (OAShE &&
7872                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7873       BP = Address(
7874           CGF.EmitScalarExpr(OAShE->getBase()),
7875           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7876     } else {
7877       // The base is the reference to the variable.
7878       // BP = &Var.
7879       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7880       if (const auto *VD =
7881               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7882         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7883                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7884           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7885               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7886                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7887             RequiresReference = true;
7888             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7889           }
7890         }
7891       }
7892 
7893       // If the variable is a pointer and is being dereferenced (i.e. is not
7894       // the last component), the base has to be the pointer itself, not its
7895       // reference. References are ignored for mapping purposes.
7896       QualType Ty =
7897           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7898       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7899         // No need to generate individual map information for the pointer, it
7900         // can be associated with the combined storage if shared memory mode is
7901         // active or the base declaration is not global variable.
7902         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7903         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7904             !VD || VD->hasLocalStorage())
7905           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7906         else
7907           FirstPointerInComplexData = true;
7908         ++I;
7909       }
7910     }
7911 
7912     // Track whether a component of the list should be marked as MEMBER_OF some
7913     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7914     // in a component list should be marked as MEMBER_OF, all subsequent entries
7915     // do not belong to the base struct. E.g.
7916     // struct S2 s;
7917     // s.ps->ps->ps->f[:]
7918     //   (1) (2) (3) (4)
7919     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7920     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7921     // is the pointee of ps(2) which is not member of struct s, so it should not
7922     // be marked as such (it is still PTR_AND_OBJ).
7923     // The variable is initialized to false so that PTR_AND_OBJ entries which
7924     // are not struct members are not considered (e.g. array of pointers to
7925     // data).
7926     bool ShouldBeMemberOf = false;
7927 
7928     // Variable keeping track of whether or not we have encountered a component
7929     // in the component list which is a member expression. Useful when we have a
7930     // pointer or a final array section, in which case it is the previous
7931     // component in the list which tells us whether we have a member expression.
7932     // E.g. X.f[:]
7933     // While processing the final array section "[:]" it is "f" which tells us
7934     // whether we are dealing with a member of a declared struct.
7935     const MemberExpr *EncounteredME = nullptr;
7936 
7937     // Track for the total number of dimension. Start from one for the dummy
7938     // dimension.
7939     uint64_t DimSize = 1;
7940 
7941     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7942     bool IsPrevMemberReference = false;
7943 
7944     for (; I != CE; ++I) {
7945       // If the current component is member of a struct (parent struct) mark it.
7946       if (!EncounteredME) {
7947         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7948         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7949         // as MEMBER_OF the parent struct.
7950         if (EncounteredME) {
7951           ShouldBeMemberOf = true;
7952           // Do not emit as complex pointer if this is actually not array-like
7953           // expression.
7954           if (FirstPointerInComplexData) {
7955             QualType Ty = std::prev(I)
7956                               ->getAssociatedDeclaration()
7957                               ->getType()
7958                               .getNonReferenceType();
7959             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7960             FirstPointerInComplexData = false;
7961           }
7962         }
7963       }
7964 
7965       auto Next = std::next(I);
7966 
7967       // We need to generate the addresses and sizes if this is the last
7968       // component, if the component is a pointer or if it is an array section
7969       // whose length can't be proved to be one. If this is a pointer, it
7970       // becomes the base address for the following components.
7971 
7972       // A final array section, is one whose length can't be proved to be one.
7973       // If the map item is non-contiguous then we don't treat any array section
7974       // as final array section.
7975       bool IsFinalArraySection =
7976           !IsNonContiguous &&
7977           isFinalArraySectionExpression(I->getAssociatedExpression());
7978 
7979       // If we have a declaration for the mapping use that, otherwise use
7980       // the base declaration of the map clause.
7981       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7982                                      ? I->getAssociatedDeclaration()
7983                                      : BaseDecl;
7984       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7985                                                : MapExpr;
7986 
7987       // Get information on whether the element is a pointer. Have to do a
7988       // special treatment for array sections given that they are built-in
7989       // types.
7990       const auto *OASE =
7991           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7992       const auto *OAShE =
7993           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7994       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7995       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7996       bool IsPointer =
7997           OAShE ||
7998           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7999                        .getCanonicalType()
8000                        ->isAnyPointerType()) ||
8001           I->getAssociatedExpression()->getType()->isAnyPointerType();
8002       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8003                                MapDecl &&
8004                                MapDecl->getType()->isLValueReferenceType();
8005       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8006 
8007       if (OASE)
8008         ++DimSize;
8009 
8010       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8011           IsFinalArraySection) {
8012         // If this is not the last component, we expect the pointer to be
8013         // associated with an array expression or member expression.
8014         assert((Next == CE ||
8015                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8016                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8017                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
8018                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8019                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8020                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8021                "Unexpected expression");
8022 
8023         Address LB = Address::invalid();
8024         Address LowestElem = Address::invalid();
8025         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8026                                        const MemberExpr *E) {
8027           const Expr *BaseExpr = E->getBase();
8028           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
8029           // scalar.
8030           LValue BaseLV;
8031           if (E->isArrow()) {
8032             LValueBaseInfo BaseInfo;
8033             TBAAAccessInfo TBAAInfo;
8034             Address Addr =
8035                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8036             QualType PtrTy = BaseExpr->getType()->getPointeeType();
8037             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8038           } else {
8039             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8040           }
8041           return BaseLV;
8042         };
8043         if (OAShE) {
8044           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8045                                     CGF.getContext().getTypeAlignInChars(
8046                                         OAShE->getBase()->getType()));
8047         } else if (IsMemberReference) {
8048           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8049           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8050           LowestElem = CGF.EmitLValueForFieldInitialization(
8051                               BaseLVal, cast<FieldDecl>(MapDecl))
8052                            .getAddress(CGF);
8053           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8054                    .getAddress(CGF);
8055         } else {
8056           LowestElem = LB =
8057               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8058                   .getAddress(CGF);
8059         }
8060 
8061         // If this component is a pointer inside the base struct then we don't
8062         // need to create any entry for it - it will be combined with the object
8063         // it is pointing to into a single PTR_AND_OBJ entry.
8064         bool IsMemberPointerOrAddr =
8065             EncounteredME &&
8066             (((IsPointer || ForDeviceAddr) &&
8067               I->getAssociatedExpression() == EncounteredME) ||
8068              (IsPrevMemberReference && !IsPointer) ||
8069              (IsMemberReference && Next != CE &&
8070               !Next->getAssociatedExpression()->getType()->isPointerType()));
8071         if (!OverlappedElements.empty() && Next == CE) {
8072           // Handle base element with the info for overlapped elements.
8073           assert(!PartialStruct.Base.isValid() && "The base element is set.");
8074           assert(!IsPointer &&
8075                  "Unexpected base element with the pointer type.");
8076           // Mark the whole struct as the struct that requires allocation on the
8077           // device.
8078           PartialStruct.LowestElem = {0, LowestElem};
8079           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8080               I->getAssociatedExpression()->getType());
8081           Address HB = CGF.Builder.CreateConstGEP(
8082               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8083                                                               CGF.VoidPtrTy),
8084               TypeSize.getQuantity() - 1);
8085           PartialStruct.HighestElem = {
8086               std::numeric_limits<decltype(
8087                   PartialStruct.HighestElem.first)>::max(),
8088               HB};
8089           PartialStruct.Base = BP;
8090           PartialStruct.LB = LB;
8091           assert(
8092               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8093               "Overlapped elements must be used only once for the variable.");
8094           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8095           // Emit data for non-overlapped data.
8096           OpenMPOffloadMappingFlags Flags =
8097               OMP_MAP_MEMBER_OF |
8098               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8099                              /*AddPtrFlag=*/false,
8100                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8101           llvm::Value *Size = nullptr;
8102           // Do bitcopy of all non-overlapped structure elements.
8103           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8104                    Component : OverlappedElements) {
8105             Address ComponentLB = Address::invalid();
8106             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8107                  Component) {
8108               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8109                 const auto *FD = dyn_cast<FieldDecl>(VD);
8110                 if (FD && FD->getType()->isLValueReferenceType()) {
8111                   const auto *ME =
8112                       cast<MemberExpr>(MC.getAssociatedExpression());
8113                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8114                   ComponentLB =
8115                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8116                           .getAddress(CGF);
8117                 } else {
8118                   ComponentLB =
8119                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8120                           .getAddress(CGF);
8121                 }
8122                 Size = CGF.Builder.CreatePtrDiff(
8123                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8124                     CGF.EmitCastToVoidPtr(LB.getPointer()));
8125                 break;
8126               }
8127             }
8128             assert(Size && "Failed to determine structure size");
8129             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8130             CombinedInfo.BasePointers.push_back(BP.getPointer());
8131             CombinedInfo.Pointers.push_back(LB.getPointer());
8132             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8133                 Size, CGF.Int64Ty, /*isSigned=*/true));
8134             CombinedInfo.Types.push_back(Flags);
8135             CombinedInfo.Mappers.push_back(nullptr);
8136             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8137                                                                       : 1);
8138             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8139           }
8140           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8141           CombinedInfo.BasePointers.push_back(BP.getPointer());
8142           CombinedInfo.Pointers.push_back(LB.getPointer());
8143           Size = CGF.Builder.CreatePtrDiff(
8144               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8145               CGF.EmitCastToVoidPtr(LB.getPointer()));
8146           CombinedInfo.Sizes.push_back(
8147               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8148           CombinedInfo.Types.push_back(Flags);
8149           CombinedInfo.Mappers.push_back(nullptr);
8150           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8151                                                                     : 1);
8152           break;
8153         }
8154         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8155         if (!IsMemberPointerOrAddr ||
8156             (Next == CE && MapType != OMPC_MAP_unknown)) {
8157           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8158           CombinedInfo.BasePointers.push_back(BP.getPointer());
8159           CombinedInfo.Pointers.push_back(LB.getPointer());
8160           CombinedInfo.Sizes.push_back(
8161               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8162           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8163                                                                     : 1);
8164 
8165           // If Mapper is valid, the last component inherits the mapper.
8166           bool HasMapper = Mapper && Next == CE;
8167           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8168 
8169           // We need to add a pointer flag for each map that comes from the
8170           // same expression except for the first one. We also need to signal
8171           // this map is the first one that relates with the current capture
8172           // (there is a set of entries for each capture).
8173           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8174               MapType, MapModifiers, MotionModifiers, IsImplicit,
8175               !IsExpressionFirstInfo || RequiresReference ||
8176                   FirstPointerInComplexData || IsMemberReference,
8177               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8178 
8179           if (!IsExpressionFirstInfo || IsMemberReference) {
8180             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8181             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8182             if (IsPointer || (IsMemberReference && Next != CE))
8183               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8184                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
8185 
8186             if (ShouldBeMemberOf) {
8187               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8188               // should be later updated with the correct value of MEMBER_OF.
8189               Flags |= OMP_MAP_MEMBER_OF;
8190               // From now on, all subsequent PTR_AND_OBJ entries should not be
8191               // marked as MEMBER_OF.
8192               ShouldBeMemberOf = false;
8193             }
8194           }
8195 
8196           CombinedInfo.Types.push_back(Flags);
8197         }
8198 
8199         // If we have encountered a member expression so far, keep track of the
8200         // mapped member. If the parent is "*this", then the value declaration
8201         // is nullptr.
8202         if (EncounteredME) {
8203           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8204           unsigned FieldIndex = FD->getFieldIndex();
8205 
8206           // Update info about the lowest and highest elements for this struct
8207           if (!PartialStruct.Base.isValid()) {
8208             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8209             if (IsFinalArraySection) {
8210               Address HB =
8211                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8212                       .getAddress(CGF);
8213               PartialStruct.HighestElem = {FieldIndex, HB};
8214             } else {
8215               PartialStruct.HighestElem = {FieldIndex, LowestElem};
8216             }
8217             PartialStruct.Base = BP;
8218             PartialStruct.LB = BP;
8219           } else if (FieldIndex < PartialStruct.LowestElem.first) {
8220             PartialStruct.LowestElem = {FieldIndex, LowestElem};
8221           } else if (FieldIndex > PartialStruct.HighestElem.first) {
8222             PartialStruct.HighestElem = {FieldIndex, LowestElem};
8223           }
8224         }
8225 
8226         // Need to emit combined struct for array sections.
8227         if (IsFinalArraySection || IsNonContiguous)
8228           PartialStruct.IsArraySection = true;
8229 
8230         // If we have a final array section, we are done with this expression.
8231         if (IsFinalArraySection)
8232           break;
8233 
8234         // The pointer becomes the base for the next element.
8235         if (Next != CE)
8236           BP = IsMemberReference ? LowestElem : LB;
8237 
8238         IsExpressionFirstInfo = false;
8239         IsCaptureFirstInfo = false;
8240         FirstPointerInComplexData = false;
8241         IsPrevMemberReference = IsMemberReference;
8242       } else if (FirstPointerInComplexData) {
8243         QualType Ty = Components.rbegin()
8244                           ->getAssociatedDeclaration()
8245                           ->getType()
8246                           .getNonReferenceType();
8247         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8248         FirstPointerInComplexData = false;
8249       }
8250     }
8251     // If ran into the whole component - allocate the space for the whole
8252     // record.
8253     if (!EncounteredME)
8254       PartialStruct.HasCompleteRecord = true;
8255 
8256     if (!IsNonContiguous)
8257       return;
8258 
8259     const ASTContext &Context = CGF.getContext();
8260 
8261     // For supporting stride in array section, we need to initialize the first
8262     // dimension size as 1, first offset as 0, and first count as 1
8263     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8264     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8265     MapValuesArrayTy CurStrides;
8266     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8267     uint64_t ElementTypeSize;
8268 
8269     // Collect Size information for each dimension and get the element size as
8270     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8271     // should be [10, 10] and the first stride is 4 btyes.
8272     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8273          Components) {
8274       const Expr *AssocExpr = Component.getAssociatedExpression();
8275       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8276 
8277       if (!OASE)
8278         continue;
8279 
8280       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8281       auto *CAT = Context.getAsConstantArrayType(Ty);
8282       auto *VAT = Context.getAsVariableArrayType(Ty);
8283 
8284       // We need all the dimension size except for the last dimension.
8285       assert((VAT || CAT || &Component == &*Components.begin()) &&
8286              "Should be either ConstantArray or VariableArray if not the "
8287              "first Component");
8288 
8289       // Get element size if CurStrides is empty.
8290       if (CurStrides.empty()) {
8291         const Type *ElementType = nullptr;
8292         if (CAT)
8293           ElementType = CAT->getElementType().getTypePtr();
8294         else if (VAT)
8295           ElementType = VAT->getElementType().getTypePtr();
8296         else
8297           assert(&Component == &*Components.begin() &&
8298                  "Only expect pointer (non CAT or VAT) when this is the "
8299                  "first Component");
8300         // If ElementType is null, then it means the base is a pointer
8301         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8302         // for next iteration.
8303         if (ElementType) {
8304           // For the case that having pointer as base, we need to remove one
8305           // level of indirection.
8306           if (&Component != &*Components.begin())
8307             ElementType = ElementType->getPointeeOrArrayElementType();
8308           ElementTypeSize =
8309               Context.getTypeSizeInChars(ElementType).getQuantity();
8310           CurStrides.push_back(
8311               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8312         }
8313       }
8314       // Get dimension value except for the last dimension since we don't need
8315       // it.
8316       if (DimSizes.size() < Components.size() - 1) {
8317         if (CAT)
8318           DimSizes.push_back(llvm::ConstantInt::get(
8319               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8320         else if (VAT)
8321           DimSizes.push_back(CGF.Builder.CreateIntCast(
8322               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8323               /*IsSigned=*/false));
8324       }
8325     }
8326 
8327     // Skip the dummy dimension since we have already have its information.
8328     auto DI = DimSizes.begin() + 1;
8329     // Product of dimension.
8330     llvm::Value *DimProd =
8331         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8332 
8333     // Collect info for non-contiguous. Notice that offset, count, and stride
8334     // are only meaningful for array-section, so we insert a null for anything
8335     // other than array-section.
8336     // Also, the size of offset, count, and stride are not the same as
8337     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8338     // count, and stride are the same as the number of non-contiguous
8339     // declaration in target update to/from clause.
8340     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8341          Components) {
8342       const Expr *AssocExpr = Component.getAssociatedExpression();
8343 
8344       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8345         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8346             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8347             /*isSigned=*/false);
8348         CurOffsets.push_back(Offset);
8349         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8350         CurStrides.push_back(CurStrides.back());
8351         continue;
8352       }
8353 
8354       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8355 
8356       if (!OASE)
8357         continue;
8358 
8359       // Offset
8360       const Expr *OffsetExpr = OASE->getLowerBound();
8361       llvm::Value *Offset = nullptr;
8362       if (!OffsetExpr) {
8363         // If offset is absent, then we just set it to zero.
8364         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8365       } else {
8366         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8367                                            CGF.Int64Ty,
8368                                            /*isSigned=*/false);
8369       }
8370       CurOffsets.push_back(Offset);
8371 
8372       // Count
8373       const Expr *CountExpr = OASE->getLength();
8374       llvm::Value *Count = nullptr;
8375       if (!CountExpr) {
8376         // In Clang, once a high dimension is an array section, we construct all
8377         // the lower dimension as array section, however, for case like
8378         // arr[0:2][2], Clang construct the inner dimension as an array section
8379         // but it actually is not in an array section form according to spec.
8380         if (!OASE->getColonLocFirst().isValid() &&
8381             !OASE->getColonLocSecond().isValid()) {
8382           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8383         } else {
8384           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8385           // When the length is absent it defaults to ⌈(size −
8386           // lower-bound)/stride⌉, where size is the size of the array
8387           // dimension.
8388           const Expr *StrideExpr = OASE->getStride();
8389           llvm::Value *Stride =
8390               StrideExpr
8391                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8392                                               CGF.Int64Ty, /*isSigned=*/false)
8393                   : nullptr;
8394           if (Stride)
8395             Count = CGF.Builder.CreateUDiv(
8396                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8397           else
8398             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8399         }
8400       } else {
8401         Count = CGF.EmitScalarExpr(CountExpr);
8402       }
8403       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8404       CurCounts.push_back(Count);
8405 
8406       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8407       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8408       //              Offset      Count     Stride
8409       //    D0          0           1         4    (int)    <- dummy dimension
8410       //    D1          0           2         8    (2 * (1) * 4)
8411       //    D2          1           2         20   (1 * (1 * 5) * 4)
8412       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8413       const Expr *StrideExpr = OASE->getStride();
8414       llvm::Value *Stride =
8415           StrideExpr
8416               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8417                                           CGF.Int64Ty, /*isSigned=*/false)
8418               : nullptr;
8419       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8420       if (Stride)
8421         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8422       else
8423         CurStrides.push_back(DimProd);
8424       if (DI != DimSizes.end())
8425         ++DI;
8426     }
8427 
8428     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8429     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8430     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8431   }
8432 
8433   /// Return the adjusted map modifiers if the declaration a capture refers to
8434   /// appears in a first-private clause. This is expected to be used only with
8435   /// directives that start with 'target'.
8436   MappableExprsHandler::OpenMPOffloadMappingFlags
8437   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8438     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8439 
8440     // A first private variable captured by reference will use only the
8441     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8442     // declaration is known as first-private in this handler.
8443     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8444       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8445         return MappableExprsHandler::OMP_MAP_TO |
8446                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8447       return MappableExprsHandler::OMP_MAP_PRIVATE |
8448              MappableExprsHandler::OMP_MAP_TO;
8449     }
8450     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8451     if (I != LambdasMap.end())
8452       // for map(to: lambda): using user specified map type.
8453       return getMapTypeBits(
8454           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8455           /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8456           /*AddPtrFlag=*/false,
8457           /*AddIsTargetParamFlag=*/false,
8458           /*isNonContiguous=*/false);
8459     return MappableExprsHandler::OMP_MAP_TO |
8460            MappableExprsHandler::OMP_MAP_FROM;
8461   }
8462 
8463   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8464     // Rotate by getFlagMemberOffset() bits.
8465     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8466                                                   << getFlagMemberOffset());
8467   }
8468 
8469   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8470                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8471     // If the entry is PTR_AND_OBJ but has not been marked with the special
8472     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8473     // marked as MEMBER_OF.
8474     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8475         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8476       return;
8477 
8478     // Reset the placeholder value to prepare the flag for the assignment of the
8479     // proper MEMBER_OF value.
8480     Flags &= ~OMP_MAP_MEMBER_OF;
8481     Flags |= MemberOfFlag;
8482   }
8483 
8484   void getPlainLayout(const CXXRecordDecl *RD,
8485                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8486                       bool AsBase) const {
8487     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8488 
8489     llvm::StructType *St =
8490         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8491 
8492     unsigned NumElements = St->getNumElements();
8493     llvm::SmallVector<
8494         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8495         RecordLayout(NumElements);
8496 
8497     // Fill bases.
8498     for (const auto &I : RD->bases()) {
8499       if (I.isVirtual())
8500         continue;
8501       const auto *Base = I.getType()->getAsCXXRecordDecl();
8502       // Ignore empty bases.
8503       if (Base->isEmpty() || CGF.getContext()
8504                                  .getASTRecordLayout(Base)
8505                                  .getNonVirtualSize()
8506                                  .isZero())
8507         continue;
8508 
8509       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8510       RecordLayout[FieldIndex] = Base;
8511     }
8512     // Fill in virtual bases.
8513     for (const auto &I : RD->vbases()) {
8514       const auto *Base = I.getType()->getAsCXXRecordDecl();
8515       // Ignore empty bases.
8516       if (Base->isEmpty())
8517         continue;
8518       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8519       if (RecordLayout[FieldIndex])
8520         continue;
8521       RecordLayout[FieldIndex] = Base;
8522     }
8523     // Fill in all the fields.
8524     assert(!RD->isUnion() && "Unexpected union.");
8525     for (const auto *Field : RD->fields()) {
8526       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8527       // will fill in later.)
8528       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8529         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8530         RecordLayout[FieldIndex] = Field;
8531       }
8532     }
8533     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8534              &Data : RecordLayout) {
8535       if (Data.isNull())
8536         continue;
8537       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8538         getPlainLayout(Base, Layout, /*AsBase=*/true);
8539       else
8540         Layout.push_back(Data.get<const FieldDecl *>());
8541     }
8542   }
8543 
8544   /// Generate all the base pointers, section pointers, sizes, map types, and
8545   /// mappers for the extracted mappable expressions (all included in \a
8546   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8547   /// pair of the relevant declaration and index where it occurs is appended to
8548   /// the device pointers info array.
8549   void generateAllInfoForClauses(
8550       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8551       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8552           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8553     // We have to process the component lists that relate with the same
8554     // declaration in a single chunk so that we can generate the map flags
8555     // correctly. Therefore, we organize all lists in a map.
8556     enum MapKind { Present, Allocs, Other, Total };
8557     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8558                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8559         Info;
8560 
8561     // Helper function to fill the information map for the different supported
8562     // clauses.
8563     auto &&InfoGen =
8564         [&Info, &SkipVarSet](
8565             const ValueDecl *D, MapKind Kind,
8566             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8567             OpenMPMapClauseKind MapType,
8568             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8569             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8570             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8571             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8572           if (SkipVarSet.contains(D))
8573             return;
8574           auto It = Info.find(D);
8575           if (It == Info.end())
8576             It = Info
8577                      .insert(std::make_pair(
8578                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8579                      .first;
8580           It->second[Kind].emplace_back(
8581               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8582               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8583         };
8584 
8585     for (const auto *Cl : Clauses) {
8586       const auto *C = dyn_cast<OMPMapClause>(Cl);
8587       if (!C)
8588         continue;
8589       MapKind Kind = Other;
8590       if (llvm::is_contained(C->getMapTypeModifiers(),
8591                              OMPC_MAP_MODIFIER_present))
8592         Kind = Present;
8593       else if (C->getMapType() == OMPC_MAP_alloc)
8594         Kind = Allocs;
8595       const auto *EI = C->getVarRefs().begin();
8596       for (const auto L : C->component_lists()) {
8597         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8598         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8599                 C->getMapTypeModifiers(), llvm::None,
8600                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8601                 E);
8602         ++EI;
8603       }
8604     }
8605     for (const auto *Cl : Clauses) {
8606       const auto *C = dyn_cast<OMPToClause>(Cl);
8607       if (!C)
8608         continue;
8609       MapKind Kind = Other;
8610       if (llvm::is_contained(C->getMotionModifiers(),
8611                              OMPC_MOTION_MODIFIER_present))
8612         Kind = Present;
8613       const auto *EI = C->getVarRefs().begin();
8614       for (const auto L : C->component_lists()) {
8615         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8616                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8617                 C->isImplicit(), std::get<2>(L), *EI);
8618         ++EI;
8619       }
8620     }
8621     for (const auto *Cl : Clauses) {
8622       const auto *C = dyn_cast<OMPFromClause>(Cl);
8623       if (!C)
8624         continue;
8625       MapKind Kind = Other;
8626       if (llvm::is_contained(C->getMotionModifiers(),
8627                              OMPC_MOTION_MODIFIER_present))
8628         Kind = Present;
8629       const auto *EI = C->getVarRefs().begin();
8630       for (const auto L : C->component_lists()) {
8631         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8632                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8633                 C->isImplicit(), std::get<2>(L), *EI);
8634         ++EI;
8635       }
8636     }
8637 
8638     // Look at the use_device_ptr clause information and mark the existing map
8639     // entries as such. If there is no map information for an entry in the
8640     // use_device_ptr list, we create one with map type 'alloc' and zero size
8641     // section. It is the user fault if that was not mapped before. If there is
8642     // no map information and the pointer is a struct member, then we defer the
8643     // emission of that entry until the whole struct has been processed.
8644     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8645                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8646         DeferredInfo;
8647     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8648 
8649     for (const auto *Cl : Clauses) {
8650       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8651       if (!C)
8652         continue;
8653       for (const auto L : C->component_lists()) {
8654         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8655             std::get<1>(L);
8656         assert(!Components.empty() &&
8657                "Not expecting empty list of components!");
8658         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8659         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8660         const Expr *IE = Components.back().getAssociatedExpression();
8661         // If the first component is a member expression, we have to look into
8662         // 'this', which maps to null in the map of map information. Otherwise
8663         // look directly for the information.
8664         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8665 
8666         // We potentially have map information for this declaration already.
8667         // Look for the first set of components that refer to it.
8668         if (It != Info.end()) {
8669           bool Found = false;
8670           for (auto &Data : It->second) {
8671             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8672               return MI.Components.back().getAssociatedDeclaration() == VD;
8673             });
8674             // If we found a map entry, signal that the pointer has to be
8675             // returned and move on to the next declaration. Exclude cases where
8676             // the base pointer is mapped as array subscript, array section or
8677             // array shaping. The base address is passed as a pointer to base in
8678             // this case and cannot be used as a base for use_device_ptr list
8679             // item.
8680             if (CI != Data.end()) {
8681               auto PrevCI = std::next(CI->Components.rbegin());
8682               const auto *VarD = dyn_cast<VarDecl>(VD);
8683               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8684                   isa<MemberExpr>(IE) ||
8685                   !VD->getType().getNonReferenceType()->isPointerType() ||
8686                   PrevCI == CI->Components.rend() ||
8687                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8688                   VarD->hasLocalStorage()) {
8689                 CI->ReturnDevicePointer = true;
8690                 Found = true;
8691                 break;
8692               }
8693             }
8694           }
8695           if (Found)
8696             continue;
8697         }
8698 
8699         // We didn't find any match in our map information - generate a zero
8700         // size array section - if the pointer is a struct member we defer this
8701         // action until the whole struct has been processed.
8702         if (isa<MemberExpr>(IE)) {
8703           // Insert the pointer into Info to be processed by
8704           // generateInfoForComponentList. Because it is a member pointer
8705           // without a pointee, no entry will be generated for it, therefore
8706           // we need to generate one after the whole struct has been processed.
8707           // Nonetheless, generateInfoForComponentList must be called to take
8708           // the pointer into account for the calculation of the range of the
8709           // partial struct.
8710           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8711                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8712                   nullptr);
8713           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8714         } else {
8715           llvm::Value *Ptr =
8716               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8717           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8718           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8719           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8720           UseDevicePtrCombinedInfo.Sizes.push_back(
8721               llvm::Constant::getNullValue(CGF.Int64Ty));
8722           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8723           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8724         }
8725       }
8726     }
8727 
8728     // Look at the use_device_addr clause information and mark the existing map
8729     // entries as such. If there is no map information for an entry in the
8730     // use_device_addr list, we create one with map type 'alloc' and zero size
8731     // section. It is the user fault if that was not mapped before. If there is
8732     // no map information and the pointer is a struct member, then we defer the
8733     // emission of that entry until the whole struct has been processed.
8734     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8735     for (const auto *Cl : Clauses) {
8736       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8737       if (!C)
8738         continue;
8739       for (const auto L : C->component_lists()) {
8740         assert(!std::get<1>(L).empty() &&
8741                "Not expecting empty list of components!");
8742         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8743         if (!Processed.insert(VD).second)
8744           continue;
8745         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8746         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8747         // If the first component is a member expression, we have to look into
8748         // 'this', which maps to null in the map of map information. Otherwise
8749         // look directly for the information.
8750         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8751 
8752         // We potentially have map information for this declaration already.
8753         // Look for the first set of components that refer to it.
8754         if (It != Info.end()) {
8755           bool Found = false;
8756           for (auto &Data : It->second) {
8757             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8758               return MI.Components.back().getAssociatedDeclaration() == VD;
8759             });
8760             // If we found a map entry, signal that the pointer has to be
8761             // returned and move on to the next declaration.
8762             if (CI != Data.end()) {
8763               CI->ReturnDevicePointer = true;
8764               Found = true;
8765               break;
8766             }
8767           }
8768           if (Found)
8769             continue;
8770         }
8771 
8772         // We didn't find any match in our map information - generate a zero
8773         // size array section - if the pointer is a struct member we defer this
8774         // action until the whole struct has been processed.
8775         if (isa<MemberExpr>(IE)) {
8776           // Insert the pointer into Info to be processed by
8777           // generateInfoForComponentList. Because it is a member pointer
8778           // without a pointee, no entry will be generated for it, therefore
8779           // we need to generate one after the whole struct has been processed.
8780           // Nonetheless, generateInfoForComponentList must be called to take
8781           // the pointer into account for the calculation of the range of the
8782           // partial struct.
8783           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8784                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8785                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8786           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8787         } else {
8788           llvm::Value *Ptr;
8789           if (IE->isGLValue())
8790             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8791           else
8792             Ptr = CGF.EmitScalarExpr(IE);
8793           CombinedInfo.Exprs.push_back(VD);
8794           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8795           CombinedInfo.Pointers.push_back(Ptr);
8796           CombinedInfo.Sizes.push_back(
8797               llvm::Constant::getNullValue(CGF.Int64Ty));
8798           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8799           CombinedInfo.Mappers.push_back(nullptr);
8800         }
8801       }
8802     }
8803 
8804     for (const auto &Data : Info) {
8805       StructRangeInfoTy PartialStruct;
8806       // Temporary generated information.
8807       MapCombinedInfoTy CurInfo;
8808       const Decl *D = Data.first;
8809       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8810       for (const auto &M : Data.second) {
8811         for (const MapInfo &L : M) {
8812           assert(!L.Components.empty() &&
8813                  "Not expecting declaration with no component lists.");
8814 
8815           // Remember the current base pointer index.
8816           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8817           CurInfo.NonContigInfo.IsNonContiguous =
8818               L.Components.back().isNonContiguous();
8819           generateInfoForComponentList(
8820               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8821               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8822               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8823 
8824           // If this entry relates with a device pointer, set the relevant
8825           // declaration and add the 'return pointer' flag.
8826           if (L.ReturnDevicePointer) {
8827             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8828                    "Unexpected number of mapped base pointers.");
8829 
8830             const ValueDecl *RelevantVD =
8831                 L.Components.back().getAssociatedDeclaration();
8832             assert(RelevantVD &&
8833                    "No relevant declaration related with device pointer??");
8834 
8835             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8836                 RelevantVD);
8837             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8838           }
8839         }
8840       }
8841 
8842       // Append any pending zero-length pointers which are struct members and
8843       // used with use_device_ptr or use_device_addr.
8844       auto CI = DeferredInfo.find(Data.first);
8845       if (CI != DeferredInfo.end()) {
8846         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8847           llvm::Value *BasePtr;
8848           llvm::Value *Ptr;
8849           if (L.ForDeviceAddr) {
8850             if (L.IE->isGLValue())
8851               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8852             else
8853               Ptr = this->CGF.EmitScalarExpr(L.IE);
8854             BasePtr = Ptr;
8855             // Entry is RETURN_PARAM. Also, set the placeholder value
8856             // MEMBER_OF=FFFF so that the entry is later updated with the
8857             // correct value of MEMBER_OF.
8858             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8859           } else {
8860             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8861             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8862                                              L.IE->getExprLoc());
8863             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8864             // placeholder value MEMBER_OF=FFFF so that the entry is later
8865             // updated with the correct value of MEMBER_OF.
8866             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8867                                     OMP_MAP_MEMBER_OF);
8868           }
8869           CurInfo.Exprs.push_back(L.VD);
8870           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8871           CurInfo.Pointers.push_back(Ptr);
8872           CurInfo.Sizes.push_back(
8873               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8874           CurInfo.Mappers.push_back(nullptr);
8875         }
8876       }
8877       // If there is an entry in PartialStruct it means we have a struct with
8878       // individual members mapped. Emit an extra combined entry.
8879       if (PartialStruct.Base.isValid()) {
8880         CurInfo.NonContigInfo.Dims.push_back(0);
8881         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8882       }
8883 
8884       // We need to append the results of this capture to what we already
8885       // have.
8886       CombinedInfo.append(CurInfo);
8887     }
8888     // Append data for use_device_ptr clauses.
8889     CombinedInfo.append(UseDevicePtrCombinedInfo);
8890   }
8891 
8892 public:
8893   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8894       : CurDir(&Dir), CGF(CGF) {
8895     // Extract firstprivate clause information.
8896     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8897       for (const auto *D : C->varlists())
8898         FirstPrivateDecls.try_emplace(
8899             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8900     // Extract implicit firstprivates from uses_allocators clauses.
8901     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8902       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8903         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8904         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8905           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8906                                         /*Implicit=*/true);
8907         else if (const auto *VD = dyn_cast<VarDecl>(
8908                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8909                          ->getDecl()))
8910           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8911       }
8912     }
8913     // Extract device pointer clause information.
8914     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8915       for (auto L : C->component_lists())
8916         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8917     // Extract map information.
8918     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8919       if (C->getMapType() != OMPC_MAP_to)
8920         continue;
8921       for (auto L : C->component_lists()) {
8922         const ValueDecl *VD = std::get<0>(L);
8923         const auto *RD = VD ? VD->getType()
8924                                   .getCanonicalType()
8925                                   .getNonReferenceType()
8926                                   ->getAsCXXRecordDecl()
8927                             : nullptr;
8928         if (RD && RD->isLambda())
8929           LambdasMap.try_emplace(std::get<0>(L), C);
8930       }
8931     }
8932   }
8933 
8934   /// Constructor for the declare mapper directive.
8935   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8936       : CurDir(&Dir), CGF(CGF) {}
8937 
8938   /// Generate code for the combined entry if we have a partially mapped struct
8939   /// and take care of the mapping flags of the arguments corresponding to
8940   /// individual struct members.
8941   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8942                          MapFlagsArrayTy &CurTypes,
8943                          const StructRangeInfoTy &PartialStruct,
8944                          const ValueDecl *VD = nullptr,
8945                          bool NotTargetParams = true) const {
8946     if (CurTypes.size() == 1 &&
8947         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8948         !PartialStruct.IsArraySection)
8949       return;
8950     Address LBAddr = PartialStruct.LowestElem.second;
8951     Address HBAddr = PartialStruct.HighestElem.second;
8952     if (PartialStruct.HasCompleteRecord) {
8953       LBAddr = PartialStruct.LB;
8954       HBAddr = PartialStruct.LB;
8955     }
8956     CombinedInfo.Exprs.push_back(VD);
8957     // Base is the base of the struct
8958     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8959     // Pointer is the address of the lowest element
8960     llvm::Value *LB = LBAddr.getPointer();
8961     CombinedInfo.Pointers.push_back(LB);
8962     // There should not be a mapper for a combined entry.
8963     CombinedInfo.Mappers.push_back(nullptr);
8964     // Size is (addr of {highest+1} element) - (addr of lowest element)
8965     llvm::Value *HB = HBAddr.getPointer();
8966     llvm::Value *HAddr =
8967         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8968     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8969     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8970     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8971     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8972                                                   /*isSigned=*/false);
8973     CombinedInfo.Sizes.push_back(Size);
8974     // Map type is always TARGET_PARAM, if generate info for captures.
8975     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8976                                                  : OMP_MAP_TARGET_PARAM);
8977     // If any element has the present modifier, then make sure the runtime
8978     // doesn't attempt to allocate the struct.
8979     if (CurTypes.end() !=
8980         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8981           return Type & OMP_MAP_PRESENT;
8982         }))
8983       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8984     // Remove TARGET_PARAM flag from the first element
8985     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8986     // If any element has the ompx_hold modifier, then make sure the runtime
8987     // uses the hold reference count for the struct as a whole so that it won't
8988     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8989     // elements as well so the runtime knows which reference count to check
8990     // when determining whether it's time for device-to-host transfers of
8991     // individual elements.
8992     if (CurTypes.end() !=
8993         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8994           return Type & OMP_MAP_OMPX_HOLD;
8995         })) {
8996       CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8997       for (auto &M : CurTypes)
8998         M |= OMP_MAP_OMPX_HOLD;
8999     }
9000 
9001     // All other current entries will be MEMBER_OF the combined entry
9002     // (except for PTR_AND_OBJ entries which do not have a placeholder value
9003     // 0xFFFF in the MEMBER_OF field).
9004     OpenMPOffloadMappingFlags MemberOfFlag =
9005         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9006     for (auto &M : CurTypes)
9007       setCorrectMemberOfFlag(M, MemberOfFlag);
9008   }
9009 
9010   /// Generate all the base pointers, section pointers, sizes, map types, and
9011   /// mappers for the extracted mappable expressions (all included in \a
9012   /// CombinedInfo). Also, for each item that relates with a device pointer, a
9013   /// pair of the relevant declaration and index where it occurs is appended to
9014   /// the device pointers info array.
9015   void generateAllInfo(
9016       MapCombinedInfoTy &CombinedInfo,
9017       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9018           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9019     assert(CurDir.is<const OMPExecutableDirective *>() &&
9020            "Expect a executable directive");
9021     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9022     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9023   }
9024 
9025   /// Generate all the base pointers, section pointers, sizes, map types, and
9026   /// mappers for the extracted map clauses of user-defined mapper (all included
9027   /// in \a CombinedInfo).
9028   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9029     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
9030            "Expect a declare mapper directive");
9031     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9032     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9033   }
9034 
9035   /// Emit capture info for lambdas for variables captured by reference.
9036   void generateInfoForLambdaCaptures(
9037       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9038       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9039     const auto *RD = VD->getType()
9040                          .getCanonicalType()
9041                          .getNonReferenceType()
9042                          ->getAsCXXRecordDecl();
9043     if (!RD || !RD->isLambda())
9044       return;
9045     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9046     LValue VDLVal = CGF.MakeAddrLValue(
9047         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9048     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9049     FieldDecl *ThisCapture = nullptr;
9050     RD->getCaptureFields(Captures, ThisCapture);
9051     if (ThisCapture) {
9052       LValue ThisLVal =
9053           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9054       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9055       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9056                                  VDLVal.getPointer(CGF));
9057       CombinedInfo.Exprs.push_back(VD);
9058       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9059       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9060       CombinedInfo.Sizes.push_back(
9061           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9062                                     CGF.Int64Ty, /*isSigned=*/true));
9063       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9064                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9065       CombinedInfo.Mappers.push_back(nullptr);
9066     }
9067     for (const LambdaCapture &LC : RD->captures()) {
9068       if (!LC.capturesVariable())
9069         continue;
9070       const VarDecl *VD = LC.getCapturedVar();
9071       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9072         continue;
9073       auto It = Captures.find(VD);
9074       assert(It != Captures.end() && "Found lambda capture without field.");
9075       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9076       if (LC.getCaptureKind() == LCK_ByRef) {
9077         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9078         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9079                                    VDLVal.getPointer(CGF));
9080         CombinedInfo.Exprs.push_back(VD);
9081         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9082         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9083         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9084             CGF.getTypeSize(
9085                 VD->getType().getCanonicalType().getNonReferenceType()),
9086             CGF.Int64Ty, /*isSigned=*/true));
9087       } else {
9088         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9089         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9090                                    VDLVal.getPointer(CGF));
9091         CombinedInfo.Exprs.push_back(VD);
9092         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9093         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9094         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9095       }
9096       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9097                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9098       CombinedInfo.Mappers.push_back(nullptr);
9099     }
9100   }
9101 
9102   /// Set correct indices for lambdas captures.
9103   void adjustMemberOfForLambdaCaptures(
9104       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9105       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9106       MapFlagsArrayTy &Types) const {
9107     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9108       // Set correct member_of idx for all implicit lambda captures.
9109       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9110                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9111         continue;
9112       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9113       assert(BasePtr && "Unable to find base lambda address.");
9114       int TgtIdx = -1;
9115       for (unsigned J = I; J > 0; --J) {
9116         unsigned Idx = J - 1;
9117         if (Pointers[Idx] != BasePtr)
9118           continue;
9119         TgtIdx = Idx;
9120         break;
9121       }
9122       assert(TgtIdx != -1 && "Unable to find parent lambda.");
9123       // All other current entries will be MEMBER_OF the combined entry
9124       // (except for PTR_AND_OBJ entries which do not have a placeholder value
9125       // 0xFFFF in the MEMBER_OF field).
9126       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9127       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9128     }
9129   }
9130 
9131   /// Generate the base pointers, section pointers, sizes, map types, and
9132   /// mappers associated to a given capture (all included in \a CombinedInfo).
9133   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9134                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9135                               StructRangeInfoTy &PartialStruct) const {
9136     assert(!Cap->capturesVariableArrayType() &&
9137            "Not expecting to generate map info for a variable array type!");
9138 
9139     // We need to know when we generating information for the first component
9140     const ValueDecl *VD = Cap->capturesThis()
9141                               ? nullptr
9142                               : Cap->getCapturedVar()->getCanonicalDecl();
9143 
9144     // for map(to: lambda): skip here, processing it in
9145     // generateDefaultMapInfo
9146     if (LambdasMap.count(VD))
9147       return;
9148 
9149     // If this declaration appears in a is_device_ptr clause we just have to
9150     // pass the pointer by value. If it is a reference to a declaration, we just
9151     // pass its value.
9152     if (DevPointersMap.count(VD)) {
9153       CombinedInfo.Exprs.push_back(VD);
9154       CombinedInfo.BasePointers.emplace_back(Arg, VD);
9155       CombinedInfo.Pointers.push_back(Arg);
9156       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9157           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9158           /*isSigned=*/true));
9159       CombinedInfo.Types.push_back(
9160           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9161           OMP_MAP_TARGET_PARAM);
9162       CombinedInfo.Mappers.push_back(nullptr);
9163       return;
9164     }
9165 
9166     using MapData =
9167         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9168                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9169                    const ValueDecl *, const Expr *>;
9170     SmallVector<MapData, 4> DeclComponentLists;
9171     assert(CurDir.is<const OMPExecutableDirective *>() &&
9172            "Expect a executable directive");
9173     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9174     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9175       const auto *EI = C->getVarRefs().begin();
9176       for (const auto L : C->decl_component_lists(VD)) {
9177         const ValueDecl *VDecl, *Mapper;
9178         // The Expression is not correct if the mapping is implicit
9179         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9180         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9181         std::tie(VDecl, Components, Mapper) = L;
9182         assert(VDecl == VD && "We got information for the wrong declaration??");
9183         assert(!Components.empty() &&
9184                "Not expecting declaration with no component lists.");
9185         DeclComponentLists.emplace_back(Components, C->getMapType(),
9186                                         C->getMapTypeModifiers(),
9187                                         C->isImplicit(), Mapper, E);
9188         ++EI;
9189       }
9190     }
9191     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9192                                              const MapData &RHS) {
9193       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9194       OpenMPMapClauseKind MapType = std::get<1>(RHS);
9195       bool HasPresent =
9196           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9197       bool HasAllocs = MapType == OMPC_MAP_alloc;
9198       MapModifiers = std::get<2>(RHS);
9199       MapType = std::get<1>(LHS);
9200       bool HasPresentR =
9201           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9202       bool HasAllocsR = MapType == OMPC_MAP_alloc;
9203       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9204     });
9205 
9206     // Find overlapping elements (including the offset from the base element).
9207     llvm::SmallDenseMap<
9208         const MapData *,
9209         llvm::SmallVector<
9210             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9211         4>
9212         OverlappedData;
9213     size_t Count = 0;
9214     for (const MapData &L : DeclComponentLists) {
9215       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9216       OpenMPMapClauseKind MapType;
9217       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9218       bool IsImplicit;
9219       const ValueDecl *Mapper;
9220       const Expr *VarRef;
9221       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9222           L;
9223       ++Count;
9224       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9225         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9226         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9227                  VarRef) = L1;
9228         auto CI = Components.rbegin();
9229         auto CE = Components.rend();
9230         auto SI = Components1.rbegin();
9231         auto SE = Components1.rend();
9232         for (; CI != CE && SI != SE; ++CI, ++SI) {
9233           if (CI->getAssociatedExpression()->getStmtClass() !=
9234               SI->getAssociatedExpression()->getStmtClass())
9235             break;
9236           // Are we dealing with different variables/fields?
9237           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9238             break;
9239         }
9240         // Found overlapping if, at least for one component, reached the head
9241         // of the components list.
9242         if (CI == CE || SI == SE) {
9243           // Ignore it if it is the same component.
9244           if (CI == CE && SI == SE)
9245             continue;
9246           const auto It = (SI == SE) ? CI : SI;
9247           // If one component is a pointer and another one is a kind of
9248           // dereference of this pointer (array subscript, section, dereference,
9249           // etc.), it is not an overlapping.
9250           // Same, if one component is a base and another component is a
9251           // dereferenced pointer memberexpr with the same base.
9252           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9253               (std::prev(It)->getAssociatedDeclaration() &&
9254                std::prev(It)
9255                    ->getAssociatedDeclaration()
9256                    ->getType()
9257                    ->isPointerType()) ||
9258               (It->getAssociatedDeclaration() &&
9259                It->getAssociatedDeclaration()->getType()->isPointerType() &&
9260                std::next(It) != CE && std::next(It) != SE))
9261             continue;
9262           const MapData &BaseData = CI == CE ? L : L1;
9263           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9264               SI == SE ? Components : Components1;
9265           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9266           OverlappedElements.getSecond().push_back(SubData);
9267         }
9268       }
9269     }
9270     // Sort the overlapped elements for each item.
9271     llvm::SmallVector<const FieldDecl *, 4> Layout;
9272     if (!OverlappedData.empty()) {
9273       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9274       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9275       while (BaseType != OrigType) {
9276         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9277         OrigType = BaseType->getPointeeOrArrayElementType();
9278       }
9279 
9280       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9281         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9282       else {
9283         const auto *RD = BaseType->getAsRecordDecl();
9284         Layout.append(RD->field_begin(), RD->field_end());
9285       }
9286     }
9287     for (auto &Pair : OverlappedData) {
9288       llvm::stable_sort(
9289           Pair.getSecond(),
9290           [&Layout](
9291               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9292               OMPClauseMappableExprCommon::MappableExprComponentListRef
9293                   Second) {
9294             auto CI = First.rbegin();
9295             auto CE = First.rend();
9296             auto SI = Second.rbegin();
9297             auto SE = Second.rend();
9298             for (; CI != CE && SI != SE; ++CI, ++SI) {
9299               if (CI->getAssociatedExpression()->getStmtClass() !=
9300                   SI->getAssociatedExpression()->getStmtClass())
9301                 break;
9302               // Are we dealing with different variables/fields?
9303               if (CI->getAssociatedDeclaration() !=
9304                   SI->getAssociatedDeclaration())
9305                 break;
9306             }
9307 
9308             // Lists contain the same elements.
9309             if (CI == CE && SI == SE)
9310               return false;
9311 
9312             // List with less elements is less than list with more elements.
9313             if (CI == CE || SI == SE)
9314               return CI == CE;
9315 
9316             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9317             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9318             if (FD1->getParent() == FD2->getParent())
9319               return FD1->getFieldIndex() < FD2->getFieldIndex();
9320             const auto *It =
9321                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9322                   return FD == FD1 || FD == FD2;
9323                 });
9324             return *It == FD1;
9325           });
9326     }
9327 
9328     // Associated with a capture, because the mapping flags depend on it.
9329     // Go through all of the elements with the overlapped elements.
9330     bool IsFirstComponentList = true;
9331     for (const auto &Pair : OverlappedData) {
9332       const MapData &L = *Pair.getFirst();
9333       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9334       OpenMPMapClauseKind MapType;
9335       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9336       bool IsImplicit;
9337       const ValueDecl *Mapper;
9338       const Expr *VarRef;
9339       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9340           L;
9341       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9342           OverlappedComponents = Pair.getSecond();
9343       generateInfoForComponentList(
9344           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9345           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9346           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9347       IsFirstComponentList = false;
9348     }
9349     // Go through other elements without overlapped elements.
9350     for (const MapData &L : DeclComponentLists) {
9351       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9352       OpenMPMapClauseKind MapType;
9353       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9354       bool IsImplicit;
9355       const ValueDecl *Mapper;
9356       const Expr *VarRef;
9357       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9358           L;
9359       auto It = OverlappedData.find(&L);
9360       if (It == OverlappedData.end())
9361         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9362                                      Components, CombinedInfo, PartialStruct,
9363                                      IsFirstComponentList, IsImplicit, Mapper,
9364                                      /*ForDeviceAddr=*/false, VD, VarRef);
9365       IsFirstComponentList = false;
9366     }
9367   }
9368 
9369   /// Generate the default map information for a given capture \a CI,
9370   /// record field declaration \a RI and captured value \a CV.
9371   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9372                               const FieldDecl &RI, llvm::Value *CV,
9373                               MapCombinedInfoTy &CombinedInfo) const {
9374     bool IsImplicit = true;
9375     // Do the default mapping.
9376     if (CI.capturesThis()) {
9377       CombinedInfo.Exprs.push_back(nullptr);
9378       CombinedInfo.BasePointers.push_back(CV);
9379       CombinedInfo.Pointers.push_back(CV);
9380       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9381       CombinedInfo.Sizes.push_back(
9382           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9383                                     CGF.Int64Ty, /*isSigned=*/true));
9384       // Default map type.
9385       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9386     } else if (CI.capturesVariableByCopy()) {
9387       const VarDecl *VD = CI.getCapturedVar();
9388       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9389       CombinedInfo.BasePointers.push_back(CV);
9390       CombinedInfo.Pointers.push_back(CV);
9391       if (!RI.getType()->isAnyPointerType()) {
9392         // We have to signal to the runtime captures passed by value that are
9393         // not pointers.
9394         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9395         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9396             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9397       } else {
9398         // Pointers are implicitly mapped with a zero size and no flags
9399         // (other than first map that is added for all implicit maps).
9400         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9401         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9402       }
9403       auto I = FirstPrivateDecls.find(VD);
9404       if (I != FirstPrivateDecls.end())
9405         IsImplicit = I->getSecond();
9406     } else {
9407       assert(CI.capturesVariable() && "Expected captured reference.");
9408       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9409       QualType ElementType = PtrTy->getPointeeType();
9410       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9411           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9412       // The default map type for a scalar/complex type is 'to' because by
9413       // default the value doesn't have to be retrieved. For an aggregate
9414       // type, the default is 'tofrom'.
9415       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9416       const VarDecl *VD = CI.getCapturedVar();
9417       auto I = FirstPrivateDecls.find(VD);
9418       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9419       CombinedInfo.BasePointers.push_back(CV);
9420       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9421         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9422             CV, ElementType, CGF.getContext().getDeclAlign(VD),
9423             AlignmentSource::Decl));
9424         CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9425       } else {
9426         CombinedInfo.Pointers.push_back(CV);
9427       }
9428       if (I != FirstPrivateDecls.end())
9429         IsImplicit = I->getSecond();
9430     }
9431     // Every default map produces a single argument which is a target parameter.
9432     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9433 
9434     // Add flag stating this is an implicit map.
9435     if (IsImplicit)
9436       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9437 
9438     // No user-defined mapper for default mapping.
9439     CombinedInfo.Mappers.push_back(nullptr);
9440   }
9441 };
9442 } // anonymous namespace
9443 
9444 static void emitNonContiguousDescriptor(
9445     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9446     CGOpenMPRuntime::TargetDataInfo &Info) {
9447   CodeGenModule &CGM = CGF.CGM;
9448   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9449       &NonContigInfo = CombinedInfo.NonContigInfo;
9450 
9451   // Build an array of struct descriptor_dim and then assign it to
9452   // offload_args.
9453   //
9454   // struct descriptor_dim {
9455   //  uint64_t offset;
9456   //  uint64_t count;
9457   //  uint64_t stride
9458   // };
9459   ASTContext &C = CGF.getContext();
9460   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9461   RecordDecl *RD;
9462   RD = C.buildImplicitRecord("descriptor_dim");
9463   RD->startDefinition();
9464   addFieldToRecordDecl(C, RD, Int64Ty);
9465   addFieldToRecordDecl(C, RD, Int64Ty);
9466   addFieldToRecordDecl(C, RD, Int64Ty);
9467   RD->completeDefinition();
9468   QualType DimTy = C.getRecordType(RD);
9469 
9470   enum { OffsetFD = 0, CountFD, StrideFD };
9471   // We need two index variable here since the size of "Dims" is the same as the
9472   // size of Components, however, the size of offset, count, and stride is equal
9473   // to the size of base declaration that is non-contiguous.
9474   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9475     // Skip emitting ir if dimension size is 1 since it cannot be
9476     // non-contiguous.
9477     if (NonContigInfo.Dims[I] == 1)
9478       continue;
9479     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9480     QualType ArrayTy =
9481         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9482     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9483     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9484       unsigned RevIdx = EE - II - 1;
9485       LValue DimsLVal = CGF.MakeAddrLValue(
9486           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9487       // Offset
9488       LValue OffsetLVal = CGF.EmitLValueForField(
9489           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9490       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9491       // Count
9492       LValue CountLVal = CGF.EmitLValueForField(
9493           DimsLVal, *std::next(RD->field_begin(), CountFD));
9494       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9495       // Stride
9496       LValue StrideLVal = CGF.EmitLValueForField(
9497           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9498       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9499     }
9500     // args[I] = &dims
9501     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9502         DimsAddr, CGM.Int8PtrTy);
9503     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9504         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9505         Info.PointersArray, 0, I);
9506     Address PAddr(P, CGF.getPointerAlign());
9507     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9508     ++L;
9509   }
9510 }
9511 
9512 // Try to extract the base declaration from a `this->x` expression if possible.
9513 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9514   if (!E)
9515     return nullptr;
9516 
9517   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9518     if (const MemberExpr *ME =
9519             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9520       return ME->getMemberDecl();
9521   return nullptr;
9522 }
9523 
9524 /// Emit a string constant containing the names of the values mapped to the
9525 /// offloading runtime library.
9526 llvm::Constant *
9527 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9528                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9529 
9530   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9531     return OMPBuilder.getOrCreateDefaultSrcLocStr();
9532 
9533   SourceLocation Loc;
9534   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9535     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9536       Loc = VD->getLocation();
9537     else
9538       Loc = MapExprs.getMapExpr()->getExprLoc();
9539   } else {
9540     Loc = MapExprs.getMapDecl()->getLocation();
9541   }
9542 
9543   std::string ExprName = "";
9544   if (MapExprs.getMapExpr()) {
9545     PrintingPolicy P(CGF.getContext().getLangOpts());
9546     llvm::raw_string_ostream OS(ExprName);
9547     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9548     OS.flush();
9549   } else {
9550     ExprName = MapExprs.getMapDecl()->getNameAsString();
9551   }
9552 
9553   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9554   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName.c_str(),
9555                                          PLoc.getLine(), PLoc.getColumn());
9556 }
9557 
9558 /// Emit the arrays used to pass the captures and map information to the
9559 /// offloading runtime library. If there is no map or capture information,
9560 /// return nullptr by reference.
9561 static void emitOffloadingArrays(
9562     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9563     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9564     bool IsNonContiguous = false) {
9565   CodeGenModule &CGM = CGF.CGM;
9566   ASTContext &Ctx = CGF.getContext();
9567 
9568   // Reset the array information.
9569   Info.clearArrayInfo();
9570   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9571 
9572   if (Info.NumberOfPtrs) {
9573     // Detect if we have any capture size requiring runtime evaluation of the
9574     // size so that a constant array could be eventually used.
9575     bool hasRuntimeEvaluationCaptureSize = false;
9576     for (llvm::Value *S : CombinedInfo.Sizes)
9577       if (!isa<llvm::Constant>(S)) {
9578         hasRuntimeEvaluationCaptureSize = true;
9579         break;
9580       }
9581 
9582     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9583     QualType PointerArrayType = Ctx.getConstantArrayType(
9584         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9585         /*IndexTypeQuals=*/0);
9586 
9587     Info.BasePointersArray =
9588         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9589     Info.PointersArray =
9590         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9591     Address MappersArray =
9592         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9593     Info.MappersArray = MappersArray.getPointer();
9594 
9595     // If we don't have any VLA types or other types that require runtime
9596     // evaluation, we can use a constant array for the map sizes, otherwise we
9597     // need to fill up the arrays as we do for the pointers.
9598     QualType Int64Ty =
9599         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9600     if (hasRuntimeEvaluationCaptureSize) {
9601       QualType SizeArrayType = Ctx.getConstantArrayType(
9602           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9603           /*IndexTypeQuals=*/0);
9604       Info.SizesArray =
9605           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9606     } else {
9607       // We expect all the sizes to be constant, so we collect them to create
9608       // a constant array.
9609       SmallVector<llvm::Constant *, 16> ConstSizes;
9610       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9611         if (IsNonContiguous &&
9612             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9613           ConstSizes.push_back(llvm::ConstantInt::get(
9614               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9615         } else {
9616           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9617         }
9618       }
9619 
9620       auto *SizesArrayInit = llvm::ConstantArray::get(
9621           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9622       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9623       auto *SizesArrayGbl = new llvm::GlobalVariable(
9624           CGM.getModule(), SizesArrayInit->getType(),
9625           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9626           SizesArrayInit, Name);
9627       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9628       Info.SizesArray = SizesArrayGbl;
9629     }
9630 
9631     // The map types are always constant so we don't need to generate code to
9632     // fill arrays. Instead, we create an array constant.
9633     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9634     llvm::copy(CombinedInfo.Types, Mapping.begin());
9635     std::string MaptypesName =
9636         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9637     auto *MapTypesArrayGbl =
9638         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9639     Info.MapTypesArray = MapTypesArrayGbl;
9640 
9641     // The information types are only built if there is debug information
9642     // requested.
9643     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9644       Info.MapNamesArray = llvm::Constant::getNullValue(
9645           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9646     } else {
9647       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9648         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9649       };
9650       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9651       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9652       std::string MapnamesName =
9653           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9654       auto *MapNamesArrayGbl =
9655           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9656       Info.MapNamesArray = MapNamesArrayGbl;
9657     }
9658 
9659     // If there's a present map type modifier, it must not be applied to the end
9660     // of a region, so generate a separate map type array in that case.
9661     if (Info.separateBeginEndCalls()) {
9662       bool EndMapTypesDiffer = false;
9663       for (uint64_t &Type : Mapping) {
9664         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9665           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9666           EndMapTypesDiffer = true;
9667         }
9668       }
9669       if (EndMapTypesDiffer) {
9670         MapTypesArrayGbl =
9671             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9672         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9673       }
9674     }
9675 
9676     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9677       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9678       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9679           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9680           Info.BasePointersArray, 0, I);
9681       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9682           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9683       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9684       CGF.Builder.CreateStore(BPVal, BPAddr);
9685 
9686       if (Info.requiresDevicePointerInfo())
9687         if (const ValueDecl *DevVD =
9688                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9689           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9690 
9691       llvm::Value *PVal = CombinedInfo.Pointers[I];
9692       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9693           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9694           Info.PointersArray, 0, I);
9695       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9696           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9697       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9698       CGF.Builder.CreateStore(PVal, PAddr);
9699 
9700       if (hasRuntimeEvaluationCaptureSize) {
9701         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9702             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9703             Info.SizesArray,
9704             /*Idx0=*/0,
9705             /*Idx1=*/I);
9706         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9707         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9708                                                           CGM.Int64Ty,
9709                                                           /*isSigned=*/true),
9710                                 SAddr);
9711       }
9712 
9713       // Fill up the mapper array.
9714       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9715       if (CombinedInfo.Mappers[I]) {
9716         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9717             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9718         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9719         Info.HasMapper = true;
9720       }
9721       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9722       CGF.Builder.CreateStore(MFunc, MAddr);
9723     }
9724   }
9725 
9726   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9727       Info.NumberOfPtrs == 0)
9728     return;
9729 
9730   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9731 }
9732 
9733 namespace {
9734 /// Additional arguments for emitOffloadingArraysArgument function.
9735 struct ArgumentsOptions {
9736   bool ForEndCall = false;
9737   ArgumentsOptions() = default;
9738   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9739 };
9740 } // namespace
9741 
9742 /// Emit the arguments to be passed to the runtime library based on the
9743 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9744 /// ForEndCall, emit map types to be passed for the end of the region instead of
9745 /// the beginning.
9746 static void emitOffloadingArraysArgument(
9747     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9748     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9749     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9750     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9751     const ArgumentsOptions &Options = ArgumentsOptions()) {
9752   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9753          "expected region end call to runtime only when end call is separate");
9754   CodeGenModule &CGM = CGF.CGM;
9755   if (Info.NumberOfPtrs) {
9756     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9757         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9758         Info.BasePointersArray,
9759         /*Idx0=*/0, /*Idx1=*/0);
9760     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9761         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9762         Info.PointersArray,
9763         /*Idx0=*/0,
9764         /*Idx1=*/0);
9765     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9766         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9767         /*Idx0=*/0, /*Idx1=*/0);
9768     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9769         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9770         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9771                                                     : Info.MapTypesArray,
9772         /*Idx0=*/0,
9773         /*Idx1=*/0);
9774 
9775     // Only emit the mapper information arrays if debug information is
9776     // requested.
9777     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9778       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9779     else
9780       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9781           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9782           Info.MapNamesArray,
9783           /*Idx0=*/0,
9784           /*Idx1=*/0);
9785     // If there is no user-defined mapper, set the mapper array to nullptr to
9786     // avoid an unnecessary data privatization
9787     if (!Info.HasMapper)
9788       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9789     else
9790       MappersArrayArg =
9791           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9792   } else {
9793     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9794     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9795     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9796     MapTypesArrayArg =
9797         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9798     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9799     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9800   }
9801 }
9802 
9803 /// Check for inner distribute directive.
9804 static const OMPExecutableDirective *
9805 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9806   const auto *CS = D.getInnermostCapturedStmt();
9807   const auto *Body =
9808       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9809   const Stmt *ChildStmt =
9810       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9811 
9812   if (const auto *NestedDir =
9813           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9814     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9815     switch (D.getDirectiveKind()) {
9816     case OMPD_target:
9817       if (isOpenMPDistributeDirective(DKind))
9818         return NestedDir;
9819       if (DKind == OMPD_teams) {
9820         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9821             /*IgnoreCaptured=*/true);
9822         if (!Body)
9823           return nullptr;
9824         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9825         if (const auto *NND =
9826                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9827           DKind = NND->getDirectiveKind();
9828           if (isOpenMPDistributeDirective(DKind))
9829             return NND;
9830         }
9831       }
9832       return nullptr;
9833     case OMPD_target_teams:
9834       if (isOpenMPDistributeDirective(DKind))
9835         return NestedDir;
9836       return nullptr;
9837     case OMPD_target_parallel:
9838     case OMPD_target_simd:
9839     case OMPD_target_parallel_for:
9840     case OMPD_target_parallel_for_simd:
9841       return nullptr;
9842     case OMPD_target_teams_distribute:
9843     case OMPD_target_teams_distribute_simd:
9844     case OMPD_target_teams_distribute_parallel_for:
9845     case OMPD_target_teams_distribute_parallel_for_simd:
9846     case OMPD_parallel:
9847     case OMPD_for:
9848     case OMPD_parallel_for:
9849     case OMPD_parallel_master:
9850     case OMPD_parallel_sections:
9851     case OMPD_for_simd:
9852     case OMPD_parallel_for_simd:
9853     case OMPD_cancel:
9854     case OMPD_cancellation_point:
9855     case OMPD_ordered:
9856     case OMPD_threadprivate:
9857     case OMPD_allocate:
9858     case OMPD_task:
9859     case OMPD_simd:
9860     case OMPD_tile:
9861     case OMPD_unroll:
9862     case OMPD_sections:
9863     case OMPD_section:
9864     case OMPD_single:
9865     case OMPD_master:
9866     case OMPD_critical:
9867     case OMPD_taskyield:
9868     case OMPD_barrier:
9869     case OMPD_taskwait:
9870     case OMPD_taskgroup:
9871     case OMPD_atomic:
9872     case OMPD_flush:
9873     case OMPD_depobj:
9874     case OMPD_scan:
9875     case OMPD_teams:
9876     case OMPD_target_data:
9877     case OMPD_target_exit_data:
9878     case OMPD_target_enter_data:
9879     case OMPD_distribute:
9880     case OMPD_distribute_simd:
9881     case OMPD_distribute_parallel_for:
9882     case OMPD_distribute_parallel_for_simd:
9883     case OMPD_teams_distribute:
9884     case OMPD_teams_distribute_simd:
9885     case OMPD_teams_distribute_parallel_for:
9886     case OMPD_teams_distribute_parallel_for_simd:
9887     case OMPD_target_update:
9888     case OMPD_declare_simd:
9889     case OMPD_declare_variant:
9890     case OMPD_begin_declare_variant:
9891     case OMPD_end_declare_variant:
9892     case OMPD_declare_target:
9893     case OMPD_end_declare_target:
9894     case OMPD_declare_reduction:
9895     case OMPD_declare_mapper:
9896     case OMPD_taskloop:
9897     case OMPD_taskloop_simd:
9898     case OMPD_master_taskloop:
9899     case OMPD_master_taskloop_simd:
9900     case OMPD_parallel_master_taskloop:
9901     case OMPD_parallel_master_taskloop_simd:
9902     case OMPD_requires:
9903     case OMPD_metadirective:
9904     case OMPD_unknown:
9905     default:
9906       llvm_unreachable("Unexpected directive.");
9907     }
9908   }
9909 
9910   return nullptr;
9911 }
9912 
9913 /// Emit the user-defined mapper function. The code generation follows the
9914 /// pattern in the example below.
9915 /// \code
9916 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9917 ///                                           void *base, void *begin,
9918 ///                                           int64_t size, int64_t type,
9919 ///                                           void *name = nullptr) {
9920 ///   // Allocate space for an array section first or add a base/begin for
9921 ///   // pointer dereference.
9922 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9923 ///       !maptype.IsDelete)
9924 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9925 ///                                 size*sizeof(Ty), clearToFromMember(type));
9926 ///   // Map members.
9927 ///   for (unsigned i = 0; i < size; i++) {
9928 ///     // For each component specified by this mapper:
9929 ///     for (auto c : begin[i]->all_components) {
9930 ///       if (c.hasMapper())
9931 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9932 ///                       c.arg_type, c.arg_name);
9933 ///       else
9934 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9935 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9936 ///                                     c.arg_name);
9937 ///     }
9938 ///   }
9939 ///   // Delete the array section.
9940 ///   if (size > 1 && maptype.IsDelete)
9941 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9942 ///                                 size*sizeof(Ty), clearToFromMember(type));
9943 /// }
9944 /// \endcode
9945 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9946                                             CodeGenFunction *CGF) {
9947   if (UDMMap.count(D) > 0)
9948     return;
9949   ASTContext &C = CGM.getContext();
9950   QualType Ty = D->getType();
9951   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9952   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9953   auto *MapperVarDecl =
9954       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9955   SourceLocation Loc = D->getLocation();
9956   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9957 
9958   // Prepare mapper function arguments and attributes.
9959   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9960                               C.VoidPtrTy, ImplicitParamDecl::Other);
9961   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9962                             ImplicitParamDecl::Other);
9963   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9964                              C.VoidPtrTy, ImplicitParamDecl::Other);
9965   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9966                             ImplicitParamDecl::Other);
9967   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9968                             ImplicitParamDecl::Other);
9969   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9970                             ImplicitParamDecl::Other);
9971   FunctionArgList Args;
9972   Args.push_back(&HandleArg);
9973   Args.push_back(&BaseArg);
9974   Args.push_back(&BeginArg);
9975   Args.push_back(&SizeArg);
9976   Args.push_back(&TypeArg);
9977   Args.push_back(&NameArg);
9978   const CGFunctionInfo &FnInfo =
9979       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9980   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9981   SmallString<64> TyStr;
9982   llvm::raw_svector_ostream Out(TyStr);
9983   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9984   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9985   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9986                                     Name, &CGM.getModule());
9987   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9988   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9989   // Start the mapper function code generation.
9990   CodeGenFunction MapperCGF(CGM);
9991   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9992   // Compute the starting and end addresses of array elements.
9993   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9994       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9995       C.getPointerType(Int64Ty), Loc);
9996   // Prepare common arguments for array initiation and deletion.
9997   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9998       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9999       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10000   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10001       MapperCGF.GetAddrOfLocalVar(&BaseArg),
10002       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10003   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10004       MapperCGF.GetAddrOfLocalVar(&BeginArg),
10005       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10006   // Convert the size in bytes into the number of array elements.
10007   Size = MapperCGF.Builder.CreateExactUDiv(
10008       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10009   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10010       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10011   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10012       PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10013   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10014       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10015       C.getPointerType(Int64Ty), Loc);
10016   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10017       MapperCGF.GetAddrOfLocalVar(&NameArg),
10018       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10019 
10020   // Emit array initiation if this is an array section and \p MapType indicates
10021   // that memory allocation is required.
10022   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10023   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10024                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
10025 
10026   // Emit a for loop to iterate through SizeArg of elements and map all of them.
10027 
10028   // Emit the loop header block.
10029   MapperCGF.EmitBlock(HeadBB);
10030   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10031   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10032   // Evaluate whether the initial condition is satisfied.
10033   llvm::Value *IsEmpty =
10034       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10035   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10036   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10037 
10038   // Emit the loop body block.
10039   MapperCGF.EmitBlock(BodyBB);
10040   llvm::BasicBlock *LastBB = BodyBB;
10041   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10042       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10043   PtrPHI->addIncoming(PtrBegin, EntryBB);
10044   Address PtrCurrent =
10045       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10046                           .getAlignment()
10047                           .alignmentOfArrayElement(ElementSize));
10048   // Privatize the declared variable of mapper to be the current array element.
10049   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10050   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10051   (void)Scope.Privatize();
10052 
10053   // Get map clause information. Fill up the arrays with all mapped variables.
10054   MappableExprsHandler::MapCombinedInfoTy Info;
10055   MappableExprsHandler MEHandler(*D, MapperCGF);
10056   MEHandler.generateAllInfoForMapper(Info);
10057 
10058   // Call the runtime API __tgt_mapper_num_components to get the number of
10059   // pre-existing components.
10060   llvm::Value *OffloadingArgs[] = {Handle};
10061   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10062       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10063                                             OMPRTL___tgt_mapper_num_components),
10064       OffloadingArgs);
10065   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10066       PreviousSize,
10067       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10068 
10069   // Fill up the runtime mapper handle for all components.
10070   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10071     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10072         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10073     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10074         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10075     llvm::Value *CurSizeArg = Info.Sizes[I];
10076     llvm::Value *CurNameArg =
10077         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10078             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10079             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10080 
10081     // Extract the MEMBER_OF field from the map type.
10082     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10083     llvm::Value *MemberMapType =
10084         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10085 
10086     // Combine the map type inherited from user-defined mapper with that
10087     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10088     // bits of the \a MapType, which is the input argument of the mapper
10089     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10090     // bits of MemberMapType.
10091     // [OpenMP 5.0], 1.2.6. map-type decay.
10092     //        | alloc |  to   | from  | tofrom | release | delete
10093     // ----------------------------------------------------------
10094     // alloc  | alloc | alloc | alloc | alloc  | release | delete
10095     // to     | alloc |  to   | alloc |   to   | release | delete
10096     // from   | alloc | alloc | from  |  from  | release | delete
10097     // tofrom | alloc |  to   | from  | tofrom | release | delete
10098     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10099         MapType,
10100         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10101                                    MappableExprsHandler::OMP_MAP_FROM));
10102     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10103     llvm::BasicBlock *AllocElseBB =
10104         MapperCGF.createBasicBlock("omp.type.alloc.else");
10105     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10106     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10107     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10108     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10109     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10110     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10111     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10112     MapperCGF.EmitBlock(AllocBB);
10113     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10114         MemberMapType,
10115         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10116                                      MappableExprsHandler::OMP_MAP_FROM)));
10117     MapperCGF.Builder.CreateBr(EndBB);
10118     MapperCGF.EmitBlock(AllocElseBB);
10119     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10120         LeftToFrom,
10121         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10122     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10123     // In case of to, clear OMP_MAP_FROM.
10124     MapperCGF.EmitBlock(ToBB);
10125     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10126         MemberMapType,
10127         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10128     MapperCGF.Builder.CreateBr(EndBB);
10129     MapperCGF.EmitBlock(ToElseBB);
10130     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10131         LeftToFrom,
10132         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10133     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10134     // In case of from, clear OMP_MAP_TO.
10135     MapperCGF.EmitBlock(FromBB);
10136     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10137         MemberMapType,
10138         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10139     // In case of tofrom, do nothing.
10140     MapperCGF.EmitBlock(EndBB);
10141     LastBB = EndBB;
10142     llvm::PHINode *CurMapType =
10143         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10144     CurMapType->addIncoming(AllocMapType, AllocBB);
10145     CurMapType->addIncoming(ToMapType, ToBB);
10146     CurMapType->addIncoming(FromMapType, FromBB);
10147     CurMapType->addIncoming(MemberMapType, ToElseBB);
10148 
10149     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
10150                                      CurSizeArg, CurMapType, CurNameArg};
10151     if (Info.Mappers[I]) {
10152       // Call the corresponding mapper function.
10153       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10154           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10155       assert(MapperFunc && "Expect a valid mapper function is available.");
10156       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10157     } else {
10158       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10159       // data structure.
10160       MapperCGF.EmitRuntimeCall(
10161           OMPBuilder.getOrCreateRuntimeFunction(
10162               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10163           OffloadingArgs);
10164     }
10165   }
10166 
10167   // Update the pointer to point to the next element that needs to be mapped,
10168   // and check whether we have mapped all elements.
10169   llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10170   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10171       ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10172   PtrPHI->addIncoming(PtrNext, LastBB);
10173   llvm::Value *IsDone =
10174       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10175   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10176   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10177 
10178   MapperCGF.EmitBlock(ExitBB);
10179   // Emit array deletion if this is an array section and \p MapType indicates
10180   // that deletion is required.
10181   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10182                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
10183 
10184   // Emit the function exit block.
10185   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10186   MapperCGF.FinishFunction();
10187   UDMMap.try_emplace(D, Fn);
10188   if (CGF) {
10189     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10190     Decls.second.push_back(D);
10191   }
10192 }
10193 
10194 /// Emit the array initialization or deletion portion for user-defined mapper
10195 /// code generation. First, it evaluates whether an array section is mapped and
10196 /// whether the \a MapType instructs to delete this section. If \a IsInit is
10197 /// true, and \a MapType indicates to not delete this array, array
10198 /// initialization code is generated. If \a IsInit is false, and \a MapType
10199 /// indicates to not this array, array deletion code is generated.
10200 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10201     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10202     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10203     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10204     bool IsInit) {
10205   StringRef Prefix = IsInit ? ".init" : ".del";
10206 
10207   // Evaluate if this is an array section.
10208   llvm::BasicBlock *BodyBB =
10209       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10210   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10211       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10212   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10213       MapType,
10214       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10215   llvm::Value *DeleteCond;
10216   llvm::Value *Cond;
10217   if (IsInit) {
10218     // base != begin?
10219     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
10220         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
10221     // IsPtrAndObj?
10222     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10223         MapType,
10224         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10225     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10226     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10227     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10228     DeleteCond = MapperCGF.Builder.CreateIsNull(
10229         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10230   } else {
10231     Cond = IsArray;
10232     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10233         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10234   }
10235   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10236   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10237 
10238   MapperCGF.EmitBlock(BodyBB);
10239   // Get the array size by multiplying element size and element number (i.e., \p
10240   // Size).
10241   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10242       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10243   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10244   // memory allocation/deletion purpose only.
10245   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10246       MapType,
10247       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10248                                    MappableExprsHandler::OMP_MAP_FROM)));
10249   MapTypeArg = MapperCGF.Builder.CreateOr(
10250       MapTypeArg,
10251       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10252 
10253   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10254   // data structure.
10255   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
10256                                    ArraySize, MapTypeArg, MapName};
10257   MapperCGF.EmitRuntimeCall(
10258       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10259                                             OMPRTL___tgt_push_mapper_component),
10260       OffloadingArgs);
10261 }
10262 
10263 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10264     const OMPDeclareMapperDecl *D) {
10265   auto I = UDMMap.find(D);
10266   if (I != UDMMap.end())
10267     return I->second;
10268   emitUserDefinedMapper(D);
10269   return UDMMap.lookup(D);
10270 }
10271 
10272 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10273     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10274     llvm::Value *DeviceID,
10275     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10276                                      const OMPLoopDirective &D)>
10277         SizeEmitter) {
10278   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10279   const OMPExecutableDirective *TD = &D;
10280   // Get nested teams distribute kind directive, if any.
10281   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10282     TD = getNestedDistributeDirective(CGM.getContext(), D);
10283   if (!TD)
10284     return;
10285   const auto *LD = cast<OMPLoopDirective>(TD);
10286   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10287                                                          PrePostActionTy &) {
10288     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10289       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10290       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10291       CGF.EmitRuntimeCall(
10292           OMPBuilder.getOrCreateRuntimeFunction(
10293               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10294           Args);
10295     }
10296   };
10297   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10298 }
10299 
10300 void CGOpenMPRuntime::emitTargetCall(
10301     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10302     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10303     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10304     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10305                                      const OMPLoopDirective &D)>
10306         SizeEmitter) {
10307   if (!CGF.HaveInsertPoint())
10308     return;
10309 
10310   assert(OutlinedFn && "Invalid outlined function!");
10311 
10312   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10313                                  D.hasClausesOfKind<OMPNowaitClause>();
10314   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10315   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10316   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10317                                             PrePostActionTy &) {
10318     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10319   };
10320   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10321 
10322   CodeGenFunction::OMPTargetDataInfo InputInfo;
10323   llvm::Value *MapTypesArray = nullptr;
10324   llvm::Value *MapNamesArray = nullptr;
10325   // Fill up the pointer arrays and transfer execution to the device.
10326   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10327                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10328                     &CapturedVars,
10329                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10330     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10331       // Reverse offloading is not supported, so just execute on the host.
10332       if (RequiresOuterTask) {
10333         CapturedVars.clear();
10334         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10335       }
10336       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10337       return;
10338     }
10339 
10340     // On top of the arrays that were filled up, the target offloading call
10341     // takes as arguments the device id as well as the host pointer. The host
10342     // pointer is used by the runtime library to identify the current target
10343     // region, so it only has to be unique and not necessarily point to
10344     // anything. It could be the pointer to the outlined function that
10345     // implements the target region, but we aren't using that so that the
10346     // compiler doesn't need to keep that, and could therefore inline the host
10347     // function if proven worthwhile during optimization.
10348 
10349     // From this point on, we need to have an ID of the target region defined.
10350     assert(OutlinedFnID && "Invalid outlined function ID!");
10351 
10352     // Emit device ID if any.
10353     llvm::Value *DeviceID;
10354     if (Device.getPointer()) {
10355       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10356               Device.getInt() == OMPC_DEVICE_device_num) &&
10357              "Expected device_num modifier.");
10358       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10359       DeviceID =
10360           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10361     } else {
10362       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10363     }
10364 
10365     // Emit the number of elements in the offloading arrays.
10366     llvm::Value *PointerNum =
10367         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10368 
10369     // Return value of the runtime offloading call.
10370     llvm::Value *Return;
10371 
10372     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10373     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10374 
10375     // Source location for the ident struct
10376     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10377 
10378     // Emit tripcount for the target loop-based directive.
10379     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10380 
10381     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10382     // The target region is an outlined function launched by the runtime
10383     // via calls __tgt_target() or __tgt_target_teams().
10384     //
10385     // __tgt_target() launches a target region with one team and one thread,
10386     // executing a serial region.  This master thread may in turn launch
10387     // more threads within its team upon encountering a parallel region,
10388     // however, no additional teams can be launched on the device.
10389     //
10390     // __tgt_target_teams() launches a target region with one or more teams,
10391     // each with one or more threads.  This call is required for target
10392     // constructs such as:
10393     //  'target teams'
10394     //  'target' / 'teams'
10395     //  'target teams distribute parallel for'
10396     //  'target parallel'
10397     // and so on.
10398     //
10399     // Note that on the host and CPU targets, the runtime implementation of
10400     // these calls simply call the outlined function without forking threads.
10401     // The outlined functions themselves have runtime calls to
10402     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10403     // the compiler in emitTeamsCall() and emitParallelCall().
10404     //
10405     // In contrast, on the NVPTX target, the implementation of
10406     // __tgt_target_teams() launches a GPU kernel with the requested number
10407     // of teams and threads so no additional calls to the runtime are required.
10408     if (NumTeams) {
10409       // If we have NumTeams defined this means that we have an enclosed teams
10410       // region. Therefore we also expect to have NumThreads defined. These two
10411       // values should be defined in the presence of a teams directive,
10412       // regardless of having any clauses associated. If the user is using teams
10413       // but no clauses, these two values will be the default that should be
10414       // passed to the runtime library - a 32-bit integer with the value zero.
10415       assert(NumThreads && "Thread limit expression should be available along "
10416                            "with number of teams.");
10417       SmallVector<llvm::Value *> OffloadingArgs = {
10418           RTLoc,
10419           DeviceID,
10420           OutlinedFnID,
10421           PointerNum,
10422           InputInfo.BasePointersArray.getPointer(),
10423           InputInfo.PointersArray.getPointer(),
10424           InputInfo.SizesArray.getPointer(),
10425           MapTypesArray,
10426           MapNamesArray,
10427           InputInfo.MappersArray.getPointer(),
10428           NumTeams,
10429           NumThreads};
10430       if (HasNowait) {
10431         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10432         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10433         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10434         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10435         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10436         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10437       }
10438       Return = CGF.EmitRuntimeCall(
10439           OMPBuilder.getOrCreateRuntimeFunction(
10440               CGM.getModule(), HasNowait
10441                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10442                                    : OMPRTL___tgt_target_teams_mapper),
10443           OffloadingArgs);
10444     } else {
10445       SmallVector<llvm::Value *> OffloadingArgs = {
10446           RTLoc,
10447           DeviceID,
10448           OutlinedFnID,
10449           PointerNum,
10450           InputInfo.BasePointersArray.getPointer(),
10451           InputInfo.PointersArray.getPointer(),
10452           InputInfo.SizesArray.getPointer(),
10453           MapTypesArray,
10454           MapNamesArray,
10455           InputInfo.MappersArray.getPointer()};
10456       if (HasNowait) {
10457         // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10458         // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10459         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10460         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10461         OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10462         OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10463       }
10464       Return = CGF.EmitRuntimeCall(
10465           OMPBuilder.getOrCreateRuntimeFunction(
10466               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10467                                          : OMPRTL___tgt_target_mapper),
10468           OffloadingArgs);
10469     }
10470 
10471     // Check the error code and execute the host version if required.
10472     llvm::BasicBlock *OffloadFailedBlock =
10473         CGF.createBasicBlock("omp_offload.failed");
10474     llvm::BasicBlock *OffloadContBlock =
10475         CGF.createBasicBlock("omp_offload.cont");
10476     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10477     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10478 
10479     CGF.EmitBlock(OffloadFailedBlock);
10480     if (RequiresOuterTask) {
10481       CapturedVars.clear();
10482       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10483     }
10484     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10485     CGF.EmitBranch(OffloadContBlock);
10486 
10487     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10488   };
10489 
10490   // Notify that the host version must be executed.
10491   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10492                     RequiresOuterTask](CodeGenFunction &CGF,
10493                                        PrePostActionTy &) {
10494     if (RequiresOuterTask) {
10495       CapturedVars.clear();
10496       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10497     }
10498     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10499   };
10500 
10501   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10502                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10503                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10504     // Fill up the arrays with all the captured variables.
10505     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10506 
10507     // Get mappable expression information.
10508     MappableExprsHandler MEHandler(D, CGF);
10509     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10510     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10511 
10512     auto RI = CS.getCapturedRecordDecl()->field_begin();
10513     auto *CV = CapturedVars.begin();
10514     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10515                                               CE = CS.capture_end();
10516          CI != CE; ++CI, ++RI, ++CV) {
10517       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10518       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10519 
10520       // VLA sizes are passed to the outlined region by copy and do not have map
10521       // information associated.
10522       if (CI->capturesVariableArrayType()) {
10523         CurInfo.Exprs.push_back(nullptr);
10524         CurInfo.BasePointers.push_back(*CV);
10525         CurInfo.Pointers.push_back(*CV);
10526         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10527             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10528         // Copy to the device as an argument. No need to retrieve it.
10529         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10530                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10531                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10532         CurInfo.Mappers.push_back(nullptr);
10533       } else {
10534         // If we have any information in the map clause, we use it, otherwise we
10535         // just do a default mapping.
10536         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10537         if (!CI->capturesThis())
10538           MappedVarSet.insert(CI->getCapturedVar());
10539         else
10540           MappedVarSet.insert(nullptr);
10541         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10542           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10543         // Generate correct mapping for variables captured by reference in
10544         // lambdas.
10545         if (CI->capturesVariable())
10546           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10547                                                   CurInfo, LambdaPointers);
10548       }
10549       // We expect to have at least an element of information for this capture.
10550       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10551              "Non-existing map pointer for capture!");
10552       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10553              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10554              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10555              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10556              "Inconsistent map information sizes!");
10557 
10558       // If there is an entry in PartialStruct it means we have a struct with
10559       // individual members mapped. Emit an extra combined entry.
10560       if (PartialStruct.Base.isValid()) {
10561         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10562         MEHandler.emitCombinedEntry(
10563             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10564             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10565       }
10566 
10567       // We need to append the results of this capture to what we already have.
10568       CombinedInfo.append(CurInfo);
10569     }
10570     // Adjust MEMBER_OF flags for the lambdas captures.
10571     MEHandler.adjustMemberOfForLambdaCaptures(
10572         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10573         CombinedInfo.Types);
10574     // Map any list items in a map clause that were not captures because they
10575     // weren't referenced within the construct.
10576     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10577 
10578     TargetDataInfo Info;
10579     // Fill up the arrays and create the arguments.
10580     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10581     emitOffloadingArraysArgument(
10582         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10583         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10584         {/*ForEndTask=*/false});
10585 
10586     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10587     InputInfo.BasePointersArray =
10588         Address(Info.BasePointersArray, CGM.getPointerAlign());
10589     InputInfo.PointersArray =
10590         Address(Info.PointersArray, CGM.getPointerAlign());
10591     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10592     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10593     MapTypesArray = Info.MapTypesArray;
10594     MapNamesArray = Info.MapNamesArray;
10595     if (RequiresOuterTask)
10596       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10597     else
10598       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10599   };
10600 
10601   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10602                              CodeGenFunction &CGF, PrePostActionTy &) {
10603     if (RequiresOuterTask) {
10604       CodeGenFunction::OMPTargetDataInfo InputInfo;
10605       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10606     } else {
10607       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10608     }
10609   };
10610 
10611   // If we have a target function ID it means that we need to support
10612   // offloading, otherwise, just execute on the host. We need to execute on host
10613   // regardless of the conditional in the if clause if, e.g., the user do not
10614   // specify target triples.
10615   if (OutlinedFnID) {
10616     if (IfCond) {
10617       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10618     } else {
10619       RegionCodeGenTy ThenRCG(TargetThenGen);
10620       ThenRCG(CGF);
10621     }
10622   } else {
10623     RegionCodeGenTy ElseRCG(TargetElseGen);
10624     ElseRCG(CGF);
10625   }
10626 }
10627 
10628 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10629                                                     StringRef ParentName) {
10630   if (!S)
10631     return;
10632 
10633   // Codegen OMP target directives that offload compute to the device.
10634   bool RequiresDeviceCodegen =
10635       isa<OMPExecutableDirective>(S) &&
10636       isOpenMPTargetExecutionDirective(
10637           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10638 
10639   if (RequiresDeviceCodegen) {
10640     const auto &E = *cast<OMPExecutableDirective>(S);
10641     unsigned DeviceID;
10642     unsigned FileID;
10643     unsigned Line;
10644     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10645                              FileID, Line);
10646 
10647     // Is this a target region that should not be emitted as an entry point? If
10648     // so just signal we are done with this target region.
10649     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10650                                                             ParentName, Line))
10651       return;
10652 
10653     switch (E.getDirectiveKind()) {
10654     case OMPD_target:
10655       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10656                                                    cast<OMPTargetDirective>(E));
10657       break;
10658     case OMPD_target_parallel:
10659       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10660           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10661       break;
10662     case OMPD_target_teams:
10663       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10664           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10665       break;
10666     case OMPD_target_teams_distribute:
10667       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10668           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10669       break;
10670     case OMPD_target_teams_distribute_simd:
10671       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10672           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10673       break;
10674     case OMPD_target_parallel_for:
10675       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10676           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10677       break;
10678     case OMPD_target_parallel_for_simd:
10679       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10680           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10681       break;
10682     case OMPD_target_simd:
10683       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10684           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10685       break;
10686     case OMPD_target_teams_distribute_parallel_for:
10687       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10688           CGM, ParentName,
10689           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10690       break;
10691     case OMPD_target_teams_distribute_parallel_for_simd:
10692       CodeGenFunction::
10693           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10694               CGM, ParentName,
10695               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10696       break;
10697     case OMPD_parallel:
10698     case OMPD_for:
10699     case OMPD_parallel_for:
10700     case OMPD_parallel_master:
10701     case OMPD_parallel_sections:
10702     case OMPD_for_simd:
10703     case OMPD_parallel_for_simd:
10704     case OMPD_cancel:
10705     case OMPD_cancellation_point:
10706     case OMPD_ordered:
10707     case OMPD_threadprivate:
10708     case OMPD_allocate:
10709     case OMPD_task:
10710     case OMPD_simd:
10711     case OMPD_tile:
10712     case OMPD_unroll:
10713     case OMPD_sections:
10714     case OMPD_section:
10715     case OMPD_single:
10716     case OMPD_master:
10717     case OMPD_critical:
10718     case OMPD_taskyield:
10719     case OMPD_barrier:
10720     case OMPD_taskwait:
10721     case OMPD_taskgroup:
10722     case OMPD_atomic:
10723     case OMPD_flush:
10724     case OMPD_depobj:
10725     case OMPD_scan:
10726     case OMPD_teams:
10727     case OMPD_target_data:
10728     case OMPD_target_exit_data:
10729     case OMPD_target_enter_data:
10730     case OMPD_distribute:
10731     case OMPD_distribute_simd:
10732     case OMPD_distribute_parallel_for:
10733     case OMPD_distribute_parallel_for_simd:
10734     case OMPD_teams_distribute:
10735     case OMPD_teams_distribute_simd:
10736     case OMPD_teams_distribute_parallel_for:
10737     case OMPD_teams_distribute_parallel_for_simd:
10738     case OMPD_target_update:
10739     case OMPD_declare_simd:
10740     case OMPD_declare_variant:
10741     case OMPD_begin_declare_variant:
10742     case OMPD_end_declare_variant:
10743     case OMPD_declare_target:
10744     case OMPD_end_declare_target:
10745     case OMPD_declare_reduction:
10746     case OMPD_declare_mapper:
10747     case OMPD_taskloop:
10748     case OMPD_taskloop_simd:
10749     case OMPD_master_taskloop:
10750     case OMPD_master_taskloop_simd:
10751     case OMPD_parallel_master_taskloop:
10752     case OMPD_parallel_master_taskloop_simd:
10753     case OMPD_requires:
10754     case OMPD_metadirective:
10755     case OMPD_unknown:
10756     default:
10757       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10758     }
10759     return;
10760   }
10761 
10762   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10763     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10764       return;
10765 
10766     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10767     return;
10768   }
10769 
10770   // If this is a lambda function, look into its body.
10771   if (const auto *L = dyn_cast<LambdaExpr>(S))
10772     S = L->getBody();
10773 
10774   // Keep looking for target regions recursively.
10775   for (const Stmt *II : S->children())
10776     scanForTargetRegionsFunctions(II, ParentName);
10777 }
10778 
10779 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10780   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10781       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10782   if (!DevTy)
10783     return false;
10784   // Do not emit device_type(nohost) functions for the host.
10785   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10786     return true;
10787   // Do not emit device_type(host) functions for the device.
10788   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10789     return true;
10790   return false;
10791 }
10792 
10793 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10794   // If emitting code for the host, we do not process FD here. Instead we do
10795   // the normal code generation.
10796   if (!CGM.getLangOpts().OpenMPIsDevice) {
10797     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10798       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10799                                   CGM.getLangOpts().OpenMPIsDevice))
10800         return true;
10801     return false;
10802   }
10803 
10804   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10805   // Try to detect target regions in the function.
10806   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10807     StringRef Name = CGM.getMangledName(GD);
10808     scanForTargetRegionsFunctions(FD->getBody(), Name);
10809     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10810                                 CGM.getLangOpts().OpenMPIsDevice))
10811       return true;
10812   }
10813 
10814   // Do not to emit function if it is not marked as declare target.
10815   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10816          AlreadyEmittedTargetDecls.count(VD) == 0;
10817 }
10818 
10819 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10820   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10821                               CGM.getLangOpts().OpenMPIsDevice))
10822     return true;
10823 
10824   if (!CGM.getLangOpts().OpenMPIsDevice)
10825     return false;
10826 
10827   // Check if there are Ctors/Dtors in this declaration and look for target
10828   // regions in it. We use the complete variant to produce the kernel name
10829   // mangling.
10830   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10831   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10832     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10833       StringRef ParentName =
10834           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10835       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10836     }
10837     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10838       StringRef ParentName =
10839           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10840       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10841     }
10842   }
10843 
10844   // Do not to emit variable if it is not marked as declare target.
10845   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10846       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10847           cast<VarDecl>(GD.getDecl()));
10848   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10849       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10850        HasRequiresUnifiedSharedMemory)) {
10851     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10852     return true;
10853   }
10854   return false;
10855 }
10856 
10857 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10858                                                    llvm::Constant *Addr) {
10859   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10860       !CGM.getLangOpts().OpenMPIsDevice)
10861     return;
10862 
10863   // If we have host/nohost variables, they do not need to be registered.
10864   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10865       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10866   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10867     return;
10868 
10869   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10870       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10871   if (!Res) {
10872     if (CGM.getLangOpts().OpenMPIsDevice) {
10873       // Register non-target variables being emitted in device code (debug info
10874       // may cause this).
10875       StringRef VarName = CGM.getMangledName(VD);
10876       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10877     }
10878     return;
10879   }
10880   // Register declare target variables.
10881   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10882   StringRef VarName;
10883   CharUnits VarSize;
10884   llvm::GlobalValue::LinkageTypes Linkage;
10885 
10886   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10887       !HasRequiresUnifiedSharedMemory) {
10888     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10889     VarName = CGM.getMangledName(VD);
10890     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10891       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10892       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10893     } else {
10894       VarSize = CharUnits::Zero();
10895     }
10896     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10897     // Temp solution to prevent optimizations of the internal variables.
10898     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10899       // Do not create a "ref-variable" if the original is not also available
10900       // on the host.
10901       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10902         return;
10903       std::string RefName = getName({VarName, "ref"});
10904       if (!CGM.GetGlobalValue(RefName)) {
10905         llvm::Constant *AddrRef =
10906             getOrCreateInternalVariable(Addr->getType(), RefName);
10907         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10908         GVAddrRef->setConstant(/*Val=*/true);
10909         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10910         GVAddrRef->setInitializer(Addr);
10911         CGM.addCompilerUsedGlobal(GVAddrRef);
10912       }
10913     }
10914   } else {
10915     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10916             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10917              HasRequiresUnifiedSharedMemory)) &&
10918            "Declare target attribute must link or to with unified memory.");
10919     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10920       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10921     else
10922       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10923 
10924     if (CGM.getLangOpts().OpenMPIsDevice) {
10925       VarName = Addr->getName();
10926       Addr = nullptr;
10927     } else {
10928       VarName = getAddrOfDeclareTargetVar(VD).getName();
10929       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10930     }
10931     VarSize = CGM.getPointerSize();
10932     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10933   }
10934 
10935   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10936       VarName, Addr, VarSize, Flags, Linkage);
10937 }
10938 
10939 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10940   if (isa<FunctionDecl>(GD.getDecl()) ||
10941       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10942     return emitTargetFunctions(GD);
10943 
10944   return emitTargetGlobalVariable(GD);
10945 }
10946 
10947 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10948   for (const VarDecl *VD : DeferredGlobalVariables) {
10949     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10950         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10951     if (!Res)
10952       continue;
10953     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10954         !HasRequiresUnifiedSharedMemory) {
10955       CGM.EmitGlobal(VD);
10956     } else {
10957       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10958               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10959                HasRequiresUnifiedSharedMemory)) &&
10960              "Expected link clause or to clause with unified memory.");
10961       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10962     }
10963   }
10964 }
10965 
10966 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10967     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10968   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10969          " Expected target-based directive.");
10970 }
10971 
10972 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10973   for (const OMPClause *Clause : D->clauselists()) {
10974     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10975       HasRequiresUnifiedSharedMemory = true;
10976     } else if (const auto *AC =
10977                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10978       switch (AC->getAtomicDefaultMemOrderKind()) {
10979       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10980         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10981         break;
10982       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10983         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10984         break;
10985       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10986         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10987         break;
10988       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10989         break;
10990       }
10991     }
10992   }
10993 }
10994 
10995 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10996   return RequiresAtomicOrdering;
10997 }
10998 
10999 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11000                                                        LangAS &AS) {
11001   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11002     return false;
11003   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11004   switch(A->getAllocatorType()) {
11005   case OMPAllocateDeclAttr::OMPNullMemAlloc:
11006   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11007   // Not supported, fallback to the default mem space.
11008   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11009   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11010   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11011   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11012   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11013   case OMPAllocateDeclAttr::OMPConstMemAlloc:
11014   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11015     AS = LangAS::Default;
11016     return true;
11017   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11018     llvm_unreachable("Expected predefined allocator for the variables with the "
11019                      "static storage.");
11020   }
11021   return false;
11022 }
11023 
11024 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11025   return HasRequiresUnifiedSharedMemory;
11026 }
11027 
11028 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11029     CodeGenModule &CGM)
11030     : CGM(CGM) {
11031   if (CGM.getLangOpts().OpenMPIsDevice) {
11032     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11033     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11034   }
11035 }
11036 
11037 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11038   if (CGM.getLangOpts().OpenMPIsDevice)
11039     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11040 }
11041 
11042 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11043   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11044     return true;
11045 
11046   const auto *D = cast<FunctionDecl>(GD.getDecl());
11047   // Do not to emit function if it is marked as declare target as it was already
11048   // emitted.
11049   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11050     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11051       if (auto *F = dyn_cast_or_null<llvm::Function>(
11052               CGM.GetGlobalValue(CGM.getMangledName(GD))))
11053         return !F->isDeclaration();
11054       return false;
11055     }
11056     return true;
11057   }
11058 
11059   return !AlreadyEmittedTargetDecls.insert(D).second;
11060 }
11061 
11062 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11063   // If we don't have entries or if we are emitting code for the device, we
11064   // don't need to do anything.
11065   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11066       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11067       (OffloadEntriesInfoManager.empty() &&
11068        !HasEmittedDeclareTargetRegion &&
11069        !HasEmittedTargetRegion))
11070     return nullptr;
11071 
11072   // Create and register the function that handles the requires directives.
11073   ASTContext &C = CGM.getContext();
11074 
11075   llvm::Function *RequiresRegFn;
11076   {
11077     CodeGenFunction CGF(CGM);
11078     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11079     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11080     std::string ReqName = getName({"omp_offloading", "requires_reg"});
11081     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11082     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11083     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11084     // TODO: check for other requires clauses.
11085     // The requires directive takes effect only when a target region is
11086     // present in the compilation unit. Otherwise it is ignored and not
11087     // passed to the runtime. This avoids the runtime from throwing an error
11088     // for mismatching requires clauses across compilation units that don't
11089     // contain at least 1 target region.
11090     assert((HasEmittedTargetRegion ||
11091             HasEmittedDeclareTargetRegion ||
11092             !OffloadEntriesInfoManager.empty()) &&
11093            "Target or declare target region expected.");
11094     if (HasRequiresUnifiedSharedMemory)
11095       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11096     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11097                             CGM.getModule(), OMPRTL___tgt_register_requires),
11098                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11099     CGF.FinishFunction();
11100   }
11101   return RequiresRegFn;
11102 }
11103 
11104 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11105                                     const OMPExecutableDirective &D,
11106                                     SourceLocation Loc,
11107                                     llvm::Function *OutlinedFn,
11108                                     ArrayRef<llvm::Value *> CapturedVars) {
11109   if (!CGF.HaveInsertPoint())
11110     return;
11111 
11112   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11113   CodeGenFunction::RunCleanupsScope Scope(CGF);
11114 
11115   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11116   llvm::Value *Args[] = {
11117       RTLoc,
11118       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11119       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11120   llvm::SmallVector<llvm::Value *, 16> RealArgs;
11121   RealArgs.append(std::begin(Args), std::end(Args));
11122   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11123 
11124   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11125       CGM.getModule(), OMPRTL___kmpc_fork_teams);
11126   CGF.EmitRuntimeCall(RTLFn, RealArgs);
11127 }
11128 
11129 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11130                                          const Expr *NumTeams,
11131                                          const Expr *ThreadLimit,
11132                                          SourceLocation Loc) {
11133   if (!CGF.HaveInsertPoint())
11134     return;
11135 
11136   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11137 
11138   llvm::Value *NumTeamsVal =
11139       NumTeams
11140           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11141                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11142           : CGF.Builder.getInt32(0);
11143 
11144   llvm::Value *ThreadLimitVal =
11145       ThreadLimit
11146           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11147                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
11148           : CGF.Builder.getInt32(0);
11149 
11150   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11151   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11152                                      ThreadLimitVal};
11153   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11154                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11155                       PushNumTeamsArgs);
11156 }
11157 
11158 void CGOpenMPRuntime::emitTargetDataCalls(
11159     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11160     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11161   if (!CGF.HaveInsertPoint())
11162     return;
11163 
11164   // Action used to replace the default codegen action and turn privatization
11165   // off.
11166   PrePostActionTy NoPrivAction;
11167 
11168   // Generate the code for the opening of the data environment. Capture all the
11169   // arguments of the runtime call by reference because they are used in the
11170   // closing of the region.
11171   auto &&BeginThenGen = [this, &D, Device, &Info,
11172                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11173     // Fill up the arrays with all the mapped variables.
11174     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11175 
11176     // Get map clause information.
11177     MappableExprsHandler MEHandler(D, CGF);
11178     MEHandler.generateAllInfo(CombinedInfo);
11179 
11180     // Fill up the arrays and create the arguments.
11181     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11182                          /*IsNonContiguous=*/true);
11183 
11184     llvm::Value *BasePointersArrayArg = nullptr;
11185     llvm::Value *PointersArrayArg = nullptr;
11186     llvm::Value *SizesArrayArg = nullptr;
11187     llvm::Value *MapTypesArrayArg = nullptr;
11188     llvm::Value *MapNamesArrayArg = nullptr;
11189     llvm::Value *MappersArrayArg = nullptr;
11190     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11191                                  SizesArrayArg, MapTypesArrayArg,
11192                                  MapNamesArrayArg, MappersArrayArg, Info);
11193 
11194     // Emit device ID if any.
11195     llvm::Value *DeviceID = nullptr;
11196     if (Device) {
11197       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11198                                            CGF.Int64Ty, /*isSigned=*/true);
11199     } else {
11200       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11201     }
11202 
11203     // Emit the number of elements in the offloading arrays.
11204     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11205     //
11206     // Source location for the ident struct
11207     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11208 
11209     llvm::Value *OffloadingArgs[] = {RTLoc,
11210                                      DeviceID,
11211                                      PointerNum,
11212                                      BasePointersArrayArg,
11213                                      PointersArrayArg,
11214                                      SizesArrayArg,
11215                                      MapTypesArrayArg,
11216                                      MapNamesArrayArg,
11217                                      MappersArrayArg};
11218     CGF.EmitRuntimeCall(
11219         OMPBuilder.getOrCreateRuntimeFunction(
11220             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11221         OffloadingArgs);
11222 
11223     // If device pointer privatization is required, emit the body of the region
11224     // here. It will have to be duplicated: with and without privatization.
11225     if (!Info.CaptureDeviceAddrMap.empty())
11226       CodeGen(CGF);
11227   };
11228 
11229   // Generate code for the closing of the data region.
11230   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11231                                                 PrePostActionTy &) {
11232     assert(Info.isValid() && "Invalid data environment closing arguments.");
11233 
11234     llvm::Value *BasePointersArrayArg = nullptr;
11235     llvm::Value *PointersArrayArg = nullptr;
11236     llvm::Value *SizesArrayArg = nullptr;
11237     llvm::Value *MapTypesArrayArg = nullptr;
11238     llvm::Value *MapNamesArrayArg = nullptr;
11239     llvm::Value *MappersArrayArg = nullptr;
11240     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11241                                  SizesArrayArg, MapTypesArrayArg,
11242                                  MapNamesArrayArg, MappersArrayArg, Info,
11243                                  {/*ForEndCall=*/true});
11244 
11245     // Emit device ID if any.
11246     llvm::Value *DeviceID = nullptr;
11247     if (Device) {
11248       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11249                                            CGF.Int64Ty, /*isSigned=*/true);
11250     } else {
11251       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11252     }
11253 
11254     // Emit the number of elements in the offloading arrays.
11255     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11256 
11257     // Source location for the ident struct
11258     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11259 
11260     llvm::Value *OffloadingArgs[] = {RTLoc,
11261                                      DeviceID,
11262                                      PointerNum,
11263                                      BasePointersArrayArg,
11264                                      PointersArrayArg,
11265                                      SizesArrayArg,
11266                                      MapTypesArrayArg,
11267                                      MapNamesArrayArg,
11268                                      MappersArrayArg};
11269     CGF.EmitRuntimeCall(
11270         OMPBuilder.getOrCreateRuntimeFunction(
11271             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11272         OffloadingArgs);
11273   };
11274 
11275   // If we need device pointer privatization, we need to emit the body of the
11276   // region with no privatization in the 'else' branch of the conditional.
11277   // Otherwise, we don't have to do anything.
11278   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11279                                                          PrePostActionTy &) {
11280     if (!Info.CaptureDeviceAddrMap.empty()) {
11281       CodeGen.setAction(NoPrivAction);
11282       CodeGen(CGF);
11283     }
11284   };
11285 
11286   // We don't have to do anything to close the region if the if clause evaluates
11287   // to false.
11288   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11289 
11290   if (IfCond) {
11291     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11292   } else {
11293     RegionCodeGenTy RCG(BeginThenGen);
11294     RCG(CGF);
11295   }
11296 
11297   // If we don't require privatization of device pointers, we emit the body in
11298   // between the runtime calls. This avoids duplicating the body code.
11299   if (Info.CaptureDeviceAddrMap.empty()) {
11300     CodeGen.setAction(NoPrivAction);
11301     CodeGen(CGF);
11302   }
11303 
11304   if (IfCond) {
11305     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11306   } else {
11307     RegionCodeGenTy RCG(EndThenGen);
11308     RCG(CGF);
11309   }
11310 }
11311 
11312 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11313     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11314     const Expr *Device) {
11315   if (!CGF.HaveInsertPoint())
11316     return;
11317 
11318   assert((isa<OMPTargetEnterDataDirective>(D) ||
11319           isa<OMPTargetExitDataDirective>(D) ||
11320           isa<OMPTargetUpdateDirective>(D)) &&
11321          "Expecting either target enter, exit data, or update directives.");
11322 
11323   CodeGenFunction::OMPTargetDataInfo InputInfo;
11324   llvm::Value *MapTypesArray = nullptr;
11325   llvm::Value *MapNamesArray = nullptr;
11326   // Generate the code for the opening of the data environment.
11327   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11328                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11329     // Emit device ID if any.
11330     llvm::Value *DeviceID = nullptr;
11331     if (Device) {
11332       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11333                                            CGF.Int64Ty, /*isSigned=*/true);
11334     } else {
11335       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11336     }
11337 
11338     // Emit the number of elements in the offloading arrays.
11339     llvm::Constant *PointerNum =
11340         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11341 
11342     // Source location for the ident struct
11343     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11344 
11345     llvm::Value *OffloadingArgs[] = {RTLoc,
11346                                      DeviceID,
11347                                      PointerNum,
11348                                      InputInfo.BasePointersArray.getPointer(),
11349                                      InputInfo.PointersArray.getPointer(),
11350                                      InputInfo.SizesArray.getPointer(),
11351                                      MapTypesArray,
11352                                      MapNamesArray,
11353                                      InputInfo.MappersArray.getPointer()};
11354 
11355     // Select the right runtime function call for each standalone
11356     // directive.
11357     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11358     RuntimeFunction RTLFn;
11359     switch (D.getDirectiveKind()) {
11360     case OMPD_target_enter_data:
11361       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11362                         : OMPRTL___tgt_target_data_begin_mapper;
11363       break;
11364     case OMPD_target_exit_data:
11365       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11366                         : OMPRTL___tgt_target_data_end_mapper;
11367       break;
11368     case OMPD_target_update:
11369       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11370                         : OMPRTL___tgt_target_data_update_mapper;
11371       break;
11372     case OMPD_parallel:
11373     case OMPD_for:
11374     case OMPD_parallel_for:
11375     case OMPD_parallel_master:
11376     case OMPD_parallel_sections:
11377     case OMPD_for_simd:
11378     case OMPD_parallel_for_simd:
11379     case OMPD_cancel:
11380     case OMPD_cancellation_point:
11381     case OMPD_ordered:
11382     case OMPD_threadprivate:
11383     case OMPD_allocate:
11384     case OMPD_task:
11385     case OMPD_simd:
11386     case OMPD_tile:
11387     case OMPD_unroll:
11388     case OMPD_sections:
11389     case OMPD_section:
11390     case OMPD_single:
11391     case OMPD_master:
11392     case OMPD_critical:
11393     case OMPD_taskyield:
11394     case OMPD_barrier:
11395     case OMPD_taskwait:
11396     case OMPD_taskgroup:
11397     case OMPD_atomic:
11398     case OMPD_flush:
11399     case OMPD_depobj:
11400     case OMPD_scan:
11401     case OMPD_teams:
11402     case OMPD_target_data:
11403     case OMPD_distribute:
11404     case OMPD_distribute_simd:
11405     case OMPD_distribute_parallel_for:
11406     case OMPD_distribute_parallel_for_simd:
11407     case OMPD_teams_distribute:
11408     case OMPD_teams_distribute_simd:
11409     case OMPD_teams_distribute_parallel_for:
11410     case OMPD_teams_distribute_parallel_for_simd:
11411     case OMPD_declare_simd:
11412     case OMPD_declare_variant:
11413     case OMPD_begin_declare_variant:
11414     case OMPD_end_declare_variant:
11415     case OMPD_declare_target:
11416     case OMPD_end_declare_target:
11417     case OMPD_declare_reduction:
11418     case OMPD_declare_mapper:
11419     case OMPD_taskloop:
11420     case OMPD_taskloop_simd:
11421     case OMPD_master_taskloop:
11422     case OMPD_master_taskloop_simd:
11423     case OMPD_parallel_master_taskloop:
11424     case OMPD_parallel_master_taskloop_simd:
11425     case OMPD_target:
11426     case OMPD_target_simd:
11427     case OMPD_target_teams_distribute:
11428     case OMPD_target_teams_distribute_simd:
11429     case OMPD_target_teams_distribute_parallel_for:
11430     case OMPD_target_teams_distribute_parallel_for_simd:
11431     case OMPD_target_teams:
11432     case OMPD_target_parallel:
11433     case OMPD_target_parallel_for:
11434     case OMPD_target_parallel_for_simd:
11435     case OMPD_requires:
11436     case OMPD_metadirective:
11437     case OMPD_unknown:
11438     default:
11439       llvm_unreachable("Unexpected standalone target data directive.");
11440       break;
11441     }
11442     CGF.EmitRuntimeCall(
11443         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11444         OffloadingArgs);
11445   };
11446 
11447   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11448                           &MapNamesArray](CodeGenFunction &CGF,
11449                                           PrePostActionTy &) {
11450     // Fill up the arrays with all the mapped variables.
11451     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11452 
11453     // Get map clause information.
11454     MappableExprsHandler MEHandler(D, CGF);
11455     MEHandler.generateAllInfo(CombinedInfo);
11456 
11457     TargetDataInfo Info;
11458     // Fill up the arrays and create the arguments.
11459     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11460                          /*IsNonContiguous=*/true);
11461     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11462                              D.hasClausesOfKind<OMPNowaitClause>();
11463     emitOffloadingArraysArgument(
11464         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11465         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11466         {/*ForEndTask=*/false});
11467     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11468     InputInfo.BasePointersArray =
11469         Address(Info.BasePointersArray, CGM.getPointerAlign());
11470     InputInfo.PointersArray =
11471         Address(Info.PointersArray, CGM.getPointerAlign());
11472     InputInfo.SizesArray =
11473         Address(Info.SizesArray, CGM.getPointerAlign());
11474     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11475     MapTypesArray = Info.MapTypesArray;
11476     MapNamesArray = Info.MapNamesArray;
11477     if (RequiresOuterTask)
11478       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11479     else
11480       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11481   };
11482 
11483   if (IfCond) {
11484     emitIfClause(CGF, IfCond, TargetThenGen,
11485                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11486   } else {
11487     RegionCodeGenTy ThenRCG(TargetThenGen);
11488     ThenRCG(CGF);
11489   }
11490 }
11491 
11492 namespace {
11493   /// Kind of parameter in a function with 'declare simd' directive.
11494   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11495   /// Attribute set of the parameter.
11496   struct ParamAttrTy {
11497     ParamKindTy Kind = Vector;
11498     llvm::APSInt StrideOrArg;
11499     llvm::APSInt Alignment;
11500   };
11501 } // namespace
11502 
11503 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11504                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11505   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11506   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11507   // of that clause. The VLEN value must be power of 2.
11508   // In other case the notion of the function`s "characteristic data type" (CDT)
11509   // is used to compute the vector length.
11510   // CDT is defined in the following order:
11511   //   a) For non-void function, the CDT is the return type.
11512   //   b) If the function has any non-uniform, non-linear parameters, then the
11513   //   CDT is the type of the first such parameter.
11514   //   c) If the CDT determined by a) or b) above is struct, union, or class
11515   //   type which is pass-by-value (except for the type that maps to the
11516   //   built-in complex data type), the characteristic data type is int.
11517   //   d) If none of the above three cases is applicable, the CDT is int.
11518   // The VLEN is then determined based on the CDT and the size of vector
11519   // register of that ISA for which current vector version is generated. The
11520   // VLEN is computed using the formula below:
11521   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11522   // where vector register size specified in section 3.2.1 Registers and the
11523   // Stack Frame of original AMD64 ABI document.
11524   QualType RetType = FD->getReturnType();
11525   if (RetType.isNull())
11526     return 0;
11527   ASTContext &C = FD->getASTContext();
11528   QualType CDT;
11529   if (!RetType.isNull() && !RetType->isVoidType()) {
11530     CDT = RetType;
11531   } else {
11532     unsigned Offset = 0;
11533     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11534       if (ParamAttrs[Offset].Kind == Vector)
11535         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11536       ++Offset;
11537     }
11538     if (CDT.isNull()) {
11539       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11540         if (ParamAttrs[I + Offset].Kind == Vector) {
11541           CDT = FD->getParamDecl(I)->getType();
11542           break;
11543         }
11544       }
11545     }
11546   }
11547   if (CDT.isNull())
11548     CDT = C.IntTy;
11549   CDT = CDT->getCanonicalTypeUnqualified();
11550   if (CDT->isRecordType() || CDT->isUnionType())
11551     CDT = C.IntTy;
11552   return C.getTypeSize(CDT);
11553 }
11554 
11555 static void
11556 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11557                            const llvm::APSInt &VLENVal,
11558                            ArrayRef<ParamAttrTy> ParamAttrs,
11559                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11560   struct ISADataTy {
11561     char ISA;
11562     unsigned VecRegSize;
11563   };
11564   ISADataTy ISAData[] = {
11565       {
11566           'b', 128
11567       }, // SSE
11568       {
11569           'c', 256
11570       }, // AVX
11571       {
11572           'd', 256
11573       }, // AVX2
11574       {
11575           'e', 512
11576       }, // AVX512
11577   };
11578   llvm::SmallVector<char, 2> Masked;
11579   switch (State) {
11580   case OMPDeclareSimdDeclAttr::BS_Undefined:
11581     Masked.push_back('N');
11582     Masked.push_back('M');
11583     break;
11584   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11585     Masked.push_back('N');
11586     break;
11587   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11588     Masked.push_back('M');
11589     break;
11590   }
11591   for (char Mask : Masked) {
11592     for (const ISADataTy &Data : ISAData) {
11593       SmallString<256> Buffer;
11594       llvm::raw_svector_ostream Out(Buffer);
11595       Out << "_ZGV" << Data.ISA << Mask;
11596       if (!VLENVal) {
11597         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11598         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11599         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11600       } else {
11601         Out << VLENVal;
11602       }
11603       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11604         switch (ParamAttr.Kind){
11605         case LinearWithVarStride:
11606           Out << 's' << ParamAttr.StrideOrArg;
11607           break;
11608         case Linear:
11609           Out << 'l';
11610           if (ParamAttr.StrideOrArg != 1)
11611             Out << ParamAttr.StrideOrArg;
11612           break;
11613         case Uniform:
11614           Out << 'u';
11615           break;
11616         case Vector:
11617           Out << 'v';
11618           break;
11619         }
11620         if (!!ParamAttr.Alignment)
11621           Out << 'a' << ParamAttr.Alignment;
11622       }
11623       Out << '_' << Fn->getName();
11624       Fn->addFnAttr(Out.str());
11625     }
11626   }
11627 }
11628 
11629 // This are the Functions that are needed to mangle the name of the
11630 // vector functions generated by the compiler, according to the rules
11631 // defined in the "Vector Function ABI specifications for AArch64",
11632 // available at
11633 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11634 
11635 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11636 ///
11637 /// TODO: Need to implement the behavior for reference marked with a
11638 /// var or no linear modifiers (1.b in the section). For this, we
11639 /// need to extend ParamKindTy to support the linear modifiers.
11640 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11641   QT = QT.getCanonicalType();
11642 
11643   if (QT->isVoidType())
11644     return false;
11645 
11646   if (Kind == ParamKindTy::Uniform)
11647     return false;
11648 
11649   if (Kind == ParamKindTy::Linear)
11650     return false;
11651 
11652   // TODO: Handle linear references with modifiers
11653 
11654   if (Kind == ParamKindTy::LinearWithVarStride)
11655     return false;
11656 
11657   return true;
11658 }
11659 
11660 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11661 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11662   QT = QT.getCanonicalType();
11663   unsigned Size = C.getTypeSize(QT);
11664 
11665   // Only scalars and complex within 16 bytes wide set PVB to true.
11666   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11667     return false;
11668 
11669   if (QT->isFloatingType())
11670     return true;
11671 
11672   if (QT->isIntegerType())
11673     return true;
11674 
11675   if (QT->isPointerType())
11676     return true;
11677 
11678   // TODO: Add support for complex types (section 3.1.2, item 2).
11679 
11680   return false;
11681 }
11682 
11683 /// Computes the lane size (LS) of a return type or of an input parameter,
11684 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11685 /// TODO: Add support for references, section 3.2.1, item 1.
11686 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11687   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11688     QualType PTy = QT.getCanonicalType()->getPointeeType();
11689     if (getAArch64PBV(PTy, C))
11690       return C.getTypeSize(PTy);
11691   }
11692   if (getAArch64PBV(QT, C))
11693     return C.getTypeSize(QT);
11694 
11695   return C.getTypeSize(C.getUIntPtrType());
11696 }
11697 
11698 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11699 // signature of the scalar function, as defined in 3.2.2 of the
11700 // AAVFABI.
11701 static std::tuple<unsigned, unsigned, bool>
11702 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11703   QualType RetType = FD->getReturnType().getCanonicalType();
11704 
11705   ASTContext &C = FD->getASTContext();
11706 
11707   bool OutputBecomesInput = false;
11708 
11709   llvm::SmallVector<unsigned, 8> Sizes;
11710   if (!RetType->isVoidType()) {
11711     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11712     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11713       OutputBecomesInput = true;
11714   }
11715   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11716     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11717     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11718   }
11719 
11720   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11721   // The LS of a function parameter / return value can only be a power
11722   // of 2, starting from 8 bits, up to 128.
11723   assert(llvm::all_of(Sizes,
11724                       [](unsigned Size) {
11725                         return Size == 8 || Size == 16 || Size == 32 ||
11726                                Size == 64 || Size == 128;
11727                       }) &&
11728          "Invalid size");
11729 
11730   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11731                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11732                          OutputBecomesInput);
11733 }
11734 
11735 /// Mangle the parameter part of the vector function name according to
11736 /// their OpenMP classification. The mangling function is defined in
11737 /// section 3.5 of the AAVFABI.
11738 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11739   SmallString<256> Buffer;
11740   llvm::raw_svector_ostream Out(Buffer);
11741   for (const auto &ParamAttr : ParamAttrs) {
11742     switch (ParamAttr.Kind) {
11743     case LinearWithVarStride:
11744       Out << "ls" << ParamAttr.StrideOrArg;
11745       break;
11746     case Linear:
11747       Out << 'l';
11748       // Don't print the step value if it is not present or if it is
11749       // equal to 1.
11750       if (ParamAttr.StrideOrArg != 1)
11751         Out << ParamAttr.StrideOrArg;
11752       break;
11753     case Uniform:
11754       Out << 'u';
11755       break;
11756     case Vector:
11757       Out << 'v';
11758       break;
11759     }
11760 
11761     if (!!ParamAttr.Alignment)
11762       Out << 'a' << ParamAttr.Alignment;
11763   }
11764 
11765   return std::string(Out.str());
11766 }
11767 
11768 // Function used to add the attribute. The parameter `VLEN` is
11769 // templated to allow the use of "x" when targeting scalable functions
11770 // for SVE.
11771 template <typename T>
11772 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11773                                  char ISA, StringRef ParSeq,
11774                                  StringRef MangledName, bool OutputBecomesInput,
11775                                  llvm::Function *Fn) {
11776   SmallString<256> Buffer;
11777   llvm::raw_svector_ostream Out(Buffer);
11778   Out << Prefix << ISA << LMask << VLEN;
11779   if (OutputBecomesInput)
11780     Out << "v";
11781   Out << ParSeq << "_" << MangledName;
11782   Fn->addFnAttr(Out.str());
11783 }
11784 
11785 // Helper function to generate the Advanced SIMD names depending on
11786 // the value of the NDS when simdlen is not present.
11787 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11788                                       StringRef Prefix, char ISA,
11789                                       StringRef ParSeq, StringRef MangledName,
11790                                       bool OutputBecomesInput,
11791                                       llvm::Function *Fn) {
11792   switch (NDS) {
11793   case 8:
11794     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11795                          OutputBecomesInput, Fn);
11796     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11797                          OutputBecomesInput, Fn);
11798     break;
11799   case 16:
11800     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11801                          OutputBecomesInput, Fn);
11802     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11803                          OutputBecomesInput, Fn);
11804     break;
11805   case 32:
11806     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11807                          OutputBecomesInput, Fn);
11808     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11809                          OutputBecomesInput, Fn);
11810     break;
11811   case 64:
11812   case 128:
11813     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11814                          OutputBecomesInput, Fn);
11815     break;
11816   default:
11817     llvm_unreachable("Scalar type is too wide.");
11818   }
11819 }
11820 
11821 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11822 static void emitAArch64DeclareSimdFunction(
11823     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11824     ArrayRef<ParamAttrTy> ParamAttrs,
11825     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11826     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11827 
11828   // Get basic data for building the vector signature.
11829   const auto Data = getNDSWDS(FD, ParamAttrs);
11830   const unsigned NDS = std::get<0>(Data);
11831   const unsigned WDS = std::get<1>(Data);
11832   const bool OutputBecomesInput = std::get<2>(Data);
11833 
11834   // Check the values provided via `simdlen` by the user.
11835   // 1. A `simdlen(1)` doesn't produce vector signatures,
11836   if (UserVLEN == 1) {
11837     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11838         DiagnosticsEngine::Warning,
11839         "The clause simdlen(1) has no effect when targeting aarch64.");
11840     CGM.getDiags().Report(SLoc, DiagID);
11841     return;
11842   }
11843 
11844   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11845   // Advanced SIMD output.
11846   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11847     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11848         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11849                                     "power of 2 when targeting Advanced SIMD.");
11850     CGM.getDiags().Report(SLoc, DiagID);
11851     return;
11852   }
11853 
11854   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11855   // limits.
11856   if (ISA == 's' && UserVLEN != 0) {
11857     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11858       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11859           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11860                                       "lanes in the architectural constraints "
11861                                       "for SVE (min is 128-bit, max is "
11862                                       "2048-bit, by steps of 128-bit)");
11863       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11864       return;
11865     }
11866   }
11867 
11868   // Sort out parameter sequence.
11869   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11870   StringRef Prefix = "_ZGV";
11871   // Generate simdlen from user input (if any).
11872   if (UserVLEN) {
11873     if (ISA == 's') {
11874       // SVE generates only a masked function.
11875       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11876                            OutputBecomesInput, Fn);
11877     } else {
11878       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11879       // Advanced SIMD generates one or two functions, depending on
11880       // the `[not]inbranch` clause.
11881       switch (State) {
11882       case OMPDeclareSimdDeclAttr::BS_Undefined:
11883         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11884                              OutputBecomesInput, Fn);
11885         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11886                              OutputBecomesInput, Fn);
11887         break;
11888       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11889         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11890                              OutputBecomesInput, Fn);
11891         break;
11892       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11893         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11894                              OutputBecomesInput, Fn);
11895         break;
11896       }
11897     }
11898   } else {
11899     // If no user simdlen is provided, follow the AAVFABI rules for
11900     // generating the vector length.
11901     if (ISA == 's') {
11902       // SVE, section 3.4.1, item 1.
11903       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11904                            OutputBecomesInput, Fn);
11905     } else {
11906       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11907       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11908       // two vector names depending on the use of the clause
11909       // `[not]inbranch`.
11910       switch (State) {
11911       case OMPDeclareSimdDeclAttr::BS_Undefined:
11912         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11913                                   OutputBecomesInput, Fn);
11914         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11915                                   OutputBecomesInput, Fn);
11916         break;
11917       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11918         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11919                                   OutputBecomesInput, Fn);
11920         break;
11921       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11922         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11923                                   OutputBecomesInput, Fn);
11924         break;
11925       }
11926     }
11927   }
11928 }
11929 
11930 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11931                                               llvm::Function *Fn) {
11932   ASTContext &C = CGM.getContext();
11933   FD = FD->getMostRecentDecl();
11934   // Map params to their positions in function decl.
11935   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11936   if (isa<CXXMethodDecl>(FD))
11937     ParamPositions.try_emplace(FD, 0);
11938   unsigned ParamPos = ParamPositions.size();
11939   for (const ParmVarDecl *P : FD->parameters()) {
11940     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11941     ++ParamPos;
11942   }
11943   while (FD) {
11944     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11945       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11946       // Mark uniform parameters.
11947       for (const Expr *E : Attr->uniforms()) {
11948         E = E->IgnoreParenImpCasts();
11949         unsigned Pos;
11950         if (isa<CXXThisExpr>(E)) {
11951           Pos = ParamPositions[FD];
11952         } else {
11953           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11954                                 ->getCanonicalDecl();
11955           Pos = ParamPositions[PVD];
11956         }
11957         ParamAttrs[Pos].Kind = Uniform;
11958       }
11959       // Get alignment info.
11960       auto NI = Attr->alignments_begin();
11961       for (const Expr *E : Attr->aligneds()) {
11962         E = E->IgnoreParenImpCasts();
11963         unsigned Pos;
11964         QualType ParmTy;
11965         if (isa<CXXThisExpr>(E)) {
11966           Pos = ParamPositions[FD];
11967           ParmTy = E->getType();
11968         } else {
11969           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11970                                 ->getCanonicalDecl();
11971           Pos = ParamPositions[PVD];
11972           ParmTy = PVD->getType();
11973         }
11974         ParamAttrs[Pos].Alignment =
11975             (*NI)
11976                 ? (*NI)->EvaluateKnownConstInt(C)
11977                 : llvm::APSInt::getUnsigned(
11978                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11979                           .getQuantity());
11980         ++NI;
11981       }
11982       // Mark linear parameters.
11983       auto SI = Attr->steps_begin();
11984       auto MI = Attr->modifiers_begin();
11985       for (const Expr *E : Attr->linears()) {
11986         E = E->IgnoreParenImpCasts();
11987         unsigned Pos;
11988         // Rescaling factor needed to compute the linear parameter
11989         // value in the mangled name.
11990         unsigned PtrRescalingFactor = 1;
11991         if (isa<CXXThisExpr>(E)) {
11992           Pos = ParamPositions[FD];
11993         } else {
11994           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11995                                 ->getCanonicalDecl();
11996           Pos = ParamPositions[PVD];
11997           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11998             PtrRescalingFactor = CGM.getContext()
11999                                      .getTypeSizeInChars(P->getPointeeType())
12000                                      .getQuantity();
12001         }
12002         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12003         ParamAttr.Kind = Linear;
12004         // Assuming a stride of 1, for `linear` without modifiers.
12005         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12006         if (*SI) {
12007           Expr::EvalResult Result;
12008           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12009             if (const auto *DRE =
12010                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12011               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12012                 ParamAttr.Kind = LinearWithVarStride;
12013                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12014                     ParamPositions[StridePVD->getCanonicalDecl()]);
12015               }
12016             }
12017           } else {
12018             ParamAttr.StrideOrArg = Result.Val.getInt();
12019           }
12020         }
12021         // If we are using a linear clause on a pointer, we need to
12022         // rescale the value of linear_step with the byte size of the
12023         // pointee type.
12024         if (Linear == ParamAttr.Kind)
12025           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12026         ++SI;
12027         ++MI;
12028       }
12029       llvm::APSInt VLENVal;
12030       SourceLocation ExprLoc;
12031       const Expr *VLENExpr = Attr->getSimdlen();
12032       if (VLENExpr) {
12033         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12034         ExprLoc = VLENExpr->getExprLoc();
12035       }
12036       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12037       if (CGM.getTriple().isX86()) {
12038         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12039       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12040         unsigned VLEN = VLENVal.getExtValue();
12041         StringRef MangledName = Fn->getName();
12042         if (CGM.getTarget().hasFeature("sve"))
12043           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12044                                          MangledName, 's', 128, Fn, ExprLoc);
12045         if (CGM.getTarget().hasFeature("neon"))
12046           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12047                                          MangledName, 'n', 128, Fn, ExprLoc);
12048       }
12049     }
12050     FD = FD->getPreviousDecl();
12051   }
12052 }
12053 
12054 namespace {
12055 /// Cleanup action for doacross support.
12056 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12057 public:
12058   static const int DoacrossFinArgs = 2;
12059 
12060 private:
12061   llvm::FunctionCallee RTLFn;
12062   llvm::Value *Args[DoacrossFinArgs];
12063 
12064 public:
12065   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12066                     ArrayRef<llvm::Value *> CallArgs)
12067       : RTLFn(RTLFn) {
12068     assert(CallArgs.size() == DoacrossFinArgs);
12069     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12070   }
12071   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12072     if (!CGF.HaveInsertPoint())
12073       return;
12074     CGF.EmitRuntimeCall(RTLFn, Args);
12075   }
12076 };
12077 } // namespace
12078 
12079 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12080                                        const OMPLoopDirective &D,
12081                                        ArrayRef<Expr *> NumIterations) {
12082   if (!CGF.HaveInsertPoint())
12083     return;
12084 
12085   ASTContext &C = CGM.getContext();
12086   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12087   RecordDecl *RD;
12088   if (KmpDimTy.isNull()) {
12089     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
12090     //  kmp_int64 lo; // lower
12091     //  kmp_int64 up; // upper
12092     //  kmp_int64 st; // stride
12093     // };
12094     RD = C.buildImplicitRecord("kmp_dim");
12095     RD->startDefinition();
12096     addFieldToRecordDecl(C, RD, Int64Ty);
12097     addFieldToRecordDecl(C, RD, Int64Ty);
12098     addFieldToRecordDecl(C, RD, Int64Ty);
12099     RD->completeDefinition();
12100     KmpDimTy = C.getRecordType(RD);
12101   } else {
12102     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12103   }
12104   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12105   QualType ArrayTy =
12106       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12107 
12108   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12109   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12110   enum { LowerFD = 0, UpperFD, StrideFD };
12111   // Fill dims with data.
12112   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12113     LValue DimsLVal = CGF.MakeAddrLValue(
12114         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12115     // dims.upper = num_iterations;
12116     LValue UpperLVal = CGF.EmitLValueForField(
12117         DimsLVal, *std::next(RD->field_begin(), UpperFD));
12118     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12119         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12120         Int64Ty, NumIterations[I]->getExprLoc());
12121     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12122     // dims.stride = 1;
12123     LValue StrideLVal = CGF.EmitLValueForField(
12124         DimsLVal, *std::next(RD->field_begin(), StrideFD));
12125     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12126                           StrideLVal);
12127   }
12128 
12129   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12130   // kmp_int32 num_dims, struct kmp_dim * dims);
12131   llvm::Value *Args[] = {
12132       emitUpdateLocation(CGF, D.getBeginLoc()),
12133       getThreadID(CGF, D.getBeginLoc()),
12134       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12135       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12136           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12137           CGM.VoidPtrTy)};
12138 
12139   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12140       CGM.getModule(), OMPRTL___kmpc_doacross_init);
12141   CGF.EmitRuntimeCall(RTLFn, Args);
12142   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12143       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12144   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12145       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12146   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12147                                              llvm::makeArrayRef(FiniArgs));
12148 }
12149 
12150 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12151                                           const OMPDependClause *C) {
12152   QualType Int64Ty =
12153       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12154   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12155   QualType ArrayTy = CGM.getContext().getConstantArrayType(
12156       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12157   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12158   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12159     const Expr *CounterVal = C->getLoopData(I);
12160     assert(CounterVal);
12161     llvm::Value *CntVal = CGF.EmitScalarConversion(
12162         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12163         CounterVal->getExprLoc());
12164     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12165                           /*Volatile=*/false, Int64Ty);
12166   }
12167   llvm::Value *Args[] = {
12168       emitUpdateLocation(CGF, C->getBeginLoc()),
12169       getThreadID(CGF, C->getBeginLoc()),
12170       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12171   llvm::FunctionCallee RTLFn;
12172   if (C->getDependencyKind() == OMPC_DEPEND_source) {
12173     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12174                                                   OMPRTL___kmpc_doacross_post);
12175   } else {
12176     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
12177     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12178                                                   OMPRTL___kmpc_doacross_wait);
12179   }
12180   CGF.EmitRuntimeCall(RTLFn, Args);
12181 }
12182 
12183 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12184                                llvm::FunctionCallee Callee,
12185                                ArrayRef<llvm::Value *> Args) const {
12186   assert(Loc.isValid() && "Outlined function call location must be valid.");
12187   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12188 
12189   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12190     if (Fn->doesNotThrow()) {
12191       CGF.EmitNounwindRuntimeCall(Fn, Args);
12192       return;
12193     }
12194   }
12195   CGF.EmitRuntimeCall(Callee, Args);
12196 }
12197 
12198 void CGOpenMPRuntime::emitOutlinedFunctionCall(
12199     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12200     ArrayRef<llvm::Value *> Args) const {
12201   emitCall(CGF, Loc, OutlinedFn, Args);
12202 }
12203 
12204 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12205   if (const auto *FD = dyn_cast<FunctionDecl>(D))
12206     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12207       HasEmittedDeclareTargetRegion = true;
12208 }
12209 
12210 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12211                                              const VarDecl *NativeParam,
12212                                              const VarDecl *TargetParam) const {
12213   return CGF.GetAddrOfLocalVar(NativeParam);
12214 }
12215 
12216 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12217                                                    const VarDecl *VD) {
12218   if (!VD)
12219     return Address::invalid();
12220   Address UntiedAddr = Address::invalid();
12221   Address UntiedRealAddr = Address::invalid();
12222   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12223   if (It != FunctionToUntiedTaskStackMap.end()) {
12224     const UntiedLocalVarsAddressesMap &UntiedData =
12225         UntiedLocalVarsStack[It->second];
12226     auto I = UntiedData.find(VD);
12227     if (I != UntiedData.end()) {
12228       UntiedAddr = I->second.first;
12229       UntiedRealAddr = I->second.second;
12230     }
12231   }
12232   const VarDecl *CVD = VD->getCanonicalDecl();
12233   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12234     // Use the default allocation.
12235     if (!isAllocatableDecl(VD))
12236       return UntiedAddr;
12237     llvm::Value *Size;
12238     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12239     if (CVD->getType()->isVariablyModifiedType()) {
12240       Size = CGF.getTypeSize(CVD->getType());
12241       // Align the size: ((size + align - 1) / align) * align
12242       Size = CGF.Builder.CreateNUWAdd(
12243           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12244       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12245       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12246     } else {
12247       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12248       Size = CGM.getSize(Sz.alignTo(Align));
12249     }
12250     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12251     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12252     assert(AA->getAllocator() &&
12253            "Expected allocator expression for non-default allocator.");
12254     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12255     // According to the standard, the original allocator type is a enum
12256     // (integer). Convert to pointer type, if required.
12257     Allocator = CGF.EmitScalarConversion(
12258         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12259         AA->getAllocator()->getExprLoc());
12260     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12261 
12262     llvm::Value *Addr =
12263         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12264                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12265                             Args, getName({CVD->getName(), ".void.addr"}));
12266     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12267         CGM.getModule(), OMPRTL___kmpc_free);
12268     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12269     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12270         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12271     if (UntiedAddr.isValid())
12272       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12273 
12274     // Cleanup action for allocate support.
12275     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12276       llvm::FunctionCallee RTLFn;
12277       SourceLocation::UIntTy LocEncoding;
12278       Address Addr;
12279       const Expr *Allocator;
12280 
12281     public:
12282       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12283                            SourceLocation::UIntTy LocEncoding, Address Addr,
12284                            const Expr *Allocator)
12285           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12286             Allocator(Allocator) {}
12287       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12288         if (!CGF.HaveInsertPoint())
12289           return;
12290         llvm::Value *Args[3];
12291         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12292             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12293         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12294             Addr.getPointer(), CGF.VoidPtrTy);
12295         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12296         // According to the standard, the original allocator type is a enum
12297         // (integer). Convert to pointer type, if required.
12298         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12299                                             CGF.getContext().VoidPtrTy,
12300                                             Allocator->getExprLoc());
12301         Args[2] = AllocVal;
12302 
12303         CGF.EmitRuntimeCall(RTLFn, Args);
12304       }
12305     };
12306     Address VDAddr =
12307         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12308     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12309         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12310         VDAddr, AA->getAllocator());
12311     if (UntiedRealAddr.isValid())
12312       if (auto *Region =
12313               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12314         Region->emitUntiedSwitch(CGF);
12315     return VDAddr;
12316   }
12317   return UntiedAddr;
12318 }
12319 
12320 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12321                                              const VarDecl *VD) const {
12322   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12323   if (It == FunctionToUntiedTaskStackMap.end())
12324     return false;
12325   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12326 }
12327 
12328 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12329     CodeGenModule &CGM, const OMPLoopDirective &S)
12330     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12331   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12332   if (!NeedToPush)
12333     return;
12334   NontemporalDeclsSet &DS =
12335       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12336   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12337     for (const Stmt *Ref : C->private_refs()) {
12338       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12339       const ValueDecl *VD;
12340       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12341         VD = DRE->getDecl();
12342       } else {
12343         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12344         assert((ME->isImplicitCXXThis() ||
12345                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12346                "Expected member of current class.");
12347         VD = ME->getMemberDecl();
12348       }
12349       DS.insert(VD);
12350     }
12351   }
12352 }
12353 
12354 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12355   if (!NeedToPush)
12356     return;
12357   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12358 }
12359 
12360 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12361     CodeGenFunction &CGF,
12362     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12363                           std::pair<Address, Address>> &LocalVars)
12364     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12365   if (!NeedToPush)
12366     return;
12367   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12368       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12369   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12370 }
12371 
12372 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12373   if (!NeedToPush)
12374     return;
12375   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12376 }
12377 
12378 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12379   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12380 
12381   return llvm::any_of(
12382       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12383       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12384 }
12385 
12386 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12387     const OMPExecutableDirective &S,
12388     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12389     const {
12390   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12391   // Vars in target/task regions must be excluded completely.
12392   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12393       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12394     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12395     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12396     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12397     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12398       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12399         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12400     }
12401   }
12402   // Exclude vars in private clauses.
12403   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12404     for (const Expr *Ref : C->varlists()) {
12405       if (!Ref->getType()->isScalarType())
12406         continue;
12407       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12408       if (!DRE)
12409         continue;
12410       NeedToCheckForLPCs.insert(DRE->getDecl());
12411     }
12412   }
12413   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12414     for (const Expr *Ref : C->varlists()) {
12415       if (!Ref->getType()->isScalarType())
12416         continue;
12417       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12418       if (!DRE)
12419         continue;
12420       NeedToCheckForLPCs.insert(DRE->getDecl());
12421     }
12422   }
12423   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12424     for (const Expr *Ref : C->varlists()) {
12425       if (!Ref->getType()->isScalarType())
12426         continue;
12427       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12428       if (!DRE)
12429         continue;
12430       NeedToCheckForLPCs.insert(DRE->getDecl());
12431     }
12432   }
12433   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12434     for (const Expr *Ref : C->varlists()) {
12435       if (!Ref->getType()->isScalarType())
12436         continue;
12437       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12438       if (!DRE)
12439         continue;
12440       NeedToCheckForLPCs.insert(DRE->getDecl());
12441     }
12442   }
12443   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12444     for (const Expr *Ref : C->varlists()) {
12445       if (!Ref->getType()->isScalarType())
12446         continue;
12447       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12448       if (!DRE)
12449         continue;
12450       NeedToCheckForLPCs.insert(DRE->getDecl());
12451     }
12452   }
12453   for (const Decl *VD : NeedToCheckForLPCs) {
12454     for (const LastprivateConditionalData &Data :
12455          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12456       if (Data.DeclToUniqueName.count(VD) > 0) {
12457         if (!Data.Disabled)
12458           NeedToAddForLPCsAsDisabled.insert(VD);
12459         break;
12460       }
12461     }
12462   }
12463 }
12464 
12465 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12466     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12467     : CGM(CGF.CGM),
12468       Action((CGM.getLangOpts().OpenMP >= 50 &&
12469               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12470                            [](const OMPLastprivateClause *C) {
12471                              return C->getKind() ==
12472                                     OMPC_LASTPRIVATE_conditional;
12473                            }))
12474                  ? ActionToDo::PushAsLastprivateConditional
12475                  : ActionToDo::DoNotPush) {
12476   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12477   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12478     return;
12479   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12480          "Expected a push action.");
12481   LastprivateConditionalData &Data =
12482       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12483   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12484     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12485       continue;
12486 
12487     for (const Expr *Ref : C->varlists()) {
12488       Data.DeclToUniqueName.insert(std::make_pair(
12489           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12490           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12491     }
12492   }
12493   Data.IVLVal = IVLVal;
12494   Data.Fn = CGF.CurFn;
12495 }
12496 
12497 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12498     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12499     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12500   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12501   if (CGM.getLangOpts().OpenMP < 50)
12502     return;
12503   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12504   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12505   if (!NeedToAddForLPCsAsDisabled.empty()) {
12506     Action = ActionToDo::DisableLastprivateConditional;
12507     LastprivateConditionalData &Data =
12508         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12509     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12510       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12511     Data.Fn = CGF.CurFn;
12512     Data.Disabled = true;
12513   }
12514 }
12515 
12516 CGOpenMPRuntime::LastprivateConditionalRAII
12517 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12518     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12519   return LastprivateConditionalRAII(CGF, S);
12520 }
12521 
12522 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12523   if (CGM.getLangOpts().OpenMP < 50)
12524     return;
12525   if (Action == ActionToDo::DisableLastprivateConditional) {
12526     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12527            "Expected list of disabled private vars.");
12528     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12529   }
12530   if (Action == ActionToDo::PushAsLastprivateConditional) {
12531     assert(
12532         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12533         "Expected list of lastprivate conditional vars.");
12534     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12535   }
12536 }
12537 
12538 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12539                                                         const VarDecl *VD) {
12540   ASTContext &C = CGM.getContext();
12541   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12542   if (I == LastprivateConditionalToTypes.end())
12543     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12544   QualType NewType;
12545   const FieldDecl *VDField;
12546   const FieldDecl *FiredField;
12547   LValue BaseLVal;
12548   auto VI = I->getSecond().find(VD);
12549   if (VI == I->getSecond().end()) {
12550     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12551     RD->startDefinition();
12552     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12553     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12554     RD->completeDefinition();
12555     NewType = C.getRecordType(RD);
12556     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12557     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12558     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12559   } else {
12560     NewType = std::get<0>(VI->getSecond());
12561     VDField = std::get<1>(VI->getSecond());
12562     FiredField = std::get<2>(VI->getSecond());
12563     BaseLVal = std::get<3>(VI->getSecond());
12564   }
12565   LValue FiredLVal =
12566       CGF.EmitLValueForField(BaseLVal, FiredField);
12567   CGF.EmitStoreOfScalar(
12568       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12569       FiredLVal);
12570   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12571 }
12572 
12573 namespace {
12574 /// Checks if the lastprivate conditional variable is referenced in LHS.
12575 class LastprivateConditionalRefChecker final
12576     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12577   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12578   const Expr *FoundE = nullptr;
12579   const Decl *FoundD = nullptr;
12580   StringRef UniqueDeclName;
12581   LValue IVLVal;
12582   llvm::Function *FoundFn = nullptr;
12583   SourceLocation Loc;
12584 
12585 public:
12586   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12587     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12588          llvm::reverse(LPM)) {
12589       auto It = D.DeclToUniqueName.find(E->getDecl());
12590       if (It == D.DeclToUniqueName.end())
12591         continue;
12592       if (D.Disabled)
12593         return false;
12594       FoundE = E;
12595       FoundD = E->getDecl()->getCanonicalDecl();
12596       UniqueDeclName = It->second;
12597       IVLVal = D.IVLVal;
12598       FoundFn = D.Fn;
12599       break;
12600     }
12601     return FoundE == E;
12602   }
12603   bool VisitMemberExpr(const MemberExpr *E) {
12604     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12605       return false;
12606     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12607          llvm::reverse(LPM)) {
12608       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12609       if (It == D.DeclToUniqueName.end())
12610         continue;
12611       if (D.Disabled)
12612         return false;
12613       FoundE = E;
12614       FoundD = E->getMemberDecl()->getCanonicalDecl();
12615       UniqueDeclName = It->second;
12616       IVLVal = D.IVLVal;
12617       FoundFn = D.Fn;
12618       break;
12619     }
12620     return FoundE == E;
12621   }
12622   bool VisitStmt(const Stmt *S) {
12623     for (const Stmt *Child : S->children()) {
12624       if (!Child)
12625         continue;
12626       if (const auto *E = dyn_cast<Expr>(Child))
12627         if (!E->isGLValue())
12628           continue;
12629       if (Visit(Child))
12630         return true;
12631     }
12632     return false;
12633   }
12634   explicit LastprivateConditionalRefChecker(
12635       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12636       : LPM(LPM) {}
12637   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12638   getFoundData() const {
12639     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12640   }
12641 };
12642 } // namespace
12643 
12644 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12645                                                        LValue IVLVal,
12646                                                        StringRef UniqueDeclName,
12647                                                        LValue LVal,
12648                                                        SourceLocation Loc) {
12649   // Last updated loop counter for the lastprivate conditional var.
12650   // int<xx> last_iv = 0;
12651   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12652   llvm::Constant *LastIV =
12653       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12654   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12655       IVLVal.getAlignment().getAsAlign());
12656   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12657 
12658   // Last value of the lastprivate conditional.
12659   // decltype(priv_a) last_a;
12660   llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12661       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12662   Last->setAlignment(LVal.getAlignment().getAsAlign());
12663   LValue LastLVal = CGF.MakeAddrLValue(
12664       Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12665 
12666   // Global loop counter. Required to handle inner parallel-for regions.
12667   // iv
12668   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12669 
12670   // #pragma omp critical(a)
12671   // if (last_iv <= iv) {
12672   //   last_iv = iv;
12673   //   last_a = priv_a;
12674   // }
12675   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12676                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12677     Action.Enter(CGF);
12678     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12679     // (last_iv <= iv) ? Check if the variable is updated and store new
12680     // value in global var.
12681     llvm::Value *CmpRes;
12682     if (IVLVal.getType()->isSignedIntegerType()) {
12683       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12684     } else {
12685       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12686              "Loop iteration variable must be integer.");
12687       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12688     }
12689     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12690     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12691     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12692     // {
12693     CGF.EmitBlock(ThenBB);
12694 
12695     //   last_iv = iv;
12696     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12697 
12698     //   last_a = priv_a;
12699     switch (CGF.getEvaluationKind(LVal.getType())) {
12700     case TEK_Scalar: {
12701       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12702       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12703       break;
12704     }
12705     case TEK_Complex: {
12706       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12707       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12708       break;
12709     }
12710     case TEK_Aggregate:
12711       llvm_unreachable(
12712           "Aggregates are not supported in lastprivate conditional.");
12713     }
12714     // }
12715     CGF.EmitBranch(ExitBB);
12716     // There is no need to emit line number for unconditional branch.
12717     (void)ApplyDebugLocation::CreateEmpty(CGF);
12718     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12719   };
12720 
12721   if (CGM.getLangOpts().OpenMPSimd) {
12722     // Do not emit as a critical region as no parallel region could be emitted.
12723     RegionCodeGenTy ThenRCG(CodeGen);
12724     ThenRCG(CGF);
12725   } else {
12726     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12727   }
12728 }
12729 
12730 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12731                                                          const Expr *LHS) {
12732   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12733     return;
12734   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12735   if (!Checker.Visit(LHS))
12736     return;
12737   const Expr *FoundE;
12738   const Decl *FoundD;
12739   StringRef UniqueDeclName;
12740   LValue IVLVal;
12741   llvm::Function *FoundFn;
12742   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12743       Checker.getFoundData();
12744   if (FoundFn != CGF.CurFn) {
12745     // Special codegen for inner parallel regions.
12746     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12747     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12748     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12749            "Lastprivate conditional is not found in outer region.");
12750     QualType StructTy = std::get<0>(It->getSecond());
12751     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12752     LValue PrivLVal = CGF.EmitLValue(FoundE);
12753     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12754         PrivLVal.getAddress(CGF),
12755         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12756     LValue BaseLVal =
12757         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12758     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12759     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12760                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12761                         FiredLVal, llvm::AtomicOrdering::Unordered,
12762                         /*IsVolatile=*/true, /*isInit=*/false);
12763     return;
12764   }
12765 
12766   // Private address of the lastprivate conditional in the current context.
12767   // priv_a
12768   LValue LVal = CGF.EmitLValue(FoundE);
12769   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12770                                    FoundE->getExprLoc());
12771 }
12772 
12773 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12774     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12775     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12776   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12777     return;
12778   auto Range = llvm::reverse(LastprivateConditionalStack);
12779   auto It = llvm::find_if(
12780       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12781   if (It == Range.end() || It->Fn != CGF.CurFn)
12782     return;
12783   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12784   assert(LPCI != LastprivateConditionalToTypes.end() &&
12785          "Lastprivates must be registered already.");
12786   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12787   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12788   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12789   for (const auto &Pair : It->DeclToUniqueName) {
12790     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12791     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12792       continue;
12793     auto I = LPCI->getSecond().find(Pair.first);
12794     assert(I != LPCI->getSecond().end() &&
12795            "Lastprivate must be rehistered already.");
12796     // bool Cmp = priv_a.Fired != 0;
12797     LValue BaseLVal = std::get<3>(I->getSecond());
12798     LValue FiredLVal =
12799         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12800     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12801     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12802     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12803     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12804     // if (Cmp) {
12805     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12806     CGF.EmitBlock(ThenBB);
12807     Address Addr = CGF.GetAddrOfLocalVar(VD);
12808     LValue LVal;
12809     if (VD->getType()->isReferenceType())
12810       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12811                                            AlignmentSource::Decl);
12812     else
12813       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12814                                 AlignmentSource::Decl);
12815     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12816                                      D.getBeginLoc());
12817     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12818     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12819     // }
12820   }
12821 }
12822 
12823 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12824     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12825     SourceLocation Loc) {
12826   if (CGF.getLangOpts().OpenMP < 50)
12827     return;
12828   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12829   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12830          "Unknown lastprivate conditional variable.");
12831   StringRef UniqueName = It->second;
12832   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12833   // The variable was not updated in the region - exit.
12834   if (!GV)
12835     return;
12836   LValue LPLVal = CGF.MakeAddrLValue(
12837       Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12838       PrivLVal.getType().getNonReferenceType());
12839   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12840   CGF.EmitStoreOfScalar(Res, PrivLVal);
12841 }
12842 
12843 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12844     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12845     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12846   llvm_unreachable("Not supported in SIMD-only mode");
12847 }
12848 
12849 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12850     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12851     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12852   llvm_unreachable("Not supported in SIMD-only mode");
12853 }
12854 
12855 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12856     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12857     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12858     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12859     bool Tied, unsigned &NumberOfParts) {
12860   llvm_unreachable("Not supported in SIMD-only mode");
12861 }
12862 
12863 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12864                                            SourceLocation Loc,
12865                                            llvm::Function *OutlinedFn,
12866                                            ArrayRef<llvm::Value *> CapturedVars,
12867                                            const Expr *IfCond,
12868                                            llvm::Value *NumThreads) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12873     CodeGenFunction &CGF, StringRef CriticalName,
12874     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12875     const Expr *Hint) {
12876   llvm_unreachable("Not supported in SIMD-only mode");
12877 }
12878 
12879 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12880                                            const RegionCodeGenTy &MasterOpGen,
12881                                            SourceLocation Loc) {
12882   llvm_unreachable("Not supported in SIMD-only mode");
12883 }
12884 
12885 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12886                                            const RegionCodeGenTy &MasterOpGen,
12887                                            SourceLocation Loc,
12888                                            const Expr *Filter) {
12889   llvm_unreachable("Not supported in SIMD-only mode");
12890 }
12891 
12892 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12893                                             SourceLocation Loc) {
12894   llvm_unreachable("Not supported in SIMD-only mode");
12895 }
12896 
12897 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12898     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12899     SourceLocation Loc) {
12900   llvm_unreachable("Not supported in SIMD-only mode");
12901 }
12902 
12903 void CGOpenMPSIMDRuntime::emitSingleRegion(
12904     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12905     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12906     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12907     ArrayRef<const Expr *> AssignmentOps) {
12908   llvm_unreachable("Not supported in SIMD-only mode");
12909 }
12910 
12911 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12912                                             const RegionCodeGenTy &OrderedOpGen,
12913                                             SourceLocation Loc,
12914                                             bool IsThreads) {
12915   llvm_unreachable("Not supported in SIMD-only mode");
12916 }
12917 
12918 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12919                                           SourceLocation Loc,
12920                                           OpenMPDirectiveKind Kind,
12921                                           bool EmitChecks,
12922                                           bool ForceSimpleCall) {
12923   llvm_unreachable("Not supported in SIMD-only mode");
12924 }
12925 
12926 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12927     CodeGenFunction &CGF, SourceLocation Loc,
12928     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12929     bool Ordered, const DispatchRTInput &DispatchValues) {
12930   llvm_unreachable("Not supported in SIMD-only mode");
12931 }
12932 
12933 void CGOpenMPSIMDRuntime::emitForStaticInit(
12934     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12935     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12936   llvm_unreachable("Not supported in SIMD-only mode");
12937 }
12938 
12939 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12940     CodeGenFunction &CGF, SourceLocation Loc,
12941     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12942   llvm_unreachable("Not supported in SIMD-only mode");
12943 }
12944 
12945 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12946                                                      SourceLocation Loc,
12947                                                      unsigned IVSize,
12948                                                      bool IVSigned) {
12949   llvm_unreachable("Not supported in SIMD-only mode");
12950 }
12951 
12952 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12953                                               SourceLocation Loc,
12954                                               OpenMPDirectiveKind DKind) {
12955   llvm_unreachable("Not supported in SIMD-only mode");
12956 }
12957 
12958 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12959                                               SourceLocation Loc,
12960                                               unsigned IVSize, bool IVSigned,
12961                                               Address IL, Address LB,
12962                                               Address UB, Address ST) {
12963   llvm_unreachable("Not supported in SIMD-only mode");
12964 }
12965 
12966 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12967                                                llvm::Value *NumThreads,
12968                                                SourceLocation Loc) {
12969   llvm_unreachable("Not supported in SIMD-only mode");
12970 }
12971 
12972 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12973                                              ProcBindKind ProcBind,
12974                                              SourceLocation Loc) {
12975   llvm_unreachable("Not supported in SIMD-only mode");
12976 }
12977 
12978 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12979                                                     const VarDecl *VD,
12980                                                     Address VDAddr,
12981                                                     SourceLocation Loc) {
12982   llvm_unreachable("Not supported in SIMD-only mode");
12983 }
12984 
12985 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12986     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12987     CodeGenFunction *CGF) {
12988   llvm_unreachable("Not supported in SIMD-only mode");
12989 }
12990 
12991 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12992     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12993   llvm_unreachable("Not supported in SIMD-only mode");
12994 }
12995 
12996 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12997                                     ArrayRef<const Expr *> Vars,
12998                                     SourceLocation Loc,
12999                                     llvm::AtomicOrdering AO) {
13000   llvm_unreachable("Not supported in SIMD-only mode");
13001 }
13002 
13003 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13004                                        const OMPExecutableDirective &D,
13005                                        llvm::Function *TaskFunction,
13006                                        QualType SharedsTy, Address Shareds,
13007                                        const Expr *IfCond,
13008                                        const OMPTaskDataTy &Data) {
13009   llvm_unreachable("Not supported in SIMD-only mode");
13010 }
13011 
13012 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13013     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13014     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13015     const Expr *IfCond, const OMPTaskDataTy &Data) {
13016   llvm_unreachable("Not supported in SIMD-only mode");
13017 }
13018 
13019 void CGOpenMPSIMDRuntime::emitReduction(
13020     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13021     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13022     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13023   assert(Options.SimpleReduction && "Only simple reduction is expected.");
13024   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13025                                  ReductionOps, Options);
13026 }
13027 
13028 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13029     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13030     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13031   llvm_unreachable("Not supported in SIMD-only mode");
13032 }
13033 
13034 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13035                                                 SourceLocation Loc,
13036                                                 bool IsWorksharingReduction) {
13037   llvm_unreachable("Not supported in SIMD-only mode");
13038 }
13039 
13040 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13041                                                   SourceLocation Loc,
13042                                                   ReductionCodeGen &RCG,
13043                                                   unsigned N) {
13044   llvm_unreachable("Not supported in SIMD-only mode");
13045 }
13046 
13047 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13048                                                   SourceLocation Loc,
13049                                                   llvm::Value *ReductionsPtr,
13050                                                   LValue SharedLVal) {
13051   llvm_unreachable("Not supported in SIMD-only mode");
13052 }
13053 
13054 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13055                                            SourceLocation Loc,
13056                                            const OMPTaskDataTy &Data) {
13057   llvm_unreachable("Not supported in SIMD-only mode");
13058 }
13059 
13060 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13061     CodeGenFunction &CGF, SourceLocation Loc,
13062     OpenMPDirectiveKind CancelRegion) {
13063   llvm_unreachable("Not supported in SIMD-only mode");
13064 }
13065 
13066 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13067                                          SourceLocation Loc, const Expr *IfCond,
13068                                          OpenMPDirectiveKind CancelRegion) {
13069   llvm_unreachable("Not supported in SIMD-only mode");
13070 }
13071 
13072 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13073     const OMPExecutableDirective &D, StringRef ParentName,
13074     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13075     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13076   llvm_unreachable("Not supported in SIMD-only mode");
13077 }
13078 
13079 void CGOpenMPSIMDRuntime::emitTargetCall(
13080     CodeGenFunction &CGF, const OMPExecutableDirective &D,
13081     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13082     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13083     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13084                                      const OMPLoopDirective &D)>
13085         SizeEmitter) {
13086   llvm_unreachable("Not supported in SIMD-only mode");
13087 }
13088 
13089 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13090   llvm_unreachable("Not supported in SIMD-only mode");
13091 }
13092 
13093 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13094   llvm_unreachable("Not supported in SIMD-only mode");
13095 }
13096 
13097 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13098   return false;
13099 }
13100 
13101 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13102                                         const OMPExecutableDirective &D,
13103                                         SourceLocation Loc,
13104                                         llvm::Function *OutlinedFn,
13105                                         ArrayRef<llvm::Value *> CapturedVars) {
13106   llvm_unreachable("Not supported in SIMD-only mode");
13107 }
13108 
13109 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13110                                              const Expr *NumTeams,
13111                                              const Expr *ThreadLimit,
13112                                              SourceLocation Loc) {
13113   llvm_unreachable("Not supported in SIMD-only mode");
13114 }
13115 
13116 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13117     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13118     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13119   llvm_unreachable("Not supported in SIMD-only mode");
13120 }
13121 
13122 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13123     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13124     const Expr *Device) {
13125   llvm_unreachable("Not supported in SIMD-only mode");
13126 }
13127 
13128 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13129                                            const OMPLoopDirective &D,
13130                                            ArrayRef<Expr *> NumIterations) {
13131   llvm_unreachable("Not supported in SIMD-only mode");
13132 }
13133 
13134 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13135                                               const OMPDependClause *C) {
13136   llvm_unreachable("Not supported in SIMD-only mode");
13137 }
13138 
13139 const VarDecl *
13140 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13141                                         const VarDecl *NativeParam) const {
13142   llvm_unreachable("Not supported in SIMD-only mode");
13143 }
13144 
13145 Address
13146 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13147                                          const VarDecl *NativeParam,
13148                                          const VarDecl *TargetParam) const {
13149   llvm_unreachable("Not supported in SIMD-only mode");
13150 }
13151