xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/APValue.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/StmtOpenMP.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/BitmaskEnum.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SetOperations.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413   bool NoInheritance = false;
414 
415 public:
416   /// Constructs region for combined constructs.
417   /// \param CodeGen Code generation sequence for combined directives. Includes
418   /// a list of functions used for code generation of implicitly inlined
419   /// regions.
420   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
421                           OpenMPDirectiveKind Kind, bool HasCancel,
422                           bool NoInheritance = true)
423       : CGF(CGF), NoInheritance(NoInheritance) {
424     // Start emission for the construct.
425     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
426         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
427     if (NoInheritance) {
428       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
429       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
430       CGF.LambdaThisCaptureField = nullptr;
431       BlockInfo = CGF.BlockInfo;
432       CGF.BlockInfo = nullptr;
433     }
434   }
435 
436   ~InlinedOpenMPRegionRAII() {
437     // Restore original CapturedStmtInfo only if we're done with code emission.
438     auto *OldCSI =
439         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
440     delete CGF.CapturedStmtInfo;
441     CGF.CapturedStmtInfo = OldCSI;
442     if (NoInheritance) {
443       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
444       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
445       CGF.BlockInfo = BlockInfo;
446     }
447   }
448 };
449 
450 /// Values for bit flags used in the ident_t to describe the fields.
451 /// All enumeric elements are named and described in accordance with the code
452 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
453 enum OpenMPLocationFlags : unsigned {
454   /// Use trampoline for internal microtask.
455   OMP_IDENT_IMD = 0x01,
456   /// Use c-style ident structure.
457   OMP_IDENT_KMPC = 0x02,
458   /// Atomic reduction option for kmpc_reduce.
459   OMP_ATOMIC_REDUCE = 0x10,
460   /// Explicit 'barrier' directive.
461   OMP_IDENT_BARRIER_EXPL = 0x20,
462   /// Implicit barrier in code.
463   OMP_IDENT_BARRIER_IMPL = 0x40,
464   /// Implicit barrier in 'for' directive.
465   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
466   /// Implicit barrier in 'sections' directive.
467   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
468   /// Implicit barrier in 'single' directive.
469   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
470   /// Call of __kmp_for_static_init for static loop.
471   OMP_IDENT_WORK_LOOP = 0x200,
472   /// Call of __kmp_for_static_init for sections.
473   OMP_IDENT_WORK_SECTIONS = 0x400,
474   /// Call of __kmp_for_static_init for distribute.
475   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
477 };
478 
479 namespace {
480 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
481 /// Values for bit flags for marking which requires clauses have been used.
482 enum OpenMPOffloadingRequiresDirFlags : int64_t {
483   /// flag undefined.
484   OMP_REQ_UNDEFINED               = 0x000,
485   /// no requires clause present.
486   OMP_REQ_NONE                    = 0x001,
487   /// reverse_offload clause.
488   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
489   /// unified_address clause.
490   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
491   /// unified_shared_memory clause.
492   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
493   /// dynamic_allocators clause.
494   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
495   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
496 };
497 
498 enum OpenMPOffloadingReservedDeviceIDs {
499   /// Device ID if the device was not defined, runtime should get it
500   /// from environment variables in the spec.
501   OMP_DEVICEID_UNDEF = -1,
502 };
503 } // anonymous namespace
504 
505 /// Describes ident structure that describes a source location.
506 /// All descriptions are taken from
507 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
508 /// Original structure:
509 /// typedef struct ident {
510 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
511 ///                                  see above  */
512 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
513 ///                                  KMP_IDENT_KMPC identifies this union
514 ///                                  member  */
515 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
516 ///                                  see above */
517 ///#if USE_ITT_BUILD
518 ///                            /*  but currently used for storing
519 ///                                region-specific ITT */
520 ///                            /*  contextual information. */
521 ///#endif /* USE_ITT_BUILD */
522 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
523 ///                                 C++  */
524 ///    char const *psource;    /**< String describing the source location.
525 ///                            The string is composed of semi-colon separated
526 //                             fields which describe the source file,
527 ///                            the function and a pair of line numbers that
528 ///                            delimit the construct.
529 ///                             */
530 /// } ident_t;
531 enum IdentFieldIndex {
532   /// might be used in Fortran
533   IdentField_Reserved_1,
534   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535   IdentField_Flags,
536   /// Not really used in Fortran any more
537   IdentField_Reserved_2,
538   /// Source[4] in Fortran, do not use for C++
539   IdentField_Reserved_3,
540   /// String describing the source location. The string is composed of
541   /// semi-colon separated fields which describe the source file, the function
542   /// and a pair of line numbers that delimit the construct.
543   IdentField_PSource
544 };
545 
546 /// Schedule types for 'omp for' loops (these enumerators are taken from
547 /// the enum sched_type in kmp.h).
548 enum OpenMPSchedType {
549   /// Lower bound for default (unordered) versions.
550   OMP_sch_lower = 32,
551   OMP_sch_static_chunked = 33,
552   OMP_sch_static = 34,
553   OMP_sch_dynamic_chunked = 35,
554   OMP_sch_guided_chunked = 36,
555   OMP_sch_runtime = 37,
556   OMP_sch_auto = 38,
557   /// static with chunk adjustment (e.g., simd)
558   OMP_sch_static_balanced_chunked = 45,
559   /// Lower bound for 'ordered' versions.
560   OMP_ord_lower = 64,
561   OMP_ord_static_chunked = 65,
562   OMP_ord_static = 66,
563   OMP_ord_dynamic_chunked = 67,
564   OMP_ord_guided_chunked = 68,
565   OMP_ord_runtime = 69,
566   OMP_ord_auto = 70,
567   OMP_sch_default = OMP_sch_static,
568   /// dist_schedule types
569   OMP_dist_sch_static_chunked = 91,
570   OMP_dist_sch_static = 92,
571   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
572   /// Set if the monotonic schedule modifier was present.
573   OMP_sch_modifier_monotonic = (1 << 29),
574   /// Set if the nonmonotonic schedule modifier was present.
575   OMP_sch_modifier_nonmonotonic = (1 << 30),
576 };
577 
578 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 /// region.
580 class CleanupTy final : public EHScopeStack::Cleanup {
581   PrePostActionTy *Action;
582 
583 public:
584   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
585   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
586     if (!CGF.HaveInsertPoint())
587       return;
588     Action->Exit(CGF);
589   }
590 };
591 
592 } // anonymous namespace
593 
594 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
595   CodeGenFunction::RunCleanupsScope Scope(CGF);
596   if (PrePostAction) {
597     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
598     Callback(CodeGen, CGF, *PrePostAction);
599   } else {
600     PrePostActionTy Action;
601     Callback(CodeGen, CGF, Action);
602   }
603 }
604 
605 /// Check if the combiner is a call to UDR combiner and if it is so return the
606 /// UDR decl used for reduction.
607 static const OMPDeclareReductionDecl *
608 getReductionInit(const Expr *ReductionOp) {
609   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
610     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
611       if (const auto *DRE =
612               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
613         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
614           return DRD;
615   return nullptr;
616 }
617 
618 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
619                                              const OMPDeclareReductionDecl *DRD,
620                                              const Expr *InitOp,
621                                              Address Private, Address Original,
622                                              QualType Ty) {
623   if (DRD->getInitializer()) {
624     std::pair<llvm::Function *, llvm::Function *> Reduction =
625         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
626     const auto *CE = cast<CallExpr>(InitOp);
627     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
628     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
629     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630     const auto *LHSDRE =
631         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
632     const auto *RHSDRE =
633         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
634     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
635     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
636                             [=]() { return Private; });
637     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
638                             [=]() { return Original; });
639     (void)PrivateScope.Privatize();
640     RValue Func = RValue::get(Reduction.second);
641     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642     CGF.EmitIgnoredExpr(InitOp);
643   } else {
644     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646     auto *GV = new llvm::GlobalVariable(
647         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648         llvm::GlobalValue::PrivateLinkage, Init, Name);
649     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650     RValue InitRVal;
651     switch (CGF.getEvaluationKind(Ty)) {
652     case TEK_Scalar:
653       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654       break;
655     case TEK_Complex:
656       InitRVal =
657           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658       break;
659     case TEK_Aggregate: {
660       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663                            /*IsInitializer=*/false);
664       return;
665     }
666     }
667     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
668     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670                          /*IsInitializer=*/false);
671   }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680                                  QualType Type, bool EmitDeclareReductionInit,
681                                  const Expr *Init,
682                                  const OMPDeclareReductionDecl *DRD,
683                                  Address SrcAddr = Address::invalid()) {
684   // Perform element-by-element initialization.
685   QualType ElementTy;
686 
687   // Drill down to the base element type on both arrays.
688   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690   DestAddr =
691       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
692   if (DRD)
693     SrcAddr =
694         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 
696   llvm::Value *SrcBegin = nullptr;
697   if (DRD)
698     SrcBegin = SrcAddr.getPointer();
699   llvm::Value *DestBegin = DestAddr.getPointer();
700   // Cast from pointer to array type to pointer to single element.
701   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
702   // The basic structure here is a while-do loop.
703   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705   llvm::Value *IsEmpty =
706       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708 
709   // Enter the loop body, making that address the current address.
710   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711   CGF.EmitBlock(BodyBB);
712 
713   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714 
715   llvm::PHINode *SrcElementPHI = nullptr;
716   Address SrcElementCurrent = Address::invalid();
717   if (DRD) {
718     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719                                           "omp.arraycpy.srcElementPast");
720     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721     SrcElementCurrent =
722         Address(SrcElementPHI,
723                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724   }
725   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727   DestElementPHI->addIncoming(DestBegin, EntryBB);
728   Address DestElementCurrent =
729       Address(DestElementPHI,
730               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731 
732   // Emit copy.
733   {
734     CodeGenFunction::RunCleanupsScope InitScope(CGF);
735     if (EmitDeclareReductionInit) {
736       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737                                        SrcElementCurrent, ElementTy);
738     } else
739       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740                            /*IsInitializer=*/false);
741   }
742 
743   if (DRD) {
744     // Shift the address forward by one element.
745     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
747     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748   }
749 
750   // Shift the address forward by one element.
751   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
753   // Check whether we've reached the end.
754   llvm::Value *Done =
755       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
756   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
757   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
758 
759   // Done.
760   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
761 }
762 
763 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
764   return CGF.EmitOMPSharedLValue(E);
765 }
766 
767 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
768                                             const Expr *E) {
769   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
770     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
771   return LValue();
772 }
773 
774 void ReductionCodeGen::emitAggregateInitialization(
775     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
776     const OMPDeclareReductionDecl *DRD) {
777   // Emit VarDecl with copy init for arrays.
778   // Get the address of the original variable captured in current
779   // captured region.
780   const auto *PrivateVD =
781       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
782   bool EmitDeclareReductionInit =
783       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
784   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
785                        EmitDeclareReductionInit,
786                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
787                                                 : PrivateVD->getInit(),
788                        DRD, SharedLVal.getAddress(CGF));
789 }
790 
791 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
792                                    ArrayRef<const Expr *> Origs,
793                                    ArrayRef<const Expr *> Privates,
794                                    ArrayRef<const Expr *> ReductionOps) {
795   ClausesData.reserve(Shareds.size());
796   SharedAddresses.reserve(Shareds.size());
797   Sizes.reserve(Shareds.size());
798   BaseDecls.reserve(Shareds.size());
799   const auto *IOrig = Origs.begin();
800   const auto *IPriv = Privates.begin();
801   const auto *IRed = ReductionOps.begin();
802   for (const Expr *Ref : Shareds) {
803     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
804     std::advance(IOrig, 1);
805     std::advance(IPriv, 1);
806     std::advance(IRed, 1);
807   }
808 }
809 
810 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
811   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
812          "Number of generated lvalues must be exactly N.");
813   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
814   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
815   SharedAddresses.emplace_back(First, Second);
816   if (ClausesData[N].Shared == ClausesData[N].Ref) {
817     OrigAddresses.emplace_back(First, Second);
818   } else {
819     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
820     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
821     OrigAddresses.emplace_back(First, Second);
822   }
823 }
824 
825 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
826   const auto *PrivateVD =
827       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
828   QualType PrivateType = PrivateVD->getType();
829   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
830   if (!PrivateType->isVariablyModifiedType()) {
831     Sizes.emplace_back(
832         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
833         nullptr);
834     return;
835   }
836   llvm::Value *Size;
837   llvm::Value *SizeInChars;
838   auto *ElemType =
839       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
840           ->getElementType();
841   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842   if (AsArraySection) {
843     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
844                                      OrigAddresses[N].first.getPointer(CGF));
845     Size = CGF.Builder.CreateNUWAdd(
846         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
847     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
848   } else {
849     SizeInChars =
850         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
851     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
852   }
853   Sizes.emplace_back(SizeInChars, Size);
854   CodeGenFunction::OpaqueValueMapping OpaqueMap(
855       CGF,
856       cast<OpaqueValueExpr>(
857           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
858       RValue::get(Size));
859   CGF.EmitVariablyModifiedType(PrivateType);
860 }
861 
862 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
863                                          llvm::Value *Size) {
864   const auto *PrivateVD =
865       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
866   QualType PrivateType = PrivateVD->getType();
867   if (!PrivateType->isVariablyModifiedType()) {
868     assert(!Size && !Sizes[N].second &&
869            "Size should be nullptr for non-variably modified reduction "
870            "items.");
871     return;
872   }
873   CodeGenFunction::OpaqueValueMapping OpaqueMap(
874       CGF,
875       cast<OpaqueValueExpr>(
876           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
877       RValue::get(Size));
878   CGF.EmitVariablyModifiedType(PrivateType);
879 }
880 
881 void ReductionCodeGen::emitInitialization(
882     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
883     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
884   assert(SharedAddresses.size() > N && "No variable was generated");
885   const auto *PrivateVD =
886       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
887   const OMPDeclareReductionDecl *DRD =
888       getReductionInit(ClausesData[N].ReductionOp);
889   QualType PrivateType = PrivateVD->getType();
890   PrivateAddr = CGF.Builder.CreateElementBitCast(
891       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
892   QualType SharedType = SharedAddresses[N].first.getType();
893   SharedLVal = CGF.MakeAddrLValue(
894       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
895                                        CGF.ConvertTypeForMem(SharedType)),
896       SharedType, SharedAddresses[N].first.getBaseInfo(),
897       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
898   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
899     if (DRD && DRD->getInitializer())
900       (void)DefaultInit(CGF);
901     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
902   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
903     (void)DefaultInit(CGF);
904     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
905                                      PrivateAddr, SharedLVal.getAddress(CGF),
906                                      SharedLVal.getType());
907   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
908              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
909     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
910                          PrivateVD->getType().getQualifiers(),
911                          /*IsInitializer=*/false);
912   }
913 }
914 
915 bool ReductionCodeGen::needCleanups(unsigned N) {
916   const auto *PrivateVD =
917       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
918   QualType PrivateType = PrivateVD->getType();
919   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
920   return DTorKind != QualType::DK_none;
921 }
922 
923 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
924                                     Address PrivateAddr) {
925   const auto *PrivateVD =
926       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
927   QualType PrivateType = PrivateVD->getType();
928   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
929   if (needCleanups(N)) {
930     PrivateAddr = CGF.Builder.CreateElementBitCast(
931         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
932     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
933   }
934 }
935 
936 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
937                           LValue BaseLV) {
938   BaseTy = BaseTy.getNonReferenceType();
939   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
940          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
941     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
942       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
943     } else {
944       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
945       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
946     }
947     BaseTy = BaseTy->getPointeeType();
948   }
949   return CGF.MakeAddrLValue(
950       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
951                                        CGF.ConvertTypeForMem(ElTy)),
952       BaseLV.getType(), BaseLV.getBaseInfo(),
953       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
954 }
955 
956 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
957                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
958                           llvm::Value *Addr) {
959   Address Tmp = Address::invalid();
960   Address TopTmp = Address::invalid();
961   Address MostTopTmp = Address::invalid();
962   BaseTy = BaseTy.getNonReferenceType();
963   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
964          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
965     Tmp = CGF.CreateMemTemp(BaseTy);
966     if (TopTmp.isValid())
967       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
968     else
969       MostTopTmp = Tmp;
970     TopTmp = Tmp;
971     BaseTy = BaseTy->getPointeeType();
972   }
973   llvm::Type *Ty = BaseLVType;
974   if (Tmp.isValid())
975     Ty = Tmp.getElementType();
976   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
977   if (Tmp.isValid()) {
978     CGF.Builder.CreateStore(Addr, Tmp);
979     return MostTopTmp;
980   }
981   return Address(Addr, BaseLVAlignment);
982 }
983 
984 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
985   const VarDecl *OrigVD = nullptr;
986   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
987     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
988     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
989       Base = TempOASE->getBase()->IgnoreParenImpCasts();
990     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
991       Base = TempASE->getBase()->IgnoreParenImpCasts();
992     DE = cast<DeclRefExpr>(Base);
993     OrigVD = cast<VarDecl>(DE->getDecl());
994   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
995     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
996     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
997       Base = TempASE->getBase()->IgnoreParenImpCasts();
998     DE = cast<DeclRefExpr>(Base);
999     OrigVD = cast<VarDecl>(DE->getDecl());
1000   }
1001   return OrigVD;
1002 }
1003 
1004 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1005                                                Address PrivateAddr) {
1006   const DeclRefExpr *DE;
1007   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1008     BaseDecls.emplace_back(OrigVD);
1009     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1010     LValue BaseLValue =
1011         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1012                     OriginalBaseLValue);
1013     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1014         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1015     llvm::Value *PrivatePointer =
1016         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1017             PrivateAddr.getPointer(),
1018             SharedAddresses[N].first.getAddress(CGF).getType());
1019     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1020     return castToBase(CGF, OrigVD->getType(),
1021                       SharedAddresses[N].first.getType(),
1022                       OriginalBaseLValue.getAddress(CGF).getType(),
1023                       OriginalBaseLValue.getAlignment(), Ptr);
1024   }
1025   BaseDecls.emplace_back(
1026       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1027   return PrivateAddr;
1028 }
1029 
1030 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1031   const OMPDeclareReductionDecl *DRD =
1032       getReductionInit(ClausesData[N].ReductionOp);
1033   return DRD && DRD->getInitializer();
1034 }
1035 
1036 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1037   return CGF.EmitLoadOfPointerLValue(
1038       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1039       getThreadIDVariable()->getType()->castAs<PointerType>());
1040 }
1041 
1042 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1043   if (!CGF.HaveInsertPoint())
1044     return;
1045   // 1.2.2 OpenMP Language Terminology
1046   // Structured block - An executable statement with a single entry at the
1047   // top and a single exit at the bottom.
1048   // The point of exit cannot be a branch out of the structured block.
1049   // longjmp() and throw() must not violate the entry/exit criteria.
1050   CGF.EHStack.pushTerminate();
1051   if (S)
1052     CGF.incrementProfileCounter(S);
1053   CodeGen(CGF);
1054   CGF.EHStack.popTerminate();
1055 }
1056 
1057 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1058     CodeGenFunction &CGF) {
1059   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1060                             getThreadIDVariable()->getType(),
1061                             AlignmentSource::Decl);
1062 }
1063 
1064 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1065                                        QualType FieldTy) {
1066   auto *Field = FieldDecl::Create(
1067       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1068       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1069       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1070   Field->setAccess(AS_public);
1071   DC->addDecl(Field);
1072   return Field;
1073 }
1074 
1075 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1076                                  StringRef Separator)
1077     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1078       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1079   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1080 
1081   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1082   OMPBuilder.initialize();
1083   loadOffloadInfoMetadata();
1084 }
1085 
1086 void CGOpenMPRuntime::clear() {
1087   InternalVars.clear();
1088   // Clean non-target variable declarations possibly used only in debug info.
1089   for (const auto &Data : EmittedNonTargetVariables) {
1090     if (!Data.getValue().pointsToAliveValue())
1091       continue;
1092     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1093     if (!GV)
1094       continue;
1095     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1096       continue;
1097     GV->eraseFromParent();
1098   }
1099 }
1100 
1101 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1102   SmallString<128> Buffer;
1103   llvm::raw_svector_ostream OS(Buffer);
1104   StringRef Sep = FirstSeparator;
1105   for (StringRef Part : Parts) {
1106     OS << Sep << Part;
1107     Sep = Separator;
1108   }
1109   return std::string(OS.str());
1110 }
1111 
1112 static llvm::Function *
1113 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1114                           const Expr *CombinerInitializer, const VarDecl *In,
1115                           const VarDecl *Out, bool IsCombiner) {
1116   // void .omp_combiner.(Ty *in, Ty *out);
1117   ASTContext &C = CGM.getContext();
1118   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1119   FunctionArgList Args;
1120   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1121                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1122   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1123                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   Args.push_back(&OmpOutParm);
1125   Args.push_back(&OmpInParm);
1126   const CGFunctionInfo &FnInfo =
1127       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1128   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1129   std::string Name = CGM.getOpenMPRuntime().getName(
1130       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1131   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1132                                     Name, &CGM.getModule());
1133   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1134   if (CGM.getLangOpts().Optimize) {
1135     Fn->removeFnAttr(llvm::Attribute::NoInline);
1136     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1137     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1138   }
1139   CodeGenFunction CGF(CGM);
1140   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1141   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1142   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1143                     Out->getLocation());
1144   CodeGenFunction::OMPPrivateScope Scope(CGF);
1145   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1146   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1147     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1148         .getAddress(CGF);
1149   });
1150   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1151   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1152     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1153         .getAddress(CGF);
1154   });
1155   (void)Scope.Privatize();
1156   if (!IsCombiner && Out->hasInit() &&
1157       !CGF.isTrivialInitializer(Out->getInit())) {
1158     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1159                          Out->getType().getQualifiers(),
1160                          /*IsInitializer=*/true);
1161   }
1162   if (CombinerInitializer)
1163     CGF.EmitIgnoredExpr(CombinerInitializer);
1164   Scope.ForceCleanup();
1165   CGF.FinishFunction();
1166   return Fn;
1167 }
1168 
1169 void CGOpenMPRuntime::emitUserDefinedReduction(
1170     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1171   if (UDRMap.count(D) > 0)
1172     return;
1173   llvm::Function *Combiner = emitCombinerOrInitializer(
1174       CGM, D->getType(), D->getCombiner(),
1175       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1176       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1177       /*IsCombiner=*/true);
1178   llvm::Function *Initializer = nullptr;
1179   if (const Expr *Init = D->getInitializer()) {
1180     Initializer = emitCombinerOrInitializer(
1181         CGM, D->getType(),
1182         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1183                                                                      : nullptr,
1184         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1185         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1186         /*IsCombiner=*/false);
1187   }
1188   UDRMap.try_emplace(D, Combiner, Initializer);
1189   if (CGF) {
1190     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1191     Decls.second.push_back(D);
1192   }
1193 }
1194 
1195 std::pair<llvm::Function *, llvm::Function *>
1196 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1197   auto I = UDRMap.find(D);
1198   if (I != UDRMap.end())
1199     return I->second;
1200   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1201   return UDRMap.lookup(D);
1202 }
1203 
1204 namespace {
1205 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1206 // Builder if one is present.
1207 struct PushAndPopStackRAII {
1208   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1209                       bool HasCancel, llvm::omp::Directive Kind)
1210       : OMPBuilder(OMPBuilder) {
1211     if (!OMPBuilder)
1212       return;
1213 
1214     // The following callback is the crucial part of clangs cleanup process.
1215     //
1216     // NOTE:
1217     // Once the OpenMPIRBuilder is used to create parallel regions (and
1218     // similar), the cancellation destination (Dest below) is determined via
1219     // IP. That means if we have variables to finalize we split the block at IP,
1220     // use the new block (=BB) as destination to build a JumpDest (via
1221     // getJumpDestInCurrentScope(BB)) which then is fed to
1222     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1223     // to push & pop an FinalizationInfo object.
1224     // The FiniCB will still be needed but at the point where the
1225     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1226     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1227       assert(IP.getBlock()->end() == IP.getPoint() &&
1228              "Clang CG should cause non-terminated block!");
1229       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1230       CGF.Builder.restoreIP(IP);
1231       CodeGenFunction::JumpDest Dest =
1232           CGF.getOMPCancelDestination(OMPD_parallel);
1233       CGF.EmitBranchThroughCleanup(Dest);
1234     };
1235 
1236     // TODO: Remove this once we emit parallel regions through the
1237     //       OpenMPIRBuilder as it can do this setup internally.
1238     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1239     OMPBuilder->pushFinalizationCB(std::move(FI));
1240   }
1241   ~PushAndPopStackRAII() {
1242     if (OMPBuilder)
1243       OMPBuilder->popFinalizationCB();
1244   }
1245   llvm::OpenMPIRBuilder *OMPBuilder;
1246 };
1247 } // namespace
1248 
1249 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1250     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1251     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1252     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1253   assert(ThreadIDVar->getType()->isPointerType() &&
1254          "thread id variable must be of type kmp_int32 *");
1255   CodeGenFunction CGF(CGM, true);
1256   bool HasCancel = false;
1257   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1258     HasCancel = OPD->hasCancel();
1259   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1260     HasCancel = OPD->hasCancel();
1261   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1262     HasCancel = OPSD->hasCancel();
1263   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1264     HasCancel = OPFD->hasCancel();
1265   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1266     HasCancel = OPFD->hasCancel();
1267   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1268     HasCancel = OPFD->hasCancel();
1269   else if (const auto *OPFD =
1270                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275 
1276   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1277   //       parallel region to make cancellation barriers work properly.
1278   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1279   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1280   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1281                                     HasCancel, OutlinedHelperName);
1282   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1283   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1284 }
1285 
1286 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1287     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1288     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1289   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1290   return emitParallelOrTeamsOutlinedFunction(
1291       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1292 }
1293 
1294 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1295     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1297   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1298   return emitParallelOrTeamsOutlinedFunction(
1299       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1300 }
1301 
1302 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1303     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1304     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1305     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1306     bool Tied, unsigned &NumberOfParts) {
1307   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1308                                               PrePostActionTy &) {
1309     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1310     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1311     llvm::Value *TaskArgs[] = {
1312         UpLoc, ThreadID,
1313         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1314                                     TaskTVar->getType()->castAs<PointerType>())
1315             .getPointer(CGF)};
1316     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1317                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1318                         TaskArgs);
1319   };
1320   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1321                                                             UntiedCodeGen);
1322   CodeGen.setAction(Action);
1323   assert(!ThreadIDVar->getType()->isPointerType() &&
1324          "thread id variable must be of type kmp_int32 for tasks");
1325   const OpenMPDirectiveKind Region =
1326       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1327                                                       : OMPD_task;
1328   const CapturedStmt *CS = D.getCapturedStmt(Region);
1329   bool HasCancel = false;
1330   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1331     HasCancel = TD->hasCancel();
1332   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1333     HasCancel = TD->hasCancel();
1334   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1335     HasCancel = TD->hasCancel();
1336   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1337     HasCancel = TD->hasCancel();
1338 
1339   CodeGenFunction CGF(CGM, true);
1340   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1341                                         InnermostKind, HasCancel, Action);
1342   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1343   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1344   if (!Tied)
1345     NumberOfParts = Action.getNumberOfParts();
1346   return Res;
1347 }
1348 
1349 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1350                              const RecordDecl *RD, const CGRecordLayout &RL,
1351                              ArrayRef<llvm::Constant *> Data) {
1352   llvm::StructType *StructTy = RL.getLLVMType();
1353   unsigned PrevIdx = 0;
1354   ConstantInitBuilder CIBuilder(CGM);
1355   auto DI = Data.begin();
1356   for (const FieldDecl *FD : RD->fields()) {
1357     unsigned Idx = RL.getLLVMFieldNo(FD);
1358     // Fill the alignment.
1359     for (unsigned I = PrevIdx; I < Idx; ++I)
1360       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1361     PrevIdx = Idx + 1;
1362     Fields.add(*DI);
1363     ++DI;
1364   }
1365 }
1366 
1367 template <class... As>
1368 static llvm::GlobalVariable *
1369 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1370                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1371                    As &&... Args) {
1372   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1373   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1374   ConstantInitBuilder CIBuilder(CGM);
1375   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1376   buildStructValue(Fields, CGM, RD, RL, Data);
1377   return Fields.finishAndCreateGlobal(
1378       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1379       std::forward<As>(Args)...);
1380 }
1381 
1382 template <typename T>
1383 static void
1384 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1385                                          ArrayRef<llvm::Constant *> Data,
1386                                          T &Parent) {
1387   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1388   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1389   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1390   buildStructValue(Fields, CGM, RD, RL, Data);
1391   Fields.finishAndAddTo(Parent);
1392 }
1393 
1394 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1395                                              bool AtCurrentPoint) {
1396   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1397   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1398 
1399   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1400   if (AtCurrentPoint) {
1401     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1402         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1403   } else {
1404     Elem.second.ServiceInsertPt =
1405         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1406     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1407   }
1408 }
1409 
1410 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1411   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1412   if (Elem.second.ServiceInsertPt) {
1413     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1414     Elem.second.ServiceInsertPt = nullptr;
1415     Ptr->eraseFromParent();
1416   }
1417 }
1418 
1419 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1420                                                   SourceLocation Loc,
1421                                                   SmallString<128> &Buffer) {
1422   llvm::raw_svector_ostream OS(Buffer);
1423   // Build debug location
1424   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1425   OS << ";" << PLoc.getFilename() << ";";
1426   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1427     OS << FD->getQualifiedNameAsString();
1428   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1429   return OS.str();
1430 }
1431 
1432 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1433                                                  SourceLocation Loc,
1434                                                  unsigned Flags) {
1435   llvm::Constant *SrcLocStr;
1436   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1437       Loc.isInvalid()) {
1438     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
1439   } else {
1440     std::string FunctionName = "";
1441     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1442       FunctionName = FD->getQualifiedNameAsString();
1443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1444     const char *FileName = PLoc.getFilename();
1445     unsigned Line = PLoc.getLine();
1446     unsigned Column = PLoc.getColumn();
1447     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
1448                                                 Line, Column);
1449   }
1450   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1451   return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
1452                                      Reserved2Flags);
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1456                                           SourceLocation Loc) {
1457   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1458   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1459   // the clang invariants used below might be broken.
1460   if (CGM.getLangOpts().OpenMPIRBuilder) {
1461     SmallString<128> Buffer;
1462     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1463     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1464         getIdentStringFromSourceLocation(CGF, Loc, Buffer));
1465     return OMPBuilder.getOrCreateThreadID(
1466         OMPBuilder.getOrCreateIdent(SrcLocStr));
1467   }
1468 
1469   llvm::Value *ThreadID = nullptr;
1470   // Check whether we've already cached a load of the thread id in this
1471   // function.
1472   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1473   if (I != OpenMPLocThreadIDMap.end()) {
1474     ThreadID = I->second.ThreadID;
1475     if (ThreadID != nullptr)
1476       return ThreadID;
1477   }
1478   // If exceptions are enabled, do not use parameter to avoid possible crash.
1479   if (auto *OMPRegionInfo =
1480           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1481     if (OMPRegionInfo->getThreadIDVariable()) {
1482       // Check if this an outlined function with thread id passed as argument.
1483       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1484       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1485       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1486           !CGF.getLangOpts().CXXExceptions ||
1487           CGF.Builder.GetInsertBlock() == TopBlock ||
1488           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1489           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1490               TopBlock ||
1491           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1492               CGF.Builder.GetInsertBlock()) {
1493         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1494         // If value loaded in entry block, cache it and use it everywhere in
1495         // function.
1496         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1497           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1498           Elem.second.ThreadID = ThreadID;
1499         }
1500         return ThreadID;
1501       }
1502     }
1503   }
1504 
1505   // This is not an outlined function region - need to call __kmpc_int32
1506   // kmpc_global_thread_num(ident_t *loc).
1507   // Generate thread id value and cache this value for use across the
1508   // function.
1509   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1510   if (!Elem.second.ServiceInsertPt)
1511     setLocThreadIdInsertPt(CGF);
1512   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1513   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1514   llvm::CallInst *Call = CGF.Builder.CreateCall(
1515       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1516                                             OMPRTL___kmpc_global_thread_num),
1517       emitUpdateLocation(CGF, Loc));
1518   Call->setCallingConv(CGF.getRuntimeCC());
1519   Elem.second.ThreadID = Call;
1520   return Call;
1521 }
1522 
1523 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1524   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1525   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1526     clearLocThreadIdInsertPt(CGF);
1527     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1528   }
1529   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1530     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1531       UDRMap.erase(D);
1532     FunctionUDRMap.erase(CGF.CurFn);
1533   }
1534   auto I = FunctionUDMMap.find(CGF.CurFn);
1535   if (I != FunctionUDMMap.end()) {
1536     for(const auto *D : I->second)
1537       UDMMap.erase(D);
1538     FunctionUDMMap.erase(I);
1539   }
1540   LastprivateConditionalToTypes.erase(CGF.CurFn);
1541   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1542 }
1543 
1544 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1545   return OMPBuilder.IdentPtr;
1546 }
1547 
1548 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1549   if (!Kmpc_MicroTy) {
1550     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1551     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1552                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1553     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1554   }
1555   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1556 }
1557 
1558 llvm::FunctionCallee
1559 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1560   assert((IVSize == 32 || IVSize == 64) &&
1561          "IV size is not compatible with the omp runtime");
1562   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1563                                             : "__kmpc_for_static_init_4u")
1564                                 : (IVSigned ? "__kmpc_for_static_init_8"
1565                                             : "__kmpc_for_static_init_8u");
1566   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1567   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1568   llvm::Type *TypeParams[] = {
1569     getIdentTyPointerTy(),                     // loc
1570     CGM.Int32Ty,                               // tid
1571     CGM.Int32Ty,                               // schedtype
1572     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1573     PtrTy,                                     // p_lower
1574     PtrTy,                                     // p_upper
1575     PtrTy,                                     // p_stride
1576     ITy,                                       // incr
1577     ITy                                        // chunk
1578   };
1579   auto *FnTy =
1580       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581   return CGM.CreateRuntimeFunction(FnTy, Name);
1582 }
1583 
1584 llvm::FunctionCallee
1585 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1586   assert((IVSize == 32 || IVSize == 64) &&
1587          "IV size is not compatible with the omp runtime");
1588   StringRef Name =
1589       IVSize == 32
1590           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1591           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1592   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1593   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1594                                CGM.Int32Ty,           // tid
1595                                CGM.Int32Ty,           // schedtype
1596                                ITy,                   // lower
1597                                ITy,                   // upper
1598                                ITy,                   // stride
1599                                ITy                    // chunk
1600   };
1601   auto *FnTy =
1602       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1603   return CGM.CreateRuntimeFunction(FnTy, Name);
1604 }
1605 
1606 llvm::FunctionCallee
1607 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1608   assert((IVSize == 32 || IVSize == 64) &&
1609          "IV size is not compatible with the omp runtime");
1610   StringRef Name =
1611       IVSize == 32
1612           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1613           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1614   llvm::Type *TypeParams[] = {
1615       getIdentTyPointerTy(), // loc
1616       CGM.Int32Ty,           // tid
1617   };
1618   auto *FnTy =
1619       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1620   return CGM.CreateRuntimeFunction(FnTy, Name);
1621 }
1622 
1623 llvm::FunctionCallee
1624 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1625   assert((IVSize == 32 || IVSize == 64) &&
1626          "IV size is not compatible with the omp runtime");
1627   StringRef Name =
1628       IVSize == 32
1629           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1630           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1631   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1632   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1633   llvm::Type *TypeParams[] = {
1634     getIdentTyPointerTy(),                     // loc
1635     CGM.Int32Ty,                               // tid
1636     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1637     PtrTy,                                     // p_lower
1638     PtrTy,                                     // p_upper
1639     PtrTy                                      // p_stride
1640   };
1641   auto *FnTy =
1642       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1643   return CGM.CreateRuntimeFunction(FnTy, Name);
1644 }
1645 
1646 /// Obtain information that uniquely identifies a target entry. This
1647 /// consists of the file and device IDs as well as line number associated with
1648 /// the relevant entry source location.
1649 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1650                                      unsigned &DeviceID, unsigned &FileID,
1651                                      unsigned &LineNum) {
1652   SourceManager &SM = C.getSourceManager();
1653 
1654   // The loc should be always valid and have a file ID (the user cannot use
1655   // #pragma directives in macros)
1656 
1657   assert(Loc.isValid() && "Source location is expected to be always valid.");
1658 
1659   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1660   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1661 
1662   llvm::sys::fs::UniqueID ID;
1663   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1664     PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1665     assert(PLoc.isValid() && "Source location is expected to be always valid.");
1666     if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1667       SM.getDiagnostics().Report(diag::err_cannot_open_file)
1668           << PLoc.getFilename() << EC.message();
1669   }
1670 
1671   DeviceID = ID.getDevice();
1672   FileID = ID.getFile();
1673   LineNum = PLoc.getLine();
1674 }
1675 
1676 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1677   if (CGM.getLangOpts().OpenMPSimd)
1678     return Address::invalid();
1679   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1680       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1681   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1682               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1683                HasRequiresUnifiedSharedMemory))) {
1684     SmallString<64> PtrName;
1685     {
1686       llvm::raw_svector_ostream OS(PtrName);
1687       OS << CGM.getMangledName(GlobalDecl(VD));
1688       if (!VD->isExternallyVisible()) {
1689         unsigned DeviceID, FileID, Line;
1690         getTargetEntryUniqueInfo(CGM.getContext(),
1691                                  VD->getCanonicalDecl()->getBeginLoc(),
1692                                  DeviceID, FileID, Line);
1693         OS << llvm::format("_%x", FileID);
1694       }
1695       OS << "_decl_tgt_ref_ptr";
1696     }
1697     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1698     if (!Ptr) {
1699       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1700       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1701                                         PtrName);
1702 
1703       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1704       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1705 
1706       if (!CGM.getLangOpts().OpenMPIsDevice)
1707         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1708       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1709     }
1710     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1711   }
1712   return Address::invalid();
1713 }
1714 
1715 llvm::Constant *
1716 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1717   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1718          !CGM.getContext().getTargetInfo().isTLSSupported());
1719   // Lookup the entry, lazily creating it if necessary.
1720   std::string Suffix = getName({"cache", ""});
1721   return getOrCreateInternalVariable(
1722       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1723 }
1724 
1725 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1726                                                 const VarDecl *VD,
1727                                                 Address VDAddr,
1728                                                 SourceLocation Loc) {
1729   if (CGM.getLangOpts().OpenMPUseTLS &&
1730       CGM.getContext().getTargetInfo().isTLSSupported())
1731     return VDAddr;
1732 
1733   llvm::Type *VarTy = VDAddr.getElementType();
1734   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1735                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1736                                                        CGM.Int8PtrTy),
1737                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1738                          getOrCreateThreadPrivateCache(VD)};
1739   return Address(CGF.EmitRuntimeCall(
1740                      OMPBuilder.getOrCreateRuntimeFunction(
1741                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1742                      Args),
1743                  VDAddr.getAlignment());
1744 }
1745 
1746 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1747     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1748     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1749   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1750   // library.
1751   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1752   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1753                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1754                       OMPLoc);
1755   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1756   // to register constructor/destructor for variable.
1757   llvm::Value *Args[] = {
1758       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1759       Ctor, CopyCtor, Dtor};
1760   CGF.EmitRuntimeCall(
1761       OMPBuilder.getOrCreateRuntimeFunction(
1762           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1763       Args);
1764 }
1765 
1766 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1767     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1768     bool PerformInit, CodeGenFunction *CGF) {
1769   if (CGM.getLangOpts().OpenMPUseTLS &&
1770       CGM.getContext().getTargetInfo().isTLSSupported())
1771     return nullptr;
1772 
1773   VD = VD->getDefinition(CGM.getContext());
1774   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1775     QualType ASTTy = VD->getType();
1776 
1777     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1778     const Expr *Init = VD->getAnyInitializer();
1779     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1780       // Generate function that re-emits the declaration's initializer into the
1781       // threadprivate copy of the variable VD
1782       CodeGenFunction CtorCGF(CGM);
1783       FunctionArgList Args;
1784       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1785                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1786                             ImplicitParamDecl::Other);
1787       Args.push_back(&Dst);
1788 
1789       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1790           CGM.getContext().VoidPtrTy, Args);
1791       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1792       std::string Name = getName({"__kmpc_global_ctor_", ""});
1793       llvm::Function *Fn =
1794           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1795       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1796                             Args, Loc, Loc);
1797       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1798           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1799           CGM.getContext().VoidPtrTy, Dst.getLocation());
1800       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1801       Arg = CtorCGF.Builder.CreateElementBitCast(
1802           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1803       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1804                                /*IsInitializer=*/true);
1805       ArgVal = CtorCGF.EmitLoadOfScalar(
1806           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807           CGM.getContext().VoidPtrTy, Dst.getLocation());
1808       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1809       CtorCGF.FinishFunction();
1810       Ctor = Fn;
1811     }
1812     if (VD->getType().isDestructedType() != QualType::DK_none) {
1813       // Generate function that emits destructor call for the threadprivate copy
1814       // of the variable VD
1815       CodeGenFunction DtorCGF(CGM);
1816       FunctionArgList Args;
1817       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1818                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1819                             ImplicitParamDecl::Other);
1820       Args.push_back(&Dst);
1821 
1822       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1823           CGM.getContext().VoidTy, Args);
1824       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1825       std::string Name = getName({"__kmpc_global_dtor_", ""});
1826       llvm::Function *Fn =
1827           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1828       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1829       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1830                             Loc, Loc);
1831       // Create a scope with an artificial location for the body of this function.
1832       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1833       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1834           DtorCGF.GetAddrOfLocalVar(&Dst),
1835           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1836       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1837                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1838                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1839       DtorCGF.FinishFunction();
1840       Dtor = Fn;
1841     }
1842     // Do not emit init function if it is not required.
1843     if (!Ctor && !Dtor)
1844       return nullptr;
1845 
1846     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1847     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1848                                                /*isVarArg=*/false)
1849                            ->getPointerTo();
1850     // Copying constructor for the threadprivate variable.
1851     // Must be NULL - reserved by runtime, but currently it requires that this
1852     // parameter is always NULL. Otherwise it fires assertion.
1853     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1854     if (Ctor == nullptr) {
1855       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1856                                              /*isVarArg=*/false)
1857                          ->getPointerTo();
1858       Ctor = llvm::Constant::getNullValue(CtorTy);
1859     }
1860     if (Dtor == nullptr) {
1861       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1862                                              /*isVarArg=*/false)
1863                          ->getPointerTo();
1864       Dtor = llvm::Constant::getNullValue(DtorTy);
1865     }
1866     if (!CGF) {
1867       auto *InitFunctionTy =
1868           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1869       std::string Name = getName({"__omp_threadprivate_init_", ""});
1870       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1871           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1872       CodeGenFunction InitCGF(CGM);
1873       FunctionArgList ArgList;
1874       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1875                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1876                             Loc, Loc);
1877       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878       InitCGF.FinishFunction();
1879       return InitFunction;
1880     }
1881     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1882   }
1883   return nullptr;
1884 }
1885 
1886 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1887                                                      llvm::GlobalVariable *Addr,
1888                                                      bool PerformInit) {
1889   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1890       !CGM.getLangOpts().OpenMPIsDevice)
1891     return false;
1892   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1893       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1894   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1895       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1896        HasRequiresUnifiedSharedMemory))
1897     return CGM.getLangOpts().OpenMPIsDevice;
1898   VD = VD->getDefinition(CGM.getContext());
1899   assert(VD && "Unknown VarDecl");
1900 
1901   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1902     return CGM.getLangOpts().OpenMPIsDevice;
1903 
1904   QualType ASTTy = VD->getType();
1905   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1906 
1907   // Produce the unique prefix to identify the new target regions. We use
1908   // the source location of the variable declaration which we know to not
1909   // conflict with any target region.
1910   unsigned DeviceID;
1911   unsigned FileID;
1912   unsigned Line;
1913   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1914   SmallString<128> Buffer, Out;
1915   {
1916     llvm::raw_svector_ostream OS(Buffer);
1917     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1918        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1919   }
1920 
1921   const Expr *Init = VD->getAnyInitializer();
1922   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1923     llvm::Constant *Ctor;
1924     llvm::Constant *ID;
1925     if (CGM.getLangOpts().OpenMPIsDevice) {
1926       // Generate function that re-emits the declaration's initializer into
1927       // the threadprivate copy of the variable VD
1928       CodeGenFunction CtorCGF(CGM);
1929 
1930       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1931       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1932       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1933           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1934       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1935       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936                             FunctionArgList(), Loc, Loc);
1937       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1938       CtorCGF.EmitAnyExprToMem(Init,
1939                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1940                                Init->getType().getQualifiers(),
1941                                /*IsInitializer=*/true);
1942       CtorCGF.FinishFunction();
1943       Ctor = Fn;
1944       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1945       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1946     } else {
1947       Ctor = new llvm::GlobalVariable(
1948           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1949           llvm::GlobalValue::PrivateLinkage,
1950           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1951       ID = Ctor;
1952     }
1953 
1954     // Register the information for the entry associated with the constructor.
1955     Out.clear();
1956     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1957         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1958         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1959   }
1960   if (VD->getType().isDestructedType() != QualType::DK_none) {
1961     llvm::Constant *Dtor;
1962     llvm::Constant *ID;
1963     if (CGM.getLangOpts().OpenMPIsDevice) {
1964       // Generate function that emits destructor call for the threadprivate
1965       // copy of the variable VD
1966       CodeGenFunction DtorCGF(CGM);
1967 
1968       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1969       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1970       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1971           FTy, Twine(Buffer, "_dtor"), FI, Loc);
1972       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1973       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1974                             FunctionArgList(), Loc, Loc);
1975       // Create a scope with an artificial location for the body of this
1976       // function.
1977       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1978       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1979                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1980                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1981       DtorCGF.FinishFunction();
1982       Dtor = Fn;
1983       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1984       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1985     } else {
1986       Dtor = new llvm::GlobalVariable(
1987           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1988           llvm::GlobalValue::PrivateLinkage,
1989           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1990       ID = Dtor;
1991     }
1992     // Register the information for the entry associated with the destructor.
1993     Out.clear();
1994     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1995         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1996         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1997   }
1998   return CGM.getLangOpts().OpenMPIsDevice;
1999 }
2000 
2001 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2002                                                           QualType VarType,
2003                                                           StringRef Name) {
2004   std::string Suffix = getName({"artificial", ""});
2005   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2006   llvm::Value *GAddr =
2007       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2008   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2009       CGM.getTarget().isTLSSupported()) {
2010     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2011     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2012   }
2013   std::string CacheSuffix = getName({"cache", ""});
2014   llvm::Value *Args[] = {
2015       emitUpdateLocation(CGF, SourceLocation()),
2016       getThreadID(CGF, SourceLocation()),
2017       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2018       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2019                                 /*isSigned=*/false),
2020       getOrCreateInternalVariable(
2021           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2022   return Address(
2023       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2024           CGF.EmitRuntimeCall(
2025               OMPBuilder.getOrCreateRuntimeFunction(
2026                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2027               Args),
2028           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2029       CGM.getContext().getTypeAlignInChars(VarType));
2030 }
2031 
2032 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2033                                    const RegionCodeGenTy &ThenGen,
2034                                    const RegionCodeGenTy &ElseGen) {
2035   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2036 
2037   // If the condition constant folds and can be elided, try to avoid emitting
2038   // the condition and the dead arm of the if/else.
2039   bool CondConstant;
2040   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2041     if (CondConstant)
2042       ThenGen(CGF);
2043     else
2044       ElseGen(CGF);
2045     return;
2046   }
2047 
2048   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2049   // emit the conditional branch.
2050   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2051   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2052   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2053   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2054 
2055   // Emit the 'then' code.
2056   CGF.EmitBlock(ThenBlock);
2057   ThenGen(CGF);
2058   CGF.EmitBranch(ContBlock);
2059   // Emit the 'else' code if present.
2060   // There is no need to emit line number for unconditional branch.
2061   (void)ApplyDebugLocation::CreateEmpty(CGF);
2062   CGF.EmitBlock(ElseBlock);
2063   ElseGen(CGF);
2064   // There is no need to emit line number for unconditional branch.
2065   (void)ApplyDebugLocation::CreateEmpty(CGF);
2066   CGF.EmitBranch(ContBlock);
2067   // Emit the continuation block for code after the if.
2068   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2069 }
2070 
2071 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2072                                        llvm::Function *OutlinedFn,
2073                                        ArrayRef<llvm::Value *> CapturedVars,
2074                                        const Expr *IfCond) {
2075   if (!CGF.HaveInsertPoint())
2076     return;
2077   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2078   auto &M = CGM.getModule();
2079   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2080                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2081     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2082     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2083     llvm::Value *Args[] = {
2084         RTLoc,
2085         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2086         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2087     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2088     RealArgs.append(std::begin(Args), std::end(Args));
2089     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2090 
2091     llvm::FunctionCallee RTLFn =
2092         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2093     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2094   };
2095   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2096                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2097     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2098     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2099     // Build calls:
2100     // __kmpc_serialized_parallel(&Loc, GTid);
2101     llvm::Value *Args[] = {RTLoc, ThreadID};
2102     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2103                             M, OMPRTL___kmpc_serialized_parallel),
2104                         Args);
2105 
2106     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2107     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2108     Address ZeroAddrBound =
2109         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2110                                          /*Name=*/".bound.zero.addr");
2111     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2112     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2113     // ThreadId for serialized parallels is 0.
2114     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2115     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2116     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2117 
2118     // Ensure we do not inline the function. This is trivially true for the ones
2119     // passed to __kmpc_fork_call but the ones calles in serialized regions
2120     // could be inlined. This is not a perfect but it is closer to the invariant
2121     // we want, namely, every data environment starts with a new function.
2122     // TODO: We should pass the if condition to the runtime function and do the
2123     //       handling there. Much cleaner code.
2124     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2125     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2126 
2127     // __kmpc_end_serialized_parallel(&Loc, GTid);
2128     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2129     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2130                             M, OMPRTL___kmpc_end_serialized_parallel),
2131                         EndArgs);
2132   };
2133   if (IfCond) {
2134     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2135   } else {
2136     RegionCodeGenTy ThenRCG(ThenGen);
2137     ThenRCG(CGF);
2138   }
2139 }
2140 
2141 // If we're inside an (outlined) parallel region, use the region info's
2142 // thread-ID variable (it is passed in a first argument of the outlined function
2143 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2144 // regular serial code region, get thread ID by calling kmp_int32
2145 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2146 // return the address of that temp.
2147 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2148                                              SourceLocation Loc) {
2149   if (auto *OMPRegionInfo =
2150           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2151     if (OMPRegionInfo->getThreadIDVariable())
2152       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2153 
2154   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2155   QualType Int32Ty =
2156       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2157   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2158   CGF.EmitStoreOfScalar(ThreadID,
2159                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2160 
2161   return ThreadIDTemp;
2162 }
2163 
2164 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2165     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2166   SmallString<256> Buffer;
2167   llvm::raw_svector_ostream Out(Buffer);
2168   Out << Name;
2169   StringRef RuntimeName = Out.str();
2170   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2171   if (Elem.second) {
2172     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2173            "OMP internal variable has different type than requested");
2174     return &*Elem.second;
2175   }
2176 
2177   return Elem.second = new llvm::GlobalVariable(
2178              CGM.getModule(), Ty, /*IsConstant*/ false,
2179              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2180              Elem.first(), /*InsertBefore=*/nullptr,
2181              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2182 }
2183 
2184 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2185   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2186   std::string Name = getName({Prefix, "var"});
2187   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2188 }
2189 
2190 namespace {
2191 /// Common pre(post)-action for different OpenMP constructs.
2192 class CommonActionTy final : public PrePostActionTy {
2193   llvm::FunctionCallee EnterCallee;
2194   ArrayRef<llvm::Value *> EnterArgs;
2195   llvm::FunctionCallee ExitCallee;
2196   ArrayRef<llvm::Value *> ExitArgs;
2197   bool Conditional;
2198   llvm::BasicBlock *ContBlock = nullptr;
2199 
2200 public:
2201   CommonActionTy(llvm::FunctionCallee EnterCallee,
2202                  ArrayRef<llvm::Value *> EnterArgs,
2203                  llvm::FunctionCallee ExitCallee,
2204                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2205       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2206         ExitArgs(ExitArgs), Conditional(Conditional) {}
2207   void Enter(CodeGenFunction &CGF) override {
2208     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2209     if (Conditional) {
2210       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2211       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2212       ContBlock = CGF.createBasicBlock("omp_if.end");
2213       // Generate the branch (If-stmt)
2214       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2215       CGF.EmitBlock(ThenBlock);
2216     }
2217   }
2218   void Done(CodeGenFunction &CGF) {
2219     // Emit the rest of blocks/branches
2220     CGF.EmitBranch(ContBlock);
2221     CGF.EmitBlock(ContBlock, true);
2222   }
2223   void Exit(CodeGenFunction &CGF) override {
2224     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2225   }
2226 };
2227 } // anonymous namespace
2228 
2229 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2230                                          StringRef CriticalName,
2231                                          const RegionCodeGenTy &CriticalOpGen,
2232                                          SourceLocation Loc, const Expr *Hint) {
2233   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2234   // CriticalOpGen();
2235   // __kmpc_end_critical(ident_t *, gtid, Lock);
2236   // Prepare arguments and build a call to __kmpc_critical
2237   if (!CGF.HaveInsertPoint())
2238     return;
2239   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240                          getCriticalRegionLock(CriticalName)};
2241   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2242                                                 std::end(Args));
2243   if (Hint) {
2244     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2245         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2246   }
2247   CommonActionTy Action(
2248       OMPBuilder.getOrCreateRuntimeFunction(
2249           CGM.getModule(),
2250           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2251       EnterArgs,
2252       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2253                                             OMPRTL___kmpc_end_critical),
2254       Args);
2255   CriticalOpGen.setAction(Action);
2256   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2257 }
2258 
2259 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2260                                        const RegionCodeGenTy &MasterOpGen,
2261                                        SourceLocation Loc) {
2262   if (!CGF.HaveInsertPoint())
2263     return;
2264   // if(__kmpc_master(ident_t *, gtid)) {
2265   //   MasterOpGen();
2266   //   __kmpc_end_master(ident_t *, gtid);
2267   // }
2268   // Prepare arguments and build a call to __kmpc_master
2269   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2270   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2271                             CGM.getModule(), OMPRTL___kmpc_master),
2272                         Args,
2273                         OMPBuilder.getOrCreateRuntimeFunction(
2274                             CGM.getModule(), OMPRTL___kmpc_end_master),
2275                         Args,
2276                         /*Conditional=*/true);
2277   MasterOpGen.setAction(Action);
2278   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2279   Action.Done(CGF);
2280 }
2281 
2282 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2283                                        const RegionCodeGenTy &MaskedOpGen,
2284                                        SourceLocation Loc, const Expr *Filter) {
2285   if (!CGF.HaveInsertPoint())
2286     return;
2287   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2288   //   MaskedOpGen();
2289   //   __kmpc_end_masked(iden_t *, gtid);
2290   // }
2291   // Prepare arguments and build a call to __kmpc_masked
2292   llvm::Value *FilterVal = Filter
2293                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2294                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2295   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2296                          FilterVal};
2297   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2298                             getThreadID(CGF, Loc)};
2299   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300                             CGM.getModule(), OMPRTL___kmpc_masked),
2301                         Args,
2302                         OMPBuilder.getOrCreateRuntimeFunction(
2303                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2304                         ArgsEnd,
2305                         /*Conditional=*/true);
2306   MaskedOpGen.setAction(Action);
2307   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2308   Action.Done(CGF);
2309 }
2310 
2311 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2312                                         SourceLocation Loc) {
2313   if (!CGF.HaveInsertPoint())
2314     return;
2315   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2316     OMPBuilder.createTaskyield(CGF.Builder);
2317   } else {
2318     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2319     llvm::Value *Args[] = {
2320         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2321         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2322     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2323                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2324                         Args);
2325   }
2326 
2327   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2328     Region->emitUntiedSwitch(CGF);
2329 }
2330 
2331 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2332                                           const RegionCodeGenTy &TaskgroupOpGen,
2333                                           SourceLocation Loc) {
2334   if (!CGF.HaveInsertPoint())
2335     return;
2336   // __kmpc_taskgroup(ident_t *, gtid);
2337   // TaskgroupOpGen();
2338   // __kmpc_end_taskgroup(ident_t *, gtid);
2339   // Prepare arguments and build a call to __kmpc_taskgroup
2340   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2341   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2342                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2343                         Args,
2344                         OMPBuilder.getOrCreateRuntimeFunction(
2345                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2346                         Args);
2347   TaskgroupOpGen.setAction(Action);
2348   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2349 }
2350 
2351 /// Given an array of pointers to variables, project the address of a
2352 /// given variable.
2353 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2354                                       unsigned Index, const VarDecl *Var) {
2355   // Pull out the pointer to the variable.
2356   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2357   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2358 
2359   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2360   Addr = CGF.Builder.CreateElementBitCast(
2361       Addr, CGF.ConvertTypeForMem(Var->getType()));
2362   return Addr;
2363 }
2364 
2365 static llvm::Value *emitCopyprivateCopyFunction(
2366     CodeGenModule &CGM, llvm::Type *ArgsType,
2367     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2368     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2369     SourceLocation Loc) {
2370   ASTContext &C = CGM.getContext();
2371   // void copy_func(void *LHSArg, void *RHSArg);
2372   FunctionArgList Args;
2373   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2374                            ImplicitParamDecl::Other);
2375   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2376                            ImplicitParamDecl::Other);
2377   Args.push_back(&LHSArg);
2378   Args.push_back(&RHSArg);
2379   const auto &CGFI =
2380       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2381   std::string Name =
2382       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2383   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2384                                     llvm::GlobalValue::InternalLinkage, Name,
2385                                     &CGM.getModule());
2386   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2387   Fn->setDoesNotRecurse();
2388   CodeGenFunction CGF(CGM);
2389   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2390   // Dest = (void*[n])(LHSArg);
2391   // Src = (void*[n])(RHSArg);
2392   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2393       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2394       ArgsType), CGF.getPointerAlign());
2395   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2396       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2397       ArgsType), CGF.getPointerAlign());
2398   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2399   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2400   // ...
2401   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2402   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2403     const auto *DestVar =
2404         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2405     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2406 
2407     const auto *SrcVar =
2408         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2409     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2410 
2411     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2412     QualType Type = VD->getType();
2413     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2414   }
2415   CGF.FinishFunction();
2416   return Fn;
2417 }
2418 
2419 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2420                                        const RegionCodeGenTy &SingleOpGen,
2421                                        SourceLocation Loc,
2422                                        ArrayRef<const Expr *> CopyprivateVars,
2423                                        ArrayRef<const Expr *> SrcExprs,
2424                                        ArrayRef<const Expr *> DstExprs,
2425                                        ArrayRef<const Expr *> AssignmentOps) {
2426   if (!CGF.HaveInsertPoint())
2427     return;
2428   assert(CopyprivateVars.size() == SrcExprs.size() &&
2429          CopyprivateVars.size() == DstExprs.size() &&
2430          CopyprivateVars.size() == AssignmentOps.size());
2431   ASTContext &C = CGM.getContext();
2432   // int32 did_it = 0;
2433   // if(__kmpc_single(ident_t *, gtid)) {
2434   //   SingleOpGen();
2435   //   __kmpc_end_single(ident_t *, gtid);
2436   //   did_it = 1;
2437   // }
2438   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439   // <copy_func>, did_it);
2440 
2441   Address DidIt = Address::invalid();
2442   if (!CopyprivateVars.empty()) {
2443     // int32 did_it = 0;
2444     QualType KmpInt32Ty =
2445         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2446     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2447     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2448   }
2449   // Prepare arguments and build a call to __kmpc_single
2450   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2451   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2452                             CGM.getModule(), OMPRTL___kmpc_single),
2453                         Args,
2454                         OMPBuilder.getOrCreateRuntimeFunction(
2455                             CGM.getModule(), OMPRTL___kmpc_end_single),
2456                         Args,
2457                         /*Conditional=*/true);
2458   SingleOpGen.setAction(Action);
2459   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2460   if (DidIt.isValid()) {
2461     // did_it = 1;
2462     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2463   }
2464   Action.Done(CGF);
2465   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2466   // <copy_func>, did_it);
2467   if (DidIt.isValid()) {
2468     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2469     QualType CopyprivateArrayTy = C.getConstantArrayType(
2470         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2471         /*IndexTypeQuals=*/0);
2472     // Create a list of all private variables for copyprivate.
2473     Address CopyprivateList =
2474         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2475     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2476       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2477       CGF.Builder.CreateStore(
2478           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2479               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2480               CGF.VoidPtrTy),
2481           Elem);
2482     }
2483     // Build function that copies private values from single region to all other
2484     // threads in the corresponding parallel region.
2485     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2486         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2487         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2488     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2489     Address CL =
2490       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2491                                                       CGF.VoidPtrTy);
2492     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2493     llvm::Value *Args[] = {
2494         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2495         getThreadID(CGF, Loc),        // i32 <gtid>
2496         BufSize,                      // size_t <buf_size>
2497         CL.getPointer(),              // void *<copyprivate list>
2498         CpyFn,                        // void (*) (void *, void *) <copy_func>
2499         DidItVal                      // i32 did_it
2500     };
2501     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2502                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2503                         Args);
2504   }
2505 }
2506 
2507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2508                                         const RegionCodeGenTy &OrderedOpGen,
2509                                         SourceLocation Loc, bool IsThreads) {
2510   if (!CGF.HaveInsertPoint())
2511     return;
2512   // __kmpc_ordered(ident_t *, gtid);
2513   // OrderedOpGen();
2514   // __kmpc_end_ordered(ident_t *, gtid);
2515   // Prepare arguments and build a call to __kmpc_ordered
2516   if (IsThreads) {
2517     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2518     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2519                               CGM.getModule(), OMPRTL___kmpc_ordered),
2520                           Args,
2521                           OMPBuilder.getOrCreateRuntimeFunction(
2522                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2523                           Args);
2524     OrderedOpGen.setAction(Action);
2525     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2526     return;
2527   }
2528   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2529 }
2530 
2531 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2532   unsigned Flags;
2533   if (Kind == OMPD_for)
2534     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2535   else if (Kind == OMPD_sections)
2536     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2537   else if (Kind == OMPD_single)
2538     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2539   else if (Kind == OMPD_barrier)
2540     Flags = OMP_IDENT_BARRIER_EXPL;
2541   else
2542     Flags = OMP_IDENT_BARRIER_IMPL;
2543   return Flags;
2544 }
2545 
2546 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2547     CodeGenFunction &CGF, const OMPLoopDirective &S,
2548     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2549   // Check if the loop directive is actually a doacross loop directive. In this
2550   // case choose static, 1 schedule.
2551   if (llvm::any_of(
2552           S.getClausesOfKind<OMPOrderedClause>(),
2553           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2554     ScheduleKind = OMPC_SCHEDULE_static;
2555     // Chunk size is 1 in this case.
2556     llvm::APInt ChunkSize(32, 1);
2557     ChunkExpr = IntegerLiteral::Create(
2558         CGF.getContext(), ChunkSize,
2559         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2560         SourceLocation());
2561   }
2562 }
2563 
2564 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2565                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2566                                       bool ForceSimpleCall) {
2567   // Check if we should use the OMPBuilder
2568   auto *OMPRegionInfo =
2569       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2570   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2571     CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2572         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2573     return;
2574   }
2575 
2576   if (!CGF.HaveInsertPoint())
2577     return;
2578   // Build call __kmpc_cancel_barrier(loc, thread_id);
2579   // Build call __kmpc_barrier(loc, thread_id);
2580   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2581   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2582   // thread_id);
2583   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2584                          getThreadID(CGF, Loc)};
2585   if (OMPRegionInfo) {
2586     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2587       llvm::Value *Result = CGF.EmitRuntimeCall(
2588           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2589                                                 OMPRTL___kmpc_cancel_barrier),
2590           Args);
2591       if (EmitChecks) {
2592         // if (__kmpc_cancel_barrier()) {
2593         //   exit from construct;
2594         // }
2595         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2596         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2597         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2598         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2599         CGF.EmitBlock(ExitBB);
2600         //   exit from construct;
2601         CodeGenFunction::JumpDest CancelDestination =
2602             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2603         CGF.EmitBranchThroughCleanup(CancelDestination);
2604         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2605       }
2606       return;
2607     }
2608   }
2609   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2610                           CGM.getModule(), OMPRTL___kmpc_barrier),
2611                       Args);
2612 }
2613 
2614 /// Map the OpenMP loop schedule to the runtime enumeration.
2615 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2616                                           bool Chunked, bool Ordered) {
2617   switch (ScheduleKind) {
2618   case OMPC_SCHEDULE_static:
2619     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2620                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2621   case OMPC_SCHEDULE_dynamic:
2622     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2623   case OMPC_SCHEDULE_guided:
2624     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2625   case OMPC_SCHEDULE_runtime:
2626     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2627   case OMPC_SCHEDULE_auto:
2628     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2629   case OMPC_SCHEDULE_unknown:
2630     assert(!Chunked && "chunk was specified but schedule kind not known");
2631     return Ordered ? OMP_ord_static : OMP_sch_static;
2632   }
2633   llvm_unreachable("Unexpected runtime schedule");
2634 }
2635 
2636 /// Map the OpenMP distribute schedule to the runtime enumeration.
2637 static OpenMPSchedType
2638 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2639   // only static is allowed for dist_schedule
2640   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2641 }
2642 
2643 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2644                                          bool Chunked) const {
2645   OpenMPSchedType Schedule =
2646       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2647   return Schedule == OMP_sch_static;
2648 }
2649 
2650 bool CGOpenMPRuntime::isStaticNonchunked(
2651     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2652   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2653   return Schedule == OMP_dist_sch_static;
2654 }
2655 
2656 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2657                                       bool Chunked) const {
2658   OpenMPSchedType Schedule =
2659       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2660   return Schedule == OMP_sch_static_chunked;
2661 }
2662 
2663 bool CGOpenMPRuntime::isStaticChunked(
2664     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2665   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2666   return Schedule == OMP_dist_sch_static_chunked;
2667 }
2668 
2669 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2670   OpenMPSchedType Schedule =
2671       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2672   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2673   return Schedule != OMP_sch_static;
2674 }
2675 
2676 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2677                                   OpenMPScheduleClauseModifier M1,
2678                                   OpenMPScheduleClauseModifier M2) {
2679   int Modifier = 0;
2680   switch (M1) {
2681   case OMPC_SCHEDULE_MODIFIER_monotonic:
2682     Modifier = OMP_sch_modifier_monotonic;
2683     break;
2684   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2685     Modifier = OMP_sch_modifier_nonmonotonic;
2686     break;
2687   case OMPC_SCHEDULE_MODIFIER_simd:
2688     if (Schedule == OMP_sch_static_chunked)
2689       Schedule = OMP_sch_static_balanced_chunked;
2690     break;
2691   case OMPC_SCHEDULE_MODIFIER_last:
2692   case OMPC_SCHEDULE_MODIFIER_unknown:
2693     break;
2694   }
2695   switch (M2) {
2696   case OMPC_SCHEDULE_MODIFIER_monotonic:
2697     Modifier = OMP_sch_modifier_monotonic;
2698     break;
2699   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2700     Modifier = OMP_sch_modifier_nonmonotonic;
2701     break;
2702   case OMPC_SCHEDULE_MODIFIER_simd:
2703     if (Schedule == OMP_sch_static_chunked)
2704       Schedule = OMP_sch_static_balanced_chunked;
2705     break;
2706   case OMPC_SCHEDULE_MODIFIER_last:
2707   case OMPC_SCHEDULE_MODIFIER_unknown:
2708     break;
2709   }
2710   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2711   // If the static schedule kind is specified or if the ordered clause is
2712   // specified, and if the nonmonotonic modifier is not specified, the effect is
2713   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2714   // modifier is specified, the effect is as if the nonmonotonic modifier is
2715   // specified.
2716   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2717     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2718           Schedule == OMP_sch_static_balanced_chunked ||
2719           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2720           Schedule == OMP_dist_sch_static_chunked ||
2721           Schedule == OMP_dist_sch_static))
2722       Modifier = OMP_sch_modifier_nonmonotonic;
2723   }
2724   return Schedule | Modifier;
2725 }
2726 
2727 void CGOpenMPRuntime::emitForDispatchInit(
2728     CodeGenFunction &CGF, SourceLocation Loc,
2729     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2730     bool Ordered, const DispatchRTInput &DispatchValues) {
2731   if (!CGF.HaveInsertPoint())
2732     return;
2733   OpenMPSchedType Schedule = getRuntimeSchedule(
2734       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2735   assert(Ordered ||
2736          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2737           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2738           Schedule != OMP_sch_static_balanced_chunked));
2739   // Call __kmpc_dispatch_init(
2740   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2741   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2742   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2743 
2744   // If the Chunk was not specified in the clause - use default value 1.
2745   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2746                                             : CGF.Builder.getIntN(IVSize, 1);
2747   llvm::Value *Args[] = {
2748       emitUpdateLocation(CGF, Loc),
2749       getThreadID(CGF, Loc),
2750       CGF.Builder.getInt32(addMonoNonMonoModifier(
2751           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2752       DispatchValues.LB,                                     // Lower
2753       DispatchValues.UB,                                     // Upper
2754       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2755       Chunk                                                  // Chunk
2756   };
2757   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2758 }
2759 
2760 static void emitForStaticInitCall(
2761     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2762     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2763     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2764     const CGOpenMPRuntime::StaticRTInput &Values) {
2765   if (!CGF.HaveInsertPoint())
2766     return;
2767 
2768   assert(!Values.Ordered);
2769   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2770          Schedule == OMP_sch_static_balanced_chunked ||
2771          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2772          Schedule == OMP_dist_sch_static ||
2773          Schedule == OMP_dist_sch_static_chunked);
2774 
2775   // Call __kmpc_for_static_init(
2776   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2777   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2778   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2779   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2780   llvm::Value *Chunk = Values.Chunk;
2781   if (Chunk == nullptr) {
2782     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2783             Schedule == OMP_dist_sch_static) &&
2784            "expected static non-chunked schedule");
2785     // If the Chunk was not specified in the clause - use default value 1.
2786     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2787   } else {
2788     assert((Schedule == OMP_sch_static_chunked ||
2789             Schedule == OMP_sch_static_balanced_chunked ||
2790             Schedule == OMP_ord_static_chunked ||
2791             Schedule == OMP_dist_sch_static_chunked) &&
2792            "expected static chunked schedule");
2793   }
2794   llvm::Value *Args[] = {
2795       UpdateLocation,
2796       ThreadId,
2797       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2798                                                   M2)), // Schedule type
2799       Values.IL.getPointer(),                           // &isLastIter
2800       Values.LB.getPointer(),                           // &LB
2801       Values.UB.getPointer(),                           // &UB
2802       Values.ST.getPointer(),                           // &Stride
2803       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2804       Chunk                                             // Chunk
2805   };
2806   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2807 }
2808 
2809 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2810                                         SourceLocation Loc,
2811                                         OpenMPDirectiveKind DKind,
2812                                         const OpenMPScheduleTy &ScheduleKind,
2813                                         const StaticRTInput &Values) {
2814   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2815       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2816   assert(isOpenMPWorksharingDirective(DKind) &&
2817          "Expected loop-based or sections-based directive.");
2818   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2819                                              isOpenMPLoopDirective(DKind)
2820                                                  ? OMP_IDENT_WORK_LOOP
2821                                                  : OMP_IDENT_WORK_SECTIONS);
2822   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2823   llvm::FunctionCallee StaticInitFunction =
2824       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2825   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2826   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2827                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2828 }
2829 
2830 void CGOpenMPRuntime::emitDistributeStaticInit(
2831     CodeGenFunction &CGF, SourceLocation Loc,
2832     OpenMPDistScheduleClauseKind SchedKind,
2833     const CGOpenMPRuntime::StaticRTInput &Values) {
2834   OpenMPSchedType ScheduleNum =
2835       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2836   llvm::Value *UpdatedLocation =
2837       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2838   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2839   llvm::FunctionCallee StaticInitFunction =
2840       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2841   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2842                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2843                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2844 }
2845 
2846 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2847                                           SourceLocation Loc,
2848                                           OpenMPDirectiveKind DKind) {
2849   if (!CGF.HaveInsertPoint())
2850     return;
2851   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2852   llvm::Value *Args[] = {
2853       emitUpdateLocation(CGF, Loc,
2854                          isOpenMPDistributeDirective(DKind)
2855                              ? OMP_IDENT_WORK_DISTRIBUTE
2856                              : isOpenMPLoopDirective(DKind)
2857                                    ? OMP_IDENT_WORK_LOOP
2858                                    : OMP_IDENT_WORK_SECTIONS),
2859       getThreadID(CGF, Loc)};
2860   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2861   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2862                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2863                       Args);
2864 }
2865 
2866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2867                                                  SourceLocation Loc,
2868                                                  unsigned IVSize,
2869                                                  bool IVSigned) {
2870   if (!CGF.HaveInsertPoint())
2871     return;
2872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2874   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2875 }
2876 
2877 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2878                                           SourceLocation Loc, unsigned IVSize,
2879                                           bool IVSigned, Address IL,
2880                                           Address LB, Address UB,
2881                                           Address ST) {
2882   // Call __kmpc_dispatch_next(
2883   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2884   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2885   //          kmp_int[32|64] *p_stride);
2886   llvm::Value *Args[] = {
2887       emitUpdateLocation(CGF, Loc),
2888       getThreadID(CGF, Loc),
2889       IL.getPointer(), // &isLastIter
2890       LB.getPointer(), // &Lower
2891       UB.getPointer(), // &Upper
2892       ST.getPointer()  // &Stride
2893   };
2894   llvm::Value *Call =
2895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2896   return CGF.EmitScalarConversion(
2897       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2898       CGF.getContext().BoolTy, Loc);
2899 }
2900 
2901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2902                                            llvm::Value *NumThreads,
2903                                            SourceLocation Loc) {
2904   if (!CGF.HaveInsertPoint())
2905     return;
2906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2907   llvm::Value *Args[] = {
2908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2910   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2911                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2912                       Args);
2913 }
2914 
2915 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2916                                          ProcBindKind ProcBind,
2917                                          SourceLocation Loc) {
2918   if (!CGF.HaveInsertPoint())
2919     return;
2920   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2921   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2922   llvm::Value *Args[] = {
2923       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2924       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2925   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2926                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2927                       Args);
2928 }
2929 
2930 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2931                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2932   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2933     OMPBuilder.createFlush(CGF.Builder);
2934   } else {
2935     if (!CGF.HaveInsertPoint())
2936       return;
2937     // Build call void __kmpc_flush(ident_t *loc)
2938     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2939                             CGM.getModule(), OMPRTL___kmpc_flush),
2940                         emitUpdateLocation(CGF, Loc));
2941   }
2942 }
2943 
2944 namespace {
2945 /// Indexes of fields for type kmp_task_t.
2946 enum KmpTaskTFields {
2947   /// List of shared variables.
2948   KmpTaskTShareds,
2949   /// Task routine.
2950   KmpTaskTRoutine,
2951   /// Partition id for the untied tasks.
2952   KmpTaskTPartId,
2953   /// Function with call of destructors for private variables.
2954   Data1,
2955   /// Task priority.
2956   Data2,
2957   /// (Taskloops only) Lower bound.
2958   KmpTaskTLowerBound,
2959   /// (Taskloops only) Upper bound.
2960   KmpTaskTUpperBound,
2961   /// (Taskloops only) Stride.
2962   KmpTaskTStride,
2963   /// (Taskloops only) Is last iteration flag.
2964   KmpTaskTLastIter,
2965   /// (Taskloops only) Reduction data.
2966   KmpTaskTReductions,
2967 };
2968 } // anonymous namespace
2969 
2970 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2971   return OffloadEntriesTargetRegion.empty() &&
2972          OffloadEntriesDeviceGlobalVar.empty();
2973 }
2974 
2975 /// Initialize target region entry.
2976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2977     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2978                                     StringRef ParentName, unsigned LineNum,
2979                                     unsigned Order) {
2980   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2981                                              "only required for the device "
2982                                              "code generation.");
2983   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2984       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2985                                    OMPTargetRegionEntryTargetRegion);
2986   ++OffloadingEntriesNum;
2987 }
2988 
2989 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2990     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2991                                   StringRef ParentName, unsigned LineNum,
2992                                   llvm::Constant *Addr, llvm::Constant *ID,
2993                                   OMPTargetRegionEntryKind Flags) {
2994   // If we are emitting code for a target, the entry is already initialized,
2995   // only has to be registered.
2996   if (CGM.getLangOpts().OpenMPIsDevice) {
2997     // This could happen if the device compilation is invoked standalone.
2998     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2999       return;
3000     auto &Entry =
3001         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3002     Entry.setAddress(Addr);
3003     Entry.setID(ID);
3004     Entry.setFlags(Flags);
3005   } else {
3006     if (Flags ==
3007             OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3008         hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3009                                  /*IgnoreAddressId*/ true))
3010       return;
3011     assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3012            "Target region entry already registered!");
3013     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3014     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3015     ++OffloadingEntriesNum;
3016   }
3017 }
3018 
3019 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3020     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3021     bool IgnoreAddressId) const {
3022   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3023   if (PerDevice == OffloadEntriesTargetRegion.end())
3024     return false;
3025   auto PerFile = PerDevice->second.find(FileID);
3026   if (PerFile == PerDevice->second.end())
3027     return false;
3028   auto PerParentName = PerFile->second.find(ParentName);
3029   if (PerParentName == PerFile->second.end())
3030     return false;
3031   auto PerLine = PerParentName->second.find(LineNum);
3032   if (PerLine == PerParentName->second.end())
3033     return false;
3034   // Fail if this entry is already registered.
3035   if (!IgnoreAddressId &&
3036       (PerLine->second.getAddress() || PerLine->second.getID()))
3037     return false;
3038   return true;
3039 }
3040 
3041 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3042     const OffloadTargetRegionEntryInfoActTy &Action) {
3043   // Scan all target region entries and perform the provided action.
3044   for (const auto &D : OffloadEntriesTargetRegion)
3045     for (const auto &F : D.second)
3046       for (const auto &P : F.second)
3047         for (const auto &L : P.second)
3048           Action(D.first, F.first, P.first(), L.first, L.second);
3049 }
3050 
3051 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3052     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3053                                        OMPTargetGlobalVarEntryKind Flags,
3054                                        unsigned Order) {
3055   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3056                                              "only required for the device "
3057                                              "code generation.");
3058   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3059   ++OffloadingEntriesNum;
3060 }
3061 
3062 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3063     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3064                                      CharUnits VarSize,
3065                                      OMPTargetGlobalVarEntryKind Flags,
3066                                      llvm::GlobalValue::LinkageTypes Linkage) {
3067   if (CGM.getLangOpts().OpenMPIsDevice) {
3068     // This could happen if the device compilation is invoked standalone.
3069     if (!hasDeviceGlobalVarEntryInfo(VarName))
3070       return;
3071     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3072     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3073       if (Entry.getVarSize().isZero()) {
3074         Entry.setVarSize(VarSize);
3075         Entry.setLinkage(Linkage);
3076       }
3077       return;
3078     }
3079     Entry.setVarSize(VarSize);
3080     Entry.setLinkage(Linkage);
3081     Entry.setAddress(Addr);
3082   } else {
3083     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3084       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3085       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3086              "Entry not initialized!");
3087       if (Entry.getVarSize().isZero()) {
3088         Entry.setVarSize(VarSize);
3089         Entry.setLinkage(Linkage);
3090       }
3091       return;
3092     }
3093     OffloadEntriesDeviceGlobalVar.try_emplace(
3094         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3095     ++OffloadingEntriesNum;
3096   }
3097 }
3098 
3099 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3100     actOnDeviceGlobalVarEntriesInfo(
3101         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3102   // Scan all target region entries and perform the provided action.
3103   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3104     Action(E.getKey(), E.getValue());
3105 }
3106 
3107 void CGOpenMPRuntime::createOffloadEntry(
3108     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3109     llvm::GlobalValue::LinkageTypes Linkage) {
3110   StringRef Name = Addr->getName();
3111   llvm::Module &M = CGM.getModule();
3112   llvm::LLVMContext &C = M.getContext();
3113 
3114   // Create constant string with the name.
3115   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3116 
3117   std::string StringName = getName({"omp_offloading", "entry_name"});
3118   auto *Str = new llvm::GlobalVariable(
3119       M, StrPtrInit->getType(), /*isConstant=*/true,
3120       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3121   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3122 
3123   llvm::Constant *Data[] = {
3124       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3125       llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3126       llvm::ConstantInt::get(CGM.SizeTy, Size),
3127       llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3128       llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3129   std::string EntryName = getName({"omp_offloading", "entry", ""});
3130   llvm::GlobalVariable *Entry = createGlobalStruct(
3131       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3132       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3133 
3134   // The entry has to be created in the section the linker expects it to be.
3135   Entry->setSection("omp_offloading_entries");
3136 }
3137 
3138 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3139   // Emit the offloading entries and metadata so that the device codegen side
3140   // can easily figure out what to emit. The produced metadata looks like
3141   // this:
3142   //
3143   // !omp_offload.info = !{!1, ...}
3144   //
3145   // Right now we only generate metadata for function that contain target
3146   // regions.
3147 
3148   // If we are in simd mode or there are no entries, we don't need to do
3149   // anything.
3150   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3151     return;
3152 
3153   llvm::Module &M = CGM.getModule();
3154   llvm::LLVMContext &C = M.getContext();
3155   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3156                          SourceLocation, StringRef>,
3157               16>
3158       OrderedEntries(OffloadEntriesInfoManager.size());
3159   llvm::SmallVector<StringRef, 16> ParentFunctions(
3160       OffloadEntriesInfoManager.size());
3161 
3162   // Auxiliary methods to create metadata values and strings.
3163   auto &&GetMDInt = [this](unsigned V) {
3164     return llvm::ConstantAsMetadata::get(
3165         llvm::ConstantInt::get(CGM.Int32Ty, V));
3166   };
3167 
3168   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3169 
3170   // Create the offloading info metadata node.
3171   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3172 
3173   // Create function that emits metadata for each target region entry;
3174   auto &&TargetRegionMetadataEmitter =
3175       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3176        &GetMDString](
3177           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3178           unsigned Line,
3179           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3180         // Generate metadata for target regions. Each entry of this metadata
3181         // contains:
3182         // - Entry 0 -> Kind of this type of metadata (0).
3183         // - Entry 1 -> Device ID of the file where the entry was identified.
3184         // - Entry 2 -> File ID of the file where the entry was identified.
3185         // - Entry 3 -> Mangled name of the function where the entry was
3186         // identified.
3187         // - Entry 4 -> Line in the file where the entry was identified.
3188         // - Entry 5 -> Order the entry was created.
3189         // The first element of the metadata node is the kind.
3190         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3191                                  GetMDInt(FileID),      GetMDString(ParentName),
3192                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3193 
3194         SourceLocation Loc;
3195         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3196                   E = CGM.getContext().getSourceManager().fileinfo_end();
3197              I != E; ++I) {
3198           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3199               I->getFirst()->getUniqueID().getFile() == FileID) {
3200             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3201                 I->getFirst(), Line, 1);
3202             break;
3203           }
3204         }
3205         // Save this entry in the right position of the ordered entries array.
3206         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3207         ParentFunctions[E.getOrder()] = ParentName;
3208 
3209         // Add metadata to the named metadata node.
3210         MD->addOperand(llvm::MDNode::get(C, Ops));
3211       };
3212 
3213   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3214       TargetRegionMetadataEmitter);
3215 
3216   // Create function that emits metadata for each device global variable entry;
3217   auto &&DeviceGlobalVarMetadataEmitter =
3218       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3219        MD](StringRef MangledName,
3220            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3221                &E) {
3222         // Generate metadata for global variables. Each entry of this metadata
3223         // contains:
3224         // - Entry 0 -> Kind of this type of metadata (1).
3225         // - Entry 1 -> Mangled name of the variable.
3226         // - Entry 2 -> Declare target kind.
3227         // - Entry 3 -> Order the entry was created.
3228         // The first element of the metadata node is the kind.
3229         llvm::Metadata *Ops[] = {
3230             GetMDInt(E.getKind()), GetMDString(MangledName),
3231             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3232 
3233         // Save this entry in the right position of the ordered entries array.
3234         OrderedEntries[E.getOrder()] =
3235             std::make_tuple(&E, SourceLocation(), MangledName);
3236 
3237         // Add metadata to the named metadata node.
3238         MD->addOperand(llvm::MDNode::get(C, Ops));
3239       };
3240 
3241   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3242       DeviceGlobalVarMetadataEmitter);
3243 
3244   for (const auto &E : OrderedEntries) {
3245     assert(std::get<0>(E) && "All ordered entries must exist!");
3246     if (const auto *CE =
3247             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3248                 std::get<0>(E))) {
3249       if (!CE->getID() || !CE->getAddress()) {
3250         // Do not blame the entry if the parent funtion is not emitted.
3251         StringRef FnName = ParentFunctions[CE->getOrder()];
3252         if (!CGM.GetGlobalValue(FnName))
3253           continue;
3254         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255             DiagnosticsEngine::Error,
3256             "Offloading entry for target region in %0 is incorrect: either the "
3257             "address or the ID is invalid.");
3258         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3259         continue;
3260       }
3261       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3262                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3263     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3264                                              OffloadEntryInfoDeviceGlobalVar>(
3265                    std::get<0>(E))) {
3266       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3267           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3268               CE->getFlags());
3269       switch (Flags) {
3270       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3271         if (CGM.getLangOpts().OpenMPIsDevice &&
3272             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3273           continue;
3274         if (!CE->getAddress()) {
3275           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3276               DiagnosticsEngine::Error, "Offloading entry for declare target "
3277                                         "variable %0 is incorrect: the "
3278                                         "address is invalid.");
3279           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3280           continue;
3281         }
3282         // The vaiable has no definition - no need to add the entry.
3283         if (CE->getVarSize().isZero())
3284           continue;
3285         break;
3286       }
3287       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3288         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3289                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3290                "Declaret target link address is set.");
3291         if (CGM.getLangOpts().OpenMPIsDevice)
3292           continue;
3293         if (!CE->getAddress()) {
3294           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3295               DiagnosticsEngine::Error,
3296               "Offloading entry for declare target variable is incorrect: the "
3297               "address is invalid.");
3298           CGM.getDiags().Report(DiagID);
3299           continue;
3300         }
3301         break;
3302       }
3303       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3304                          CE->getVarSize().getQuantity(), Flags,
3305                          CE->getLinkage());
3306     } else {
3307       llvm_unreachable("Unsupported entry kind.");
3308     }
3309   }
3310 }
3311 
3312 /// Loads all the offload entries information from the host IR
3313 /// metadata.
3314 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3315   // If we are in target mode, load the metadata from the host IR. This code has
3316   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3317 
3318   if (!CGM.getLangOpts().OpenMPIsDevice)
3319     return;
3320 
3321   if (CGM.getLangOpts().OMPHostIRFile.empty())
3322     return;
3323 
3324   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3325   if (auto EC = Buf.getError()) {
3326     CGM.getDiags().Report(diag::err_cannot_open_file)
3327         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3328     return;
3329   }
3330 
3331   llvm::LLVMContext C;
3332   auto ME = expectedToErrorOrAndEmitErrors(
3333       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3334 
3335   if (auto EC = ME.getError()) {
3336     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3337         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3338     CGM.getDiags().Report(DiagID)
3339         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3340     return;
3341   }
3342 
3343   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3344   if (!MD)
3345     return;
3346 
3347   for (llvm::MDNode *MN : MD->operands()) {
3348     auto &&GetMDInt = [MN](unsigned Idx) {
3349       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3350       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3351     };
3352 
3353     auto &&GetMDString = [MN](unsigned Idx) {
3354       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3355       return V->getString();
3356     };
3357 
3358     switch (GetMDInt(0)) {
3359     default:
3360       llvm_unreachable("Unexpected metadata!");
3361       break;
3362     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3363         OffloadingEntryInfoTargetRegion:
3364       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3365           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3366           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3367           /*Order=*/GetMDInt(5));
3368       break;
3369     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3370         OffloadingEntryInfoDeviceGlobalVar:
3371       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3372           /*MangledName=*/GetMDString(1),
3373           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3374               /*Flags=*/GetMDInt(2)),
3375           /*Order=*/GetMDInt(3));
3376       break;
3377     }
3378   }
3379 }
3380 
3381 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3382   if (!KmpRoutineEntryPtrTy) {
3383     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3384     ASTContext &C = CGM.getContext();
3385     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3386     FunctionProtoType::ExtProtoInfo EPI;
3387     KmpRoutineEntryPtrQTy = C.getPointerType(
3388         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3389     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3390   }
3391 }
3392 
3393 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3394   // Make sure the type of the entry is already created. This is the type we
3395   // have to create:
3396   // struct __tgt_offload_entry{
3397   //   void      *addr;       // Pointer to the offload entry info.
3398   //                          // (function or global)
3399   //   char      *name;       // Name of the function or global.
3400   //   size_t     size;       // Size of the entry info (0 if it a function).
3401   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3402   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3403   // };
3404   if (TgtOffloadEntryQTy.isNull()) {
3405     ASTContext &C = CGM.getContext();
3406     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3407     RD->startDefinition();
3408     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3409     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3410     addFieldToRecordDecl(C, RD, C.getSizeType());
3411     addFieldToRecordDecl(
3412         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3413     addFieldToRecordDecl(
3414         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3415     RD->completeDefinition();
3416     RD->addAttr(PackedAttr::CreateImplicit(C));
3417     TgtOffloadEntryQTy = C.getRecordType(RD);
3418   }
3419   return TgtOffloadEntryQTy;
3420 }
3421 
3422 namespace {
3423 struct PrivateHelpersTy {
3424   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3425                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3426       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3427         PrivateElemInit(PrivateElemInit) {}
3428   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3429   const Expr *OriginalRef = nullptr;
3430   const VarDecl *Original = nullptr;
3431   const VarDecl *PrivateCopy = nullptr;
3432   const VarDecl *PrivateElemInit = nullptr;
3433   bool isLocalPrivate() const {
3434     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3435   }
3436 };
3437 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3438 } // anonymous namespace
3439 
3440 static bool isAllocatableDecl(const VarDecl *VD) {
3441   const VarDecl *CVD = VD->getCanonicalDecl();
3442   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3443     return false;
3444   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3445   // Use the default allocation.
3446   return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
3447             AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
3448            !AA->getAllocator());
3449 }
3450 
3451 static RecordDecl *
3452 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3453   if (!Privates.empty()) {
3454     ASTContext &C = CGM.getContext();
3455     // Build struct .kmp_privates_t. {
3456     //         /*  private vars  */
3457     //       };
3458     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3459     RD->startDefinition();
3460     for (const auto &Pair : Privates) {
3461       const VarDecl *VD = Pair.second.Original;
3462       QualType Type = VD->getType().getNonReferenceType();
3463       // If the private variable is a local variable with lvalue ref type,
3464       // allocate the pointer instead of the pointee type.
3465       if (Pair.second.isLocalPrivate()) {
3466         if (VD->getType()->isLValueReferenceType())
3467           Type = C.getPointerType(Type);
3468         if (isAllocatableDecl(VD))
3469           Type = C.getPointerType(Type);
3470       }
3471       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3472       if (VD->hasAttrs()) {
3473         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3474              E(VD->getAttrs().end());
3475              I != E; ++I)
3476           FD->addAttr(*I);
3477       }
3478     }
3479     RD->completeDefinition();
3480     return RD;
3481   }
3482   return nullptr;
3483 }
3484 
3485 static RecordDecl *
3486 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3487                          QualType KmpInt32Ty,
3488                          QualType KmpRoutineEntryPointerQTy) {
3489   ASTContext &C = CGM.getContext();
3490   // Build struct kmp_task_t {
3491   //         void *              shareds;
3492   //         kmp_routine_entry_t routine;
3493   //         kmp_int32           part_id;
3494   //         kmp_cmplrdata_t data1;
3495   //         kmp_cmplrdata_t data2;
3496   // For taskloops additional fields:
3497   //         kmp_uint64          lb;
3498   //         kmp_uint64          ub;
3499   //         kmp_int64           st;
3500   //         kmp_int32           liter;
3501   //         void *              reductions;
3502   //       };
3503   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3504   UD->startDefinition();
3505   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3506   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3507   UD->completeDefinition();
3508   QualType KmpCmplrdataTy = C.getRecordType(UD);
3509   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3510   RD->startDefinition();
3511   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3512   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3513   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3514   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3515   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3516   if (isOpenMPTaskLoopDirective(Kind)) {
3517     QualType KmpUInt64Ty =
3518         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3519     QualType KmpInt64Ty =
3520         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3521     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3522     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3523     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3524     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3525     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3526   }
3527   RD->completeDefinition();
3528   return RD;
3529 }
3530 
3531 static RecordDecl *
3532 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3533                                      ArrayRef<PrivateDataTy> Privates) {
3534   ASTContext &C = CGM.getContext();
3535   // Build struct kmp_task_t_with_privates {
3536   //         kmp_task_t task_data;
3537   //         .kmp_privates_t. privates;
3538   //       };
3539   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3540   RD->startDefinition();
3541   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3542   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3543     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3544   RD->completeDefinition();
3545   return RD;
3546 }
3547 
3548 /// Emit a proxy function which accepts kmp_task_t as the second
3549 /// argument.
3550 /// \code
3551 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3552 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3553 ///   For taskloops:
3554 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3555 ///   tt->reductions, tt->shareds);
3556 ///   return 0;
3557 /// }
3558 /// \endcode
3559 static llvm::Function *
3560 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3561                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3562                       QualType KmpTaskTWithPrivatesPtrQTy,
3563                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3564                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3565                       llvm::Value *TaskPrivatesMap) {
3566   ASTContext &C = CGM.getContext();
3567   FunctionArgList Args;
3568   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3569                             ImplicitParamDecl::Other);
3570   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3571                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3572                                 ImplicitParamDecl::Other);
3573   Args.push_back(&GtidArg);
3574   Args.push_back(&TaskTypeArg);
3575   const auto &TaskEntryFnInfo =
3576       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3577   llvm::FunctionType *TaskEntryTy =
3578       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3579   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3580   auto *TaskEntry = llvm::Function::Create(
3581       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3582   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3583   TaskEntry->setDoesNotRecurse();
3584   CodeGenFunction CGF(CGM);
3585   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3586                     Loc, Loc);
3587 
3588   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3589   // tt,
3590   // For taskloops:
3591   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3592   // tt->task_data.shareds);
3593   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3594       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3595   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3596       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3597       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3598   const auto *KmpTaskTWithPrivatesQTyRD =
3599       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3600   LValue Base =
3601       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3602   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3603   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3604   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3605   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3606 
3607   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3608   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3609   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3610       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3611       CGF.ConvertTypeForMem(SharedsPtrTy));
3612 
3613   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3614   llvm::Value *PrivatesParam;
3615   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3616     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3617     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3618         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3619   } else {
3620     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3621   }
3622 
3623   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3624                                TaskPrivatesMap,
3625                                CGF.Builder
3626                                    .CreatePointerBitCastOrAddrSpaceCast(
3627                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3628                                    .getPointer()};
3629   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3630                                           std::end(CommonArgs));
3631   if (isOpenMPTaskLoopDirective(Kind)) {
3632     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3633     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3634     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3635     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3636     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3637     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3638     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3639     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3640     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3641     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3642     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3643     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3644     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3645     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3646     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3647     CallArgs.push_back(LBParam);
3648     CallArgs.push_back(UBParam);
3649     CallArgs.push_back(StParam);
3650     CallArgs.push_back(LIParam);
3651     CallArgs.push_back(RParam);
3652   }
3653   CallArgs.push_back(SharedsParam);
3654 
3655   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3656                                                   CallArgs);
3657   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3658                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3659   CGF.FinishFunction();
3660   return TaskEntry;
3661 }
3662 
3663 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3664                                             SourceLocation Loc,
3665                                             QualType KmpInt32Ty,
3666                                             QualType KmpTaskTWithPrivatesPtrQTy,
3667                                             QualType KmpTaskTWithPrivatesQTy) {
3668   ASTContext &C = CGM.getContext();
3669   FunctionArgList Args;
3670   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3671                             ImplicitParamDecl::Other);
3672   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3673                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3674                                 ImplicitParamDecl::Other);
3675   Args.push_back(&GtidArg);
3676   Args.push_back(&TaskTypeArg);
3677   const auto &DestructorFnInfo =
3678       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3679   llvm::FunctionType *DestructorFnTy =
3680       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3681   std::string Name =
3682       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3683   auto *DestructorFn =
3684       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3685                              Name, &CGM.getModule());
3686   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3687                                     DestructorFnInfo);
3688   DestructorFn->setDoesNotRecurse();
3689   CodeGenFunction CGF(CGM);
3690   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3691                     Args, Loc, Loc);
3692 
3693   LValue Base = CGF.EmitLoadOfPointerLValue(
3694       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3695       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3696   const auto *KmpTaskTWithPrivatesQTyRD =
3697       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3698   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3699   Base = CGF.EmitLValueForField(Base, *FI);
3700   for (const auto *Field :
3701        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3702     if (QualType::DestructionKind DtorKind =
3703             Field->getType().isDestructedType()) {
3704       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3705       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3706     }
3707   }
3708   CGF.FinishFunction();
3709   return DestructorFn;
3710 }
3711 
3712 /// Emit a privates mapping function for correct handling of private and
3713 /// firstprivate variables.
3714 /// \code
3715 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3716 /// **noalias priv1,...,  <tyn> **noalias privn) {
3717 ///   *priv1 = &.privates.priv1;
3718 ///   ...;
3719 ///   *privn = &.privates.privn;
3720 /// }
3721 /// \endcode
3722 static llvm::Value *
3723 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3724                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3725                                ArrayRef<PrivateDataTy> Privates) {
3726   ASTContext &C = CGM.getContext();
3727   FunctionArgList Args;
3728   ImplicitParamDecl TaskPrivatesArg(
3729       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3730       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3731       ImplicitParamDecl::Other);
3732   Args.push_back(&TaskPrivatesArg);
3733   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3734   unsigned Counter = 1;
3735   for (const Expr *E : Data.PrivateVars) {
3736     Args.push_back(ImplicitParamDecl::Create(
3737         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3738         C.getPointerType(C.getPointerType(E->getType()))
3739             .withConst()
3740             .withRestrict(),
3741         ImplicitParamDecl::Other));
3742     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3743     PrivateVarsPos[VD] = Counter;
3744     ++Counter;
3745   }
3746   for (const Expr *E : Data.FirstprivateVars) {
3747     Args.push_back(ImplicitParamDecl::Create(
3748         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3749         C.getPointerType(C.getPointerType(E->getType()))
3750             .withConst()
3751             .withRestrict(),
3752         ImplicitParamDecl::Other));
3753     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3754     PrivateVarsPos[VD] = Counter;
3755     ++Counter;
3756   }
3757   for (const Expr *E : Data.LastprivateVars) {
3758     Args.push_back(ImplicitParamDecl::Create(
3759         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760         C.getPointerType(C.getPointerType(E->getType()))
3761             .withConst()
3762             .withRestrict(),
3763         ImplicitParamDecl::Other));
3764     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765     PrivateVarsPos[VD] = Counter;
3766     ++Counter;
3767   }
3768   for (const VarDecl *VD : Data.PrivateLocals) {
3769     QualType Ty = VD->getType().getNonReferenceType();
3770     if (VD->getType()->isLValueReferenceType())
3771       Ty = C.getPointerType(Ty);
3772     if (isAllocatableDecl(VD))
3773       Ty = C.getPointerType(Ty);
3774     Args.push_back(ImplicitParamDecl::Create(
3775         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3776         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3777         ImplicitParamDecl::Other));
3778     PrivateVarsPos[VD] = Counter;
3779     ++Counter;
3780   }
3781   const auto &TaskPrivatesMapFnInfo =
3782       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3783   llvm::FunctionType *TaskPrivatesMapTy =
3784       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3785   std::string Name =
3786       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3787   auto *TaskPrivatesMap = llvm::Function::Create(
3788       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3789       &CGM.getModule());
3790   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3791                                     TaskPrivatesMapFnInfo);
3792   if (CGM.getLangOpts().Optimize) {
3793     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3794     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3795     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3796   }
3797   CodeGenFunction CGF(CGM);
3798   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3799                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3800 
3801   // *privi = &.privates.privi;
3802   LValue Base = CGF.EmitLoadOfPointerLValue(
3803       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3804       TaskPrivatesArg.getType()->castAs<PointerType>());
3805   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3806   Counter = 0;
3807   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3808     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3809     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3810     LValue RefLVal =
3811         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3812     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3813         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3814     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3815     ++Counter;
3816   }
3817   CGF.FinishFunction();
3818   return TaskPrivatesMap;
3819 }
3820 
3821 /// Emit initialization for private variables in task-based directives.
3822 static void emitPrivatesInit(CodeGenFunction &CGF,
3823                              const OMPExecutableDirective &D,
3824                              Address KmpTaskSharedsPtr, LValue TDBase,
3825                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3826                              QualType SharedsTy, QualType SharedsPtrTy,
3827                              const OMPTaskDataTy &Data,
3828                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3829   ASTContext &C = CGF.getContext();
3830   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3831   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3832   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3833                                  ? OMPD_taskloop
3834                                  : OMPD_task;
3835   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3836   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3837   LValue SrcBase;
3838   bool IsTargetTask =
3839       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3840       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3841   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3842   // PointersArray, SizesArray, and MappersArray. The original variables for
3843   // these arrays are not captured and we get their addresses explicitly.
3844   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3845       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3846     SrcBase = CGF.MakeAddrLValue(
3847         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3848             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3849         SharedsTy);
3850   }
3851   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3852   for (const PrivateDataTy &Pair : Privates) {
3853     // Do not initialize private locals.
3854     if (Pair.second.isLocalPrivate()) {
3855       ++FI;
3856       continue;
3857     }
3858     const VarDecl *VD = Pair.second.PrivateCopy;
3859     const Expr *Init = VD->getAnyInitializer();
3860     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3861                              !CGF.isTrivialInitializer(Init)))) {
3862       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3863       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3864         const VarDecl *OriginalVD = Pair.second.Original;
3865         // Check if the variable is the target-based BasePointersArray,
3866         // PointersArray, SizesArray, or MappersArray.
3867         LValue SharedRefLValue;
3868         QualType Type = PrivateLValue.getType();
3869         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3870         if (IsTargetTask && !SharedField) {
3871           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3872                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3873                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3874                          ->getNumParams() == 0 &&
3875                  isa<TranslationUnitDecl>(
3876                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3877                          ->getDeclContext()) &&
3878                  "Expected artificial target data variable.");
3879           SharedRefLValue =
3880               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3881         } else if (ForDup) {
3882           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3883           SharedRefLValue = CGF.MakeAddrLValue(
3884               Address(SharedRefLValue.getPointer(CGF),
3885                       C.getDeclAlign(OriginalVD)),
3886               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3887               SharedRefLValue.getTBAAInfo());
3888         } else if (CGF.LambdaCaptureFields.count(
3889                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3890                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3891           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3892         } else {
3893           // Processing for implicitly captured variables.
3894           InlinedOpenMPRegionRAII Region(
3895               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3896               /*HasCancel=*/false, /*NoInheritance=*/true);
3897           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3898         }
3899         if (Type->isArrayType()) {
3900           // Initialize firstprivate array.
3901           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3902             // Perform simple memcpy.
3903             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3904           } else {
3905             // Initialize firstprivate array using element-by-element
3906             // initialization.
3907             CGF.EmitOMPAggregateAssign(
3908                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3909                 Type,
3910                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3911                                                   Address SrcElement) {
3912                   // Clean up any temporaries needed by the initialization.
3913                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3914                   InitScope.addPrivate(
3915                       Elem, [SrcElement]() -> Address { return SrcElement; });
3916                   (void)InitScope.Privatize();
3917                   // Emit initialization for single element.
3918                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3919                       CGF, &CapturesInfo);
3920                   CGF.EmitAnyExprToMem(Init, DestElement,
3921                                        Init->getType().getQualifiers(),
3922                                        /*IsInitializer=*/false);
3923                 });
3924           }
3925         } else {
3926           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3927           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3928             return SharedRefLValue.getAddress(CGF);
3929           });
3930           (void)InitScope.Privatize();
3931           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3932           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3933                              /*capturedByInit=*/false);
3934         }
3935       } else {
3936         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3937       }
3938     }
3939     ++FI;
3940   }
3941 }
3942 
3943 /// Check if duplication function is required for taskloops.
3944 static bool checkInitIsRequired(CodeGenFunction &CGF,
3945                                 ArrayRef<PrivateDataTy> Privates) {
3946   bool InitRequired = false;
3947   for (const PrivateDataTy &Pair : Privates) {
3948     if (Pair.second.isLocalPrivate())
3949       continue;
3950     const VarDecl *VD = Pair.second.PrivateCopy;
3951     const Expr *Init = VD->getAnyInitializer();
3952     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3953                                     !CGF.isTrivialInitializer(Init));
3954     if (InitRequired)
3955       break;
3956   }
3957   return InitRequired;
3958 }
3959 
3960 
3961 /// Emit task_dup function (for initialization of
3962 /// private/firstprivate/lastprivate vars and last_iter flag)
3963 /// \code
3964 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3965 /// lastpriv) {
3966 /// // setup lastprivate flag
3967 ///    task_dst->last = lastpriv;
3968 /// // could be constructor calls here...
3969 /// }
3970 /// \endcode
3971 static llvm::Value *
3972 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3973                     const OMPExecutableDirective &D,
3974                     QualType KmpTaskTWithPrivatesPtrQTy,
3975                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3976                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3977                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3978                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3979   ASTContext &C = CGM.getContext();
3980   FunctionArgList Args;
3981   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3982                            KmpTaskTWithPrivatesPtrQTy,
3983                            ImplicitParamDecl::Other);
3984   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3985                            KmpTaskTWithPrivatesPtrQTy,
3986                            ImplicitParamDecl::Other);
3987   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3988                                 ImplicitParamDecl::Other);
3989   Args.push_back(&DstArg);
3990   Args.push_back(&SrcArg);
3991   Args.push_back(&LastprivArg);
3992   const auto &TaskDupFnInfo =
3993       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3994   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3995   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3996   auto *TaskDup = llvm::Function::Create(
3997       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3998   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3999   TaskDup->setDoesNotRecurse();
4000   CodeGenFunction CGF(CGM);
4001   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4002                     Loc);
4003 
4004   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4005       CGF.GetAddrOfLocalVar(&DstArg),
4006       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4007   // task_dst->liter = lastpriv;
4008   if (WithLastIter) {
4009     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4010     LValue Base = CGF.EmitLValueForField(
4011         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4012     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4013     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4014         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4015     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4016   }
4017 
4018   // Emit initial values for private copies (if any).
4019   assert(!Privates.empty());
4020   Address KmpTaskSharedsPtr = Address::invalid();
4021   if (!Data.FirstprivateVars.empty()) {
4022     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4023         CGF.GetAddrOfLocalVar(&SrcArg),
4024         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4025     LValue Base = CGF.EmitLValueForField(
4026         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4027     KmpTaskSharedsPtr = Address(
4028         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4029                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4030                                                   KmpTaskTShareds)),
4031                              Loc),
4032         CGM.getNaturalTypeAlignment(SharedsTy));
4033   }
4034   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4035                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4036   CGF.FinishFunction();
4037   return TaskDup;
4038 }
4039 
4040 /// Checks if destructor function is required to be generated.
4041 /// \return true if cleanups are required, false otherwise.
4042 static bool
4043 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4044                          ArrayRef<PrivateDataTy> Privates) {
4045   for (const PrivateDataTy &P : Privates) {
4046     if (P.second.isLocalPrivate())
4047       continue;
4048     QualType Ty = P.second.Original->getType().getNonReferenceType();
4049     if (Ty.isDestructedType())
4050       return true;
4051   }
4052   return false;
4053 }
4054 
4055 namespace {
4056 /// Loop generator for OpenMP iterator expression.
4057 class OMPIteratorGeneratorScope final
4058     : public CodeGenFunction::OMPPrivateScope {
4059   CodeGenFunction &CGF;
4060   const OMPIteratorExpr *E = nullptr;
4061   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4062   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4063   OMPIteratorGeneratorScope() = delete;
4064   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4065 
4066 public:
4067   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4068       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4069     if (!E)
4070       return;
4071     SmallVector<llvm::Value *, 4> Uppers;
4072     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4073       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4074       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4075       addPrivate(VD, [&CGF, VD]() {
4076         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4077       });
4078       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4079       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4080         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4081                                  "counter.addr");
4082       });
4083     }
4084     Privatize();
4085 
4086     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4087       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4088       LValue CLVal =
4089           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4090                              HelperData.CounterVD->getType());
4091       // Counter = 0;
4092       CGF.EmitStoreOfScalar(
4093           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4094           CLVal);
4095       CodeGenFunction::JumpDest &ContDest =
4096           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4097       CodeGenFunction::JumpDest &ExitDest =
4098           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4099       // N = <number-of_iterations>;
4100       llvm::Value *N = Uppers[I];
4101       // cont:
4102       // if (Counter < N) goto body; else goto exit;
4103       CGF.EmitBlock(ContDest.getBlock());
4104       auto *CVal =
4105           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4106       llvm::Value *Cmp =
4107           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4108               ? CGF.Builder.CreateICmpSLT(CVal, N)
4109               : CGF.Builder.CreateICmpULT(CVal, N);
4110       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4111       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4112       // body:
4113       CGF.EmitBlock(BodyBB);
4114       // Iteri = Begini + Counter * Stepi;
4115       CGF.EmitIgnoredExpr(HelperData.Update);
4116     }
4117   }
4118   ~OMPIteratorGeneratorScope() {
4119     if (!E)
4120       return;
4121     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4122       // Counter = Counter + 1;
4123       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4124       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4125       // goto cont;
4126       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4127       // exit:
4128       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4129     }
4130   }
4131 };
4132 } // namespace
4133 
4134 static std::pair<llvm::Value *, llvm::Value *>
4135 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4136   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4137   llvm::Value *Addr;
4138   if (OASE) {
4139     const Expr *Base = OASE->getBase();
4140     Addr = CGF.EmitScalarExpr(Base);
4141   } else {
4142     Addr = CGF.EmitLValue(E).getPointer(CGF);
4143   }
4144   llvm::Value *SizeVal;
4145   QualType Ty = E->getType();
4146   if (OASE) {
4147     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4148     for (const Expr *SE : OASE->getDimensions()) {
4149       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4150       Sz = CGF.EmitScalarConversion(
4151           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4152       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4153     }
4154   } else if (const auto *ASE =
4155                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4156     LValue UpAddrLVal =
4157         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4158     llvm::Value *UpAddr =
4159         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4160     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4161     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4162     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4163   } else {
4164     SizeVal = CGF.getTypeSize(Ty);
4165   }
4166   return std::make_pair(Addr, SizeVal);
4167 }
4168 
4169 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4170 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4171   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4172   if (KmpTaskAffinityInfoTy.isNull()) {
4173     RecordDecl *KmpAffinityInfoRD =
4174         C.buildImplicitRecord("kmp_task_affinity_info_t");
4175     KmpAffinityInfoRD->startDefinition();
4176     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4177     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4178     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4179     KmpAffinityInfoRD->completeDefinition();
4180     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4181   }
4182 }
4183 
4184 CGOpenMPRuntime::TaskResultTy
4185 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4186                               const OMPExecutableDirective &D,
4187                               llvm::Function *TaskFunction, QualType SharedsTy,
4188                               Address Shareds, const OMPTaskDataTy &Data) {
4189   ASTContext &C = CGM.getContext();
4190   llvm::SmallVector<PrivateDataTy, 4> Privates;
4191   // Aggregate privates and sort them by the alignment.
4192   const auto *I = Data.PrivateCopies.begin();
4193   for (const Expr *E : Data.PrivateVars) {
4194     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4195     Privates.emplace_back(
4196         C.getDeclAlign(VD),
4197         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4198                          /*PrivateElemInit=*/nullptr));
4199     ++I;
4200   }
4201   I = Data.FirstprivateCopies.begin();
4202   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4203   for (const Expr *E : Data.FirstprivateVars) {
4204     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4205     Privates.emplace_back(
4206         C.getDeclAlign(VD),
4207         PrivateHelpersTy(
4208             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4209             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4210     ++I;
4211     ++IElemInitRef;
4212   }
4213   I = Data.LastprivateCopies.begin();
4214   for (const Expr *E : Data.LastprivateVars) {
4215     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4216     Privates.emplace_back(
4217         C.getDeclAlign(VD),
4218         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4219                          /*PrivateElemInit=*/nullptr));
4220     ++I;
4221   }
4222   for (const VarDecl *VD : Data.PrivateLocals) {
4223     if (isAllocatableDecl(VD))
4224       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4225     else
4226       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4227   }
4228   llvm::stable_sort(Privates,
4229                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
4230                       return L.first > R.first;
4231                     });
4232   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4233   // Build type kmp_routine_entry_t (if not built yet).
4234   emitKmpRoutineEntryT(KmpInt32Ty);
4235   // Build type kmp_task_t (if not built yet).
4236   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4237     if (SavedKmpTaskloopTQTy.isNull()) {
4238       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4239           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4240     }
4241     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4242   } else {
4243     assert((D.getDirectiveKind() == OMPD_task ||
4244             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4245             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4246            "Expected taskloop, task or target directive");
4247     if (SavedKmpTaskTQTy.isNull()) {
4248       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4249           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4250     }
4251     KmpTaskTQTy = SavedKmpTaskTQTy;
4252   }
4253   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4254   // Build particular struct kmp_task_t for the given task.
4255   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4256       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4257   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4258   QualType KmpTaskTWithPrivatesPtrQTy =
4259       C.getPointerType(KmpTaskTWithPrivatesQTy);
4260   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4261   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4262       KmpTaskTWithPrivatesTy->getPointerTo();
4263   llvm::Value *KmpTaskTWithPrivatesTySize =
4264       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4265   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4266 
4267   // Emit initial values for private copies (if any).
4268   llvm::Value *TaskPrivatesMap = nullptr;
4269   llvm::Type *TaskPrivatesMapTy =
4270       std::next(TaskFunction->arg_begin(), 3)->getType();
4271   if (!Privates.empty()) {
4272     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4273     TaskPrivatesMap =
4274         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4275     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4276         TaskPrivatesMap, TaskPrivatesMapTy);
4277   } else {
4278     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4279         cast<llvm::PointerType>(TaskPrivatesMapTy));
4280   }
4281   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4282   // kmp_task_t *tt);
4283   llvm::Function *TaskEntry = emitProxyTaskFunction(
4284       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4285       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4286       TaskPrivatesMap);
4287 
4288   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4289   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4290   // kmp_routine_entry_t *task_entry);
4291   // Task flags. Format is taken from
4292   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4293   // description of kmp_tasking_flags struct.
4294   enum {
4295     TiedFlag = 0x1,
4296     FinalFlag = 0x2,
4297     DestructorsFlag = 0x8,
4298     PriorityFlag = 0x20,
4299     DetachableFlag = 0x40,
4300   };
4301   unsigned Flags = Data.Tied ? TiedFlag : 0;
4302   bool NeedsCleanup = false;
4303   if (!Privates.empty()) {
4304     NeedsCleanup =
4305         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4306     if (NeedsCleanup)
4307       Flags = Flags | DestructorsFlag;
4308   }
4309   if (Data.Priority.getInt())
4310     Flags = Flags | PriorityFlag;
4311   if (D.hasClausesOfKind<OMPDetachClause>())
4312     Flags = Flags | DetachableFlag;
4313   llvm::Value *TaskFlags =
4314       Data.Final.getPointer()
4315           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4316                                      CGF.Builder.getInt32(FinalFlag),
4317                                      CGF.Builder.getInt32(/*C=*/0))
4318           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4319   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4320   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4321   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4322       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4323       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4324           TaskEntry, KmpRoutineEntryPtrTy)};
4325   llvm::Value *NewTask;
4326   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4327     // Check if we have any device clause associated with the directive.
4328     const Expr *Device = nullptr;
4329     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4330       Device = C->getDevice();
4331     // Emit device ID if any otherwise use default value.
4332     llvm::Value *DeviceID;
4333     if (Device)
4334       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4335                                            CGF.Int64Ty, /*isSigned=*/true);
4336     else
4337       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4338     AllocArgs.push_back(DeviceID);
4339     NewTask = CGF.EmitRuntimeCall(
4340         OMPBuilder.getOrCreateRuntimeFunction(
4341             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4342         AllocArgs);
4343   } else {
4344     NewTask =
4345         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4346                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4347                             AllocArgs);
4348   }
4349   // Emit detach clause initialization.
4350   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4351   // task_descriptor);
4352   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4353     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4354     LValue EvtLVal = CGF.EmitLValue(Evt);
4355 
4356     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4357     // int gtid, kmp_task_t *task);
4358     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4359     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4360     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4361     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4362         OMPBuilder.getOrCreateRuntimeFunction(
4363             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4364         {Loc, Tid, NewTask});
4365     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4366                                       Evt->getExprLoc());
4367     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4368   }
4369   // Process affinity clauses.
4370   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4371     // Process list of affinity data.
4372     ASTContext &C = CGM.getContext();
4373     Address AffinitiesArray = Address::invalid();
4374     // Calculate number of elements to form the array of affinity data.
4375     llvm::Value *NumOfElements = nullptr;
4376     unsigned NumAffinities = 0;
4377     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4378       if (const Expr *Modifier = C->getModifier()) {
4379         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4380         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4381           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4382           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4383           NumOfElements =
4384               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4385         }
4386       } else {
4387         NumAffinities += C->varlist_size();
4388       }
4389     }
4390     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4391     // Fields ids in kmp_task_affinity_info record.
4392     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4393 
4394     QualType KmpTaskAffinityInfoArrayTy;
4395     if (NumOfElements) {
4396       NumOfElements = CGF.Builder.CreateNUWAdd(
4397           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4398       OpaqueValueExpr OVE(
4399           Loc,
4400           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4401           VK_RValue);
4402       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4403                                                     RValue::get(NumOfElements));
4404       KmpTaskAffinityInfoArrayTy =
4405           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4406                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4407       // Properly emit variable-sized array.
4408       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4409                                            ImplicitParamDecl::Other);
4410       CGF.EmitVarDecl(*PD);
4411       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4412       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4413                                                 /*isSigned=*/false);
4414     } else {
4415       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4416           KmpTaskAffinityInfoTy,
4417           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4418           ArrayType::Normal, /*IndexTypeQuals=*/0);
4419       AffinitiesArray =
4420           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4421       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4422       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4423                                              /*isSigned=*/false);
4424     }
4425 
4426     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4427     // Fill array by elements without iterators.
4428     unsigned Pos = 0;
4429     bool HasIterator = false;
4430     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4431       if (C->getModifier()) {
4432         HasIterator = true;
4433         continue;
4434       }
4435       for (const Expr *E : C->varlists()) {
4436         llvm::Value *Addr;
4437         llvm::Value *Size;
4438         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4439         LValue Base =
4440             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4441                                KmpTaskAffinityInfoTy);
4442         // affs[i].base_addr = &<Affinities[i].second>;
4443         LValue BaseAddrLVal = CGF.EmitLValueForField(
4444             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4445         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4446                               BaseAddrLVal);
4447         // affs[i].len = sizeof(<Affinities[i].second>);
4448         LValue LenLVal = CGF.EmitLValueForField(
4449             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4450         CGF.EmitStoreOfScalar(Size, LenLVal);
4451         ++Pos;
4452       }
4453     }
4454     LValue PosLVal;
4455     if (HasIterator) {
4456       PosLVal = CGF.MakeAddrLValue(
4457           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4458           C.getSizeType());
4459       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4460     }
4461     // Process elements with iterators.
4462     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4463       const Expr *Modifier = C->getModifier();
4464       if (!Modifier)
4465         continue;
4466       OMPIteratorGeneratorScope IteratorScope(
4467           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4468       for (const Expr *E : C->varlists()) {
4469         llvm::Value *Addr;
4470         llvm::Value *Size;
4471         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4472         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4473         LValue Base = CGF.MakeAddrLValue(
4474             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4475                     AffinitiesArray.getAlignment()),
4476             KmpTaskAffinityInfoTy);
4477         // affs[i].base_addr = &<Affinities[i].second>;
4478         LValue BaseAddrLVal = CGF.EmitLValueForField(
4479             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4480         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4481                               BaseAddrLVal);
4482         // affs[i].len = sizeof(<Affinities[i].second>);
4483         LValue LenLVal = CGF.EmitLValueForField(
4484             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4485         CGF.EmitStoreOfScalar(Size, LenLVal);
4486         Idx = CGF.Builder.CreateNUWAdd(
4487             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4488         CGF.EmitStoreOfScalar(Idx, PosLVal);
4489       }
4490     }
4491     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4492     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4493     // naffins, kmp_task_affinity_info_t *affin_list);
4494     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4495     llvm::Value *GTid = getThreadID(CGF, Loc);
4496     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4497         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4498     // FIXME: Emit the function and ignore its result for now unless the
4499     // runtime function is properly implemented.
4500     (void)CGF.EmitRuntimeCall(
4501         OMPBuilder.getOrCreateRuntimeFunction(
4502             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4503         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4504   }
4505   llvm::Value *NewTaskNewTaskTTy =
4506       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4507           NewTask, KmpTaskTWithPrivatesPtrTy);
4508   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4509                                                KmpTaskTWithPrivatesQTy);
4510   LValue TDBase =
4511       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4512   // Fill the data in the resulting kmp_task_t record.
4513   // Copy shareds if there are any.
4514   Address KmpTaskSharedsPtr = Address::invalid();
4515   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4516     KmpTaskSharedsPtr =
4517         Address(CGF.EmitLoadOfScalar(
4518                     CGF.EmitLValueForField(
4519                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4520                                            KmpTaskTShareds)),
4521                     Loc),
4522                 CGM.getNaturalTypeAlignment(SharedsTy));
4523     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4524     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4525     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4526   }
4527   // Emit initial values for private copies (if any).
4528   TaskResultTy Result;
4529   if (!Privates.empty()) {
4530     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4531                      SharedsTy, SharedsPtrTy, Data, Privates,
4532                      /*ForDup=*/false);
4533     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4534         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4535       Result.TaskDupFn = emitTaskDupFunction(
4536           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4537           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4538           /*WithLastIter=*/!Data.LastprivateVars.empty());
4539     }
4540   }
4541   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4542   enum { Priority = 0, Destructors = 1 };
4543   // Provide pointer to function with destructors for privates.
4544   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4545   const RecordDecl *KmpCmplrdataUD =
4546       (*FI)->getType()->getAsUnionType()->getDecl();
4547   if (NeedsCleanup) {
4548     llvm::Value *DestructorFn = emitDestructorsFunction(
4549         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4550         KmpTaskTWithPrivatesQTy);
4551     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4552     LValue DestructorsLV = CGF.EmitLValueForField(
4553         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4554     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4555                               DestructorFn, KmpRoutineEntryPtrTy),
4556                           DestructorsLV);
4557   }
4558   // Set priority.
4559   if (Data.Priority.getInt()) {
4560     LValue Data2LV = CGF.EmitLValueForField(
4561         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4562     LValue PriorityLV = CGF.EmitLValueForField(
4563         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4564     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4565   }
4566   Result.NewTask = NewTask;
4567   Result.TaskEntry = TaskEntry;
4568   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4569   Result.TDBase = TDBase;
4570   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4571   return Result;
4572 }
4573 
4574 namespace {
4575 /// Dependence kind for RTL.
4576 enum RTLDependenceKindTy {
4577   DepIn = 0x01,
4578   DepInOut = 0x3,
4579   DepMutexInOutSet = 0x4
4580 };
4581 /// Fields ids in kmp_depend_info record.
4582 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4583 } // namespace
4584 
4585 /// Translates internal dependency kind into the runtime kind.
4586 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4587   RTLDependenceKindTy DepKind;
4588   switch (K) {
4589   case OMPC_DEPEND_in:
4590     DepKind = DepIn;
4591     break;
4592   // Out and InOut dependencies must use the same code.
4593   case OMPC_DEPEND_out:
4594   case OMPC_DEPEND_inout:
4595     DepKind = DepInOut;
4596     break;
4597   case OMPC_DEPEND_mutexinoutset:
4598     DepKind = DepMutexInOutSet;
4599     break;
4600   case OMPC_DEPEND_source:
4601   case OMPC_DEPEND_sink:
4602   case OMPC_DEPEND_depobj:
4603   case OMPC_DEPEND_unknown:
4604     llvm_unreachable("Unknown task dependence type");
4605   }
4606   return DepKind;
4607 }
4608 
4609 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4610 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4611                            QualType &FlagsTy) {
4612   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4613   if (KmpDependInfoTy.isNull()) {
4614     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4615     KmpDependInfoRD->startDefinition();
4616     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4617     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4618     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4619     KmpDependInfoRD->completeDefinition();
4620     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4621   }
4622 }
4623 
4624 std::pair<llvm::Value *, LValue>
4625 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4626                                    SourceLocation Loc) {
4627   ASTContext &C = CGM.getContext();
4628   QualType FlagsTy;
4629   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4630   RecordDecl *KmpDependInfoRD =
4631       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4632   LValue Base = CGF.EmitLoadOfPointerLValue(
4633       DepobjLVal.getAddress(CGF),
4634       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4635   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4636   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4637           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4638   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4639                             Base.getTBAAInfo());
4640   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4641       Addr.getPointer(),
4642       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4643   LValue NumDepsBase = CGF.MakeAddrLValue(
4644       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4645       Base.getBaseInfo(), Base.getTBAAInfo());
4646   // NumDeps = deps[i].base_addr;
4647   LValue BaseAddrLVal = CGF.EmitLValueForField(
4648       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4649   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4650   return std::make_pair(NumDeps, Base);
4651 }
4652 
4653 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4654                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4655                            const OMPTaskDataTy::DependData &Data,
4656                            Address DependenciesArray) {
4657   CodeGenModule &CGM = CGF.CGM;
4658   ASTContext &C = CGM.getContext();
4659   QualType FlagsTy;
4660   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4661   RecordDecl *KmpDependInfoRD =
4662       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4663   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4664 
4665   OMPIteratorGeneratorScope IteratorScope(
4666       CGF, cast_or_null<OMPIteratorExpr>(
4667                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4668                                  : nullptr));
4669   for (const Expr *E : Data.DepExprs) {
4670     llvm::Value *Addr;
4671     llvm::Value *Size;
4672     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4673     LValue Base;
4674     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4675       Base = CGF.MakeAddrLValue(
4676           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4677     } else {
4678       LValue &PosLVal = *Pos.get<LValue *>();
4679       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4680       Base = CGF.MakeAddrLValue(
4681           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4682                   DependenciesArray.getAlignment()),
4683           KmpDependInfoTy);
4684     }
4685     // deps[i].base_addr = &<Dependencies[i].second>;
4686     LValue BaseAddrLVal = CGF.EmitLValueForField(
4687         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4688     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4689                           BaseAddrLVal);
4690     // deps[i].len = sizeof(<Dependencies[i].second>);
4691     LValue LenLVal = CGF.EmitLValueForField(
4692         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4693     CGF.EmitStoreOfScalar(Size, LenLVal);
4694     // deps[i].flags = <Dependencies[i].first>;
4695     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4696     LValue FlagsLVal = CGF.EmitLValueForField(
4697         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4698     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4699                           FlagsLVal);
4700     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4701       ++(*P);
4702     } else {
4703       LValue &PosLVal = *Pos.get<LValue *>();
4704       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4705       Idx = CGF.Builder.CreateNUWAdd(Idx,
4706                                      llvm::ConstantInt::get(Idx->getType(), 1));
4707       CGF.EmitStoreOfScalar(Idx, PosLVal);
4708     }
4709   }
4710 }
4711 
4712 static SmallVector<llvm::Value *, 4>
4713 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4714                         const OMPTaskDataTy::DependData &Data) {
4715   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4716          "Expected depobj dependecy kind.");
4717   SmallVector<llvm::Value *, 4> Sizes;
4718   SmallVector<LValue, 4> SizeLVals;
4719   ASTContext &C = CGF.getContext();
4720   QualType FlagsTy;
4721   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4722   RecordDecl *KmpDependInfoRD =
4723       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4724   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4725   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4726   {
4727     OMPIteratorGeneratorScope IteratorScope(
4728         CGF, cast_or_null<OMPIteratorExpr>(
4729                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4730                                    : nullptr));
4731     for (const Expr *E : Data.DepExprs) {
4732       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4733       LValue Base = CGF.EmitLoadOfPointerLValue(
4734           DepobjLVal.getAddress(CGF),
4735           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4736       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4737           Base.getAddress(CGF), KmpDependInfoPtrT);
4738       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4739                                 Base.getTBAAInfo());
4740       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4741           Addr.getPointer(),
4742           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4743       LValue NumDepsBase = CGF.MakeAddrLValue(
4744           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4745           Base.getBaseInfo(), Base.getTBAAInfo());
4746       // NumDeps = deps[i].base_addr;
4747       LValue BaseAddrLVal = CGF.EmitLValueForField(
4748           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4749       llvm::Value *NumDeps =
4750           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4751       LValue NumLVal = CGF.MakeAddrLValue(
4752           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4753           C.getUIntPtrType());
4754       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4755                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4756       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4757       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4758       CGF.EmitStoreOfScalar(Add, NumLVal);
4759       SizeLVals.push_back(NumLVal);
4760     }
4761   }
4762   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4763     llvm::Value *Size =
4764         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4765     Sizes.push_back(Size);
4766   }
4767   return Sizes;
4768 }
4769 
4770 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4771                                LValue PosLVal,
4772                                const OMPTaskDataTy::DependData &Data,
4773                                Address DependenciesArray) {
4774   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4775          "Expected depobj dependecy kind.");
4776   ASTContext &C = CGF.getContext();
4777   QualType FlagsTy;
4778   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4779   RecordDecl *KmpDependInfoRD =
4780       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4781   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4782   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4783   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4784   {
4785     OMPIteratorGeneratorScope IteratorScope(
4786         CGF, cast_or_null<OMPIteratorExpr>(
4787                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4788                                    : nullptr));
4789     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4790       const Expr *E = Data.DepExprs[I];
4791       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4792       LValue Base = CGF.EmitLoadOfPointerLValue(
4793           DepobjLVal.getAddress(CGF),
4794           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4795       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4796           Base.getAddress(CGF), KmpDependInfoPtrT);
4797       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4798                                 Base.getTBAAInfo());
4799 
4800       // Get number of elements in a single depobj.
4801       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4802           Addr.getPointer(),
4803           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4804       LValue NumDepsBase = CGF.MakeAddrLValue(
4805           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4806           Base.getBaseInfo(), Base.getTBAAInfo());
4807       // NumDeps = deps[i].base_addr;
4808       LValue BaseAddrLVal = CGF.EmitLValueForField(
4809           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4810       llvm::Value *NumDeps =
4811           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4812 
4813       // memcopy dependency data.
4814       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4815           ElSize,
4816           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4817       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4818       Address DepAddr =
4819           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4820                   DependenciesArray.getAlignment());
4821       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4822 
4823       // Increase pos.
4824       // pos += size;
4825       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4826       CGF.EmitStoreOfScalar(Add, PosLVal);
4827     }
4828   }
4829 }
4830 
4831 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4832     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4833     SourceLocation Loc) {
4834   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4835         return D.DepExprs.empty();
4836       }))
4837     return std::make_pair(nullptr, Address::invalid());
4838   // Process list of dependencies.
4839   ASTContext &C = CGM.getContext();
4840   Address DependenciesArray = Address::invalid();
4841   llvm::Value *NumOfElements = nullptr;
4842   unsigned NumDependencies = std::accumulate(
4843       Dependencies.begin(), Dependencies.end(), 0,
4844       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4845         return D.DepKind == OMPC_DEPEND_depobj
4846                    ? V
4847                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4848       });
4849   QualType FlagsTy;
4850   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4851   bool HasDepobjDeps = false;
4852   bool HasRegularWithIterators = false;
4853   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4854   llvm::Value *NumOfRegularWithIterators =
4855       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4856   // Calculate number of depobj dependecies and regular deps with the iterators.
4857   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4858     if (D.DepKind == OMPC_DEPEND_depobj) {
4859       SmallVector<llvm::Value *, 4> Sizes =
4860           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4861       for (llvm::Value *Size : Sizes) {
4862         NumOfDepobjElements =
4863             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4864       }
4865       HasDepobjDeps = true;
4866       continue;
4867     }
4868     // Include number of iterations, if any.
4869     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4870       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4871         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4872         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4873         NumOfRegularWithIterators =
4874             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4875       }
4876       HasRegularWithIterators = true;
4877       continue;
4878     }
4879   }
4880 
4881   QualType KmpDependInfoArrayTy;
4882   if (HasDepobjDeps || HasRegularWithIterators) {
4883     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4884                                            /*isSigned=*/false);
4885     if (HasDepobjDeps) {
4886       NumOfElements =
4887           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4888     }
4889     if (HasRegularWithIterators) {
4890       NumOfElements =
4891           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4892     }
4893     OpaqueValueExpr OVE(Loc,
4894                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4895                         VK_RValue);
4896     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4897                                                   RValue::get(NumOfElements));
4898     KmpDependInfoArrayTy =
4899         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4900                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4901     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4902     // Properly emit variable-sized array.
4903     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4904                                          ImplicitParamDecl::Other);
4905     CGF.EmitVarDecl(*PD);
4906     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4907     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4908                                               /*isSigned=*/false);
4909   } else {
4910     KmpDependInfoArrayTy = C.getConstantArrayType(
4911         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4912         ArrayType::Normal, /*IndexTypeQuals=*/0);
4913     DependenciesArray =
4914         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4915     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4916     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4917                                            /*isSigned=*/false);
4918   }
4919   unsigned Pos = 0;
4920   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4921     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4922         Dependencies[I].IteratorExpr)
4923       continue;
4924     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4925                    DependenciesArray);
4926   }
4927   // Copy regular dependecies with iterators.
4928   LValue PosLVal = CGF.MakeAddrLValue(
4929       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4930   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4931   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4932     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4933         !Dependencies[I].IteratorExpr)
4934       continue;
4935     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4936                    DependenciesArray);
4937   }
4938   // Copy final depobj arrays without iterators.
4939   if (HasDepobjDeps) {
4940     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4941       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4942         continue;
4943       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4944                          DependenciesArray);
4945     }
4946   }
4947   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4948       DependenciesArray, CGF.VoidPtrTy);
4949   return std::make_pair(NumOfElements, DependenciesArray);
4950 }
4951 
4952 Address CGOpenMPRuntime::emitDepobjDependClause(
4953     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4954     SourceLocation Loc) {
4955   if (Dependencies.DepExprs.empty())
4956     return Address::invalid();
4957   // Process list of dependencies.
4958   ASTContext &C = CGM.getContext();
4959   Address DependenciesArray = Address::invalid();
4960   unsigned NumDependencies = Dependencies.DepExprs.size();
4961   QualType FlagsTy;
4962   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4963   RecordDecl *KmpDependInfoRD =
4964       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4965 
4966   llvm::Value *Size;
4967   // Define type kmp_depend_info[<Dependencies.size()>];
4968   // For depobj reserve one extra element to store the number of elements.
4969   // It is required to handle depobj(x) update(in) construct.
4970   // kmp_depend_info[<Dependencies.size()>] deps;
4971   llvm::Value *NumDepsVal;
4972   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4973   if (const auto *IE =
4974           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4975     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4976     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4977       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4978       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4979       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4980     }
4981     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4982                                     NumDepsVal);
4983     CharUnits SizeInBytes =
4984         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4985     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4986     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4987     NumDepsVal =
4988         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4989   } else {
4990     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4991         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4992         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4993     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4994     Size = CGM.getSize(Sz.alignTo(Align));
4995     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4996   }
4997   // Need to allocate on the dynamic memory.
4998   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4999   // Use default allocator.
5000   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5001   llvm::Value *Args[] = {ThreadID, Size, Allocator};
5002 
5003   llvm::Value *Addr =
5004       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5005                               CGM.getModule(), OMPRTL___kmpc_alloc),
5006                           Args, ".dep.arr.addr");
5007   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5008       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5009   DependenciesArray = Address(Addr, Align);
5010   // Write number of elements in the first element of array for depobj.
5011   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5012   // deps[i].base_addr = NumDependencies;
5013   LValue BaseAddrLVal = CGF.EmitLValueForField(
5014       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5015   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5016   llvm::PointerUnion<unsigned *, LValue *> Pos;
5017   unsigned Idx = 1;
5018   LValue PosLVal;
5019   if (Dependencies.IteratorExpr) {
5020     PosLVal = CGF.MakeAddrLValue(
5021         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5022         C.getSizeType());
5023     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5024                           /*IsInit=*/true);
5025     Pos = &PosLVal;
5026   } else {
5027     Pos = &Idx;
5028   }
5029   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5030   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5031       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5032   return DependenciesArray;
5033 }
5034 
5035 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5036                                         SourceLocation Loc) {
5037   ASTContext &C = CGM.getContext();
5038   QualType FlagsTy;
5039   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5040   LValue Base = CGF.EmitLoadOfPointerLValue(
5041       DepobjLVal.getAddress(CGF),
5042       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5043   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5044   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5046   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5047       Addr.getPointer(),
5048       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5049   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5050                                                                CGF.VoidPtrTy);
5051   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5052   // Use default allocator.
5053   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5054   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5055 
5056   // _kmpc_free(gtid, addr, nullptr);
5057   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5058                                 CGM.getModule(), OMPRTL___kmpc_free),
5059                             Args);
5060 }
5061 
5062 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5063                                        OpenMPDependClauseKind NewDepKind,
5064                                        SourceLocation Loc) {
5065   ASTContext &C = CGM.getContext();
5066   QualType FlagsTy;
5067   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5068   RecordDecl *KmpDependInfoRD =
5069       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5070   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5071   llvm::Value *NumDeps;
5072   LValue Base;
5073   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5074 
5075   Address Begin = Base.getAddress(CGF);
5076   // Cast from pointer to array type to pointer to single element.
5077   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5078   // The basic structure here is a while-do loop.
5079   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5080   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5081   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5082   CGF.EmitBlock(BodyBB);
5083   llvm::PHINode *ElementPHI =
5084       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5085   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5086   Begin = Address(ElementPHI, Begin.getAlignment());
5087   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5088                             Base.getTBAAInfo());
5089   // deps[i].flags = NewDepKind;
5090   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5091   LValue FlagsLVal = CGF.EmitLValueForField(
5092       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5093   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5094                         FlagsLVal);
5095 
5096   // Shift the address forward by one element.
5097   Address ElementNext =
5098       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5099   ElementPHI->addIncoming(ElementNext.getPointer(),
5100                           CGF.Builder.GetInsertBlock());
5101   llvm::Value *IsEmpty =
5102       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5103   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5104   // Done.
5105   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5106 }
5107 
5108 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5109                                    const OMPExecutableDirective &D,
5110                                    llvm::Function *TaskFunction,
5111                                    QualType SharedsTy, Address Shareds,
5112                                    const Expr *IfCond,
5113                                    const OMPTaskDataTy &Data) {
5114   if (!CGF.HaveInsertPoint())
5115     return;
5116 
5117   TaskResultTy Result =
5118       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5119   llvm::Value *NewTask = Result.NewTask;
5120   llvm::Function *TaskEntry = Result.TaskEntry;
5121   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5122   LValue TDBase = Result.TDBase;
5123   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5124   // Process list of dependences.
5125   Address DependenciesArray = Address::invalid();
5126   llvm::Value *NumOfElements;
5127   std::tie(NumOfElements, DependenciesArray) =
5128       emitDependClause(CGF, Data.Dependences, Loc);
5129 
5130   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5131   // libcall.
5132   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5133   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5134   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5135   // list is not empty
5136   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5137   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5138   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5139   llvm::Value *DepTaskArgs[7];
5140   if (!Data.Dependences.empty()) {
5141     DepTaskArgs[0] = UpLoc;
5142     DepTaskArgs[1] = ThreadID;
5143     DepTaskArgs[2] = NewTask;
5144     DepTaskArgs[3] = NumOfElements;
5145     DepTaskArgs[4] = DependenciesArray.getPointer();
5146     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5147     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5148   }
5149   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5150                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5151     if (!Data.Tied) {
5152       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5153       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5154       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5155     }
5156     if (!Data.Dependences.empty()) {
5157       CGF.EmitRuntimeCall(
5158           OMPBuilder.getOrCreateRuntimeFunction(
5159               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5160           DepTaskArgs);
5161     } else {
5162       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5163                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5164                           TaskArgs);
5165     }
5166     // Check if parent region is untied and build return for untied task;
5167     if (auto *Region =
5168             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5169       Region->emitUntiedSwitch(CGF);
5170   };
5171 
5172   llvm::Value *DepWaitTaskArgs[6];
5173   if (!Data.Dependences.empty()) {
5174     DepWaitTaskArgs[0] = UpLoc;
5175     DepWaitTaskArgs[1] = ThreadID;
5176     DepWaitTaskArgs[2] = NumOfElements;
5177     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5178     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5179     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5180   }
5181   auto &M = CGM.getModule();
5182   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5183                         TaskEntry, &Data, &DepWaitTaskArgs,
5184                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5185     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5186     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5187     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5188     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5189     // is specified.
5190     if (!Data.Dependences.empty())
5191       CGF.EmitRuntimeCall(
5192           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5193           DepWaitTaskArgs);
5194     // Call proxy_task_entry(gtid, new_task);
5195     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5196                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5197       Action.Enter(CGF);
5198       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5199       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5200                                                           OutlinedFnArgs);
5201     };
5202 
5203     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5204     // kmp_task_t *new_task);
5205     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5206     // kmp_task_t *new_task);
5207     RegionCodeGenTy RCG(CodeGen);
5208     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5209                               M, OMPRTL___kmpc_omp_task_begin_if0),
5210                           TaskArgs,
5211                           OMPBuilder.getOrCreateRuntimeFunction(
5212                               M, OMPRTL___kmpc_omp_task_complete_if0),
5213                           TaskArgs);
5214     RCG.setAction(Action);
5215     RCG(CGF);
5216   };
5217 
5218   if (IfCond) {
5219     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5220   } else {
5221     RegionCodeGenTy ThenRCG(ThenCodeGen);
5222     ThenRCG(CGF);
5223   }
5224 }
5225 
5226 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5227                                        const OMPLoopDirective &D,
5228                                        llvm::Function *TaskFunction,
5229                                        QualType SharedsTy, Address Shareds,
5230                                        const Expr *IfCond,
5231                                        const OMPTaskDataTy &Data) {
5232   if (!CGF.HaveInsertPoint())
5233     return;
5234   TaskResultTy Result =
5235       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5236   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5237   // libcall.
5238   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5239   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5240   // sched, kmp_uint64 grainsize, void *task_dup);
5241   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5242   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5243   llvm::Value *IfVal;
5244   if (IfCond) {
5245     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5246                                       /*isSigned=*/true);
5247   } else {
5248     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5249   }
5250 
5251   LValue LBLVal = CGF.EmitLValueForField(
5252       Result.TDBase,
5253       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5254   const auto *LBVar =
5255       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5256   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5257                        LBLVal.getQuals(),
5258                        /*IsInitializer=*/true);
5259   LValue UBLVal = CGF.EmitLValueForField(
5260       Result.TDBase,
5261       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5262   const auto *UBVar =
5263       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5264   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5265                        UBLVal.getQuals(),
5266                        /*IsInitializer=*/true);
5267   LValue StLVal = CGF.EmitLValueForField(
5268       Result.TDBase,
5269       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5270   const auto *StVar =
5271       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5272   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5273                        StLVal.getQuals(),
5274                        /*IsInitializer=*/true);
5275   // Store reductions address.
5276   LValue RedLVal = CGF.EmitLValueForField(
5277       Result.TDBase,
5278       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5279   if (Data.Reductions) {
5280     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5281   } else {
5282     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5283                                CGF.getContext().VoidPtrTy);
5284   }
5285   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5286   llvm::Value *TaskArgs[] = {
5287       UpLoc,
5288       ThreadID,
5289       Result.NewTask,
5290       IfVal,
5291       LBLVal.getPointer(CGF),
5292       UBLVal.getPointer(CGF),
5293       CGF.EmitLoadOfScalar(StLVal, Loc),
5294       llvm::ConstantInt::getSigned(
5295           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5296       llvm::ConstantInt::getSigned(
5297           CGF.IntTy, Data.Schedule.getPointer()
5298                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5299                          : NoSchedule),
5300       Data.Schedule.getPointer()
5301           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5302                                       /*isSigned=*/false)
5303           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5304       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305                              Result.TaskDupFn, CGF.VoidPtrTy)
5306                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5307   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5308                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5309                       TaskArgs);
5310 }
5311 
5312 /// Emit reduction operation for each element of array (required for
5313 /// array sections) LHS op = RHS.
5314 /// \param Type Type of array.
5315 /// \param LHSVar Variable on the left side of the reduction operation
5316 /// (references element of array in original variable).
5317 /// \param RHSVar Variable on the right side of the reduction operation
5318 /// (references element of array in original variable).
5319 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5320 /// RHSVar.
5321 static void EmitOMPAggregateReduction(
5322     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5323     const VarDecl *RHSVar,
5324     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5325                                   const Expr *, const Expr *)> &RedOpGen,
5326     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5327     const Expr *UpExpr = nullptr) {
5328   // Perform element-by-element initialization.
5329   QualType ElementTy;
5330   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5331   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5332 
5333   // Drill down to the base element type on both arrays.
5334   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5335   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5336 
5337   llvm::Value *RHSBegin = RHSAddr.getPointer();
5338   llvm::Value *LHSBegin = LHSAddr.getPointer();
5339   // Cast from pointer to array type to pointer to single element.
5340   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5341   // The basic structure here is a while-do loop.
5342   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5343   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5344   llvm::Value *IsEmpty =
5345       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5346   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5347 
5348   // Enter the loop body, making that address the current address.
5349   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5350   CGF.EmitBlock(BodyBB);
5351 
5352   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5353 
5354   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5355       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5356   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5357   Address RHSElementCurrent =
5358       Address(RHSElementPHI,
5359               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5360 
5361   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5362       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5363   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5364   Address LHSElementCurrent =
5365       Address(LHSElementPHI,
5366               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5367 
5368   // Emit copy.
5369   CodeGenFunction::OMPPrivateScope Scope(CGF);
5370   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5371   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5372   Scope.Privatize();
5373   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5374   Scope.ForceCleanup();
5375 
5376   // Shift the address forward by one element.
5377   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5378       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5379   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5380       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5381   // Check whether we've reached the end.
5382   llvm::Value *Done =
5383       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5384   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5385   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5386   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5387 
5388   // Done.
5389   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5390 }
5391 
5392 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5393 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5394 /// UDR combiner function.
5395 static void emitReductionCombiner(CodeGenFunction &CGF,
5396                                   const Expr *ReductionOp) {
5397   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5398     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5399       if (const auto *DRE =
5400               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5401         if (const auto *DRD =
5402                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5403           std::pair<llvm::Function *, llvm::Function *> Reduction =
5404               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5405           RValue Func = RValue::get(Reduction.first);
5406           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5407           CGF.EmitIgnoredExpr(ReductionOp);
5408           return;
5409         }
5410   CGF.EmitIgnoredExpr(ReductionOp);
5411 }
5412 
5413 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5414     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5415     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5416     ArrayRef<const Expr *> ReductionOps) {
5417   ASTContext &C = CGM.getContext();
5418 
5419   // void reduction_func(void *LHSArg, void *RHSArg);
5420   FunctionArgList Args;
5421   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5422                            ImplicitParamDecl::Other);
5423   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5424                            ImplicitParamDecl::Other);
5425   Args.push_back(&LHSArg);
5426   Args.push_back(&RHSArg);
5427   const auto &CGFI =
5428       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5429   std::string Name = getName({"omp", "reduction", "reduction_func"});
5430   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5431                                     llvm::GlobalValue::InternalLinkage, Name,
5432                                     &CGM.getModule());
5433   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5434   Fn->setDoesNotRecurse();
5435   CodeGenFunction CGF(CGM);
5436   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5437 
5438   // Dst = (void*[n])(LHSArg);
5439   // Src = (void*[n])(RHSArg);
5440   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5441       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5442       ArgsType), CGF.getPointerAlign());
5443   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5444       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5445       ArgsType), CGF.getPointerAlign());
5446 
5447   //  ...
5448   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5449   //  ...
5450   CodeGenFunction::OMPPrivateScope Scope(CGF);
5451   auto IPriv = Privates.begin();
5452   unsigned Idx = 0;
5453   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5454     const auto *RHSVar =
5455         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5456     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5457       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5458     });
5459     const auto *LHSVar =
5460         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5461     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5462       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5463     });
5464     QualType PrivTy = (*IPriv)->getType();
5465     if (PrivTy->isVariablyModifiedType()) {
5466       // Get array size and emit VLA type.
5467       ++Idx;
5468       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5469       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5470       const VariableArrayType *VLA =
5471           CGF.getContext().getAsVariableArrayType(PrivTy);
5472       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5473       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5474           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5475       CGF.EmitVariablyModifiedType(PrivTy);
5476     }
5477   }
5478   Scope.Privatize();
5479   IPriv = Privates.begin();
5480   auto ILHS = LHSExprs.begin();
5481   auto IRHS = RHSExprs.begin();
5482   for (const Expr *E : ReductionOps) {
5483     if ((*IPriv)->getType()->isArrayType()) {
5484       // Emit reduction for array section.
5485       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5486       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5487       EmitOMPAggregateReduction(
5488           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5489           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5490             emitReductionCombiner(CGF, E);
5491           });
5492     } else {
5493       // Emit reduction for array subscript or single variable.
5494       emitReductionCombiner(CGF, E);
5495     }
5496     ++IPriv;
5497     ++ILHS;
5498     ++IRHS;
5499   }
5500   Scope.ForceCleanup();
5501   CGF.FinishFunction();
5502   return Fn;
5503 }
5504 
5505 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5506                                                   const Expr *ReductionOp,
5507                                                   const Expr *PrivateRef,
5508                                                   const DeclRefExpr *LHS,
5509                                                   const DeclRefExpr *RHS) {
5510   if (PrivateRef->getType()->isArrayType()) {
5511     // Emit reduction for array section.
5512     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5513     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5514     EmitOMPAggregateReduction(
5515         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5516         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5517           emitReductionCombiner(CGF, ReductionOp);
5518         });
5519   } else {
5520     // Emit reduction for array subscript or single variable.
5521     emitReductionCombiner(CGF, ReductionOp);
5522   }
5523 }
5524 
5525 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5526                                     ArrayRef<const Expr *> Privates,
5527                                     ArrayRef<const Expr *> LHSExprs,
5528                                     ArrayRef<const Expr *> RHSExprs,
5529                                     ArrayRef<const Expr *> ReductionOps,
5530                                     ReductionOptionsTy Options) {
5531   if (!CGF.HaveInsertPoint())
5532     return;
5533 
5534   bool WithNowait = Options.WithNowait;
5535   bool SimpleReduction = Options.SimpleReduction;
5536 
5537   // Next code should be emitted for reduction:
5538   //
5539   // static kmp_critical_name lock = { 0 };
5540   //
5541   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5542   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5543   //  ...
5544   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5545   //  *(Type<n>-1*)rhs[<n>-1]);
5546   // }
5547   //
5548   // ...
5549   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5550   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5551   // RedList, reduce_func, &<lock>)) {
5552   // case 1:
5553   //  ...
5554   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5555   //  ...
5556   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5557   // break;
5558   // case 2:
5559   //  ...
5560   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5561   //  ...
5562   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5563   // break;
5564   // default:;
5565   // }
5566   //
5567   // if SimpleReduction is true, only the next code is generated:
5568   //  ...
5569   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5570   //  ...
5571 
5572   ASTContext &C = CGM.getContext();
5573 
5574   if (SimpleReduction) {
5575     CodeGenFunction::RunCleanupsScope Scope(CGF);
5576     auto IPriv = Privates.begin();
5577     auto ILHS = LHSExprs.begin();
5578     auto IRHS = RHSExprs.begin();
5579     for (const Expr *E : ReductionOps) {
5580       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5581                                   cast<DeclRefExpr>(*IRHS));
5582       ++IPriv;
5583       ++ILHS;
5584       ++IRHS;
5585     }
5586     return;
5587   }
5588 
5589   // 1. Build a list of reduction variables.
5590   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5591   auto Size = RHSExprs.size();
5592   for (const Expr *E : Privates) {
5593     if (E->getType()->isVariablyModifiedType())
5594       // Reserve place for array size.
5595       ++Size;
5596   }
5597   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5598   QualType ReductionArrayTy =
5599       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5600                              /*IndexTypeQuals=*/0);
5601   Address ReductionList =
5602       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5603   auto IPriv = Privates.begin();
5604   unsigned Idx = 0;
5605   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5606     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5607     CGF.Builder.CreateStore(
5608         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5609             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5610         Elem);
5611     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5612       // Store array size.
5613       ++Idx;
5614       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5615       llvm::Value *Size = CGF.Builder.CreateIntCast(
5616           CGF.getVLASize(
5617                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5618               .NumElts,
5619           CGF.SizeTy, /*isSigned=*/false);
5620       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5621                               Elem);
5622     }
5623   }
5624 
5625   // 2. Emit reduce_func().
5626   llvm::Function *ReductionFn = emitReductionFunction(
5627       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5628       LHSExprs, RHSExprs, ReductionOps);
5629 
5630   // 3. Create static kmp_critical_name lock = { 0 };
5631   std::string Name = getName({"reduction"});
5632   llvm::Value *Lock = getCriticalRegionLock(Name);
5633 
5634   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5635   // RedList, reduce_func, &<lock>);
5636   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5637   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5638   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5639   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5640       ReductionList.getPointer(), CGF.VoidPtrTy);
5641   llvm::Value *Args[] = {
5642       IdentTLoc,                             // ident_t *<loc>
5643       ThreadId,                              // i32 <gtid>
5644       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5645       ReductionArrayTySize,                  // size_type sizeof(RedList)
5646       RL,                                    // void *RedList
5647       ReductionFn, // void (*) (void *, void *) <reduce_func>
5648       Lock         // kmp_critical_name *&<lock>
5649   };
5650   llvm::Value *Res = CGF.EmitRuntimeCall(
5651       OMPBuilder.getOrCreateRuntimeFunction(
5652           CGM.getModule(),
5653           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5654       Args);
5655 
5656   // 5. Build switch(res)
5657   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5658   llvm::SwitchInst *SwInst =
5659       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5660 
5661   // 6. Build case 1:
5662   //  ...
5663   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5664   //  ...
5665   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5666   // break;
5667   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5668   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5669   CGF.EmitBlock(Case1BB);
5670 
5671   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5672   llvm::Value *EndArgs[] = {
5673       IdentTLoc, // ident_t *<loc>
5674       ThreadId,  // i32 <gtid>
5675       Lock       // kmp_critical_name *&<lock>
5676   };
5677   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5678                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5679     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5680     auto IPriv = Privates.begin();
5681     auto ILHS = LHSExprs.begin();
5682     auto IRHS = RHSExprs.begin();
5683     for (const Expr *E : ReductionOps) {
5684       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5685                                      cast<DeclRefExpr>(*IRHS));
5686       ++IPriv;
5687       ++ILHS;
5688       ++IRHS;
5689     }
5690   };
5691   RegionCodeGenTy RCG(CodeGen);
5692   CommonActionTy Action(
5693       nullptr, llvm::None,
5694       OMPBuilder.getOrCreateRuntimeFunction(
5695           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5696                                       : OMPRTL___kmpc_end_reduce),
5697       EndArgs);
5698   RCG.setAction(Action);
5699   RCG(CGF);
5700 
5701   CGF.EmitBranch(DefaultBB);
5702 
5703   // 7. Build case 2:
5704   //  ...
5705   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5706   //  ...
5707   // break;
5708   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5709   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5710   CGF.EmitBlock(Case2BB);
5711 
5712   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5713                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5714     auto ILHS = LHSExprs.begin();
5715     auto IRHS = RHSExprs.begin();
5716     auto IPriv = Privates.begin();
5717     for (const Expr *E : ReductionOps) {
5718       const Expr *XExpr = nullptr;
5719       const Expr *EExpr = nullptr;
5720       const Expr *UpExpr = nullptr;
5721       BinaryOperatorKind BO = BO_Comma;
5722       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5723         if (BO->getOpcode() == BO_Assign) {
5724           XExpr = BO->getLHS();
5725           UpExpr = BO->getRHS();
5726         }
5727       }
5728       // Try to emit update expression as a simple atomic.
5729       const Expr *RHSExpr = UpExpr;
5730       if (RHSExpr) {
5731         // Analyze RHS part of the whole expression.
5732         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5733                 RHSExpr->IgnoreParenImpCasts())) {
5734           // If this is a conditional operator, analyze its condition for
5735           // min/max reduction operator.
5736           RHSExpr = ACO->getCond();
5737         }
5738         if (const auto *BORHS =
5739                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5740           EExpr = BORHS->getRHS();
5741           BO = BORHS->getOpcode();
5742         }
5743       }
5744       if (XExpr) {
5745         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5746         auto &&AtomicRedGen = [BO, VD,
5747                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5748                                     const Expr *EExpr, const Expr *UpExpr) {
5749           LValue X = CGF.EmitLValue(XExpr);
5750           RValue E;
5751           if (EExpr)
5752             E = CGF.EmitAnyExpr(EExpr);
5753           CGF.EmitOMPAtomicSimpleUpdateExpr(
5754               X, E, BO, /*IsXLHSInRHSPart=*/true,
5755               llvm::AtomicOrdering::Monotonic, Loc,
5756               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5757                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5758                 PrivateScope.addPrivate(
5759                     VD, [&CGF, VD, XRValue, Loc]() {
5760                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5761                       CGF.emitOMPSimpleStore(
5762                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5763                           VD->getType().getNonReferenceType(), Loc);
5764                       return LHSTemp;
5765                     });
5766                 (void)PrivateScope.Privatize();
5767                 return CGF.EmitAnyExpr(UpExpr);
5768               });
5769         };
5770         if ((*IPriv)->getType()->isArrayType()) {
5771           // Emit atomic reduction for array section.
5772           const auto *RHSVar =
5773               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5774           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5775                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5776         } else {
5777           // Emit atomic reduction for array subscript or single variable.
5778           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5779         }
5780       } else {
5781         // Emit as a critical region.
5782         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5783                                            const Expr *, const Expr *) {
5784           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5785           std::string Name = RT.getName({"atomic_reduction"});
5786           RT.emitCriticalRegion(
5787               CGF, Name,
5788               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5789                 Action.Enter(CGF);
5790                 emitReductionCombiner(CGF, E);
5791               },
5792               Loc);
5793         };
5794         if ((*IPriv)->getType()->isArrayType()) {
5795           const auto *LHSVar =
5796               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5797           const auto *RHSVar =
5798               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5799           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5800                                     CritRedGen);
5801         } else {
5802           CritRedGen(CGF, nullptr, nullptr, nullptr);
5803         }
5804       }
5805       ++ILHS;
5806       ++IRHS;
5807       ++IPriv;
5808     }
5809   };
5810   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5811   if (!WithNowait) {
5812     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5813     llvm::Value *EndArgs[] = {
5814         IdentTLoc, // ident_t *<loc>
5815         ThreadId,  // i32 <gtid>
5816         Lock       // kmp_critical_name *&<lock>
5817     };
5818     CommonActionTy Action(nullptr, llvm::None,
5819                           OMPBuilder.getOrCreateRuntimeFunction(
5820                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5821                           EndArgs);
5822     AtomicRCG.setAction(Action);
5823     AtomicRCG(CGF);
5824   } else {
5825     AtomicRCG(CGF);
5826   }
5827 
5828   CGF.EmitBranch(DefaultBB);
5829   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5830 }
5831 
5832 /// Generates unique name for artificial threadprivate variables.
5833 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5834 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5835                                       const Expr *Ref) {
5836   SmallString<256> Buffer;
5837   llvm::raw_svector_ostream Out(Buffer);
5838   const clang::DeclRefExpr *DE;
5839   const VarDecl *D = ::getBaseDecl(Ref, DE);
5840   if (!D)
5841     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5842   D = D->getCanonicalDecl();
5843   std::string Name = CGM.getOpenMPRuntime().getName(
5844       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5845   Out << Prefix << Name << "_"
5846       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5847   return std::string(Out.str());
5848 }
5849 
5850 /// Emits reduction initializer function:
5851 /// \code
5852 /// void @.red_init(void* %arg, void* %orig) {
5853 /// %0 = bitcast void* %arg to <type>*
5854 /// store <type> <init>, <type>* %0
5855 /// ret void
5856 /// }
5857 /// \endcode
5858 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5859                                            SourceLocation Loc,
5860                                            ReductionCodeGen &RCG, unsigned N) {
5861   ASTContext &C = CGM.getContext();
5862   QualType VoidPtrTy = C.VoidPtrTy;
5863   VoidPtrTy.addRestrict();
5864   FunctionArgList Args;
5865   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5866                           ImplicitParamDecl::Other);
5867   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5868                               ImplicitParamDecl::Other);
5869   Args.emplace_back(&Param);
5870   Args.emplace_back(&ParamOrig);
5871   const auto &FnInfo =
5872       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5873   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5874   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5875   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5876                                     Name, &CGM.getModule());
5877   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5878   Fn->setDoesNotRecurse();
5879   CodeGenFunction CGF(CGM);
5880   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5881   Address PrivateAddr = CGF.EmitLoadOfPointer(
5882       CGF.GetAddrOfLocalVar(&Param),
5883       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5884   llvm::Value *Size = nullptr;
5885   // If the size of the reduction item is non-constant, load it from global
5886   // threadprivate variable.
5887   if (RCG.getSizes(N).second) {
5888     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5889         CGF, CGM.getContext().getSizeType(),
5890         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5891     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5892                                 CGM.getContext().getSizeType(), Loc);
5893   }
5894   RCG.emitAggregateType(CGF, N, Size);
5895   LValue OrigLVal;
5896   // If initializer uses initializer from declare reduction construct, emit a
5897   // pointer to the address of the original reduction item (reuired by reduction
5898   // initializer)
5899   if (RCG.usesReductionInitializer(N)) {
5900     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5901     SharedAddr = CGF.EmitLoadOfPointer(
5902         SharedAddr,
5903         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5904     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5905   } else {
5906     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5907         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5908         CGM.getContext().VoidPtrTy);
5909   }
5910   // Emit the initializer:
5911   // %0 = bitcast void* %arg to <type>*
5912   // store <type> <init>, <type>* %0
5913   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5914                          [](CodeGenFunction &) { return false; });
5915   CGF.FinishFunction();
5916   return Fn;
5917 }
5918 
5919 /// Emits reduction combiner function:
5920 /// \code
5921 /// void @.red_comb(void* %arg0, void* %arg1) {
5922 /// %lhs = bitcast void* %arg0 to <type>*
5923 /// %rhs = bitcast void* %arg1 to <type>*
5924 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5925 /// store <type> %2, <type>* %lhs
5926 /// ret void
5927 /// }
5928 /// \endcode
5929 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5930                                            SourceLocation Loc,
5931                                            ReductionCodeGen &RCG, unsigned N,
5932                                            const Expr *ReductionOp,
5933                                            const Expr *LHS, const Expr *RHS,
5934                                            const Expr *PrivateRef) {
5935   ASTContext &C = CGM.getContext();
5936   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5937   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5938   FunctionArgList Args;
5939   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5940                                C.VoidPtrTy, ImplicitParamDecl::Other);
5941   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5942                             ImplicitParamDecl::Other);
5943   Args.emplace_back(&ParamInOut);
5944   Args.emplace_back(&ParamIn);
5945   const auto &FnInfo =
5946       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5947   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5948   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5949   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5950                                     Name, &CGM.getModule());
5951   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5952   Fn->setDoesNotRecurse();
5953   CodeGenFunction CGF(CGM);
5954   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5955   llvm::Value *Size = nullptr;
5956   // If the size of the reduction item is non-constant, load it from global
5957   // threadprivate variable.
5958   if (RCG.getSizes(N).second) {
5959     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5960         CGF, CGM.getContext().getSizeType(),
5961         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5962     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5963                                 CGM.getContext().getSizeType(), Loc);
5964   }
5965   RCG.emitAggregateType(CGF, N, Size);
5966   // Remap lhs and rhs variables to the addresses of the function arguments.
5967   // %lhs = bitcast void* %arg0 to <type>*
5968   // %rhs = bitcast void* %arg1 to <type>*
5969   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5970   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5971     // Pull out the pointer to the variable.
5972     Address PtrAddr = CGF.EmitLoadOfPointer(
5973         CGF.GetAddrOfLocalVar(&ParamInOut),
5974         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5975     return CGF.Builder.CreateElementBitCast(
5976         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5977   });
5978   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5979     // Pull out the pointer to the variable.
5980     Address PtrAddr = CGF.EmitLoadOfPointer(
5981         CGF.GetAddrOfLocalVar(&ParamIn),
5982         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5983     return CGF.Builder.CreateElementBitCast(
5984         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5985   });
5986   PrivateScope.Privatize();
5987   // Emit the combiner body:
5988   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5989   // store <type> %2, <type>* %lhs
5990   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5991       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5992       cast<DeclRefExpr>(RHS));
5993   CGF.FinishFunction();
5994   return Fn;
5995 }
5996 
5997 /// Emits reduction finalizer function:
5998 /// \code
5999 /// void @.red_fini(void* %arg) {
6000 /// %0 = bitcast void* %arg to <type>*
6001 /// <destroy>(<type>* %0)
6002 /// ret void
6003 /// }
6004 /// \endcode
6005 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6006                                            SourceLocation Loc,
6007                                            ReductionCodeGen &RCG, unsigned N) {
6008   if (!RCG.needCleanups(N))
6009     return nullptr;
6010   ASTContext &C = CGM.getContext();
6011   FunctionArgList Args;
6012   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6013                           ImplicitParamDecl::Other);
6014   Args.emplace_back(&Param);
6015   const auto &FnInfo =
6016       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6017   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6018   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6019   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6020                                     Name, &CGM.getModule());
6021   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6022   Fn->setDoesNotRecurse();
6023   CodeGenFunction CGF(CGM);
6024   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6025   Address PrivateAddr = CGF.EmitLoadOfPointer(
6026       CGF.GetAddrOfLocalVar(&Param),
6027       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6028   llvm::Value *Size = nullptr;
6029   // If the size of the reduction item is non-constant, load it from global
6030   // threadprivate variable.
6031   if (RCG.getSizes(N).second) {
6032     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6033         CGF, CGM.getContext().getSizeType(),
6034         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6035     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6036                                 CGM.getContext().getSizeType(), Loc);
6037   }
6038   RCG.emitAggregateType(CGF, N, Size);
6039   // Emit the finalizer body:
6040   // <destroy>(<type>* %0)
6041   RCG.emitCleanups(CGF, N, PrivateAddr);
6042   CGF.FinishFunction(Loc);
6043   return Fn;
6044 }
6045 
6046 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6047     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6048     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6049   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6050     return nullptr;
6051 
6052   // Build typedef struct:
6053   // kmp_taskred_input {
6054   //   void *reduce_shar; // shared reduction item
6055   //   void *reduce_orig; // original reduction item used for initialization
6056   //   size_t reduce_size; // size of data item
6057   //   void *reduce_init; // data initialization routine
6058   //   void *reduce_fini; // data finalization routine
6059   //   void *reduce_comb; // data combiner routine
6060   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6061   // } kmp_taskred_input_t;
6062   ASTContext &C = CGM.getContext();
6063   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6064   RD->startDefinition();
6065   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6066   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6067   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6068   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6069   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6070   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6071   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6072       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6073   RD->completeDefinition();
6074   QualType RDType = C.getRecordType(RD);
6075   unsigned Size = Data.ReductionVars.size();
6076   llvm::APInt ArraySize(/*numBits=*/64, Size);
6077   QualType ArrayRDType = C.getConstantArrayType(
6078       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6079   // kmp_task_red_input_t .rd_input.[Size];
6080   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6081   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6082                        Data.ReductionCopies, Data.ReductionOps);
6083   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6084     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6085     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6086                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6087     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6088         TaskRedInput.getPointer(), Idxs,
6089         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6090         ".rd_input.gep.");
6091     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6092     // ElemLVal.reduce_shar = &Shareds[Cnt];
6093     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6094     RCG.emitSharedOrigLValue(CGF, Cnt);
6095     llvm::Value *CastedShared =
6096         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6097     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6098     // ElemLVal.reduce_orig = &Origs[Cnt];
6099     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6100     llvm::Value *CastedOrig =
6101         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6102     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6103     RCG.emitAggregateType(CGF, Cnt);
6104     llvm::Value *SizeValInChars;
6105     llvm::Value *SizeVal;
6106     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6107     // We use delayed creation/initialization for VLAs and array sections. It is
6108     // required because runtime does not provide the way to pass the sizes of
6109     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6110     // threadprivate global variables are used to store these values and use
6111     // them in the functions.
6112     bool DelayedCreation = !!SizeVal;
6113     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6114                                                /*isSigned=*/false);
6115     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6116     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6117     // ElemLVal.reduce_init = init;
6118     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6119     llvm::Value *InitAddr =
6120         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6121     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6122     // ElemLVal.reduce_fini = fini;
6123     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6124     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6125     llvm::Value *FiniAddr = Fini
6126                                 ? CGF.EmitCastToVoidPtr(Fini)
6127                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6128     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6129     // ElemLVal.reduce_comb = comb;
6130     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6131     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6132         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6133         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6134     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6135     // ElemLVal.flags = 0;
6136     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6137     if (DelayedCreation) {
6138       CGF.EmitStoreOfScalar(
6139           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6140           FlagsLVal);
6141     } else
6142       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6143                                  FlagsLVal.getType());
6144   }
6145   if (Data.IsReductionWithTaskMod) {
6146     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6147     // is_ws, int num, void *data);
6148     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6149     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6150                                                   CGM.IntTy, /*isSigned=*/true);
6151     llvm::Value *Args[] = {
6152         IdentTLoc, GTid,
6153         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6154                                /*isSigned=*/true),
6155         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6156         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6157             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6158     return CGF.EmitRuntimeCall(
6159         OMPBuilder.getOrCreateRuntimeFunction(
6160             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6161         Args);
6162   }
6163   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6164   llvm::Value *Args[] = {
6165       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6166                                 /*isSigned=*/true),
6167       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6168       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6169                                                       CGM.VoidPtrTy)};
6170   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6171                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6172                              Args);
6173 }
6174 
6175 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6176                                             SourceLocation Loc,
6177                                             bool IsWorksharingReduction) {
6178   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6179   // is_ws, int num, void *data);
6180   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6181   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6182                                                 CGM.IntTy, /*isSigned=*/true);
6183   llvm::Value *Args[] = {IdentTLoc, GTid,
6184                          llvm::ConstantInt::get(CGM.IntTy,
6185                                                 IsWorksharingReduction ? 1 : 0,
6186                                                 /*isSigned=*/true)};
6187   (void)CGF.EmitRuntimeCall(
6188       OMPBuilder.getOrCreateRuntimeFunction(
6189           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6190       Args);
6191 }
6192 
6193 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6194                                               SourceLocation Loc,
6195                                               ReductionCodeGen &RCG,
6196                                               unsigned N) {
6197   auto Sizes = RCG.getSizes(N);
6198   // Emit threadprivate global variable if the type is non-constant
6199   // (Sizes.second = nullptr).
6200   if (Sizes.second) {
6201     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6202                                                      /*isSigned=*/false);
6203     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6204         CGF, CGM.getContext().getSizeType(),
6205         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6206     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6207   }
6208 }
6209 
6210 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6211                                               SourceLocation Loc,
6212                                               llvm::Value *ReductionsPtr,
6213                                               LValue SharedLVal) {
6214   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6215   // *d);
6216   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6217                                                    CGM.IntTy,
6218                                                    /*isSigned=*/true),
6219                          ReductionsPtr,
6220                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6221                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6222   return Address(
6223       CGF.EmitRuntimeCall(
6224           OMPBuilder.getOrCreateRuntimeFunction(
6225               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6226           Args),
6227       SharedLVal.getAlignment());
6228 }
6229 
6230 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6231                                        SourceLocation Loc) {
6232   if (!CGF.HaveInsertPoint())
6233     return;
6234 
6235   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6236     OMPBuilder.createTaskwait(CGF.Builder);
6237   } else {
6238     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6239     // global_tid);
6240     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6241     // Ignore return result until untied tasks are supported.
6242     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6243                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6244                         Args);
6245   }
6246 
6247   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6248     Region->emitUntiedSwitch(CGF);
6249 }
6250 
6251 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6252                                            OpenMPDirectiveKind InnerKind,
6253                                            const RegionCodeGenTy &CodeGen,
6254                                            bool HasCancel) {
6255   if (!CGF.HaveInsertPoint())
6256     return;
6257   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6258                                  InnerKind != OMPD_critical &&
6259                                      InnerKind != OMPD_master &&
6260                                      InnerKind != OMPD_masked);
6261   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6262 }
6263 
6264 namespace {
6265 enum RTCancelKind {
6266   CancelNoreq = 0,
6267   CancelParallel = 1,
6268   CancelLoop = 2,
6269   CancelSections = 3,
6270   CancelTaskgroup = 4
6271 };
6272 } // anonymous namespace
6273 
6274 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6275   RTCancelKind CancelKind = CancelNoreq;
6276   if (CancelRegion == OMPD_parallel)
6277     CancelKind = CancelParallel;
6278   else if (CancelRegion == OMPD_for)
6279     CancelKind = CancelLoop;
6280   else if (CancelRegion == OMPD_sections)
6281     CancelKind = CancelSections;
6282   else {
6283     assert(CancelRegion == OMPD_taskgroup);
6284     CancelKind = CancelTaskgroup;
6285   }
6286   return CancelKind;
6287 }
6288 
6289 void CGOpenMPRuntime::emitCancellationPointCall(
6290     CodeGenFunction &CGF, SourceLocation Loc,
6291     OpenMPDirectiveKind CancelRegion) {
6292   if (!CGF.HaveInsertPoint())
6293     return;
6294   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6295   // global_tid, kmp_int32 cncl_kind);
6296   if (auto *OMPRegionInfo =
6297           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6298     // For 'cancellation point taskgroup', the task region info may not have a
6299     // cancel. This may instead happen in another adjacent task.
6300     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6301       llvm::Value *Args[] = {
6302           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6303           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6304       // Ignore return result until untied tasks are supported.
6305       llvm::Value *Result = CGF.EmitRuntimeCall(
6306           OMPBuilder.getOrCreateRuntimeFunction(
6307               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6308           Args);
6309       // if (__kmpc_cancellationpoint()) {
6310       //   exit from construct;
6311       // }
6312       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6313       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6314       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6315       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6316       CGF.EmitBlock(ExitBB);
6317       // exit from construct;
6318       CodeGenFunction::JumpDest CancelDest =
6319           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6320       CGF.EmitBranchThroughCleanup(CancelDest);
6321       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6322     }
6323   }
6324 }
6325 
6326 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6327                                      const Expr *IfCond,
6328                                      OpenMPDirectiveKind CancelRegion) {
6329   if (!CGF.HaveInsertPoint())
6330     return;
6331   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6332   // kmp_int32 cncl_kind);
6333   auto &M = CGM.getModule();
6334   if (auto *OMPRegionInfo =
6335           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6336     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6337                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6338       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6339       llvm::Value *Args[] = {
6340           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6341           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6342       // Ignore return result until untied tasks are supported.
6343       llvm::Value *Result = CGF.EmitRuntimeCall(
6344           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6345       // if (__kmpc_cancel()) {
6346       //   exit from construct;
6347       // }
6348       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6349       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6350       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6351       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6352       CGF.EmitBlock(ExitBB);
6353       // exit from construct;
6354       CodeGenFunction::JumpDest CancelDest =
6355           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6356       CGF.EmitBranchThroughCleanup(CancelDest);
6357       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6358     };
6359     if (IfCond) {
6360       emitIfClause(CGF, IfCond, ThenGen,
6361                    [](CodeGenFunction &, PrePostActionTy &) {});
6362     } else {
6363       RegionCodeGenTy ThenRCG(ThenGen);
6364       ThenRCG(CGF);
6365     }
6366   }
6367 }
6368 
6369 namespace {
6370 /// Cleanup action for uses_allocators support.
6371 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6372   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6373 
6374 public:
6375   OMPUsesAllocatorsActionTy(
6376       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6377       : Allocators(Allocators) {}
6378   void Enter(CodeGenFunction &CGF) override {
6379     if (!CGF.HaveInsertPoint())
6380       return;
6381     for (const auto &AllocatorData : Allocators) {
6382       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6383           CGF, AllocatorData.first, AllocatorData.second);
6384     }
6385   }
6386   void Exit(CodeGenFunction &CGF) override {
6387     if (!CGF.HaveInsertPoint())
6388       return;
6389     for (const auto &AllocatorData : Allocators) {
6390       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6391                                                         AllocatorData.first);
6392     }
6393   }
6394 };
6395 } // namespace
6396 
6397 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6398     const OMPExecutableDirective &D, StringRef ParentName,
6399     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6400     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6401   assert(!ParentName.empty() && "Invalid target region parent name!");
6402   HasEmittedTargetRegion = true;
6403   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6404   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6405     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6406       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6407       if (!D.AllocatorTraits)
6408         continue;
6409       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6410     }
6411   }
6412   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6413   CodeGen.setAction(UsesAllocatorAction);
6414   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6415                                    IsOffloadEntry, CodeGen);
6416 }
6417 
6418 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6419                                              const Expr *Allocator,
6420                                              const Expr *AllocatorTraits) {
6421   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6422   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6423   // Use default memspace handle.
6424   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6425   llvm::Value *NumTraits = llvm::ConstantInt::get(
6426       CGF.IntTy, cast<ConstantArrayType>(
6427                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6428                      ->getSize()
6429                      .getLimitedValue());
6430   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6431   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6432       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6433   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6434                                            AllocatorTraitsLVal.getBaseInfo(),
6435                                            AllocatorTraitsLVal.getTBAAInfo());
6436   llvm::Value *Traits =
6437       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6438 
6439   llvm::Value *AllocatorVal =
6440       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6441                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6442                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6443   // Store to allocator.
6444   CGF.EmitVarDecl(*cast<VarDecl>(
6445       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6446   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6447   AllocatorVal =
6448       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6449                                Allocator->getType(), Allocator->getExprLoc());
6450   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6451 }
6452 
6453 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6454                                              const Expr *Allocator) {
6455   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6456   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6457   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6458   llvm::Value *AllocatorVal =
6459       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6460   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6461                                           CGF.getContext().VoidPtrTy,
6462                                           Allocator->getExprLoc());
6463   (void)CGF.EmitRuntimeCall(
6464       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6465                                             OMPRTL___kmpc_destroy_allocator),
6466       {ThreadId, AllocatorVal});
6467 }
6468 
6469 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6470     const OMPExecutableDirective &D, StringRef ParentName,
6471     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6472     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6473   // Create a unique name for the entry function using the source location
6474   // information of the current target region. The name will be something like:
6475   //
6476   // __omp_offloading_DD_FFFF_PP_lBB
6477   //
6478   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6479   // mangled name of the function that encloses the target region and BB is the
6480   // line number of the target region.
6481 
6482   unsigned DeviceID;
6483   unsigned FileID;
6484   unsigned Line;
6485   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6486                            Line);
6487   SmallString<64> EntryFnName;
6488   {
6489     llvm::raw_svector_ostream OS(EntryFnName);
6490     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6491        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6492   }
6493 
6494   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6495 
6496   CodeGenFunction CGF(CGM, true);
6497   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6498   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499 
6500   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6501 
6502   // If this target outline function is not an offload entry, we don't need to
6503   // register it.
6504   if (!IsOffloadEntry)
6505     return;
6506 
6507   // The target region ID is used by the runtime library to identify the current
6508   // target region, so it only has to be unique and not necessarily point to
6509   // anything. It could be the pointer to the outlined function that implements
6510   // the target region, but we aren't using that so that the compiler doesn't
6511   // need to keep that, and could therefore inline the host function if proven
6512   // worthwhile during optimization. In the other hand, if emitting code for the
6513   // device, the ID has to be the function address so that it can retrieved from
6514   // the offloading entry and launched by the runtime library. We also mark the
6515   // outlined function to have external linkage in case we are emitting code for
6516   // the device, because these functions will be entry points to the device.
6517 
6518   if (CGM.getLangOpts().OpenMPIsDevice) {
6519     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6520     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6521     OutlinedFn->setDSOLocal(false);
6522     if (CGM.getTriple().isAMDGCN())
6523       OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6524   } else {
6525     std::string Name = getName({EntryFnName, "region_id"});
6526     OutlinedFnID = new llvm::GlobalVariable(
6527         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6528         llvm::GlobalValue::WeakAnyLinkage,
6529         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6530   }
6531 
6532   // Register the information for the entry associated with this target region.
6533   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6534       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6535       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6536 }
6537 
6538 /// Checks if the expression is constant or does not have non-trivial function
6539 /// calls.
6540 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6541   // We can skip constant expressions.
6542   // We can skip expressions with trivial calls or simple expressions.
6543   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6544           !E->hasNonTrivialCall(Ctx)) &&
6545          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6546 }
6547 
6548 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6549                                                     const Stmt *Body) {
6550   const Stmt *Child = Body->IgnoreContainers();
6551   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6552     Child = nullptr;
6553     for (const Stmt *S : C->body()) {
6554       if (const auto *E = dyn_cast<Expr>(S)) {
6555         if (isTrivial(Ctx, E))
6556           continue;
6557       }
6558       // Some of the statements can be ignored.
6559       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6560           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6561         continue;
6562       // Analyze declarations.
6563       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6564         if (llvm::all_of(DS->decls(), [](const Decl *D) {
6565               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6566                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6567                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6568                   isa<UsingDirectiveDecl>(D) ||
6569                   isa<OMPDeclareReductionDecl>(D) ||
6570                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6571                 return true;
6572               const auto *VD = dyn_cast<VarDecl>(D);
6573               if (!VD)
6574                 return false;
6575               return VD->hasGlobalStorage() || !VD->isUsed();
6576             }))
6577           continue;
6578       }
6579       // Found multiple children - cannot get the one child only.
6580       if (Child)
6581         return nullptr;
6582       Child = S;
6583     }
6584     if (Child)
6585       Child = Child->IgnoreContainers();
6586   }
6587   return Child;
6588 }
6589 
6590 /// Emit the number of teams for a target directive.  Inspect the num_teams
6591 /// clause associated with a teams construct combined or closely nested
6592 /// with the target directive.
6593 ///
6594 /// Emit a team of size one for directives such as 'target parallel' that
6595 /// have no associated teams construct.
6596 ///
6597 /// Otherwise, return nullptr.
6598 static llvm::Value *
6599 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6600                                const OMPExecutableDirective &D) {
6601   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6602          "Clauses associated with the teams directive expected to be emitted "
6603          "only for the host!");
6604   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6605   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6606          "Expected target-based executable directive.");
6607   CGBuilderTy &Bld = CGF.Builder;
6608   switch (DirectiveKind) {
6609   case OMPD_target: {
6610     const auto *CS = D.getInnermostCapturedStmt();
6611     const auto *Body =
6612         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6613     const Stmt *ChildStmt =
6614         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6615     if (const auto *NestedDir =
6616             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6617       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6618         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6619           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6620           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6621           const Expr *NumTeams =
6622               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6623           llvm::Value *NumTeamsVal =
6624               CGF.EmitScalarExpr(NumTeams,
6625                                  /*IgnoreResultAssign*/ true);
6626           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6627                                    /*isSigned=*/true);
6628         }
6629         return Bld.getInt32(0);
6630       }
6631       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6632           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6633         return Bld.getInt32(1);
6634       return Bld.getInt32(0);
6635     }
6636     return nullptr;
6637   }
6638   case OMPD_target_teams:
6639   case OMPD_target_teams_distribute:
6640   case OMPD_target_teams_distribute_simd:
6641   case OMPD_target_teams_distribute_parallel_for:
6642   case OMPD_target_teams_distribute_parallel_for_simd: {
6643     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6644       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6645       const Expr *NumTeams =
6646           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6647       llvm::Value *NumTeamsVal =
6648           CGF.EmitScalarExpr(NumTeams,
6649                              /*IgnoreResultAssign*/ true);
6650       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6651                                /*isSigned=*/true);
6652     }
6653     return Bld.getInt32(0);
6654   }
6655   case OMPD_target_parallel:
6656   case OMPD_target_parallel_for:
6657   case OMPD_target_parallel_for_simd:
6658   case OMPD_target_simd:
6659     return Bld.getInt32(1);
6660   case OMPD_parallel:
6661   case OMPD_for:
6662   case OMPD_parallel_for:
6663   case OMPD_parallel_master:
6664   case OMPD_parallel_sections:
6665   case OMPD_for_simd:
6666   case OMPD_parallel_for_simd:
6667   case OMPD_cancel:
6668   case OMPD_cancellation_point:
6669   case OMPD_ordered:
6670   case OMPD_threadprivate:
6671   case OMPD_allocate:
6672   case OMPD_task:
6673   case OMPD_simd:
6674   case OMPD_tile:
6675   case OMPD_sections:
6676   case OMPD_section:
6677   case OMPD_single:
6678   case OMPD_master:
6679   case OMPD_critical:
6680   case OMPD_taskyield:
6681   case OMPD_barrier:
6682   case OMPD_taskwait:
6683   case OMPD_taskgroup:
6684   case OMPD_atomic:
6685   case OMPD_flush:
6686   case OMPD_depobj:
6687   case OMPD_scan:
6688   case OMPD_teams:
6689   case OMPD_target_data:
6690   case OMPD_target_exit_data:
6691   case OMPD_target_enter_data:
6692   case OMPD_distribute:
6693   case OMPD_distribute_simd:
6694   case OMPD_distribute_parallel_for:
6695   case OMPD_distribute_parallel_for_simd:
6696   case OMPD_teams_distribute:
6697   case OMPD_teams_distribute_simd:
6698   case OMPD_teams_distribute_parallel_for:
6699   case OMPD_teams_distribute_parallel_for_simd:
6700   case OMPD_target_update:
6701   case OMPD_declare_simd:
6702   case OMPD_declare_variant:
6703   case OMPD_begin_declare_variant:
6704   case OMPD_end_declare_variant:
6705   case OMPD_declare_target:
6706   case OMPD_end_declare_target:
6707   case OMPD_declare_reduction:
6708   case OMPD_declare_mapper:
6709   case OMPD_taskloop:
6710   case OMPD_taskloop_simd:
6711   case OMPD_master_taskloop:
6712   case OMPD_master_taskloop_simd:
6713   case OMPD_parallel_master_taskloop:
6714   case OMPD_parallel_master_taskloop_simd:
6715   case OMPD_requires:
6716   case OMPD_unknown:
6717     break;
6718   default:
6719     break;
6720   }
6721   llvm_unreachable("Unexpected directive kind.");
6722 }
6723 
6724 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6725                                   llvm::Value *DefaultThreadLimitVal) {
6726   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6727       CGF.getContext(), CS->getCapturedStmt());
6728   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6729     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6730       llvm::Value *NumThreads = nullptr;
6731       llvm::Value *CondVal = nullptr;
6732       // Handle if clause. If if clause present, the number of threads is
6733       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6734       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6735         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6736         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6737         const OMPIfClause *IfClause = nullptr;
6738         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6739           if (C->getNameModifier() == OMPD_unknown ||
6740               C->getNameModifier() == OMPD_parallel) {
6741             IfClause = C;
6742             break;
6743           }
6744         }
6745         if (IfClause) {
6746           const Expr *Cond = IfClause->getCondition();
6747           bool Result;
6748           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6749             if (!Result)
6750               return CGF.Builder.getInt32(1);
6751           } else {
6752             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6753             if (const auto *PreInit =
6754                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6755               for (const auto *I : PreInit->decls()) {
6756                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6757                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6758                 } else {
6759                   CodeGenFunction::AutoVarEmission Emission =
6760                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6761                   CGF.EmitAutoVarCleanups(Emission);
6762                 }
6763               }
6764             }
6765             CondVal = CGF.EvaluateExprAsBool(Cond);
6766           }
6767         }
6768       }
6769       // Check the value of num_threads clause iff if clause was not specified
6770       // or is not evaluated to false.
6771       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6772         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6773         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6774         const auto *NumThreadsClause =
6775             Dir->getSingleClause<OMPNumThreadsClause>();
6776         CodeGenFunction::LexicalScope Scope(
6777             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6778         if (const auto *PreInit =
6779                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6780           for (const auto *I : PreInit->decls()) {
6781             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6782               CGF.EmitVarDecl(cast<VarDecl>(*I));
6783             } else {
6784               CodeGenFunction::AutoVarEmission Emission =
6785                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6786               CGF.EmitAutoVarCleanups(Emission);
6787             }
6788           }
6789         }
6790         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6791         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6792                                                /*isSigned=*/false);
6793         if (DefaultThreadLimitVal)
6794           NumThreads = CGF.Builder.CreateSelect(
6795               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6796               DefaultThreadLimitVal, NumThreads);
6797       } else {
6798         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6799                                            : CGF.Builder.getInt32(0);
6800       }
6801       // Process condition of the if clause.
6802       if (CondVal) {
6803         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6804                                               CGF.Builder.getInt32(1));
6805       }
6806       return NumThreads;
6807     }
6808     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6809       return CGF.Builder.getInt32(1);
6810     return DefaultThreadLimitVal;
6811   }
6812   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6813                                : CGF.Builder.getInt32(0);
6814 }
6815 
6816 /// Emit the number of threads for a target directive.  Inspect the
6817 /// thread_limit clause associated with a teams construct combined or closely
6818 /// nested with the target directive.
6819 ///
6820 /// Emit the num_threads clause for directives such as 'target parallel' that
6821 /// have no associated teams construct.
6822 ///
6823 /// Otherwise, return nullptr.
6824 static llvm::Value *
6825 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6826                                  const OMPExecutableDirective &D) {
6827   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6828          "Clauses associated with the teams directive expected to be emitted "
6829          "only for the host!");
6830   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6831   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6832          "Expected target-based executable directive.");
6833   CGBuilderTy &Bld = CGF.Builder;
6834   llvm::Value *ThreadLimitVal = nullptr;
6835   llvm::Value *NumThreadsVal = nullptr;
6836   switch (DirectiveKind) {
6837   case OMPD_target: {
6838     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6839     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6840       return NumThreads;
6841     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6842         CGF.getContext(), CS->getCapturedStmt());
6843     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6844       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6845         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6846         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6847         const auto *ThreadLimitClause =
6848             Dir->getSingleClause<OMPThreadLimitClause>();
6849         CodeGenFunction::LexicalScope Scope(
6850             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6851         if (const auto *PreInit =
6852                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6853           for (const auto *I : PreInit->decls()) {
6854             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6855               CGF.EmitVarDecl(cast<VarDecl>(*I));
6856             } else {
6857               CodeGenFunction::AutoVarEmission Emission =
6858                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6859               CGF.EmitAutoVarCleanups(Emission);
6860             }
6861           }
6862         }
6863         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6864             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6865         ThreadLimitVal =
6866             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6867       }
6868       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6869           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6870         CS = Dir->getInnermostCapturedStmt();
6871         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6872             CGF.getContext(), CS->getCapturedStmt());
6873         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6874       }
6875       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6876           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6877         CS = Dir->getInnermostCapturedStmt();
6878         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6879           return NumThreads;
6880       }
6881       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6882         return Bld.getInt32(1);
6883     }
6884     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6885   }
6886   case OMPD_target_teams: {
6887     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6888       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6889       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6890       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6891           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6892       ThreadLimitVal =
6893           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6894     }
6895     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6896     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6897       return NumThreads;
6898     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6899         CGF.getContext(), CS->getCapturedStmt());
6900     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6901       if (Dir->getDirectiveKind() == OMPD_distribute) {
6902         CS = Dir->getInnermostCapturedStmt();
6903         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6904           return NumThreads;
6905       }
6906     }
6907     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6908   }
6909   case OMPD_target_teams_distribute:
6910     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6911       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6912       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6913       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6914           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6915       ThreadLimitVal =
6916           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6917     }
6918     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6919   case OMPD_target_parallel:
6920   case OMPD_target_parallel_for:
6921   case OMPD_target_parallel_for_simd:
6922   case OMPD_target_teams_distribute_parallel_for:
6923   case OMPD_target_teams_distribute_parallel_for_simd: {
6924     llvm::Value *CondVal = nullptr;
6925     // Handle if clause. If if clause present, the number of threads is
6926     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6927     if (D.hasClausesOfKind<OMPIfClause>()) {
6928       const OMPIfClause *IfClause = nullptr;
6929       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6930         if (C->getNameModifier() == OMPD_unknown ||
6931             C->getNameModifier() == OMPD_parallel) {
6932           IfClause = C;
6933           break;
6934         }
6935       }
6936       if (IfClause) {
6937         const Expr *Cond = IfClause->getCondition();
6938         bool Result;
6939         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6940           if (!Result)
6941             return Bld.getInt32(1);
6942         } else {
6943           CodeGenFunction::RunCleanupsScope Scope(CGF);
6944           CondVal = CGF.EvaluateExprAsBool(Cond);
6945         }
6946       }
6947     }
6948     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6949       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6950       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6951       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6952           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6953       ThreadLimitVal =
6954           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6955     }
6956     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6957       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6958       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6959       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6960           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6961       NumThreadsVal =
6962           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6963       ThreadLimitVal = ThreadLimitVal
6964                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6965                                                                 ThreadLimitVal),
6966                                               NumThreadsVal, ThreadLimitVal)
6967                            : NumThreadsVal;
6968     }
6969     if (!ThreadLimitVal)
6970       ThreadLimitVal = Bld.getInt32(0);
6971     if (CondVal)
6972       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6973     return ThreadLimitVal;
6974   }
6975   case OMPD_target_teams_distribute_simd:
6976   case OMPD_target_simd:
6977     return Bld.getInt32(1);
6978   case OMPD_parallel:
6979   case OMPD_for:
6980   case OMPD_parallel_for:
6981   case OMPD_parallel_master:
6982   case OMPD_parallel_sections:
6983   case OMPD_for_simd:
6984   case OMPD_parallel_for_simd:
6985   case OMPD_cancel:
6986   case OMPD_cancellation_point:
6987   case OMPD_ordered:
6988   case OMPD_threadprivate:
6989   case OMPD_allocate:
6990   case OMPD_task:
6991   case OMPD_simd:
6992   case OMPD_tile:
6993   case OMPD_sections:
6994   case OMPD_section:
6995   case OMPD_single:
6996   case OMPD_master:
6997   case OMPD_critical:
6998   case OMPD_taskyield:
6999   case OMPD_barrier:
7000   case OMPD_taskwait:
7001   case OMPD_taskgroup:
7002   case OMPD_atomic:
7003   case OMPD_flush:
7004   case OMPD_depobj:
7005   case OMPD_scan:
7006   case OMPD_teams:
7007   case OMPD_target_data:
7008   case OMPD_target_exit_data:
7009   case OMPD_target_enter_data:
7010   case OMPD_distribute:
7011   case OMPD_distribute_simd:
7012   case OMPD_distribute_parallel_for:
7013   case OMPD_distribute_parallel_for_simd:
7014   case OMPD_teams_distribute:
7015   case OMPD_teams_distribute_simd:
7016   case OMPD_teams_distribute_parallel_for:
7017   case OMPD_teams_distribute_parallel_for_simd:
7018   case OMPD_target_update:
7019   case OMPD_declare_simd:
7020   case OMPD_declare_variant:
7021   case OMPD_begin_declare_variant:
7022   case OMPD_end_declare_variant:
7023   case OMPD_declare_target:
7024   case OMPD_end_declare_target:
7025   case OMPD_declare_reduction:
7026   case OMPD_declare_mapper:
7027   case OMPD_taskloop:
7028   case OMPD_taskloop_simd:
7029   case OMPD_master_taskloop:
7030   case OMPD_master_taskloop_simd:
7031   case OMPD_parallel_master_taskloop:
7032   case OMPD_parallel_master_taskloop_simd:
7033   case OMPD_requires:
7034   case OMPD_unknown:
7035     break;
7036   default:
7037     break;
7038   }
7039   llvm_unreachable("Unsupported directive kind.");
7040 }
7041 
7042 namespace {
7043 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7044 
7045 // Utility to handle information from clauses associated with a given
7046 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7047 // It provides a convenient interface to obtain the information and generate
7048 // code for that information.
7049 class MappableExprsHandler {
7050 public:
7051   /// Values for bit flags used to specify the mapping type for
7052   /// offloading.
7053   enum OpenMPOffloadMappingFlags : uint64_t {
7054     /// No flags
7055     OMP_MAP_NONE = 0x0,
7056     /// Allocate memory on the device and move data from host to device.
7057     OMP_MAP_TO = 0x01,
7058     /// Allocate memory on the device and move data from device to host.
7059     OMP_MAP_FROM = 0x02,
7060     /// Always perform the requested mapping action on the element, even
7061     /// if it was already mapped before.
7062     OMP_MAP_ALWAYS = 0x04,
7063     /// Delete the element from the device environment, ignoring the
7064     /// current reference count associated with the element.
7065     OMP_MAP_DELETE = 0x08,
7066     /// The element being mapped is a pointer-pointee pair; both the
7067     /// pointer and the pointee should be mapped.
7068     OMP_MAP_PTR_AND_OBJ = 0x10,
7069     /// This flags signals that the base address of an entry should be
7070     /// passed to the target kernel as an argument.
7071     OMP_MAP_TARGET_PARAM = 0x20,
7072     /// Signal that the runtime library has to return the device pointer
7073     /// in the current position for the data being mapped. Used when we have the
7074     /// use_device_ptr or use_device_addr clause.
7075     OMP_MAP_RETURN_PARAM = 0x40,
7076     /// This flag signals that the reference being passed is a pointer to
7077     /// private data.
7078     OMP_MAP_PRIVATE = 0x80,
7079     /// Pass the element to the device by value.
7080     OMP_MAP_LITERAL = 0x100,
7081     /// Implicit map
7082     OMP_MAP_IMPLICIT = 0x200,
7083     /// Close is a hint to the runtime to allocate memory close to
7084     /// the target device.
7085     OMP_MAP_CLOSE = 0x400,
7086     /// 0x800 is reserved for compatibility with XLC.
7087     /// Produce a runtime error if the data is not already allocated.
7088     OMP_MAP_PRESENT = 0x1000,
7089     /// Signal that the runtime library should use args as an array of
7090     /// descriptor_dim pointers and use args_size as dims. Used when we have
7091     /// non-contiguous list items in target update directive
7092     OMP_MAP_NON_CONTIG = 0x100000000000,
7093     /// The 16 MSBs of the flags indicate whether the entry is member of some
7094     /// struct/class.
7095     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7096     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7097   };
7098 
7099   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7100   static unsigned getFlagMemberOffset() {
7101     unsigned Offset = 0;
7102     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7103          Remain = Remain >> 1)
7104       Offset++;
7105     return Offset;
7106   }
7107 
7108   /// Class that holds debugging information for a data mapping to be passed to
7109   /// the runtime library.
7110   class MappingExprInfo {
7111     /// The variable declaration used for the data mapping.
7112     const ValueDecl *MapDecl = nullptr;
7113     /// The original expression used in the map clause, or null if there is
7114     /// none.
7115     const Expr *MapExpr = nullptr;
7116 
7117   public:
7118     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7119         : MapDecl(MapDecl), MapExpr(MapExpr) {}
7120 
7121     const ValueDecl *getMapDecl() const { return MapDecl; }
7122     const Expr *getMapExpr() const { return MapExpr; }
7123   };
7124 
7125   /// Class that associates information with a base pointer to be passed to the
7126   /// runtime library.
7127   class BasePointerInfo {
7128     /// The base pointer.
7129     llvm::Value *Ptr = nullptr;
7130     /// The base declaration that refers to this device pointer, or null if
7131     /// there is none.
7132     const ValueDecl *DevPtrDecl = nullptr;
7133 
7134   public:
7135     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7136         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7137     llvm::Value *operator*() const { return Ptr; }
7138     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7139     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7140   };
7141 
7142   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7143   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7144   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7145   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7146   using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7147   using MapDimArrayTy = SmallVector<uint64_t, 4>;
7148   using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7149 
7150   /// This structure contains combined information generated for mappable
7151   /// clauses, including base pointers, pointers, sizes, map types, user-defined
7152   /// mappers, and non-contiguous information.
7153   struct MapCombinedInfoTy {
7154     struct StructNonContiguousInfo {
7155       bool IsNonContiguous = false;
7156       MapDimArrayTy Dims;
7157       MapNonContiguousArrayTy Offsets;
7158       MapNonContiguousArrayTy Counts;
7159       MapNonContiguousArrayTy Strides;
7160     };
7161     MapExprsArrayTy Exprs;
7162     MapBaseValuesArrayTy BasePointers;
7163     MapValuesArrayTy Pointers;
7164     MapValuesArrayTy Sizes;
7165     MapFlagsArrayTy Types;
7166     MapMappersArrayTy Mappers;
7167     StructNonContiguousInfo NonContigInfo;
7168 
7169     /// Append arrays in \a CurInfo.
7170     void append(MapCombinedInfoTy &CurInfo) {
7171       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7172       BasePointers.append(CurInfo.BasePointers.begin(),
7173                           CurInfo.BasePointers.end());
7174       Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7175       Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7176       Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7177       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7178       NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7179                                  CurInfo.NonContigInfo.Dims.end());
7180       NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7181                                     CurInfo.NonContigInfo.Offsets.end());
7182       NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7183                                    CurInfo.NonContigInfo.Counts.end());
7184       NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7185                                     CurInfo.NonContigInfo.Strides.end());
7186     }
7187   };
7188 
7189   /// Map between a struct and the its lowest & highest elements which have been
7190   /// mapped.
7191   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7192   ///                    HE(FieldIndex, Pointer)}
7193   struct StructRangeInfoTy {
7194     MapCombinedInfoTy PreliminaryMapData;
7195     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7196         0, Address::invalid()};
7197     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7198         0, Address::invalid()};
7199     Address Base = Address::invalid();
7200     Address LB = Address::invalid();
7201     bool IsArraySection = false;
7202     bool HasCompleteRecord = false;
7203   };
7204 
7205 private:
7206   /// Kind that defines how a device pointer has to be returned.
7207   struct MapInfo {
7208     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7209     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7210     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7211     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7212     bool ReturnDevicePointer = false;
7213     bool IsImplicit = false;
7214     const ValueDecl *Mapper = nullptr;
7215     const Expr *VarRef = nullptr;
7216     bool ForDeviceAddr = false;
7217 
7218     MapInfo() = default;
7219     MapInfo(
7220         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7221         OpenMPMapClauseKind MapType,
7222         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7223         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7224         bool ReturnDevicePointer, bool IsImplicit,
7225         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7226         bool ForDeviceAddr = false)
7227         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7228           MotionModifiers(MotionModifiers),
7229           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7230           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7231   };
7232 
7233   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7234   /// member and there is no map information about it, then emission of that
7235   /// entry is deferred until the whole struct has been processed.
7236   struct DeferredDevicePtrEntryTy {
7237     const Expr *IE = nullptr;
7238     const ValueDecl *VD = nullptr;
7239     bool ForDeviceAddr = false;
7240 
7241     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7242                              bool ForDeviceAddr)
7243         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7244   };
7245 
7246   /// The target directive from where the mappable clauses were extracted. It
7247   /// is either a executable directive or a user-defined mapper directive.
7248   llvm::PointerUnion<const OMPExecutableDirective *,
7249                      const OMPDeclareMapperDecl *>
7250       CurDir;
7251 
7252   /// Function the directive is being generated for.
7253   CodeGenFunction &CGF;
7254 
7255   /// Set of all first private variables in the current directive.
7256   /// bool data is set to true if the variable is implicitly marked as
7257   /// firstprivate, false otherwise.
7258   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7259 
7260   /// Map between device pointer declarations and their expression components.
7261   /// The key value for declarations in 'this' is null.
7262   llvm::DenseMap<
7263       const ValueDecl *,
7264       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7265       DevPointersMap;
7266 
7267   llvm::Value *getExprTypeSize(const Expr *E) const {
7268     QualType ExprTy = E->getType().getCanonicalType();
7269 
7270     // Calculate the size for array shaping expression.
7271     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7272       llvm::Value *Size =
7273           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7274       for (const Expr *SE : OAE->getDimensions()) {
7275         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7276         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7277                                       CGF.getContext().getSizeType(),
7278                                       SE->getExprLoc());
7279         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7280       }
7281       return Size;
7282     }
7283 
7284     // Reference types are ignored for mapping purposes.
7285     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7286       ExprTy = RefTy->getPointeeType().getCanonicalType();
7287 
7288     // Given that an array section is considered a built-in type, we need to
7289     // do the calculation based on the length of the section instead of relying
7290     // on CGF.getTypeSize(E->getType()).
7291     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7292       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7293                             OAE->getBase()->IgnoreParenImpCasts())
7294                             .getCanonicalType();
7295 
7296       // If there is no length associated with the expression and lower bound is
7297       // not specified too, that means we are using the whole length of the
7298       // base.
7299       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7300           !OAE->getLowerBound())
7301         return CGF.getTypeSize(BaseTy);
7302 
7303       llvm::Value *ElemSize;
7304       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7305         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7306       } else {
7307         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7308         assert(ATy && "Expecting array type if not a pointer type.");
7309         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7310       }
7311 
7312       // If we don't have a length at this point, that is because we have an
7313       // array section with a single element.
7314       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7315         return ElemSize;
7316 
7317       if (const Expr *LenExpr = OAE->getLength()) {
7318         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7319         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7320                                              CGF.getContext().getSizeType(),
7321                                              LenExpr->getExprLoc());
7322         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7323       }
7324       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7325              OAE->getLowerBound() && "expected array_section[lb:].");
7326       // Size = sizetype - lb * elemtype;
7327       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7328       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7329       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7330                                        CGF.getContext().getSizeType(),
7331                                        OAE->getLowerBound()->getExprLoc());
7332       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7333       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7334       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7335       LengthVal = CGF.Builder.CreateSelect(
7336           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7337       return LengthVal;
7338     }
7339     return CGF.getTypeSize(ExprTy);
7340   }
7341 
7342   /// Return the corresponding bits for a given map clause modifier. Add
7343   /// a flag marking the map as a pointer if requested. Add a flag marking the
7344   /// map as the first one of a series of maps that relate to the same map
7345   /// expression.
7346   OpenMPOffloadMappingFlags getMapTypeBits(
7347       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7348       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7349       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7350     OpenMPOffloadMappingFlags Bits =
7351         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7352     switch (MapType) {
7353     case OMPC_MAP_alloc:
7354     case OMPC_MAP_release:
7355       // alloc and release is the default behavior in the runtime library,  i.e.
7356       // if we don't pass any bits alloc/release that is what the runtime is
7357       // going to do. Therefore, we don't need to signal anything for these two
7358       // type modifiers.
7359       break;
7360     case OMPC_MAP_to:
7361       Bits |= OMP_MAP_TO;
7362       break;
7363     case OMPC_MAP_from:
7364       Bits |= OMP_MAP_FROM;
7365       break;
7366     case OMPC_MAP_tofrom:
7367       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7368       break;
7369     case OMPC_MAP_delete:
7370       Bits |= OMP_MAP_DELETE;
7371       break;
7372     case OMPC_MAP_unknown:
7373       llvm_unreachable("Unexpected map type!");
7374     }
7375     if (AddPtrFlag)
7376       Bits |= OMP_MAP_PTR_AND_OBJ;
7377     if (AddIsTargetParamFlag)
7378       Bits |= OMP_MAP_TARGET_PARAM;
7379     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7380         != MapModifiers.end())
7381       Bits |= OMP_MAP_ALWAYS;
7382     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7383         != MapModifiers.end())
7384       Bits |= OMP_MAP_CLOSE;
7385     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) !=
7386             MapModifiers.end() ||
7387         llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) !=
7388             MotionModifiers.end())
7389       Bits |= OMP_MAP_PRESENT;
7390     if (IsNonContiguous)
7391       Bits |= OMP_MAP_NON_CONTIG;
7392     return Bits;
7393   }
7394 
7395   /// Return true if the provided expression is a final array section. A
7396   /// final array section, is one whose length can't be proved to be one.
7397   bool isFinalArraySectionExpression(const Expr *E) const {
7398     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7399 
7400     // It is not an array section and therefore not a unity-size one.
7401     if (!OASE)
7402       return false;
7403 
7404     // An array section with no colon always refer to a single element.
7405     if (OASE->getColonLocFirst().isInvalid())
7406       return false;
7407 
7408     const Expr *Length = OASE->getLength();
7409 
7410     // If we don't have a length we have to check if the array has size 1
7411     // for this dimension. Also, we should always expect a length if the
7412     // base type is pointer.
7413     if (!Length) {
7414       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7415                              OASE->getBase()->IgnoreParenImpCasts())
7416                              .getCanonicalType();
7417       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7418         return ATy->getSize().getSExtValue() != 1;
7419       // If we don't have a constant dimension length, we have to consider
7420       // the current section as having any size, so it is not necessarily
7421       // unitary. If it happen to be unity size, that's user fault.
7422       return true;
7423     }
7424 
7425     // Check if the length evaluates to 1.
7426     Expr::EvalResult Result;
7427     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7428       return true; // Can have more that size 1.
7429 
7430     llvm::APSInt ConstLength = Result.Val.getInt();
7431     return ConstLength.getSExtValue() != 1;
7432   }
7433 
7434   /// Generate the base pointers, section pointers, sizes, map type bits, and
7435   /// user-defined mappers (all included in \a CombinedInfo) for the provided
7436   /// map type, map or motion modifiers, and expression components.
7437   /// \a IsFirstComponent should be set to true if the provided set of
7438   /// components is the first associated with a capture.
7439   void generateInfoForComponentList(
7440       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7441       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7442       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7443       MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7444       bool IsFirstComponentList, bool IsImplicit,
7445       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7446       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7447       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7448           OverlappedElements = llvm::None) const {
7449     // The following summarizes what has to be generated for each map and the
7450     // types below. The generated information is expressed in this order:
7451     // base pointer, section pointer, size, flags
7452     // (to add to the ones that come from the map type and modifier).
7453     //
7454     // double d;
7455     // int i[100];
7456     // float *p;
7457     //
7458     // struct S1 {
7459     //   int i;
7460     //   float f[50];
7461     // }
7462     // struct S2 {
7463     //   int i;
7464     //   float f[50];
7465     //   S1 s;
7466     //   double *p;
7467     //   struct S2 *ps;
7468     //   int &ref;
7469     // }
7470     // S2 s;
7471     // S2 *ps;
7472     //
7473     // map(d)
7474     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7475     //
7476     // map(i)
7477     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7478     //
7479     // map(i[1:23])
7480     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7481     //
7482     // map(p)
7483     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7484     //
7485     // map(p[1:24])
7486     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7487     // in unified shared memory mode or for local pointers
7488     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7489     //
7490     // map(s)
7491     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7492     //
7493     // map(s.i)
7494     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7495     //
7496     // map(s.s.f)
7497     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7498     //
7499     // map(s.p)
7500     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7501     //
7502     // map(to: s.p[:22])
7503     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7504     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7505     // &(s.p), &(s.p[0]), 22*sizeof(double),
7506     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7507     // (*) alloc space for struct members, only this is a target parameter
7508     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7509     //      optimizes this entry out, same in the examples below)
7510     // (***) map the pointee (map: to)
7511     //
7512     // map(to: s.ref)
7513     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7514     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7515     // (*) alloc space for struct members, only this is a target parameter
7516     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7517     //      optimizes this entry out, same in the examples below)
7518     // (***) map the pointee (map: to)
7519     //
7520     // map(s.ps)
7521     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7522     //
7523     // map(from: s.ps->s.i)
7524     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7525     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7526     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7527     //
7528     // map(to: s.ps->ps)
7529     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7530     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7531     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7532     //
7533     // map(s.ps->ps->ps)
7534     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7535     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7536     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7537     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7538     //
7539     // map(to: s.ps->ps->s.f[:22])
7540     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7541     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7542     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7543     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7544     //
7545     // map(ps)
7546     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7547     //
7548     // map(ps->i)
7549     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7550     //
7551     // map(ps->s.f)
7552     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7553     //
7554     // map(from: ps->p)
7555     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7556     //
7557     // map(to: ps->p[:22])
7558     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7559     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7560     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7561     //
7562     // map(ps->ps)
7563     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7564     //
7565     // map(from: ps->ps->s.i)
7566     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7567     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7568     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7569     //
7570     // map(from: ps->ps->ps)
7571     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7572     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7573     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7574     //
7575     // map(ps->ps->ps->ps)
7576     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7577     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7578     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7579     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7580     //
7581     // map(to: ps->ps->ps->s.f[:22])
7582     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7583     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7584     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7585     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7586     //
7587     // map(to: s.f[:22]) map(from: s.p[:33])
7588     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7589     //     sizeof(double*) (**), TARGET_PARAM
7590     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7591     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7592     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7593     // (*) allocate contiguous space needed to fit all mapped members even if
7594     //     we allocate space for members not mapped (in this example,
7595     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7596     //     them as well because they fall between &s.f[0] and &s.p)
7597     //
7598     // map(from: s.f[:22]) map(to: ps->p[:33])
7599     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7600     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7601     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7602     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7603     // (*) the struct this entry pertains to is the 2nd element in the list of
7604     //     arguments, hence MEMBER_OF(2)
7605     //
7606     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7607     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7608     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7609     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7610     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7611     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7612     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7613     // (*) the struct this entry pertains to is the 4th element in the list
7614     //     of arguments, hence MEMBER_OF(4)
7615 
7616     // Track if the map information being generated is the first for a capture.
7617     bool IsCaptureFirstInfo = IsFirstComponentList;
7618     // When the variable is on a declare target link or in a to clause with
7619     // unified memory, a reference is needed to hold the host/device address
7620     // of the variable.
7621     bool RequiresReference = false;
7622 
7623     // Scan the components from the base to the complete expression.
7624     auto CI = Components.rbegin();
7625     auto CE = Components.rend();
7626     auto I = CI;
7627 
7628     // Track if the map information being generated is the first for a list of
7629     // components.
7630     bool IsExpressionFirstInfo = true;
7631     bool FirstPointerInComplexData = false;
7632     Address BP = Address::invalid();
7633     const Expr *AssocExpr = I->getAssociatedExpression();
7634     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7635     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7636     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7637 
7638     if (isa<MemberExpr>(AssocExpr)) {
7639       // The base is the 'this' pointer. The content of the pointer is going
7640       // to be the base of the field being mapped.
7641       BP = CGF.LoadCXXThisAddress();
7642     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7643                (OASE &&
7644                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7645       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7646     } else if (OAShE &&
7647                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7648       BP = Address(
7649           CGF.EmitScalarExpr(OAShE->getBase()),
7650           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7651     } else {
7652       // The base is the reference to the variable.
7653       // BP = &Var.
7654       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7655       if (const auto *VD =
7656               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7657         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7658                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7659           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7660               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7661                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7662             RequiresReference = true;
7663             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7664           }
7665         }
7666       }
7667 
7668       // If the variable is a pointer and is being dereferenced (i.e. is not
7669       // the last component), the base has to be the pointer itself, not its
7670       // reference. References are ignored for mapping purposes.
7671       QualType Ty =
7672           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7673       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7674         // No need to generate individual map information for the pointer, it
7675         // can be associated with the combined storage if shared memory mode is
7676         // active or the base declaration is not global variable.
7677         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7678         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7679             !VD || VD->hasLocalStorage())
7680           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7681         else
7682           FirstPointerInComplexData = true;
7683         ++I;
7684       }
7685     }
7686 
7687     // Track whether a component of the list should be marked as MEMBER_OF some
7688     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7689     // in a component list should be marked as MEMBER_OF, all subsequent entries
7690     // do not belong to the base struct. E.g.
7691     // struct S2 s;
7692     // s.ps->ps->ps->f[:]
7693     //   (1) (2) (3) (4)
7694     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7695     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7696     // is the pointee of ps(2) which is not member of struct s, so it should not
7697     // be marked as such (it is still PTR_AND_OBJ).
7698     // The variable is initialized to false so that PTR_AND_OBJ entries which
7699     // are not struct members are not considered (e.g. array of pointers to
7700     // data).
7701     bool ShouldBeMemberOf = false;
7702 
7703     // Variable keeping track of whether or not we have encountered a component
7704     // in the component list which is a member expression. Useful when we have a
7705     // pointer or a final array section, in which case it is the previous
7706     // component in the list which tells us whether we have a member expression.
7707     // E.g. X.f[:]
7708     // While processing the final array section "[:]" it is "f" which tells us
7709     // whether we are dealing with a member of a declared struct.
7710     const MemberExpr *EncounteredME = nullptr;
7711 
7712     // Track for the total number of dimension. Start from one for the dummy
7713     // dimension.
7714     uint64_t DimSize = 1;
7715 
7716     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7717     bool IsPrevMemberReference = false;
7718 
7719     for (; I != CE; ++I) {
7720       // If the current component is member of a struct (parent struct) mark it.
7721       if (!EncounteredME) {
7722         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7723         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7724         // as MEMBER_OF the parent struct.
7725         if (EncounteredME) {
7726           ShouldBeMemberOf = true;
7727           // Do not emit as complex pointer if this is actually not array-like
7728           // expression.
7729           if (FirstPointerInComplexData) {
7730             QualType Ty = std::prev(I)
7731                               ->getAssociatedDeclaration()
7732                               ->getType()
7733                               .getNonReferenceType();
7734             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7735             FirstPointerInComplexData = false;
7736           }
7737         }
7738       }
7739 
7740       auto Next = std::next(I);
7741 
7742       // We need to generate the addresses and sizes if this is the last
7743       // component, if the component is a pointer or if it is an array section
7744       // whose length can't be proved to be one. If this is a pointer, it
7745       // becomes the base address for the following components.
7746 
7747       // A final array section, is one whose length can't be proved to be one.
7748       // If the map item is non-contiguous then we don't treat any array section
7749       // as final array section.
7750       bool IsFinalArraySection =
7751           !IsNonContiguous &&
7752           isFinalArraySectionExpression(I->getAssociatedExpression());
7753 
7754       // If we have a declaration for the mapping use that, otherwise use
7755       // the base declaration of the map clause.
7756       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7757                                      ? I->getAssociatedDeclaration()
7758                                      : BaseDecl;
7759       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7760                                                : MapExpr;
7761 
7762       // Get information on whether the element is a pointer. Have to do a
7763       // special treatment for array sections given that they are built-in
7764       // types.
7765       const auto *OASE =
7766           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7767       const auto *OAShE =
7768           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7769       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7770       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7771       bool IsPointer =
7772           OAShE ||
7773           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7774                        .getCanonicalType()
7775                        ->isAnyPointerType()) ||
7776           I->getAssociatedExpression()->getType()->isAnyPointerType();
7777       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7778                                MapDecl &&
7779                                MapDecl->getType()->isLValueReferenceType();
7780       bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7781 
7782       if (OASE)
7783         ++DimSize;
7784 
7785       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7786           IsFinalArraySection) {
7787         // If this is not the last component, we expect the pointer to be
7788         // associated with an array expression or member expression.
7789         assert((Next == CE ||
7790                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7791                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7792                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7793                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7794                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7795                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7796                "Unexpected expression");
7797 
7798         Address LB = Address::invalid();
7799         Address LowestElem = Address::invalid();
7800         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7801                                        const MemberExpr *E) {
7802           const Expr *BaseExpr = E->getBase();
7803           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7804           // scalar.
7805           LValue BaseLV;
7806           if (E->isArrow()) {
7807             LValueBaseInfo BaseInfo;
7808             TBAAAccessInfo TBAAInfo;
7809             Address Addr =
7810                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7811             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7812             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7813           } else {
7814             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7815           }
7816           return BaseLV;
7817         };
7818         if (OAShE) {
7819           LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7820                                     CGF.getContext().getTypeAlignInChars(
7821                                         OAShE->getBase()->getType()));
7822         } else if (IsMemberReference) {
7823           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7824           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7825           LowestElem = CGF.EmitLValueForFieldInitialization(
7826                               BaseLVal, cast<FieldDecl>(MapDecl))
7827                            .getAddress(CGF);
7828           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7829                    .getAddress(CGF);
7830         } else {
7831           LowestElem = LB =
7832               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7833                   .getAddress(CGF);
7834         }
7835 
7836         // If this component is a pointer inside the base struct then we don't
7837         // need to create any entry for it - it will be combined with the object
7838         // it is pointing to into a single PTR_AND_OBJ entry.
7839         bool IsMemberPointerOrAddr =
7840             EncounteredME &&
7841             (((IsPointer || ForDeviceAddr) &&
7842               I->getAssociatedExpression() == EncounteredME) ||
7843              (IsPrevMemberReference && !IsPointer) ||
7844              (IsMemberReference && Next != CE &&
7845               !Next->getAssociatedExpression()->getType()->isPointerType()));
7846         if (!OverlappedElements.empty() && Next == CE) {
7847           // Handle base element with the info for overlapped elements.
7848           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7849           assert(!IsPointer &&
7850                  "Unexpected base element with the pointer type.");
7851           // Mark the whole struct as the struct that requires allocation on the
7852           // device.
7853           PartialStruct.LowestElem = {0, LowestElem};
7854           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7855               I->getAssociatedExpression()->getType());
7856           Address HB = CGF.Builder.CreateConstGEP(
7857               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
7858                                                               CGF.VoidPtrTy),
7859               TypeSize.getQuantity() - 1);
7860           PartialStruct.HighestElem = {
7861               std::numeric_limits<decltype(
7862                   PartialStruct.HighestElem.first)>::max(),
7863               HB};
7864           PartialStruct.Base = BP;
7865           PartialStruct.LB = LB;
7866           assert(
7867               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7868               "Overlapped elements must be used only once for the variable.");
7869           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7870           // Emit data for non-overlapped data.
7871           OpenMPOffloadMappingFlags Flags =
7872               OMP_MAP_MEMBER_OF |
7873               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7874                              /*AddPtrFlag=*/false,
7875                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7876           llvm::Value *Size = nullptr;
7877           // Do bitcopy of all non-overlapped structure elements.
7878           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7879                    Component : OverlappedElements) {
7880             Address ComponentLB = Address::invalid();
7881             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7882                  Component) {
7883               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7884                 const auto *FD = dyn_cast<FieldDecl>(VD);
7885                 if (FD && FD->getType()->isLValueReferenceType()) {
7886                   const auto *ME =
7887                       cast<MemberExpr>(MC.getAssociatedExpression());
7888                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7889                   ComponentLB =
7890                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7891                           .getAddress(CGF);
7892                 } else {
7893                   ComponentLB =
7894                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7895                           .getAddress(CGF);
7896                 }
7897                 Size = CGF.Builder.CreatePtrDiff(
7898                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7899                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7900                 break;
7901               }
7902             }
7903             assert(Size && "Failed to determine structure size");
7904             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7905             CombinedInfo.BasePointers.push_back(BP.getPointer());
7906             CombinedInfo.Pointers.push_back(LB.getPointer());
7907             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7908                 Size, CGF.Int64Ty, /*isSigned=*/true));
7909             CombinedInfo.Types.push_back(Flags);
7910             CombinedInfo.Mappers.push_back(nullptr);
7911             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7912                                                                       : 1);
7913             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7914           }
7915           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7916           CombinedInfo.BasePointers.push_back(BP.getPointer());
7917           CombinedInfo.Pointers.push_back(LB.getPointer());
7918           Size = CGF.Builder.CreatePtrDiff(
7919               CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7920               CGF.EmitCastToVoidPtr(LB.getPointer()));
7921           CombinedInfo.Sizes.push_back(
7922               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7923           CombinedInfo.Types.push_back(Flags);
7924           CombinedInfo.Mappers.push_back(nullptr);
7925           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7926                                                                     : 1);
7927           break;
7928         }
7929         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7930         if (!IsMemberPointerOrAddr ||
7931             (Next == CE && MapType != OMPC_MAP_unknown)) {
7932           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7933           CombinedInfo.BasePointers.push_back(BP.getPointer());
7934           CombinedInfo.Pointers.push_back(LB.getPointer());
7935           CombinedInfo.Sizes.push_back(
7936               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7937           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7938                                                                     : 1);
7939 
7940           // If Mapper is valid, the last component inherits the mapper.
7941           bool HasMapper = Mapper && Next == CE;
7942           CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7943 
7944           // We need to add a pointer flag for each map that comes from the
7945           // same expression except for the first one. We also need to signal
7946           // this map is the first one that relates with the current capture
7947           // (there is a set of entries for each capture).
7948           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7949               MapType, MapModifiers, MotionModifiers, IsImplicit,
7950               !IsExpressionFirstInfo || RequiresReference ||
7951                   FirstPointerInComplexData || IsMemberReference,
7952               IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7953 
7954           if (!IsExpressionFirstInfo || IsMemberReference) {
7955             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7956             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7957             if (IsPointer || (IsMemberReference && Next != CE))
7958               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7959                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7960 
7961             if (ShouldBeMemberOf) {
7962               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7963               // should be later updated with the correct value of MEMBER_OF.
7964               Flags |= OMP_MAP_MEMBER_OF;
7965               // From now on, all subsequent PTR_AND_OBJ entries should not be
7966               // marked as MEMBER_OF.
7967               ShouldBeMemberOf = false;
7968             }
7969           }
7970 
7971           CombinedInfo.Types.push_back(Flags);
7972         }
7973 
7974         // If we have encountered a member expression so far, keep track of the
7975         // mapped member. If the parent is "*this", then the value declaration
7976         // is nullptr.
7977         if (EncounteredME) {
7978           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7979           unsigned FieldIndex = FD->getFieldIndex();
7980 
7981           // Update info about the lowest and highest elements for this struct
7982           if (!PartialStruct.Base.isValid()) {
7983             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7984             if (IsFinalArraySection) {
7985               Address HB =
7986                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7987                       .getAddress(CGF);
7988               PartialStruct.HighestElem = {FieldIndex, HB};
7989             } else {
7990               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7991             }
7992             PartialStruct.Base = BP;
7993             PartialStruct.LB = BP;
7994           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7995             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7996           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7997             PartialStruct.HighestElem = {FieldIndex, LowestElem};
7998           }
7999         }
8000 
8001         // Need to emit combined struct for array sections.
8002         if (IsFinalArraySection || IsNonContiguous)
8003           PartialStruct.IsArraySection = true;
8004 
8005         // If we have a final array section, we are done with this expression.
8006         if (IsFinalArraySection)
8007           break;
8008 
8009         // The pointer becomes the base for the next element.
8010         if (Next != CE)
8011           BP = IsMemberReference ? LowestElem : LB;
8012 
8013         IsExpressionFirstInfo = false;
8014         IsCaptureFirstInfo = false;
8015         FirstPointerInComplexData = false;
8016         IsPrevMemberReference = IsMemberReference;
8017       } else if (FirstPointerInComplexData) {
8018         QualType Ty = Components.rbegin()
8019                           ->getAssociatedDeclaration()
8020                           ->getType()
8021                           .getNonReferenceType();
8022         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8023         FirstPointerInComplexData = false;
8024       }
8025     }
8026     // If ran into the whole component - allocate the space for the whole
8027     // record.
8028     if (!EncounteredME)
8029       PartialStruct.HasCompleteRecord = true;
8030 
8031     if (!IsNonContiguous)
8032       return;
8033 
8034     const ASTContext &Context = CGF.getContext();
8035 
8036     // For supporting stride in array section, we need to initialize the first
8037     // dimension size as 1, first offset as 0, and first count as 1
8038     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8039     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8040     MapValuesArrayTy CurStrides;
8041     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8042     uint64_t ElementTypeSize;
8043 
8044     // Collect Size information for each dimension and get the element size as
8045     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8046     // should be [10, 10] and the first stride is 4 btyes.
8047     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8048          Components) {
8049       const Expr *AssocExpr = Component.getAssociatedExpression();
8050       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8051 
8052       if (!OASE)
8053         continue;
8054 
8055       QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8056       auto *CAT = Context.getAsConstantArrayType(Ty);
8057       auto *VAT = Context.getAsVariableArrayType(Ty);
8058 
8059       // We need all the dimension size except for the last dimension.
8060       assert((VAT || CAT || &Component == &*Components.begin()) &&
8061              "Should be either ConstantArray or VariableArray if not the "
8062              "first Component");
8063 
8064       // Get element size if CurStrides is empty.
8065       if (CurStrides.empty()) {
8066         const Type *ElementType = nullptr;
8067         if (CAT)
8068           ElementType = CAT->getElementType().getTypePtr();
8069         else if (VAT)
8070           ElementType = VAT->getElementType().getTypePtr();
8071         else
8072           assert(&Component == &*Components.begin() &&
8073                  "Only expect pointer (non CAT or VAT) when this is the "
8074                  "first Component");
8075         // If ElementType is null, then it means the base is a pointer
8076         // (neither CAT nor VAT) and we'll attempt to get ElementType again
8077         // for next iteration.
8078         if (ElementType) {
8079           // For the case that having pointer as base, we need to remove one
8080           // level of indirection.
8081           if (&Component != &*Components.begin())
8082             ElementType = ElementType->getPointeeOrArrayElementType();
8083           ElementTypeSize =
8084               Context.getTypeSizeInChars(ElementType).getQuantity();
8085           CurStrides.push_back(
8086               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8087         }
8088       }
8089       // Get dimension value except for the last dimension since we don't need
8090       // it.
8091       if (DimSizes.size() < Components.size() - 1) {
8092         if (CAT)
8093           DimSizes.push_back(llvm::ConstantInt::get(
8094               CGF.Int64Ty, CAT->getSize().getZExtValue()));
8095         else if (VAT)
8096           DimSizes.push_back(CGF.Builder.CreateIntCast(
8097               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8098               /*IsSigned=*/false));
8099       }
8100     }
8101 
8102     // Skip the dummy dimension since we have already have its information.
8103     auto DI = DimSizes.begin() + 1;
8104     // Product of dimension.
8105     llvm::Value *DimProd =
8106         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8107 
8108     // Collect info for non-contiguous. Notice that offset, count, and stride
8109     // are only meaningful for array-section, so we insert a null for anything
8110     // other than array-section.
8111     // Also, the size of offset, count, and stride are not the same as
8112     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8113     // count, and stride are the same as the number of non-contiguous
8114     // declaration in target update to/from clause.
8115     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8116          Components) {
8117       const Expr *AssocExpr = Component.getAssociatedExpression();
8118 
8119       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8120         llvm::Value *Offset = CGF.Builder.CreateIntCast(
8121             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8122             /*isSigned=*/false);
8123         CurOffsets.push_back(Offset);
8124         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8125         CurStrides.push_back(CurStrides.back());
8126         continue;
8127       }
8128 
8129       const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8130 
8131       if (!OASE)
8132         continue;
8133 
8134       // Offset
8135       const Expr *OffsetExpr = OASE->getLowerBound();
8136       llvm::Value *Offset = nullptr;
8137       if (!OffsetExpr) {
8138         // If offset is absent, then we just set it to zero.
8139         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8140       } else {
8141         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8142                                            CGF.Int64Ty,
8143                                            /*isSigned=*/false);
8144       }
8145       CurOffsets.push_back(Offset);
8146 
8147       // Count
8148       const Expr *CountExpr = OASE->getLength();
8149       llvm::Value *Count = nullptr;
8150       if (!CountExpr) {
8151         // In Clang, once a high dimension is an array section, we construct all
8152         // the lower dimension as array section, however, for case like
8153         // arr[0:2][2], Clang construct the inner dimension as an array section
8154         // but it actually is not in an array section form according to spec.
8155         if (!OASE->getColonLocFirst().isValid() &&
8156             !OASE->getColonLocSecond().isValid()) {
8157           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8158         } else {
8159           // OpenMP 5.0, 2.1.5 Array Sections, Description.
8160           // When the length is absent it defaults to ⌈(size −
8161           // lower-bound)/stride⌉, where size is the size of the array
8162           // dimension.
8163           const Expr *StrideExpr = OASE->getStride();
8164           llvm::Value *Stride =
8165               StrideExpr
8166                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8167                                               CGF.Int64Ty, /*isSigned=*/false)
8168                   : nullptr;
8169           if (Stride)
8170             Count = CGF.Builder.CreateUDiv(
8171                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8172           else
8173             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8174         }
8175       } else {
8176         Count = CGF.EmitScalarExpr(CountExpr);
8177       }
8178       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8179       CurCounts.push_back(Count);
8180 
8181       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8182       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8183       //              Offset      Count     Stride
8184       //    D0          0           1         4    (int)    <- dummy dimension
8185       //    D1          0           2         8    (2 * (1) * 4)
8186       //    D2          1           2         20   (1 * (1 * 5) * 4)
8187       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
8188       const Expr *StrideExpr = OASE->getStride();
8189       llvm::Value *Stride =
8190           StrideExpr
8191               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8192                                           CGF.Int64Ty, /*isSigned=*/false)
8193               : nullptr;
8194       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8195       if (Stride)
8196         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8197       else
8198         CurStrides.push_back(DimProd);
8199       if (DI != DimSizes.end())
8200         ++DI;
8201     }
8202 
8203     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8204     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8205     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8206   }
8207 
8208   /// Return the adjusted map modifiers if the declaration a capture refers to
8209   /// appears in a first-private clause. This is expected to be used only with
8210   /// directives that start with 'target'.
8211   MappableExprsHandler::OpenMPOffloadMappingFlags
8212   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8213     assert(Cap.capturesVariable() && "Expected capture by reference only!");
8214 
8215     // A first private variable captured by reference will use only the
8216     // 'private ptr' and 'map to' flag. Return the right flags if the captured
8217     // declaration is known as first-private in this handler.
8218     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8219       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
8220           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
8221         return MappableExprsHandler::OMP_MAP_ALWAYS |
8222                MappableExprsHandler::OMP_MAP_TO;
8223       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8224         return MappableExprsHandler::OMP_MAP_TO |
8225                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8226       return MappableExprsHandler::OMP_MAP_PRIVATE |
8227              MappableExprsHandler::OMP_MAP_TO;
8228     }
8229     return MappableExprsHandler::OMP_MAP_TO |
8230            MappableExprsHandler::OMP_MAP_FROM;
8231   }
8232 
8233   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8234     // Rotate by getFlagMemberOffset() bits.
8235     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8236                                                   << getFlagMemberOffset());
8237   }
8238 
8239   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8240                                      OpenMPOffloadMappingFlags MemberOfFlag) {
8241     // If the entry is PTR_AND_OBJ but has not been marked with the special
8242     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8243     // marked as MEMBER_OF.
8244     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8245         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8246       return;
8247 
8248     // Reset the placeholder value to prepare the flag for the assignment of the
8249     // proper MEMBER_OF value.
8250     Flags &= ~OMP_MAP_MEMBER_OF;
8251     Flags |= MemberOfFlag;
8252   }
8253 
8254   void getPlainLayout(const CXXRecordDecl *RD,
8255                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8256                       bool AsBase) const {
8257     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8258 
8259     llvm::StructType *St =
8260         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8261 
8262     unsigned NumElements = St->getNumElements();
8263     llvm::SmallVector<
8264         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8265         RecordLayout(NumElements);
8266 
8267     // Fill bases.
8268     for (const auto &I : RD->bases()) {
8269       if (I.isVirtual())
8270         continue;
8271       const auto *Base = I.getType()->getAsCXXRecordDecl();
8272       // Ignore empty bases.
8273       if (Base->isEmpty() || CGF.getContext()
8274                                  .getASTRecordLayout(Base)
8275                                  .getNonVirtualSize()
8276                                  .isZero())
8277         continue;
8278 
8279       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8280       RecordLayout[FieldIndex] = Base;
8281     }
8282     // Fill in virtual bases.
8283     for (const auto &I : RD->vbases()) {
8284       const auto *Base = I.getType()->getAsCXXRecordDecl();
8285       // Ignore empty bases.
8286       if (Base->isEmpty())
8287         continue;
8288       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8289       if (RecordLayout[FieldIndex])
8290         continue;
8291       RecordLayout[FieldIndex] = Base;
8292     }
8293     // Fill in all the fields.
8294     assert(!RD->isUnion() && "Unexpected union.");
8295     for (const auto *Field : RD->fields()) {
8296       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8297       // will fill in later.)
8298       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8299         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8300         RecordLayout[FieldIndex] = Field;
8301       }
8302     }
8303     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8304              &Data : RecordLayout) {
8305       if (Data.isNull())
8306         continue;
8307       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8308         getPlainLayout(Base, Layout, /*AsBase=*/true);
8309       else
8310         Layout.push_back(Data.get<const FieldDecl *>());
8311     }
8312   }
8313 
8314   /// Generate all the base pointers, section pointers, sizes, map types, and
8315   /// mappers for the extracted mappable expressions (all included in \a
8316   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8317   /// pair of the relevant declaration and index where it occurs is appended to
8318   /// the device pointers info array.
8319   void generateAllInfoForClauses(
8320       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8321       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8322           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8323     // We have to process the component lists that relate with the same
8324     // declaration in a single chunk so that we can generate the map flags
8325     // correctly. Therefore, we organize all lists in a map.
8326     enum MapKind { Present, Allocs, Other, Total };
8327     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8328                     SmallVector<SmallVector<MapInfo, 8>, 4>>
8329         Info;
8330 
8331     // Helper function to fill the information map for the different supported
8332     // clauses.
8333     auto &&InfoGen =
8334         [&Info, &SkipVarSet](
8335             const ValueDecl *D, MapKind Kind,
8336             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8337             OpenMPMapClauseKind MapType,
8338             ArrayRef<OpenMPMapModifierKind> MapModifiers,
8339             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8340             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8341             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8342           if (SkipVarSet.contains(D))
8343             return;
8344           auto It = Info.find(D);
8345           if (It == Info.end())
8346             It = Info
8347                      .insert(std::make_pair(
8348                          D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8349                      .first;
8350           It->second[Kind].emplace_back(
8351               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8352               IsImplicit, Mapper, VarRef, ForDeviceAddr);
8353         };
8354 
8355     for (const auto *Cl : Clauses) {
8356       const auto *C = dyn_cast<OMPMapClause>(Cl);
8357       if (!C)
8358         continue;
8359       MapKind Kind = Other;
8360       if (!C->getMapTypeModifiers().empty() &&
8361           llvm::any_of(C->getMapTypeModifiers(), [](OpenMPMapModifierKind K) {
8362             return K == OMPC_MAP_MODIFIER_present;
8363           }))
8364         Kind = Present;
8365       else if (C->getMapType() == OMPC_MAP_alloc)
8366         Kind = Allocs;
8367       const auto *EI = C->getVarRefs().begin();
8368       for (const auto L : C->component_lists()) {
8369         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8370         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8371                 C->getMapTypeModifiers(), llvm::None,
8372                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8373                 E);
8374         ++EI;
8375       }
8376     }
8377     for (const auto *Cl : Clauses) {
8378       const auto *C = dyn_cast<OMPToClause>(Cl);
8379       if (!C)
8380         continue;
8381       MapKind Kind = Other;
8382       if (!C->getMotionModifiers().empty() &&
8383           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8384             return K == OMPC_MOTION_MODIFIER_present;
8385           }))
8386         Kind = Present;
8387       const auto *EI = C->getVarRefs().begin();
8388       for (const auto L : C->component_lists()) {
8389         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8390                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8391                 C->isImplicit(), std::get<2>(L), *EI);
8392         ++EI;
8393       }
8394     }
8395     for (const auto *Cl : Clauses) {
8396       const auto *C = dyn_cast<OMPFromClause>(Cl);
8397       if (!C)
8398         continue;
8399       MapKind Kind = Other;
8400       if (!C->getMotionModifiers().empty() &&
8401           llvm::any_of(C->getMotionModifiers(), [](OpenMPMotionModifierKind K) {
8402             return K == OMPC_MOTION_MODIFIER_present;
8403           }))
8404         Kind = Present;
8405       const auto *EI = C->getVarRefs().begin();
8406       for (const auto L : C->component_lists()) {
8407         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8408                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8409                 C->isImplicit(), std::get<2>(L), *EI);
8410         ++EI;
8411       }
8412     }
8413 
8414     // Look at the use_device_ptr clause information and mark the existing map
8415     // entries as such. If there is no map information for an entry in the
8416     // use_device_ptr list, we create one with map type 'alloc' and zero size
8417     // section. It is the user fault if that was not mapped before. If there is
8418     // no map information and the pointer is a struct member, then we defer the
8419     // emission of that entry until the whole struct has been processed.
8420     llvm::MapVector<CanonicalDeclPtr<const Decl>,
8421                     SmallVector<DeferredDevicePtrEntryTy, 4>>
8422         DeferredInfo;
8423     MapCombinedInfoTy UseDevicePtrCombinedInfo;
8424 
8425     for (const auto *Cl : Clauses) {
8426       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8427       if (!C)
8428         continue;
8429       for (const auto L : C->component_lists()) {
8430         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8431             std::get<1>(L);
8432         assert(!Components.empty() &&
8433                "Not expecting empty list of components!");
8434         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8435         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8436         const Expr *IE = Components.back().getAssociatedExpression();
8437         // If the first component is a member expression, we have to look into
8438         // 'this', which maps to null in the map of map information. Otherwise
8439         // look directly for the information.
8440         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8441 
8442         // We potentially have map information for this declaration already.
8443         // Look for the first set of components that refer to it.
8444         if (It != Info.end()) {
8445           bool Found = false;
8446           for (auto &Data : It->second) {
8447             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8448               return MI.Components.back().getAssociatedDeclaration() == VD;
8449             });
8450             // If we found a map entry, signal that the pointer has to be
8451             // returned and move on to the next declaration. Exclude cases where
8452             // the base pointer is mapped as array subscript, array section or
8453             // array shaping. The base address is passed as a pointer to base in
8454             // this case and cannot be used as a base for use_device_ptr list
8455             // item.
8456             if (CI != Data.end()) {
8457               auto PrevCI = std::next(CI->Components.rbegin());
8458               const auto *VarD = dyn_cast<VarDecl>(VD);
8459               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8460                   isa<MemberExpr>(IE) ||
8461                   !VD->getType().getNonReferenceType()->isPointerType() ||
8462                   PrevCI == CI->Components.rend() ||
8463                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8464                   VarD->hasLocalStorage()) {
8465                 CI->ReturnDevicePointer = true;
8466                 Found = true;
8467                 break;
8468               }
8469             }
8470           }
8471           if (Found)
8472             continue;
8473         }
8474 
8475         // We didn't find any match in our map information - generate a zero
8476         // size array section - if the pointer is a struct member we defer this
8477         // action until the whole struct has been processed.
8478         if (isa<MemberExpr>(IE)) {
8479           // Insert the pointer into Info to be processed by
8480           // generateInfoForComponentList. Because it is a member pointer
8481           // without a pointee, no entry will be generated for it, therefore
8482           // we need to generate one after the whole struct has been processed.
8483           // Nonetheless, generateInfoForComponentList must be called to take
8484           // the pointer into account for the calculation of the range of the
8485           // partial struct.
8486           InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8487                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8488                   nullptr);
8489           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8490         } else {
8491           llvm::Value *Ptr =
8492               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8493           UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8494           UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8495           UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8496           UseDevicePtrCombinedInfo.Sizes.push_back(
8497               llvm::Constant::getNullValue(CGF.Int64Ty));
8498           UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8499           UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8500         }
8501       }
8502     }
8503 
8504     // Look at the use_device_addr clause information and mark the existing map
8505     // entries as such. If there is no map information for an entry in the
8506     // use_device_addr list, we create one with map type 'alloc' and zero size
8507     // section. It is the user fault if that was not mapped before. If there is
8508     // no map information and the pointer is a struct member, then we defer the
8509     // emission of that entry until the whole struct has been processed.
8510     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8511     for (const auto *Cl : Clauses) {
8512       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8513       if (!C)
8514         continue;
8515       for (const auto L : C->component_lists()) {
8516         assert(!std::get<1>(L).empty() &&
8517                "Not expecting empty list of components!");
8518         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8519         if (!Processed.insert(VD).second)
8520           continue;
8521         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8522         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8523         // If the first component is a member expression, we have to look into
8524         // 'this', which maps to null in the map of map information. Otherwise
8525         // look directly for the information.
8526         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8527 
8528         // We potentially have map information for this declaration already.
8529         // Look for the first set of components that refer to it.
8530         if (It != Info.end()) {
8531           bool Found = false;
8532           for (auto &Data : It->second) {
8533             auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8534               return MI.Components.back().getAssociatedDeclaration() == VD;
8535             });
8536             // If we found a map entry, signal that the pointer has to be
8537             // returned and move on to the next declaration.
8538             if (CI != Data.end()) {
8539               CI->ReturnDevicePointer = true;
8540               Found = true;
8541               break;
8542             }
8543           }
8544           if (Found)
8545             continue;
8546         }
8547 
8548         // We didn't find any match in our map information - generate a zero
8549         // size array section - if the pointer is a struct member we defer this
8550         // action until the whole struct has been processed.
8551         if (isa<MemberExpr>(IE)) {
8552           // Insert the pointer into Info to be processed by
8553           // generateInfoForComponentList. Because it is a member pointer
8554           // without a pointee, no entry will be generated for it, therefore
8555           // we need to generate one after the whole struct has been processed.
8556           // Nonetheless, generateInfoForComponentList must be called to take
8557           // the pointer into account for the calculation of the range of the
8558           // partial struct.
8559           InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8560                   llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8561                   nullptr, nullptr, /*ForDeviceAddr=*/true);
8562           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8563         } else {
8564           llvm::Value *Ptr;
8565           if (IE->isGLValue())
8566             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8567           else
8568             Ptr = CGF.EmitScalarExpr(IE);
8569           CombinedInfo.Exprs.push_back(VD);
8570           CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8571           CombinedInfo.Pointers.push_back(Ptr);
8572           CombinedInfo.Sizes.push_back(
8573               llvm::Constant::getNullValue(CGF.Int64Ty));
8574           CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8575           CombinedInfo.Mappers.push_back(nullptr);
8576         }
8577       }
8578     }
8579 
8580     for (const auto &Data : Info) {
8581       StructRangeInfoTy PartialStruct;
8582       // Temporary generated information.
8583       MapCombinedInfoTy CurInfo;
8584       const Decl *D = Data.first;
8585       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8586       for (const auto &M : Data.second) {
8587         for (const MapInfo &L : M) {
8588           assert(!L.Components.empty() &&
8589                  "Not expecting declaration with no component lists.");
8590 
8591           // Remember the current base pointer index.
8592           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8593           CurInfo.NonContigInfo.IsNonContiguous =
8594               L.Components.back().isNonContiguous();
8595           generateInfoForComponentList(
8596               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8597               CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8598               L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8599 
8600           // If this entry relates with a device pointer, set the relevant
8601           // declaration and add the 'return pointer' flag.
8602           if (L.ReturnDevicePointer) {
8603             assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
8604                    "Unexpected number of mapped base pointers.");
8605 
8606             const ValueDecl *RelevantVD =
8607                 L.Components.back().getAssociatedDeclaration();
8608             assert(RelevantVD &&
8609                    "No relevant declaration related with device pointer??");
8610 
8611             CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8612                 RelevantVD);
8613             CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8614           }
8615         }
8616       }
8617 
8618       // Append any pending zero-length pointers which are struct members and
8619       // used with use_device_ptr or use_device_addr.
8620       auto CI = DeferredInfo.find(Data.first);
8621       if (CI != DeferredInfo.end()) {
8622         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8623           llvm::Value *BasePtr;
8624           llvm::Value *Ptr;
8625           if (L.ForDeviceAddr) {
8626             if (L.IE->isGLValue())
8627               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8628             else
8629               Ptr = this->CGF.EmitScalarExpr(L.IE);
8630             BasePtr = Ptr;
8631             // Entry is RETURN_PARAM. Also, set the placeholder value
8632             // MEMBER_OF=FFFF so that the entry is later updated with the
8633             // correct value of MEMBER_OF.
8634             CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8635           } else {
8636             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8637             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8638                                              L.IE->getExprLoc());
8639             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8640             // placeholder value MEMBER_OF=FFFF so that the entry is later
8641             // updated with the correct value of MEMBER_OF.
8642             CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8643                                     OMP_MAP_MEMBER_OF);
8644           }
8645           CurInfo.Exprs.push_back(L.VD);
8646           CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8647           CurInfo.Pointers.push_back(Ptr);
8648           CurInfo.Sizes.push_back(
8649               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8650           CurInfo.Mappers.push_back(nullptr);
8651         }
8652       }
8653       // If there is an entry in PartialStruct it means we have a struct with
8654       // individual members mapped. Emit an extra combined entry.
8655       if (PartialStruct.Base.isValid()) {
8656         CurInfo.NonContigInfo.Dims.push_back(0);
8657         emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8658       }
8659 
8660       // We need to append the results of this capture to what we already
8661       // have.
8662       CombinedInfo.append(CurInfo);
8663     }
8664     // Append data for use_device_ptr clauses.
8665     CombinedInfo.append(UseDevicePtrCombinedInfo);
8666   }
8667 
8668 public:
8669   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8670       : CurDir(&Dir), CGF(CGF) {
8671     // Extract firstprivate clause information.
8672     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8673       for (const auto *D : C->varlists())
8674         FirstPrivateDecls.try_emplace(
8675             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8676     // Extract implicit firstprivates from uses_allocators clauses.
8677     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8678       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8679         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8680         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8681           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8682                                         /*Implicit=*/true);
8683         else if (const auto *VD = dyn_cast<VarDecl>(
8684                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8685                          ->getDecl()))
8686           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8687       }
8688     }
8689     // Extract device pointer clause information.
8690     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8691       for (auto L : C->component_lists())
8692         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8693   }
8694 
8695   /// Constructor for the declare mapper directive.
8696   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8697       : CurDir(&Dir), CGF(CGF) {}
8698 
8699   /// Generate code for the combined entry if we have a partially mapped struct
8700   /// and take care of the mapping flags of the arguments corresponding to
8701   /// individual struct members.
8702   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8703                          MapFlagsArrayTy &CurTypes,
8704                          const StructRangeInfoTy &PartialStruct,
8705                          const ValueDecl *VD = nullptr,
8706                          bool NotTargetParams = true) const {
8707     if (CurTypes.size() == 1 &&
8708         ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8709         !PartialStruct.IsArraySection)
8710       return;
8711     Address LBAddr = PartialStruct.LowestElem.second;
8712     Address HBAddr = PartialStruct.HighestElem.second;
8713     if (PartialStruct.HasCompleteRecord) {
8714       LBAddr = PartialStruct.LB;
8715       HBAddr = PartialStruct.LB;
8716     }
8717     CombinedInfo.Exprs.push_back(VD);
8718     // Base is the base of the struct
8719     CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8720     // Pointer is the address of the lowest element
8721     llvm::Value *LB = LBAddr.getPointer();
8722     CombinedInfo.Pointers.push_back(LB);
8723     // There should not be a mapper for a combined entry.
8724     CombinedInfo.Mappers.push_back(nullptr);
8725     // Size is (addr of {highest+1} element) - (addr of lowest element)
8726     llvm::Value *HB = HBAddr.getPointer();
8727     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
8728     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8729     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8730     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
8731     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8732                                                   /*isSigned=*/false);
8733     CombinedInfo.Sizes.push_back(Size);
8734     // Map type is always TARGET_PARAM, if generate info for captures.
8735     CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8736                                                  : OMP_MAP_TARGET_PARAM);
8737     // If any element has the present modifier, then make sure the runtime
8738     // doesn't attempt to allocate the struct.
8739     if (CurTypes.end() !=
8740         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8741           return Type & OMP_MAP_PRESENT;
8742         }))
8743       CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8744     // Remove TARGET_PARAM flag from the first element
8745     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8746 
8747     // All other current entries will be MEMBER_OF the combined entry
8748     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8749     // 0xFFFF in the MEMBER_OF field).
8750     OpenMPOffloadMappingFlags MemberOfFlag =
8751         getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8752     for (auto &M : CurTypes)
8753       setCorrectMemberOfFlag(M, MemberOfFlag);
8754   }
8755 
8756   /// Generate all the base pointers, section pointers, sizes, map types, and
8757   /// mappers for the extracted mappable expressions (all included in \a
8758   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8759   /// pair of the relevant declaration and index where it occurs is appended to
8760   /// the device pointers info array.
8761   void generateAllInfo(
8762       MapCombinedInfoTy &CombinedInfo,
8763       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8764           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8765     assert(CurDir.is<const OMPExecutableDirective *>() &&
8766            "Expect a executable directive");
8767     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8768     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8769   }
8770 
8771   /// Generate all the base pointers, section pointers, sizes, map types, and
8772   /// mappers for the extracted map clauses of user-defined mapper (all included
8773   /// in \a CombinedInfo).
8774   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8775     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8776            "Expect a declare mapper directive");
8777     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8778     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8779   }
8780 
8781   /// Emit capture info for lambdas for variables captured by reference.
8782   void generateInfoForLambdaCaptures(
8783       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8784       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8785     const auto *RD = VD->getType()
8786                          .getCanonicalType()
8787                          .getNonReferenceType()
8788                          ->getAsCXXRecordDecl();
8789     if (!RD || !RD->isLambda())
8790       return;
8791     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8792     LValue VDLVal = CGF.MakeAddrLValue(
8793         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8794     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8795     FieldDecl *ThisCapture = nullptr;
8796     RD->getCaptureFields(Captures, ThisCapture);
8797     if (ThisCapture) {
8798       LValue ThisLVal =
8799           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8800       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8801       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8802                                  VDLVal.getPointer(CGF));
8803       CombinedInfo.Exprs.push_back(VD);
8804       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8805       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8806       CombinedInfo.Sizes.push_back(
8807           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8808                                     CGF.Int64Ty, /*isSigned=*/true));
8809       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8810                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8811       CombinedInfo.Mappers.push_back(nullptr);
8812     }
8813     for (const LambdaCapture &LC : RD->captures()) {
8814       if (!LC.capturesVariable())
8815         continue;
8816       const VarDecl *VD = LC.getCapturedVar();
8817       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8818         continue;
8819       auto It = Captures.find(VD);
8820       assert(It != Captures.end() && "Found lambda capture without field.");
8821       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8822       if (LC.getCaptureKind() == LCK_ByRef) {
8823         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8824         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8825                                    VDLVal.getPointer(CGF));
8826         CombinedInfo.Exprs.push_back(VD);
8827         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8828         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8829         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8830             CGF.getTypeSize(
8831                 VD->getType().getCanonicalType().getNonReferenceType()),
8832             CGF.Int64Ty, /*isSigned=*/true));
8833       } else {
8834         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8835         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8836                                    VDLVal.getPointer(CGF));
8837         CombinedInfo.Exprs.push_back(VD);
8838         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8839         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8840         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8841       }
8842       CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8843                                    OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8844       CombinedInfo.Mappers.push_back(nullptr);
8845     }
8846   }
8847 
8848   /// Set correct indices for lambdas captures.
8849   void adjustMemberOfForLambdaCaptures(
8850       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8851       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8852       MapFlagsArrayTy &Types) const {
8853     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8854       // Set correct member_of idx for all implicit lambda captures.
8855       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8856                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8857         continue;
8858       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8859       assert(BasePtr && "Unable to find base lambda address.");
8860       int TgtIdx = -1;
8861       for (unsigned J = I; J > 0; --J) {
8862         unsigned Idx = J - 1;
8863         if (Pointers[Idx] != BasePtr)
8864           continue;
8865         TgtIdx = Idx;
8866         break;
8867       }
8868       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8869       // All other current entries will be MEMBER_OF the combined entry
8870       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8871       // 0xFFFF in the MEMBER_OF field).
8872       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8873       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8874     }
8875   }
8876 
8877   /// Generate the base pointers, section pointers, sizes, map types, and
8878   /// mappers associated to a given capture (all included in \a CombinedInfo).
8879   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8880                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8881                               StructRangeInfoTy &PartialStruct) const {
8882     assert(!Cap->capturesVariableArrayType() &&
8883            "Not expecting to generate map info for a variable array type!");
8884 
8885     // We need to know when we generating information for the first component
8886     const ValueDecl *VD = Cap->capturesThis()
8887                               ? nullptr
8888                               : Cap->getCapturedVar()->getCanonicalDecl();
8889 
8890     // If this declaration appears in a is_device_ptr clause we just have to
8891     // pass the pointer by value. If it is a reference to a declaration, we just
8892     // pass its value.
8893     if (DevPointersMap.count(VD)) {
8894       CombinedInfo.Exprs.push_back(VD);
8895       CombinedInfo.BasePointers.emplace_back(Arg, VD);
8896       CombinedInfo.Pointers.push_back(Arg);
8897       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8898           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8899           /*isSigned=*/true));
8900       CombinedInfo.Types.push_back(
8901           (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
8902           OMP_MAP_TARGET_PARAM);
8903       CombinedInfo.Mappers.push_back(nullptr);
8904       return;
8905     }
8906 
8907     using MapData =
8908         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8909                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8910                    const ValueDecl *, const Expr *>;
8911     SmallVector<MapData, 4> DeclComponentLists;
8912     assert(CurDir.is<const OMPExecutableDirective *>() &&
8913            "Expect a executable directive");
8914     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8915     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8916       const auto *EI = C->getVarRefs().begin();
8917       for (const auto L : C->decl_component_lists(VD)) {
8918         const ValueDecl *VDecl, *Mapper;
8919         // The Expression is not correct if the mapping is implicit
8920         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8921         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8922         std::tie(VDecl, Components, Mapper) = L;
8923         assert(VDecl == VD && "We got information for the wrong declaration??");
8924         assert(!Components.empty() &&
8925                "Not expecting declaration with no component lists.");
8926         DeclComponentLists.emplace_back(Components, C->getMapType(),
8927                                         C->getMapTypeModifiers(),
8928                                         C->isImplicit(), Mapper, E);
8929         ++EI;
8930       }
8931     }
8932     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8933                                              const MapData &RHS) {
8934       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8935       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8936       bool HasPresent = !MapModifiers.empty() &&
8937                         llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8938                           return K == clang::OMPC_MAP_MODIFIER_present;
8939                         });
8940       bool HasAllocs = MapType == OMPC_MAP_alloc;
8941       MapModifiers = std::get<2>(RHS);
8942       MapType = std::get<1>(LHS);
8943       bool HasPresentR =
8944           !MapModifiers.empty() &&
8945           llvm::any_of(MapModifiers, [](OpenMPMapModifierKind K) {
8946             return K == clang::OMPC_MAP_MODIFIER_present;
8947           });
8948       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8949       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8950     });
8951 
8952     // Find overlapping elements (including the offset from the base element).
8953     llvm::SmallDenseMap<
8954         const MapData *,
8955         llvm::SmallVector<
8956             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8957         4>
8958         OverlappedData;
8959     size_t Count = 0;
8960     for (const MapData &L : DeclComponentLists) {
8961       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8962       OpenMPMapClauseKind MapType;
8963       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8964       bool IsImplicit;
8965       const ValueDecl *Mapper;
8966       const Expr *VarRef;
8967       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8968           L;
8969       ++Count;
8970       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8971         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8972         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8973                  VarRef) = L1;
8974         auto CI = Components.rbegin();
8975         auto CE = Components.rend();
8976         auto SI = Components1.rbegin();
8977         auto SE = Components1.rend();
8978         for (; CI != CE && SI != SE; ++CI, ++SI) {
8979           if (CI->getAssociatedExpression()->getStmtClass() !=
8980               SI->getAssociatedExpression()->getStmtClass())
8981             break;
8982           // Are we dealing with different variables/fields?
8983           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8984             break;
8985         }
8986         // Found overlapping if, at least for one component, reached the head
8987         // of the components list.
8988         if (CI == CE || SI == SE) {
8989           // Ignore it if it is the same component.
8990           if (CI == CE && SI == SE)
8991             continue;
8992           const auto It = (SI == SE) ? CI : SI;
8993           // If one component is a pointer and another one is a kind of
8994           // dereference of this pointer (array subscript, section, dereference,
8995           // etc.), it is not an overlapping.
8996           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8997               std::prev(It)
8998                   ->getAssociatedExpression()
8999                   ->getType()
9000                   ->isPointerType())
9001             continue;
9002           const MapData &BaseData = CI == CE ? L : L1;
9003           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9004               SI == SE ? Components : Components1;
9005           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9006           OverlappedElements.getSecond().push_back(SubData);
9007         }
9008       }
9009     }
9010     // Sort the overlapped elements for each item.
9011     llvm::SmallVector<const FieldDecl *, 4> Layout;
9012     if (!OverlappedData.empty()) {
9013       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9014       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9015       while (BaseType != OrigType) {
9016         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9017         OrigType = BaseType->getPointeeOrArrayElementType();
9018       }
9019 
9020       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9021         getPlainLayout(CRD, Layout, /*AsBase=*/false);
9022       else {
9023         const auto *RD = BaseType->getAsRecordDecl();
9024         Layout.append(RD->field_begin(), RD->field_end());
9025       }
9026     }
9027     for (auto &Pair : OverlappedData) {
9028       llvm::stable_sort(
9029           Pair.getSecond(),
9030           [&Layout](
9031               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9032               OMPClauseMappableExprCommon::MappableExprComponentListRef
9033                   Second) {
9034             auto CI = First.rbegin();
9035             auto CE = First.rend();
9036             auto SI = Second.rbegin();
9037             auto SE = Second.rend();
9038             for (; CI != CE && SI != SE; ++CI, ++SI) {
9039               if (CI->getAssociatedExpression()->getStmtClass() !=
9040                   SI->getAssociatedExpression()->getStmtClass())
9041                 break;
9042               // Are we dealing with different variables/fields?
9043               if (CI->getAssociatedDeclaration() !=
9044                   SI->getAssociatedDeclaration())
9045                 break;
9046             }
9047 
9048             // Lists contain the same elements.
9049             if (CI == CE && SI == SE)
9050               return false;
9051 
9052             // List with less elements is less than list with more elements.
9053             if (CI == CE || SI == SE)
9054               return CI == CE;
9055 
9056             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9057             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9058             if (FD1->getParent() == FD2->getParent())
9059               return FD1->getFieldIndex() < FD2->getFieldIndex();
9060             const auto It =
9061                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9062                   return FD == FD1 || FD == FD2;
9063                 });
9064             return *It == FD1;
9065           });
9066     }
9067 
9068     // Associated with a capture, because the mapping flags depend on it.
9069     // Go through all of the elements with the overlapped elements.
9070     bool IsFirstComponentList = true;
9071     for (const auto &Pair : OverlappedData) {
9072       const MapData &L = *Pair.getFirst();
9073       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9074       OpenMPMapClauseKind MapType;
9075       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9076       bool IsImplicit;
9077       const ValueDecl *Mapper;
9078       const Expr *VarRef;
9079       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9080           L;
9081       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9082           OverlappedComponents = Pair.getSecond();
9083       generateInfoForComponentList(
9084           MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9085           PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9086           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9087       IsFirstComponentList = false;
9088     }
9089     // Go through other elements without overlapped elements.
9090     for (const MapData &L : DeclComponentLists) {
9091       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9092       OpenMPMapClauseKind MapType;
9093       ArrayRef<OpenMPMapModifierKind> MapModifiers;
9094       bool IsImplicit;
9095       const ValueDecl *Mapper;
9096       const Expr *VarRef;
9097       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9098           L;
9099       auto It = OverlappedData.find(&L);
9100       if (It == OverlappedData.end())
9101         generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9102                                      Components, CombinedInfo, PartialStruct,
9103                                      IsFirstComponentList, IsImplicit, Mapper,
9104                                      /*ForDeviceAddr=*/false, VD, VarRef);
9105       IsFirstComponentList = false;
9106     }
9107   }
9108 
9109   /// Generate the default map information for a given capture \a CI,
9110   /// record field declaration \a RI and captured value \a CV.
9111   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9112                               const FieldDecl &RI, llvm::Value *CV,
9113                               MapCombinedInfoTy &CombinedInfo) const {
9114     bool IsImplicit = true;
9115     // Do the default mapping.
9116     if (CI.capturesThis()) {
9117       CombinedInfo.Exprs.push_back(nullptr);
9118       CombinedInfo.BasePointers.push_back(CV);
9119       CombinedInfo.Pointers.push_back(CV);
9120       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9121       CombinedInfo.Sizes.push_back(
9122           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9123                                     CGF.Int64Ty, /*isSigned=*/true));
9124       // Default map type.
9125       CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9126     } else if (CI.capturesVariableByCopy()) {
9127       const VarDecl *VD = CI.getCapturedVar();
9128       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9129       CombinedInfo.BasePointers.push_back(CV);
9130       CombinedInfo.Pointers.push_back(CV);
9131       if (!RI.getType()->isAnyPointerType()) {
9132         // We have to signal to the runtime captures passed by value that are
9133         // not pointers.
9134         CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9135         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9136             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9137       } else {
9138         // Pointers are implicitly mapped with a zero size and no flags
9139         // (other than first map that is added for all implicit maps).
9140         CombinedInfo.Types.push_back(OMP_MAP_NONE);
9141         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9142       }
9143       auto I = FirstPrivateDecls.find(VD);
9144       if (I != FirstPrivateDecls.end())
9145         IsImplicit = I->getSecond();
9146     } else {
9147       assert(CI.capturesVariable() && "Expected captured reference.");
9148       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9149       QualType ElementType = PtrTy->getPointeeType();
9150       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9151           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9152       // The default map type for a scalar/complex type is 'to' because by
9153       // default the value doesn't have to be retrieved. For an aggregate
9154       // type, the default is 'tofrom'.
9155       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9156       const VarDecl *VD = CI.getCapturedVar();
9157       auto I = FirstPrivateDecls.find(VD);
9158       if (I != FirstPrivateDecls.end() &&
9159           VD->getType().isConstant(CGF.getContext())) {
9160         llvm::Constant *Addr =
9161             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
9162         // Copy the value of the original variable to the new global copy.
9163         CGF.Builder.CreateMemCpy(
9164             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
9165             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
9166             CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
9167         // Use new global variable as the base pointers.
9168         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9169         CombinedInfo.BasePointers.push_back(Addr);
9170         CombinedInfo.Pointers.push_back(Addr);
9171       } else {
9172         CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9173         CombinedInfo.BasePointers.push_back(CV);
9174         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9175           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9176               CV, ElementType, CGF.getContext().getDeclAlign(VD),
9177               AlignmentSource::Decl));
9178           CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9179         } else {
9180           CombinedInfo.Pointers.push_back(CV);
9181         }
9182       }
9183       if (I != FirstPrivateDecls.end())
9184         IsImplicit = I->getSecond();
9185     }
9186     // Every default map produces a single argument which is a target parameter.
9187     CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9188 
9189     // Add flag stating this is an implicit map.
9190     if (IsImplicit)
9191       CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9192 
9193     // No user-defined mapper for default mapping.
9194     CombinedInfo.Mappers.push_back(nullptr);
9195   }
9196 };
9197 } // anonymous namespace
9198 
9199 static void emitNonContiguousDescriptor(
9200     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9201     CGOpenMPRuntime::TargetDataInfo &Info) {
9202   CodeGenModule &CGM = CGF.CGM;
9203   MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9204       &NonContigInfo = CombinedInfo.NonContigInfo;
9205 
9206   // Build an array of struct descriptor_dim and then assign it to
9207   // offload_args.
9208   //
9209   // struct descriptor_dim {
9210   //  uint64_t offset;
9211   //  uint64_t count;
9212   //  uint64_t stride
9213   // };
9214   ASTContext &C = CGF.getContext();
9215   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9216   RecordDecl *RD;
9217   RD = C.buildImplicitRecord("descriptor_dim");
9218   RD->startDefinition();
9219   addFieldToRecordDecl(C, RD, Int64Ty);
9220   addFieldToRecordDecl(C, RD, Int64Ty);
9221   addFieldToRecordDecl(C, RD, Int64Ty);
9222   RD->completeDefinition();
9223   QualType DimTy = C.getRecordType(RD);
9224 
9225   enum { OffsetFD = 0, CountFD, StrideFD };
9226   // We need two index variable here since the size of "Dims" is the same as the
9227   // size of Components, however, the size of offset, count, and stride is equal
9228   // to the size of base declaration that is non-contiguous.
9229   for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9230     // Skip emitting ir if dimension size is 1 since it cannot be
9231     // non-contiguous.
9232     if (NonContigInfo.Dims[I] == 1)
9233       continue;
9234     llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9235     QualType ArrayTy =
9236         C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9237     Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9238     for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9239       unsigned RevIdx = EE - II - 1;
9240       LValue DimsLVal = CGF.MakeAddrLValue(
9241           CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9242       // Offset
9243       LValue OffsetLVal = CGF.EmitLValueForField(
9244           DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9245       CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9246       // Count
9247       LValue CountLVal = CGF.EmitLValueForField(
9248           DimsLVal, *std::next(RD->field_begin(), CountFD));
9249       CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9250       // Stride
9251       LValue StrideLVal = CGF.EmitLValueForField(
9252           DimsLVal, *std::next(RD->field_begin(), StrideFD));
9253       CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9254     }
9255     // args[I] = &dims
9256     Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9257         DimsAddr, CGM.Int8PtrTy);
9258     llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9259         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9260         Info.PointersArray, 0, I);
9261     Address PAddr(P, CGF.getPointerAlign());
9262     CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9263     ++L;
9264   }
9265 }
9266 
9267 /// Emit a string constant containing the names of the values mapped to the
9268 /// offloading runtime library.
9269 llvm::Constant *
9270 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9271                        MappableExprsHandler::MappingExprInfo &MapExprs) {
9272   llvm::Constant *SrcLocStr;
9273   if (!MapExprs.getMapDecl()) {
9274     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
9275   } else {
9276     std::string ExprName = "";
9277     if (MapExprs.getMapExpr()) {
9278       PrintingPolicy P(CGF.getContext().getLangOpts());
9279       llvm::raw_string_ostream OS(ExprName);
9280       MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9281       OS.flush();
9282     } else {
9283       ExprName = MapExprs.getMapDecl()->getNameAsString();
9284     }
9285 
9286     SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
9287     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9288     const char *FileName = PLoc.getFilename();
9289     unsigned Line = PLoc.getLine();
9290     unsigned Column = PLoc.getColumn();
9291     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
9292                                                 Line, Column);
9293   }
9294   return SrcLocStr;
9295 }
9296 
9297 /// Emit the arrays used to pass the captures and map information to the
9298 /// offloading runtime library. If there is no map or capture information,
9299 /// return nullptr by reference.
9300 static void emitOffloadingArrays(
9301     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9302     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9303     bool IsNonContiguous = false) {
9304   CodeGenModule &CGM = CGF.CGM;
9305   ASTContext &Ctx = CGF.getContext();
9306 
9307   // Reset the array information.
9308   Info.clearArrayInfo();
9309   Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9310 
9311   if (Info.NumberOfPtrs) {
9312     // Detect if we have any capture size requiring runtime evaluation of the
9313     // size so that a constant array could be eventually used.
9314     bool hasRuntimeEvaluationCaptureSize = false;
9315     for (llvm::Value *S : CombinedInfo.Sizes)
9316       if (!isa<llvm::Constant>(S)) {
9317         hasRuntimeEvaluationCaptureSize = true;
9318         break;
9319       }
9320 
9321     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9322     QualType PointerArrayType = Ctx.getConstantArrayType(
9323         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9324         /*IndexTypeQuals=*/0);
9325 
9326     Info.BasePointersArray =
9327         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9328     Info.PointersArray =
9329         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9330     Address MappersArray =
9331         CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9332     Info.MappersArray = MappersArray.getPointer();
9333 
9334     // If we don't have any VLA types or other types that require runtime
9335     // evaluation, we can use a constant array for the map sizes, otherwise we
9336     // need to fill up the arrays as we do for the pointers.
9337     QualType Int64Ty =
9338         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9339     if (hasRuntimeEvaluationCaptureSize) {
9340       QualType SizeArrayType = Ctx.getConstantArrayType(
9341           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9342           /*IndexTypeQuals=*/0);
9343       Info.SizesArray =
9344           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9345     } else {
9346       // We expect all the sizes to be constant, so we collect them to create
9347       // a constant array.
9348       SmallVector<llvm::Constant *, 16> ConstSizes;
9349       for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9350         if (IsNonContiguous &&
9351             (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9352           ConstSizes.push_back(llvm::ConstantInt::get(
9353               CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9354         } else {
9355           ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9356         }
9357       }
9358 
9359       auto *SizesArrayInit = llvm::ConstantArray::get(
9360           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9361       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9362       auto *SizesArrayGbl = new llvm::GlobalVariable(
9363           CGM.getModule(), SizesArrayInit->getType(),
9364           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9365           SizesArrayInit, Name);
9366       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9367       Info.SizesArray = SizesArrayGbl;
9368     }
9369 
9370     // The map types are always constant so we don't need to generate code to
9371     // fill arrays. Instead, we create an array constant.
9372     SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9373     llvm::copy(CombinedInfo.Types, Mapping.begin());
9374     std::string MaptypesName =
9375         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9376     auto *MapTypesArrayGbl =
9377         OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9378     Info.MapTypesArray = MapTypesArrayGbl;
9379 
9380     // The information types are only built if there is debug information
9381     // requested.
9382     if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9383       Info.MapNamesArray = llvm::Constant::getNullValue(
9384           llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9385     } else {
9386       auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9387         return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9388       };
9389       SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9390       llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9391       std::string MapnamesName =
9392           CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9393       auto *MapNamesArrayGbl =
9394           OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9395       Info.MapNamesArray = MapNamesArrayGbl;
9396     }
9397 
9398     // If there's a present map type modifier, it must not be applied to the end
9399     // of a region, so generate a separate map type array in that case.
9400     if (Info.separateBeginEndCalls()) {
9401       bool EndMapTypesDiffer = false;
9402       for (uint64_t &Type : Mapping) {
9403         if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9404           Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9405           EndMapTypesDiffer = true;
9406         }
9407       }
9408       if (EndMapTypesDiffer) {
9409         MapTypesArrayGbl =
9410             OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9411         Info.MapTypesArrayEnd = MapTypesArrayGbl;
9412       }
9413     }
9414 
9415     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9416       llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9417       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9418           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9419           Info.BasePointersArray, 0, I);
9420       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9421           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9422       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9423       CGF.Builder.CreateStore(BPVal, BPAddr);
9424 
9425       if (Info.requiresDevicePointerInfo())
9426         if (const ValueDecl *DevVD =
9427                 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9428           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9429 
9430       llvm::Value *PVal = CombinedInfo.Pointers[I];
9431       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9432           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9433           Info.PointersArray, 0, I);
9434       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9435           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9436       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9437       CGF.Builder.CreateStore(PVal, PAddr);
9438 
9439       if (hasRuntimeEvaluationCaptureSize) {
9440         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9441             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9442             Info.SizesArray,
9443             /*Idx0=*/0,
9444             /*Idx1=*/I);
9445         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9446         CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9447                                                           CGM.Int64Ty,
9448                                                           /*isSigned=*/true),
9449                                 SAddr);
9450       }
9451 
9452       // Fill up the mapper array.
9453       llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9454       if (CombinedInfo.Mappers[I]) {
9455         MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9456             cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9457         MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9458         Info.HasMapper = true;
9459       }
9460       Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9461       CGF.Builder.CreateStore(MFunc, MAddr);
9462     }
9463   }
9464 
9465   if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9466       Info.NumberOfPtrs == 0)
9467     return;
9468 
9469   emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9470 }
9471 
9472 namespace {
9473 /// Additional arguments for emitOffloadingArraysArgument function.
9474 struct ArgumentsOptions {
9475   bool ForEndCall = false;
9476   ArgumentsOptions() = default;
9477   ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9478 };
9479 } // namespace
9480 
9481 /// Emit the arguments to be passed to the runtime library based on the
9482 /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
9483 /// ForEndCall, emit map types to be passed for the end of the region instead of
9484 /// the beginning.
9485 static void emitOffloadingArraysArgument(
9486     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9487     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9488     llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9489     llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9490     const ArgumentsOptions &Options = ArgumentsOptions()) {
9491   assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
9492          "expected region end call to runtime only when end call is separate");
9493   CodeGenModule &CGM = CGF.CGM;
9494   if (Info.NumberOfPtrs) {
9495     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9496         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9497         Info.BasePointersArray,
9498         /*Idx0=*/0, /*Idx1=*/0);
9499     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9500         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9501         Info.PointersArray,
9502         /*Idx0=*/0,
9503         /*Idx1=*/0);
9504     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9505         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9506         /*Idx0=*/0, /*Idx1=*/0);
9507     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9508         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9509         Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9510                                                     : Info.MapTypesArray,
9511         /*Idx0=*/0,
9512         /*Idx1=*/0);
9513 
9514     // Only emit the mapper information arrays if debug information is
9515     // requested.
9516     if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9517       MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9518     else
9519       MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9520           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9521           Info.MapNamesArray,
9522           /*Idx0=*/0,
9523           /*Idx1=*/0);
9524     // If there is no user-defined mapper, set the mapper array to nullptr to
9525     // avoid an unnecessary data privatization
9526     if (!Info.HasMapper)
9527       MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9528     else
9529       MappersArrayArg =
9530           CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9531   } else {
9532     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9533     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9534     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9535     MapTypesArrayArg =
9536         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9537     MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9538     MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9539   }
9540 }
9541 
9542 /// Check for inner distribute directive.
9543 static const OMPExecutableDirective *
9544 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9545   const auto *CS = D.getInnermostCapturedStmt();
9546   const auto *Body =
9547       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9548   const Stmt *ChildStmt =
9549       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9550 
9551   if (const auto *NestedDir =
9552           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9553     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9554     switch (D.getDirectiveKind()) {
9555     case OMPD_target:
9556       if (isOpenMPDistributeDirective(DKind))
9557         return NestedDir;
9558       if (DKind == OMPD_teams) {
9559         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9560             /*IgnoreCaptured=*/true);
9561         if (!Body)
9562           return nullptr;
9563         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9564         if (const auto *NND =
9565                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9566           DKind = NND->getDirectiveKind();
9567           if (isOpenMPDistributeDirective(DKind))
9568             return NND;
9569         }
9570       }
9571       return nullptr;
9572     case OMPD_target_teams:
9573       if (isOpenMPDistributeDirective(DKind))
9574         return NestedDir;
9575       return nullptr;
9576     case OMPD_target_parallel:
9577     case OMPD_target_simd:
9578     case OMPD_target_parallel_for:
9579     case OMPD_target_parallel_for_simd:
9580       return nullptr;
9581     case OMPD_target_teams_distribute:
9582     case OMPD_target_teams_distribute_simd:
9583     case OMPD_target_teams_distribute_parallel_for:
9584     case OMPD_target_teams_distribute_parallel_for_simd:
9585     case OMPD_parallel:
9586     case OMPD_for:
9587     case OMPD_parallel_for:
9588     case OMPD_parallel_master:
9589     case OMPD_parallel_sections:
9590     case OMPD_for_simd:
9591     case OMPD_parallel_for_simd:
9592     case OMPD_cancel:
9593     case OMPD_cancellation_point:
9594     case OMPD_ordered:
9595     case OMPD_threadprivate:
9596     case OMPD_allocate:
9597     case OMPD_task:
9598     case OMPD_simd:
9599     case OMPD_tile:
9600     case OMPD_sections:
9601     case OMPD_section:
9602     case OMPD_single:
9603     case OMPD_master:
9604     case OMPD_critical:
9605     case OMPD_taskyield:
9606     case OMPD_barrier:
9607     case OMPD_taskwait:
9608     case OMPD_taskgroup:
9609     case OMPD_atomic:
9610     case OMPD_flush:
9611     case OMPD_depobj:
9612     case OMPD_scan:
9613     case OMPD_teams:
9614     case OMPD_target_data:
9615     case OMPD_target_exit_data:
9616     case OMPD_target_enter_data:
9617     case OMPD_distribute:
9618     case OMPD_distribute_simd:
9619     case OMPD_distribute_parallel_for:
9620     case OMPD_distribute_parallel_for_simd:
9621     case OMPD_teams_distribute:
9622     case OMPD_teams_distribute_simd:
9623     case OMPD_teams_distribute_parallel_for:
9624     case OMPD_teams_distribute_parallel_for_simd:
9625     case OMPD_target_update:
9626     case OMPD_declare_simd:
9627     case OMPD_declare_variant:
9628     case OMPD_begin_declare_variant:
9629     case OMPD_end_declare_variant:
9630     case OMPD_declare_target:
9631     case OMPD_end_declare_target:
9632     case OMPD_declare_reduction:
9633     case OMPD_declare_mapper:
9634     case OMPD_taskloop:
9635     case OMPD_taskloop_simd:
9636     case OMPD_master_taskloop:
9637     case OMPD_master_taskloop_simd:
9638     case OMPD_parallel_master_taskloop:
9639     case OMPD_parallel_master_taskloop_simd:
9640     case OMPD_requires:
9641     case OMPD_unknown:
9642     default:
9643       llvm_unreachable("Unexpected directive.");
9644     }
9645   }
9646 
9647   return nullptr;
9648 }
9649 
9650 /// Emit the user-defined mapper function. The code generation follows the
9651 /// pattern in the example below.
9652 /// \code
9653 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9654 ///                                           void *base, void *begin,
9655 ///                                           int64_t size, int64_t type,
9656 ///                                           void *name = nullptr) {
9657 ///   // Allocate space for an array section first or add a base/begin for
9658 ///   // pointer dereference.
9659 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9660 ///       !maptype.IsDelete)
9661 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9662 ///                                 size*sizeof(Ty), clearToFromMember(type));
9663 ///   // Map members.
9664 ///   for (unsigned i = 0; i < size; i++) {
9665 ///     // For each component specified by this mapper:
9666 ///     for (auto c : begin[i]->all_components) {
9667 ///       if (c.hasMapper())
9668 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9669 ///                       c.arg_type, c.arg_name);
9670 ///       else
9671 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9672 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9673 ///                                     c.arg_name);
9674 ///     }
9675 ///   }
9676 ///   // Delete the array section.
9677 ///   if (size > 1 && maptype.IsDelete)
9678 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9679 ///                                 size*sizeof(Ty), clearToFromMember(type));
9680 /// }
9681 /// \endcode
9682 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9683                                             CodeGenFunction *CGF) {
9684   if (UDMMap.count(D) > 0)
9685     return;
9686   ASTContext &C = CGM.getContext();
9687   QualType Ty = D->getType();
9688   QualType PtrTy = C.getPointerType(Ty).withRestrict();
9689   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9690   auto *MapperVarDecl =
9691       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9692   SourceLocation Loc = D->getLocation();
9693   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9694 
9695   // Prepare mapper function arguments and attributes.
9696   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9697                               C.VoidPtrTy, ImplicitParamDecl::Other);
9698   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9699                             ImplicitParamDecl::Other);
9700   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9701                              C.VoidPtrTy, ImplicitParamDecl::Other);
9702   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9703                             ImplicitParamDecl::Other);
9704   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9705                             ImplicitParamDecl::Other);
9706   ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9707                             ImplicitParamDecl::Other);
9708   FunctionArgList Args;
9709   Args.push_back(&HandleArg);
9710   Args.push_back(&BaseArg);
9711   Args.push_back(&BeginArg);
9712   Args.push_back(&SizeArg);
9713   Args.push_back(&TypeArg);
9714   Args.push_back(&NameArg);
9715   const CGFunctionInfo &FnInfo =
9716       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9717   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9718   SmallString<64> TyStr;
9719   llvm::raw_svector_ostream Out(TyStr);
9720   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9721   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9722   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9723                                     Name, &CGM.getModule());
9724   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9725   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9726   // Start the mapper function code generation.
9727   CodeGenFunction MapperCGF(CGM);
9728   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9729   // Compute the starting and end addresses of array elements.
9730   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9731       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9732       C.getPointerType(Int64Ty), Loc);
9733   // Prepare common arguments for array initiation and deletion.
9734   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9735       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9736       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9737   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9738       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9739       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9740   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9741       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9742       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9743   // Convert the size in bytes into the number of array elements.
9744   Size = MapperCGF.Builder.CreateExactUDiv(
9745       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9746   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9747       BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9748   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9749   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9750       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9751       C.getPointerType(Int64Ty), Loc);
9752   llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9753       MapperCGF.GetAddrOfLocalVar(&NameArg),
9754       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9755 
9756   // Emit array initiation if this is an array section and \p MapType indicates
9757   // that memory allocation is required.
9758   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9759   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9760                              MapName, ElementSize, HeadBB, /*IsInit=*/true);
9761 
9762   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9763 
9764   // Emit the loop header block.
9765   MapperCGF.EmitBlock(HeadBB);
9766   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9767   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9768   // Evaluate whether the initial condition is satisfied.
9769   llvm::Value *IsEmpty =
9770       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9771   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9772   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9773 
9774   // Emit the loop body block.
9775   MapperCGF.EmitBlock(BodyBB);
9776   llvm::BasicBlock *LastBB = BodyBB;
9777   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9778       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9779   PtrPHI->addIncoming(PtrBegin, EntryBB);
9780   Address PtrCurrent =
9781       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9782                           .getAlignment()
9783                           .alignmentOfArrayElement(ElementSize));
9784   // Privatize the declared variable of mapper to be the current array element.
9785   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9786   Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
9787   (void)Scope.Privatize();
9788 
9789   // Get map clause information. Fill up the arrays with all mapped variables.
9790   MappableExprsHandler::MapCombinedInfoTy Info;
9791   MappableExprsHandler MEHandler(*D, MapperCGF);
9792   MEHandler.generateAllInfoForMapper(Info);
9793 
9794   // Call the runtime API __tgt_mapper_num_components to get the number of
9795   // pre-existing components.
9796   llvm::Value *OffloadingArgs[] = {Handle};
9797   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9798       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9799                                             OMPRTL___tgt_mapper_num_components),
9800       OffloadingArgs);
9801   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9802       PreviousSize,
9803       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9804 
9805   // Fill up the runtime mapper handle for all components.
9806   for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9807     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9808         *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9809     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9810         Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9811     llvm::Value *CurSizeArg = Info.Sizes[I];
9812     llvm::Value *CurNameArg =
9813         (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9814             ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9815             : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9816 
9817     // Extract the MEMBER_OF field from the map type.
9818     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
9819     llvm::Value *MemberMapType =
9820         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9821 
9822     // Combine the map type inherited from user-defined mapper with that
9823     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9824     // bits of the \a MapType, which is the input argument of the mapper
9825     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9826     // bits of MemberMapType.
9827     // [OpenMP 5.0], 1.2.6. map-type decay.
9828     //        | alloc |  to   | from  | tofrom | release | delete
9829     // ----------------------------------------------------------
9830     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9831     // to     | alloc |  to   | alloc |   to   | release | delete
9832     // from   | alloc | alloc | from  |  from  | release | delete
9833     // tofrom | alloc |  to   | from  | tofrom | release | delete
9834     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9835         MapType,
9836         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9837                                    MappableExprsHandler::OMP_MAP_FROM));
9838     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9839     llvm::BasicBlock *AllocElseBB =
9840         MapperCGF.createBasicBlock("omp.type.alloc.else");
9841     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9842     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9843     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9844     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9845     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9846     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9847     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9848     MapperCGF.EmitBlock(AllocBB);
9849     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9850         MemberMapType,
9851         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9852                                      MappableExprsHandler::OMP_MAP_FROM)));
9853     MapperCGF.Builder.CreateBr(EndBB);
9854     MapperCGF.EmitBlock(AllocElseBB);
9855     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9856         LeftToFrom,
9857         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9858     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9859     // In case of to, clear OMP_MAP_FROM.
9860     MapperCGF.EmitBlock(ToBB);
9861     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9862         MemberMapType,
9863         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9864     MapperCGF.Builder.CreateBr(EndBB);
9865     MapperCGF.EmitBlock(ToElseBB);
9866     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9867         LeftToFrom,
9868         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9869     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9870     // In case of from, clear OMP_MAP_TO.
9871     MapperCGF.EmitBlock(FromBB);
9872     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9873         MemberMapType,
9874         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9875     // In case of tofrom, do nothing.
9876     MapperCGF.EmitBlock(EndBB);
9877     LastBB = EndBB;
9878     llvm::PHINode *CurMapType =
9879         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9880     CurMapType->addIncoming(AllocMapType, AllocBB);
9881     CurMapType->addIncoming(ToMapType, ToBB);
9882     CurMapType->addIncoming(FromMapType, FromBB);
9883     CurMapType->addIncoming(MemberMapType, ToElseBB);
9884 
9885     llvm::Value *OffloadingArgs[] = {Handle,     CurBaseArg, CurBeginArg,
9886                                      CurSizeArg, CurMapType, CurNameArg};
9887     if (Info.Mappers[I]) {
9888       // Call the corresponding mapper function.
9889       llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9890           cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9891       assert(MapperFunc && "Expect a valid mapper function is available.");
9892       MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9893     } else {
9894       // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9895       // data structure.
9896       MapperCGF.EmitRuntimeCall(
9897           OMPBuilder.getOrCreateRuntimeFunction(
9898               CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9899           OffloadingArgs);
9900     }
9901   }
9902 
9903   // Update the pointer to point to the next element that needs to be mapped,
9904   // and check whether we have mapped all elements.
9905   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9906       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9907   PtrPHI->addIncoming(PtrNext, LastBB);
9908   llvm::Value *IsDone =
9909       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9910   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9911   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9912 
9913   MapperCGF.EmitBlock(ExitBB);
9914   // Emit array deletion if this is an array section and \p MapType indicates
9915   // that deletion is required.
9916   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9917                              MapName, ElementSize, DoneBB, /*IsInit=*/false);
9918 
9919   // Emit the function exit block.
9920   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9921   MapperCGF.FinishFunction();
9922   UDMMap.try_emplace(D, Fn);
9923   if (CGF) {
9924     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9925     Decls.second.push_back(D);
9926   }
9927 }
9928 
9929 /// Emit the array initialization or deletion portion for user-defined mapper
9930 /// code generation. First, it evaluates whether an array section is mapped and
9931 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9932 /// true, and \a MapType indicates to not delete this array, array
9933 /// initialization code is generated. If \a IsInit is false, and \a MapType
9934 /// indicates to not this array, array deletion code is generated.
9935 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9936     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9937     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9938     llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9939     bool IsInit) {
9940   StringRef Prefix = IsInit ? ".init" : ".del";
9941 
9942   // Evaluate if this is an array section.
9943   llvm::BasicBlock *BodyBB =
9944       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9945   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9946       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9947   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9948       MapType,
9949       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9950   llvm::Value *DeleteCond;
9951   llvm::Value *Cond;
9952   if (IsInit) {
9953     // base != begin?
9954     llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
9955         MapperCGF.Builder.CreatePtrDiff(Base, Begin));
9956     // IsPtrAndObj?
9957     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9958         MapType,
9959         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
9960     PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9961     BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9962     Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9963     DeleteCond = MapperCGF.Builder.CreateIsNull(
9964         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9965   } else {
9966     Cond = IsArray;
9967     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9968         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9969   }
9970   Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9971   MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9972 
9973   MapperCGF.EmitBlock(BodyBB);
9974   // Get the array size by multiplying element size and element number (i.e., \p
9975   // Size).
9976   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9977       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9978   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9979   // memory allocation/deletion purpose only.
9980   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9981       MapType,
9982       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9983                                    MappableExprsHandler::OMP_MAP_FROM)));
9984   MapTypeArg = MapperCGF.Builder.CreateOr(
9985       MapTypeArg,
9986       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
9987 
9988   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9989   // data structure.
9990   llvm::Value *OffloadingArgs[] = {Handle,    Base,       Begin,
9991                                    ArraySize, MapTypeArg, MapName};
9992   MapperCGF.EmitRuntimeCall(
9993       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9994                                             OMPRTL___tgt_push_mapper_component),
9995       OffloadingArgs);
9996 }
9997 
9998 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9999     const OMPDeclareMapperDecl *D) {
10000   auto I = UDMMap.find(D);
10001   if (I != UDMMap.end())
10002     return I->second;
10003   emitUserDefinedMapper(D);
10004   return UDMMap.lookup(D);
10005 }
10006 
10007 void CGOpenMPRuntime::emitTargetNumIterationsCall(
10008     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10009     llvm::Value *DeviceID,
10010     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10011                                      const OMPLoopDirective &D)>
10012         SizeEmitter) {
10013   OpenMPDirectiveKind Kind = D.getDirectiveKind();
10014   const OMPExecutableDirective *TD = &D;
10015   // Get nested teams distribute kind directive, if any.
10016   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10017     TD = getNestedDistributeDirective(CGM.getContext(), D);
10018   if (!TD)
10019     return;
10020   const auto *LD = cast<OMPLoopDirective>(TD);
10021   auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10022                                                          PrePostActionTy &) {
10023     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10024       llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10025       llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10026       CGF.EmitRuntimeCall(
10027           OMPBuilder.getOrCreateRuntimeFunction(
10028               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10029           Args);
10030     }
10031   };
10032   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10033 }
10034 
10035 void CGOpenMPRuntime::emitTargetCall(
10036     CodeGenFunction &CGF, const OMPExecutableDirective &D,
10037     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10038     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10039     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10040                                      const OMPLoopDirective &D)>
10041         SizeEmitter) {
10042   if (!CGF.HaveInsertPoint())
10043     return;
10044 
10045   assert(OutlinedFn && "Invalid outlined function!");
10046 
10047   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10048                                  D.hasClausesOfKind<OMPNowaitClause>();
10049   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10050   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10051   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10052                                             PrePostActionTy &) {
10053     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10054   };
10055   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10056 
10057   CodeGenFunction::OMPTargetDataInfo InputInfo;
10058   llvm::Value *MapTypesArray = nullptr;
10059   llvm::Value *MapNamesArray = nullptr;
10060   // Fill up the pointer arrays and transfer execution to the device.
10061   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10062                     &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10063                     &CapturedVars,
10064                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10065     if (Device.getInt() == OMPC_DEVICE_ancestor) {
10066       // Reverse offloading is not supported, so just execute on the host.
10067       if (RequiresOuterTask) {
10068         CapturedVars.clear();
10069         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10070       }
10071       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10072       return;
10073     }
10074 
10075     // On top of the arrays that were filled up, the target offloading call
10076     // takes as arguments the device id as well as the host pointer. The host
10077     // pointer is used by the runtime library to identify the current target
10078     // region, so it only has to be unique and not necessarily point to
10079     // anything. It could be the pointer to the outlined function that
10080     // implements the target region, but we aren't using that so that the
10081     // compiler doesn't need to keep that, and could therefore inline the host
10082     // function if proven worthwhile during optimization.
10083 
10084     // From this point on, we need to have an ID of the target region defined.
10085     assert(OutlinedFnID && "Invalid outlined function ID!");
10086 
10087     // Emit device ID if any.
10088     llvm::Value *DeviceID;
10089     if (Device.getPointer()) {
10090       assert((Device.getInt() == OMPC_DEVICE_unknown ||
10091               Device.getInt() == OMPC_DEVICE_device_num) &&
10092              "Expected device_num modifier.");
10093       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10094       DeviceID =
10095           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10096     } else {
10097       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10098     }
10099 
10100     // Emit the number of elements in the offloading arrays.
10101     llvm::Value *PointerNum =
10102         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10103 
10104     // Return value of the runtime offloading call.
10105     llvm::Value *Return;
10106 
10107     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10108     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10109 
10110     // Source location for the ident struct
10111     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10112 
10113     // Emit tripcount for the target loop-based directive.
10114     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10115 
10116     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10117     // The target region is an outlined function launched by the runtime
10118     // via calls __tgt_target() or __tgt_target_teams().
10119     //
10120     // __tgt_target() launches a target region with one team and one thread,
10121     // executing a serial region.  This master thread may in turn launch
10122     // more threads within its team upon encountering a parallel region,
10123     // however, no additional teams can be launched on the device.
10124     //
10125     // __tgt_target_teams() launches a target region with one or more teams,
10126     // each with one or more threads.  This call is required for target
10127     // constructs such as:
10128     //  'target teams'
10129     //  'target' / 'teams'
10130     //  'target teams distribute parallel for'
10131     //  'target parallel'
10132     // and so on.
10133     //
10134     // Note that on the host and CPU targets, the runtime implementation of
10135     // these calls simply call the outlined function without forking threads.
10136     // The outlined functions themselves have runtime calls to
10137     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10138     // the compiler in emitTeamsCall() and emitParallelCall().
10139     //
10140     // In contrast, on the NVPTX target, the implementation of
10141     // __tgt_target_teams() launches a GPU kernel with the requested number
10142     // of teams and threads so no additional calls to the runtime are required.
10143     if (NumTeams) {
10144       // If we have NumTeams defined this means that we have an enclosed teams
10145       // region. Therefore we also expect to have NumThreads defined. These two
10146       // values should be defined in the presence of a teams directive,
10147       // regardless of having any clauses associated. If the user is using teams
10148       // but no clauses, these two values will be the default that should be
10149       // passed to the runtime library - a 32-bit integer with the value zero.
10150       assert(NumThreads && "Thread limit expression should be available along "
10151                            "with number of teams.");
10152       llvm::Value *OffloadingArgs[] = {RTLoc,
10153                                        DeviceID,
10154                                        OutlinedFnID,
10155                                        PointerNum,
10156                                        InputInfo.BasePointersArray.getPointer(),
10157                                        InputInfo.PointersArray.getPointer(),
10158                                        InputInfo.SizesArray.getPointer(),
10159                                        MapTypesArray,
10160                                        MapNamesArray,
10161                                        InputInfo.MappersArray.getPointer(),
10162                                        NumTeams,
10163                                        NumThreads};
10164       Return = CGF.EmitRuntimeCall(
10165           OMPBuilder.getOrCreateRuntimeFunction(
10166               CGM.getModule(), HasNowait
10167                                    ? OMPRTL___tgt_target_teams_nowait_mapper
10168                                    : OMPRTL___tgt_target_teams_mapper),
10169           OffloadingArgs);
10170     } else {
10171       llvm::Value *OffloadingArgs[] = {RTLoc,
10172                                        DeviceID,
10173                                        OutlinedFnID,
10174                                        PointerNum,
10175                                        InputInfo.BasePointersArray.getPointer(),
10176                                        InputInfo.PointersArray.getPointer(),
10177                                        InputInfo.SizesArray.getPointer(),
10178                                        MapTypesArray,
10179                                        MapNamesArray,
10180                                        InputInfo.MappersArray.getPointer()};
10181       Return = CGF.EmitRuntimeCall(
10182           OMPBuilder.getOrCreateRuntimeFunction(
10183               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10184                                          : OMPRTL___tgt_target_mapper),
10185           OffloadingArgs);
10186     }
10187 
10188     // Check the error code and execute the host version if required.
10189     llvm::BasicBlock *OffloadFailedBlock =
10190         CGF.createBasicBlock("omp_offload.failed");
10191     llvm::BasicBlock *OffloadContBlock =
10192         CGF.createBasicBlock("omp_offload.cont");
10193     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10194     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10195 
10196     CGF.EmitBlock(OffloadFailedBlock);
10197     if (RequiresOuterTask) {
10198       CapturedVars.clear();
10199       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10200     }
10201     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10202     CGF.EmitBranch(OffloadContBlock);
10203 
10204     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10205   };
10206 
10207   // Notify that the host version must be executed.
10208   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10209                     RequiresOuterTask](CodeGenFunction &CGF,
10210                                        PrePostActionTy &) {
10211     if (RequiresOuterTask) {
10212       CapturedVars.clear();
10213       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10214     }
10215     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10216   };
10217 
10218   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10219                           &MapNamesArray, &CapturedVars, RequiresOuterTask,
10220                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10221     // Fill up the arrays with all the captured variables.
10222     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10223 
10224     // Get mappable expression information.
10225     MappableExprsHandler MEHandler(D, CGF);
10226     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10227     llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10228 
10229     auto RI = CS.getCapturedRecordDecl()->field_begin();
10230     auto *CV = CapturedVars.begin();
10231     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10232                                               CE = CS.capture_end();
10233          CI != CE; ++CI, ++RI, ++CV) {
10234       MappableExprsHandler::MapCombinedInfoTy CurInfo;
10235       MappableExprsHandler::StructRangeInfoTy PartialStruct;
10236 
10237       // VLA sizes are passed to the outlined region by copy and do not have map
10238       // information associated.
10239       if (CI->capturesVariableArrayType()) {
10240         CurInfo.Exprs.push_back(nullptr);
10241         CurInfo.BasePointers.push_back(*CV);
10242         CurInfo.Pointers.push_back(*CV);
10243         CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10244             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10245         // Copy to the device as an argument. No need to retrieve it.
10246         CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10247                                 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10248                                 MappableExprsHandler::OMP_MAP_IMPLICIT);
10249         CurInfo.Mappers.push_back(nullptr);
10250       } else {
10251         // If we have any information in the map clause, we use it, otherwise we
10252         // just do a default mapping.
10253         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10254         if (!CI->capturesThis())
10255           MappedVarSet.insert(CI->getCapturedVar());
10256         else
10257           MappedVarSet.insert(nullptr);
10258         if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10259           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10260         // Generate correct mapping for variables captured by reference in
10261         // lambdas.
10262         if (CI->capturesVariable())
10263           MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10264                                                   CurInfo, LambdaPointers);
10265       }
10266       // We expect to have at least an element of information for this capture.
10267       assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
10268              "Non-existing map pointer for capture!");
10269       assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10270              CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10271              CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10272              CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10273              "Inconsistent map information sizes!");
10274 
10275       // If there is an entry in PartialStruct it means we have a struct with
10276       // individual members mapped. Emit an extra combined entry.
10277       if (PartialStruct.Base.isValid()) {
10278         CombinedInfo.append(PartialStruct.PreliminaryMapData);
10279         MEHandler.emitCombinedEntry(
10280             CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10281             !PartialStruct.PreliminaryMapData.BasePointers.empty());
10282       }
10283 
10284       // We need to append the results of this capture to what we already have.
10285       CombinedInfo.append(CurInfo);
10286     }
10287     // Adjust MEMBER_OF flags for the lambdas captures.
10288     MEHandler.adjustMemberOfForLambdaCaptures(
10289         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10290         CombinedInfo.Types);
10291     // Map any list items in a map clause that were not captures because they
10292     // weren't referenced within the construct.
10293     MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10294 
10295     TargetDataInfo Info;
10296     // Fill up the arrays and create the arguments.
10297     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10298     emitOffloadingArraysArgument(
10299         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10300         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10301         {/*ForEndTask=*/false});
10302 
10303     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10304     InputInfo.BasePointersArray =
10305         Address(Info.BasePointersArray, CGM.getPointerAlign());
10306     InputInfo.PointersArray =
10307         Address(Info.PointersArray, CGM.getPointerAlign());
10308     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10309     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10310     MapTypesArray = Info.MapTypesArray;
10311     MapNamesArray = Info.MapNamesArray;
10312     if (RequiresOuterTask)
10313       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10314     else
10315       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10316   };
10317 
10318   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10319                              CodeGenFunction &CGF, PrePostActionTy &) {
10320     if (RequiresOuterTask) {
10321       CodeGenFunction::OMPTargetDataInfo InputInfo;
10322       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10323     } else {
10324       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10325     }
10326   };
10327 
10328   // If we have a target function ID it means that we need to support
10329   // offloading, otherwise, just execute on the host. We need to execute on host
10330   // regardless of the conditional in the if clause if, e.g., the user do not
10331   // specify target triples.
10332   if (OutlinedFnID) {
10333     if (IfCond) {
10334       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10335     } else {
10336       RegionCodeGenTy ThenRCG(TargetThenGen);
10337       ThenRCG(CGF);
10338     }
10339   } else {
10340     RegionCodeGenTy ElseRCG(TargetElseGen);
10341     ElseRCG(CGF);
10342   }
10343 }
10344 
10345 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10346                                                     StringRef ParentName) {
10347   if (!S)
10348     return;
10349 
10350   // Codegen OMP target directives that offload compute to the device.
10351   bool RequiresDeviceCodegen =
10352       isa<OMPExecutableDirective>(S) &&
10353       isOpenMPTargetExecutionDirective(
10354           cast<OMPExecutableDirective>(S)->getDirectiveKind());
10355 
10356   if (RequiresDeviceCodegen) {
10357     const auto &E = *cast<OMPExecutableDirective>(S);
10358     unsigned DeviceID;
10359     unsigned FileID;
10360     unsigned Line;
10361     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10362                              FileID, Line);
10363 
10364     // Is this a target region that should not be emitted as an entry point? If
10365     // so just signal we are done with this target region.
10366     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10367                                                             ParentName, Line))
10368       return;
10369 
10370     switch (E.getDirectiveKind()) {
10371     case OMPD_target:
10372       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10373                                                    cast<OMPTargetDirective>(E));
10374       break;
10375     case OMPD_target_parallel:
10376       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10377           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10378       break;
10379     case OMPD_target_teams:
10380       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10381           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10382       break;
10383     case OMPD_target_teams_distribute:
10384       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10385           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10386       break;
10387     case OMPD_target_teams_distribute_simd:
10388       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10389           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10390       break;
10391     case OMPD_target_parallel_for:
10392       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10393           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10394       break;
10395     case OMPD_target_parallel_for_simd:
10396       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10397           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10398       break;
10399     case OMPD_target_simd:
10400       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10401           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10402       break;
10403     case OMPD_target_teams_distribute_parallel_for:
10404       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10405           CGM, ParentName,
10406           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10407       break;
10408     case OMPD_target_teams_distribute_parallel_for_simd:
10409       CodeGenFunction::
10410           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10411               CGM, ParentName,
10412               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10413       break;
10414     case OMPD_parallel:
10415     case OMPD_for:
10416     case OMPD_parallel_for:
10417     case OMPD_parallel_master:
10418     case OMPD_parallel_sections:
10419     case OMPD_for_simd:
10420     case OMPD_parallel_for_simd:
10421     case OMPD_cancel:
10422     case OMPD_cancellation_point:
10423     case OMPD_ordered:
10424     case OMPD_threadprivate:
10425     case OMPD_allocate:
10426     case OMPD_task:
10427     case OMPD_simd:
10428     case OMPD_tile:
10429     case OMPD_sections:
10430     case OMPD_section:
10431     case OMPD_single:
10432     case OMPD_master:
10433     case OMPD_critical:
10434     case OMPD_taskyield:
10435     case OMPD_barrier:
10436     case OMPD_taskwait:
10437     case OMPD_taskgroup:
10438     case OMPD_atomic:
10439     case OMPD_flush:
10440     case OMPD_depobj:
10441     case OMPD_scan:
10442     case OMPD_teams:
10443     case OMPD_target_data:
10444     case OMPD_target_exit_data:
10445     case OMPD_target_enter_data:
10446     case OMPD_distribute:
10447     case OMPD_distribute_simd:
10448     case OMPD_distribute_parallel_for:
10449     case OMPD_distribute_parallel_for_simd:
10450     case OMPD_teams_distribute:
10451     case OMPD_teams_distribute_simd:
10452     case OMPD_teams_distribute_parallel_for:
10453     case OMPD_teams_distribute_parallel_for_simd:
10454     case OMPD_target_update:
10455     case OMPD_declare_simd:
10456     case OMPD_declare_variant:
10457     case OMPD_begin_declare_variant:
10458     case OMPD_end_declare_variant:
10459     case OMPD_declare_target:
10460     case OMPD_end_declare_target:
10461     case OMPD_declare_reduction:
10462     case OMPD_declare_mapper:
10463     case OMPD_taskloop:
10464     case OMPD_taskloop_simd:
10465     case OMPD_master_taskloop:
10466     case OMPD_master_taskloop_simd:
10467     case OMPD_parallel_master_taskloop:
10468     case OMPD_parallel_master_taskloop_simd:
10469     case OMPD_requires:
10470     case OMPD_unknown:
10471     default:
10472       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10473     }
10474     return;
10475   }
10476 
10477   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10478     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10479       return;
10480 
10481     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10482     return;
10483   }
10484 
10485   // If this is a lambda function, look into its body.
10486   if (const auto *L = dyn_cast<LambdaExpr>(S))
10487     S = L->getBody();
10488 
10489   // Keep looking for target regions recursively.
10490   for (const Stmt *II : S->children())
10491     scanForTargetRegionsFunctions(II, ParentName);
10492 }
10493 
10494 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10495   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10496       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10497   if (!DevTy)
10498     return false;
10499   // Do not emit device_type(nohost) functions for the host.
10500   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10501     return true;
10502   // Do not emit device_type(host) functions for the device.
10503   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10504     return true;
10505   return false;
10506 }
10507 
10508 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10509   // If emitting code for the host, we do not process FD here. Instead we do
10510   // the normal code generation.
10511   if (!CGM.getLangOpts().OpenMPIsDevice) {
10512     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10513       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10514                                   CGM.getLangOpts().OpenMPIsDevice))
10515         return true;
10516     return false;
10517   }
10518 
10519   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10520   // Try to detect target regions in the function.
10521   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10522     StringRef Name = CGM.getMangledName(GD);
10523     scanForTargetRegionsFunctions(FD->getBody(), Name);
10524     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10525                                 CGM.getLangOpts().OpenMPIsDevice))
10526       return true;
10527   }
10528 
10529   // Do not to emit function if it is not marked as declare target.
10530   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10531          AlreadyEmittedTargetDecls.count(VD) == 0;
10532 }
10533 
10534 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10535   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10536                               CGM.getLangOpts().OpenMPIsDevice))
10537     return true;
10538 
10539   if (!CGM.getLangOpts().OpenMPIsDevice)
10540     return false;
10541 
10542   // Check if there are Ctors/Dtors in this declaration and look for target
10543   // regions in it. We use the complete variant to produce the kernel name
10544   // mangling.
10545   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10546   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10547     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10548       StringRef ParentName =
10549           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10550       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10551     }
10552     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10553       StringRef ParentName =
10554           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10555       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10556     }
10557   }
10558 
10559   // Do not to emit variable if it is not marked as declare target.
10560   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10561       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10562           cast<VarDecl>(GD.getDecl()));
10563   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10564       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10565        HasRequiresUnifiedSharedMemory)) {
10566     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10567     return true;
10568   }
10569   return false;
10570 }
10571 
10572 llvm::Constant *
10573 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
10574                                                 const VarDecl *VD) {
10575   assert(VD->getType().isConstant(CGM.getContext()) &&
10576          "Expected constant variable.");
10577   StringRef VarName;
10578   llvm::Constant *Addr;
10579   llvm::GlobalValue::LinkageTypes Linkage;
10580   QualType Ty = VD->getType();
10581   SmallString<128> Buffer;
10582   {
10583     unsigned DeviceID;
10584     unsigned FileID;
10585     unsigned Line;
10586     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
10587                              FileID, Line);
10588     llvm::raw_svector_ostream OS(Buffer);
10589     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
10590        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
10591     VarName = OS.str();
10592   }
10593   Linkage = llvm::GlobalValue::InternalLinkage;
10594   Addr =
10595       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
10596                                   getDefaultFirstprivateAddressSpace());
10597   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
10598   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
10599   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
10600   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10601       VarName, Addr, VarSize,
10602       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
10603   return Addr;
10604 }
10605 
10606 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10607                                                    llvm::Constant *Addr) {
10608   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10609       !CGM.getLangOpts().OpenMPIsDevice)
10610     return;
10611 
10612   // If we have host/nohost variables, they do not need to be registered.
10613   Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10614       OMPDeclareTargetDeclAttr::getDeviceType(VD);
10615   if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10616     return;
10617 
10618   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10619       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10620   if (!Res) {
10621     if (CGM.getLangOpts().OpenMPIsDevice) {
10622       // Register non-target variables being emitted in device code (debug info
10623       // may cause this).
10624       StringRef VarName = CGM.getMangledName(VD);
10625       EmittedNonTargetVariables.try_emplace(VarName, Addr);
10626     }
10627     return;
10628   }
10629   // Register declare target variables.
10630   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10631   StringRef VarName;
10632   CharUnits VarSize;
10633   llvm::GlobalValue::LinkageTypes Linkage;
10634 
10635   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10636       !HasRequiresUnifiedSharedMemory) {
10637     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10638     VarName = CGM.getMangledName(VD);
10639     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10640       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10641       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
10642     } else {
10643       VarSize = CharUnits::Zero();
10644     }
10645     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10646     // Temp solution to prevent optimizations of the internal variables.
10647     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10648       // Do not create a "ref-variable" if the original is not also available
10649       // on the host.
10650       if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10651         return;
10652       std::string RefName = getName({VarName, "ref"});
10653       if (!CGM.GetGlobalValue(RefName)) {
10654         llvm::Constant *AddrRef =
10655             getOrCreateInternalVariable(Addr->getType(), RefName);
10656         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10657         GVAddrRef->setConstant(/*Val=*/true);
10658         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10659         GVAddrRef->setInitializer(Addr);
10660         CGM.addCompilerUsedGlobal(GVAddrRef);
10661       }
10662     }
10663   } else {
10664     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
10665             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10666              HasRequiresUnifiedSharedMemory)) &&
10667            "Declare target attribute must link or to with unified memory.");
10668     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10669       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10670     else
10671       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10672 
10673     if (CGM.getLangOpts().OpenMPIsDevice) {
10674       VarName = Addr->getName();
10675       Addr = nullptr;
10676     } else {
10677       VarName = getAddrOfDeclareTargetVar(VD).getName();
10678       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10679     }
10680     VarSize = CGM.getPointerSize();
10681     Linkage = llvm::GlobalValue::WeakAnyLinkage;
10682   }
10683 
10684   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10685       VarName, Addr, VarSize, Flags, Linkage);
10686 }
10687 
10688 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10689   if (isa<FunctionDecl>(GD.getDecl()) ||
10690       isa<OMPDeclareReductionDecl>(GD.getDecl()))
10691     return emitTargetFunctions(GD);
10692 
10693   return emitTargetGlobalVariable(GD);
10694 }
10695 
10696 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10697   for (const VarDecl *VD : DeferredGlobalVariables) {
10698     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10699         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10700     if (!Res)
10701       continue;
10702     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10703         !HasRequiresUnifiedSharedMemory) {
10704       CGM.EmitGlobal(VD);
10705     } else {
10706       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10707               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10708                HasRequiresUnifiedSharedMemory)) &&
10709              "Expected link clause or to clause with unified memory.");
10710       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10711     }
10712   }
10713 }
10714 
10715 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10716     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10717   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10718          " Expected target-based directive.");
10719 }
10720 
10721 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10722   for (const OMPClause *Clause : D->clauselists()) {
10723     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10724       HasRequiresUnifiedSharedMemory = true;
10725     } else if (const auto *AC =
10726                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10727       switch (AC->getAtomicDefaultMemOrderKind()) {
10728       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10729         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10730         break;
10731       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10732         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10733         break;
10734       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10735         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10736         break;
10737       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10738         break;
10739       }
10740     }
10741   }
10742 }
10743 
10744 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10745   return RequiresAtomicOrdering;
10746 }
10747 
10748 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10749                                                        LangAS &AS) {
10750   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10751     return false;
10752   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10753   switch(A->getAllocatorType()) {
10754   case OMPAllocateDeclAttr::OMPNullMemAlloc:
10755   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10756   // Not supported, fallback to the default mem space.
10757   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10758   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10759   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10760   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10761   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10762   case OMPAllocateDeclAttr::OMPConstMemAlloc:
10763   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10764     AS = LangAS::Default;
10765     return true;
10766   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10767     llvm_unreachable("Expected predefined allocator for the variables with the "
10768                      "static storage.");
10769   }
10770   return false;
10771 }
10772 
10773 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10774   return HasRequiresUnifiedSharedMemory;
10775 }
10776 
10777 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10778     CodeGenModule &CGM)
10779     : CGM(CGM) {
10780   if (CGM.getLangOpts().OpenMPIsDevice) {
10781     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10782     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10783   }
10784 }
10785 
10786 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10787   if (CGM.getLangOpts().OpenMPIsDevice)
10788     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10789 }
10790 
10791 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10792   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10793     return true;
10794 
10795   const auto *D = cast<FunctionDecl>(GD.getDecl());
10796   // Do not to emit function if it is marked as declare target as it was already
10797   // emitted.
10798   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10799     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10800       if (auto *F = dyn_cast_or_null<llvm::Function>(
10801               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10802         return !F->isDeclaration();
10803       return false;
10804     }
10805     return true;
10806   }
10807 
10808   return !AlreadyEmittedTargetDecls.insert(D).second;
10809 }
10810 
10811 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10812   // If we don't have entries or if we are emitting code for the device, we
10813   // don't need to do anything.
10814   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10815       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10816       (OffloadEntriesInfoManager.empty() &&
10817        !HasEmittedDeclareTargetRegion &&
10818        !HasEmittedTargetRegion))
10819     return nullptr;
10820 
10821   // Create and register the function that handles the requires directives.
10822   ASTContext &C = CGM.getContext();
10823 
10824   llvm::Function *RequiresRegFn;
10825   {
10826     CodeGenFunction CGF(CGM);
10827     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10828     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10829     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10830     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10831     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10832     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10833     // TODO: check for other requires clauses.
10834     // The requires directive takes effect only when a target region is
10835     // present in the compilation unit. Otherwise it is ignored and not
10836     // passed to the runtime. This avoids the runtime from throwing an error
10837     // for mismatching requires clauses across compilation units that don't
10838     // contain at least 1 target region.
10839     assert((HasEmittedTargetRegion ||
10840             HasEmittedDeclareTargetRegion ||
10841             !OffloadEntriesInfoManager.empty()) &&
10842            "Target or declare target region expected.");
10843     if (HasRequiresUnifiedSharedMemory)
10844       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10845     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10846                             CGM.getModule(), OMPRTL___tgt_register_requires),
10847                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10848     CGF.FinishFunction();
10849   }
10850   return RequiresRegFn;
10851 }
10852 
10853 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10854                                     const OMPExecutableDirective &D,
10855                                     SourceLocation Loc,
10856                                     llvm::Function *OutlinedFn,
10857                                     ArrayRef<llvm::Value *> CapturedVars) {
10858   if (!CGF.HaveInsertPoint())
10859     return;
10860 
10861   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10862   CodeGenFunction::RunCleanupsScope Scope(CGF);
10863 
10864   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10865   llvm::Value *Args[] = {
10866       RTLoc,
10867       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10868       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10869   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10870   RealArgs.append(std::begin(Args), std::end(Args));
10871   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10872 
10873   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10874       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10875   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10876 }
10877 
10878 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10879                                          const Expr *NumTeams,
10880                                          const Expr *ThreadLimit,
10881                                          SourceLocation Loc) {
10882   if (!CGF.HaveInsertPoint())
10883     return;
10884 
10885   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10886 
10887   llvm::Value *NumTeamsVal =
10888       NumTeams
10889           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10890                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10891           : CGF.Builder.getInt32(0);
10892 
10893   llvm::Value *ThreadLimitVal =
10894       ThreadLimit
10895           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10896                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10897           : CGF.Builder.getInt32(0);
10898 
10899   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10900   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10901                                      ThreadLimitVal};
10902   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10903                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10904                       PushNumTeamsArgs);
10905 }
10906 
10907 void CGOpenMPRuntime::emitTargetDataCalls(
10908     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10909     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10910   if (!CGF.HaveInsertPoint())
10911     return;
10912 
10913   // Action used to replace the default codegen action and turn privatization
10914   // off.
10915   PrePostActionTy NoPrivAction;
10916 
10917   // Generate the code for the opening of the data environment. Capture all the
10918   // arguments of the runtime call by reference because they are used in the
10919   // closing of the region.
10920   auto &&BeginThenGen = [this, &D, Device, &Info,
10921                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10922     // Fill up the arrays with all the mapped variables.
10923     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10924 
10925     // Get map clause information.
10926     MappableExprsHandler MEHandler(D, CGF);
10927     MEHandler.generateAllInfo(CombinedInfo);
10928 
10929     // Fill up the arrays and create the arguments.
10930     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10931                          /*IsNonContiguous=*/true);
10932 
10933     llvm::Value *BasePointersArrayArg = nullptr;
10934     llvm::Value *PointersArrayArg = nullptr;
10935     llvm::Value *SizesArrayArg = nullptr;
10936     llvm::Value *MapTypesArrayArg = nullptr;
10937     llvm::Value *MapNamesArrayArg = nullptr;
10938     llvm::Value *MappersArrayArg = nullptr;
10939     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10940                                  SizesArrayArg, MapTypesArrayArg,
10941                                  MapNamesArrayArg, MappersArrayArg, Info);
10942 
10943     // Emit device ID if any.
10944     llvm::Value *DeviceID = nullptr;
10945     if (Device) {
10946       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10947                                            CGF.Int64Ty, /*isSigned=*/true);
10948     } else {
10949       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10950     }
10951 
10952     // Emit the number of elements in the offloading arrays.
10953     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10954     //
10955     // Source location for the ident struct
10956     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10957 
10958     llvm::Value *OffloadingArgs[] = {RTLoc,
10959                                      DeviceID,
10960                                      PointerNum,
10961                                      BasePointersArrayArg,
10962                                      PointersArrayArg,
10963                                      SizesArrayArg,
10964                                      MapTypesArrayArg,
10965                                      MapNamesArrayArg,
10966                                      MappersArrayArg};
10967     CGF.EmitRuntimeCall(
10968         OMPBuilder.getOrCreateRuntimeFunction(
10969             CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10970         OffloadingArgs);
10971 
10972     // If device pointer privatization is required, emit the body of the region
10973     // here. It will have to be duplicated: with and without privatization.
10974     if (!Info.CaptureDeviceAddrMap.empty())
10975       CodeGen(CGF);
10976   };
10977 
10978   // Generate code for the closing of the data region.
10979   auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10980                                                 PrePostActionTy &) {
10981     assert(Info.isValid() && "Invalid data environment closing arguments.");
10982 
10983     llvm::Value *BasePointersArrayArg = nullptr;
10984     llvm::Value *PointersArrayArg = nullptr;
10985     llvm::Value *SizesArrayArg = nullptr;
10986     llvm::Value *MapTypesArrayArg = nullptr;
10987     llvm::Value *MapNamesArrayArg = nullptr;
10988     llvm::Value *MappersArrayArg = nullptr;
10989     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10990                                  SizesArrayArg, MapTypesArrayArg,
10991                                  MapNamesArrayArg, MappersArrayArg, Info,
10992                                  {/*ForEndCall=*/true});
10993 
10994     // Emit device ID if any.
10995     llvm::Value *DeviceID = nullptr;
10996     if (Device) {
10997       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10998                                            CGF.Int64Ty, /*isSigned=*/true);
10999     } else {
11000       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11001     }
11002 
11003     // Emit the number of elements in the offloading arrays.
11004     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11005 
11006     // Source location for the ident struct
11007     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11008 
11009     llvm::Value *OffloadingArgs[] = {RTLoc,
11010                                      DeviceID,
11011                                      PointerNum,
11012                                      BasePointersArrayArg,
11013                                      PointersArrayArg,
11014                                      SizesArrayArg,
11015                                      MapTypesArrayArg,
11016                                      MapNamesArrayArg,
11017                                      MappersArrayArg};
11018     CGF.EmitRuntimeCall(
11019         OMPBuilder.getOrCreateRuntimeFunction(
11020             CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11021         OffloadingArgs);
11022   };
11023 
11024   // If we need device pointer privatization, we need to emit the body of the
11025   // region with no privatization in the 'else' branch of the conditional.
11026   // Otherwise, we don't have to do anything.
11027   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11028                                                          PrePostActionTy &) {
11029     if (!Info.CaptureDeviceAddrMap.empty()) {
11030       CodeGen.setAction(NoPrivAction);
11031       CodeGen(CGF);
11032     }
11033   };
11034 
11035   // We don't have to do anything to close the region if the if clause evaluates
11036   // to false.
11037   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11038 
11039   if (IfCond) {
11040     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11041   } else {
11042     RegionCodeGenTy RCG(BeginThenGen);
11043     RCG(CGF);
11044   }
11045 
11046   // If we don't require privatization of device pointers, we emit the body in
11047   // between the runtime calls. This avoids duplicating the body code.
11048   if (Info.CaptureDeviceAddrMap.empty()) {
11049     CodeGen.setAction(NoPrivAction);
11050     CodeGen(CGF);
11051   }
11052 
11053   if (IfCond) {
11054     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11055   } else {
11056     RegionCodeGenTy RCG(EndThenGen);
11057     RCG(CGF);
11058   }
11059 }
11060 
11061 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11062     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11063     const Expr *Device) {
11064   if (!CGF.HaveInsertPoint())
11065     return;
11066 
11067   assert((isa<OMPTargetEnterDataDirective>(D) ||
11068           isa<OMPTargetExitDataDirective>(D) ||
11069           isa<OMPTargetUpdateDirective>(D)) &&
11070          "Expecting either target enter, exit data, or update directives.");
11071 
11072   CodeGenFunction::OMPTargetDataInfo InputInfo;
11073   llvm::Value *MapTypesArray = nullptr;
11074   llvm::Value *MapNamesArray = nullptr;
11075   // Generate the code for the opening of the data environment.
11076   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11077                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11078     // Emit device ID if any.
11079     llvm::Value *DeviceID = nullptr;
11080     if (Device) {
11081       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11082                                            CGF.Int64Ty, /*isSigned=*/true);
11083     } else {
11084       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11085     }
11086 
11087     // Emit the number of elements in the offloading arrays.
11088     llvm::Constant *PointerNum =
11089         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11090 
11091     // Source location for the ident struct
11092     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11093 
11094     llvm::Value *OffloadingArgs[] = {RTLoc,
11095                                      DeviceID,
11096                                      PointerNum,
11097                                      InputInfo.BasePointersArray.getPointer(),
11098                                      InputInfo.PointersArray.getPointer(),
11099                                      InputInfo.SizesArray.getPointer(),
11100                                      MapTypesArray,
11101                                      MapNamesArray,
11102                                      InputInfo.MappersArray.getPointer()};
11103 
11104     // Select the right runtime function call for each standalone
11105     // directive.
11106     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11107     RuntimeFunction RTLFn;
11108     switch (D.getDirectiveKind()) {
11109     case OMPD_target_enter_data:
11110       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11111                         : OMPRTL___tgt_target_data_begin_mapper;
11112       break;
11113     case OMPD_target_exit_data:
11114       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11115                         : OMPRTL___tgt_target_data_end_mapper;
11116       break;
11117     case OMPD_target_update:
11118       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11119                         : OMPRTL___tgt_target_data_update_mapper;
11120       break;
11121     case OMPD_parallel:
11122     case OMPD_for:
11123     case OMPD_parallel_for:
11124     case OMPD_parallel_master:
11125     case OMPD_parallel_sections:
11126     case OMPD_for_simd:
11127     case OMPD_parallel_for_simd:
11128     case OMPD_cancel:
11129     case OMPD_cancellation_point:
11130     case OMPD_ordered:
11131     case OMPD_threadprivate:
11132     case OMPD_allocate:
11133     case OMPD_task:
11134     case OMPD_simd:
11135     case OMPD_tile:
11136     case OMPD_sections:
11137     case OMPD_section:
11138     case OMPD_single:
11139     case OMPD_master:
11140     case OMPD_critical:
11141     case OMPD_taskyield:
11142     case OMPD_barrier:
11143     case OMPD_taskwait:
11144     case OMPD_taskgroup:
11145     case OMPD_atomic:
11146     case OMPD_flush:
11147     case OMPD_depobj:
11148     case OMPD_scan:
11149     case OMPD_teams:
11150     case OMPD_target_data:
11151     case OMPD_distribute:
11152     case OMPD_distribute_simd:
11153     case OMPD_distribute_parallel_for:
11154     case OMPD_distribute_parallel_for_simd:
11155     case OMPD_teams_distribute:
11156     case OMPD_teams_distribute_simd:
11157     case OMPD_teams_distribute_parallel_for:
11158     case OMPD_teams_distribute_parallel_for_simd:
11159     case OMPD_declare_simd:
11160     case OMPD_declare_variant:
11161     case OMPD_begin_declare_variant:
11162     case OMPD_end_declare_variant:
11163     case OMPD_declare_target:
11164     case OMPD_end_declare_target:
11165     case OMPD_declare_reduction:
11166     case OMPD_declare_mapper:
11167     case OMPD_taskloop:
11168     case OMPD_taskloop_simd:
11169     case OMPD_master_taskloop:
11170     case OMPD_master_taskloop_simd:
11171     case OMPD_parallel_master_taskloop:
11172     case OMPD_parallel_master_taskloop_simd:
11173     case OMPD_target:
11174     case OMPD_target_simd:
11175     case OMPD_target_teams_distribute:
11176     case OMPD_target_teams_distribute_simd:
11177     case OMPD_target_teams_distribute_parallel_for:
11178     case OMPD_target_teams_distribute_parallel_for_simd:
11179     case OMPD_target_teams:
11180     case OMPD_target_parallel:
11181     case OMPD_target_parallel_for:
11182     case OMPD_target_parallel_for_simd:
11183     case OMPD_requires:
11184     case OMPD_unknown:
11185     default:
11186       llvm_unreachable("Unexpected standalone target data directive.");
11187       break;
11188     }
11189     CGF.EmitRuntimeCall(
11190         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11191         OffloadingArgs);
11192   };
11193 
11194   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11195                           &MapNamesArray](CodeGenFunction &CGF,
11196                                           PrePostActionTy &) {
11197     // Fill up the arrays with all the mapped variables.
11198     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11199 
11200     // Get map clause information.
11201     MappableExprsHandler MEHandler(D, CGF);
11202     MEHandler.generateAllInfo(CombinedInfo);
11203 
11204     TargetDataInfo Info;
11205     // Fill up the arrays and create the arguments.
11206     emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11207                          /*IsNonContiguous=*/true);
11208     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11209                              D.hasClausesOfKind<OMPNowaitClause>();
11210     emitOffloadingArraysArgument(
11211         CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11212         Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11213         {/*ForEndTask=*/false});
11214     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11215     InputInfo.BasePointersArray =
11216         Address(Info.BasePointersArray, CGM.getPointerAlign());
11217     InputInfo.PointersArray =
11218         Address(Info.PointersArray, CGM.getPointerAlign());
11219     InputInfo.SizesArray =
11220         Address(Info.SizesArray, CGM.getPointerAlign());
11221     InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11222     MapTypesArray = Info.MapTypesArray;
11223     MapNamesArray = Info.MapNamesArray;
11224     if (RequiresOuterTask)
11225       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11226     else
11227       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11228   };
11229 
11230   if (IfCond) {
11231     emitIfClause(CGF, IfCond, TargetThenGen,
11232                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
11233   } else {
11234     RegionCodeGenTy ThenRCG(TargetThenGen);
11235     ThenRCG(CGF);
11236   }
11237 }
11238 
11239 namespace {
11240   /// Kind of parameter in a function with 'declare simd' directive.
11241   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11242   /// Attribute set of the parameter.
11243   struct ParamAttrTy {
11244     ParamKindTy Kind = Vector;
11245     llvm::APSInt StrideOrArg;
11246     llvm::APSInt Alignment;
11247   };
11248 } // namespace
11249 
11250 static unsigned evaluateCDTSize(const FunctionDecl *FD,
11251                                 ArrayRef<ParamAttrTy> ParamAttrs) {
11252   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11253   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11254   // of that clause. The VLEN value must be power of 2.
11255   // In other case the notion of the function`s "characteristic data type" (CDT)
11256   // is used to compute the vector length.
11257   // CDT is defined in the following order:
11258   //   a) For non-void function, the CDT is the return type.
11259   //   b) If the function has any non-uniform, non-linear parameters, then the
11260   //   CDT is the type of the first such parameter.
11261   //   c) If the CDT determined by a) or b) above is struct, union, or class
11262   //   type which is pass-by-value (except for the type that maps to the
11263   //   built-in complex data type), the characteristic data type is int.
11264   //   d) If none of the above three cases is applicable, the CDT is int.
11265   // The VLEN is then determined based on the CDT and the size of vector
11266   // register of that ISA for which current vector version is generated. The
11267   // VLEN is computed using the formula below:
11268   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
11269   // where vector register size specified in section 3.2.1 Registers and the
11270   // Stack Frame of original AMD64 ABI document.
11271   QualType RetType = FD->getReturnType();
11272   if (RetType.isNull())
11273     return 0;
11274   ASTContext &C = FD->getASTContext();
11275   QualType CDT;
11276   if (!RetType.isNull() && !RetType->isVoidType()) {
11277     CDT = RetType;
11278   } else {
11279     unsigned Offset = 0;
11280     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11281       if (ParamAttrs[Offset].Kind == Vector)
11282         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11283       ++Offset;
11284     }
11285     if (CDT.isNull()) {
11286       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11287         if (ParamAttrs[I + Offset].Kind == Vector) {
11288           CDT = FD->getParamDecl(I)->getType();
11289           break;
11290         }
11291       }
11292     }
11293   }
11294   if (CDT.isNull())
11295     CDT = C.IntTy;
11296   CDT = CDT->getCanonicalTypeUnqualified();
11297   if (CDT->isRecordType() || CDT->isUnionType())
11298     CDT = C.IntTy;
11299   return C.getTypeSize(CDT);
11300 }
11301 
11302 static void
11303 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11304                            const llvm::APSInt &VLENVal,
11305                            ArrayRef<ParamAttrTy> ParamAttrs,
11306                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
11307   struct ISADataTy {
11308     char ISA;
11309     unsigned VecRegSize;
11310   };
11311   ISADataTy ISAData[] = {
11312       {
11313           'b', 128
11314       }, // SSE
11315       {
11316           'c', 256
11317       }, // AVX
11318       {
11319           'd', 256
11320       }, // AVX2
11321       {
11322           'e', 512
11323       }, // AVX512
11324   };
11325   llvm::SmallVector<char, 2> Masked;
11326   switch (State) {
11327   case OMPDeclareSimdDeclAttr::BS_Undefined:
11328     Masked.push_back('N');
11329     Masked.push_back('M');
11330     break;
11331   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11332     Masked.push_back('N');
11333     break;
11334   case OMPDeclareSimdDeclAttr::BS_Inbranch:
11335     Masked.push_back('M');
11336     break;
11337   }
11338   for (char Mask : Masked) {
11339     for (const ISADataTy &Data : ISAData) {
11340       SmallString<256> Buffer;
11341       llvm::raw_svector_ostream Out(Buffer);
11342       Out << "_ZGV" << Data.ISA << Mask;
11343       if (!VLENVal) {
11344         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11345         assert(NumElts && "Non-zero simdlen/cdtsize expected");
11346         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11347       } else {
11348         Out << VLENVal;
11349       }
11350       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11351         switch (ParamAttr.Kind){
11352         case LinearWithVarStride:
11353           Out << 's' << ParamAttr.StrideOrArg;
11354           break;
11355         case Linear:
11356           Out << 'l';
11357           if (ParamAttr.StrideOrArg != 1)
11358             Out << ParamAttr.StrideOrArg;
11359           break;
11360         case Uniform:
11361           Out << 'u';
11362           break;
11363         case Vector:
11364           Out << 'v';
11365           break;
11366         }
11367         if (!!ParamAttr.Alignment)
11368           Out << 'a' << ParamAttr.Alignment;
11369       }
11370       Out << '_' << Fn->getName();
11371       Fn->addFnAttr(Out.str());
11372     }
11373   }
11374 }
11375 
11376 // This are the Functions that are needed to mangle the name of the
11377 // vector functions generated by the compiler, according to the rules
11378 // defined in the "Vector Function ABI specifications for AArch64",
11379 // available at
11380 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11381 
11382 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11383 ///
11384 /// TODO: Need to implement the behavior for reference marked with a
11385 /// var or no linear modifiers (1.b in the section). For this, we
11386 /// need to extend ParamKindTy to support the linear modifiers.
11387 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11388   QT = QT.getCanonicalType();
11389 
11390   if (QT->isVoidType())
11391     return false;
11392 
11393   if (Kind == ParamKindTy::Uniform)
11394     return false;
11395 
11396   if (Kind == ParamKindTy::Linear)
11397     return false;
11398 
11399   // TODO: Handle linear references with modifiers
11400 
11401   if (Kind == ParamKindTy::LinearWithVarStride)
11402     return false;
11403 
11404   return true;
11405 }
11406 
11407 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11408 static bool getAArch64PBV(QualType QT, ASTContext &C) {
11409   QT = QT.getCanonicalType();
11410   unsigned Size = C.getTypeSize(QT);
11411 
11412   // Only scalars and complex within 16 bytes wide set PVB to true.
11413   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11414     return false;
11415 
11416   if (QT->isFloatingType())
11417     return true;
11418 
11419   if (QT->isIntegerType())
11420     return true;
11421 
11422   if (QT->isPointerType())
11423     return true;
11424 
11425   // TODO: Add support for complex types (section 3.1.2, item 2).
11426 
11427   return false;
11428 }
11429 
11430 /// Computes the lane size (LS) of a return type or of an input parameter,
11431 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11432 /// TODO: Add support for references, section 3.2.1, item 1.
11433 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11434   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11435     QualType PTy = QT.getCanonicalType()->getPointeeType();
11436     if (getAArch64PBV(PTy, C))
11437       return C.getTypeSize(PTy);
11438   }
11439   if (getAArch64PBV(QT, C))
11440     return C.getTypeSize(QT);
11441 
11442   return C.getTypeSize(C.getUIntPtrType());
11443 }
11444 
11445 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11446 // signature of the scalar function, as defined in 3.2.2 of the
11447 // AAVFABI.
11448 static std::tuple<unsigned, unsigned, bool>
11449 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11450   QualType RetType = FD->getReturnType().getCanonicalType();
11451 
11452   ASTContext &C = FD->getASTContext();
11453 
11454   bool OutputBecomesInput = false;
11455 
11456   llvm::SmallVector<unsigned, 8> Sizes;
11457   if (!RetType->isVoidType()) {
11458     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11459     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11460       OutputBecomesInput = true;
11461   }
11462   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11463     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11464     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11465   }
11466 
11467   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11468   // The LS of a function parameter / return value can only be a power
11469   // of 2, starting from 8 bits, up to 128.
11470   assert(std::all_of(Sizes.begin(), Sizes.end(),
11471                      [](unsigned Size) {
11472                        return Size == 8 || Size == 16 || Size == 32 ||
11473                               Size == 64 || Size == 128;
11474                      }) &&
11475          "Invalid size");
11476 
11477   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11478                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
11479                          OutputBecomesInput);
11480 }
11481 
11482 /// Mangle the parameter part of the vector function name according to
11483 /// their OpenMP classification. The mangling function is defined in
11484 /// section 3.5 of the AAVFABI.
11485 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11486   SmallString<256> Buffer;
11487   llvm::raw_svector_ostream Out(Buffer);
11488   for (const auto &ParamAttr : ParamAttrs) {
11489     switch (ParamAttr.Kind) {
11490     case LinearWithVarStride:
11491       Out << "ls" << ParamAttr.StrideOrArg;
11492       break;
11493     case Linear:
11494       Out << 'l';
11495       // Don't print the step value if it is not present or if it is
11496       // equal to 1.
11497       if (ParamAttr.StrideOrArg != 1)
11498         Out << ParamAttr.StrideOrArg;
11499       break;
11500     case Uniform:
11501       Out << 'u';
11502       break;
11503     case Vector:
11504       Out << 'v';
11505       break;
11506     }
11507 
11508     if (!!ParamAttr.Alignment)
11509       Out << 'a' << ParamAttr.Alignment;
11510   }
11511 
11512   return std::string(Out.str());
11513 }
11514 
11515 // Function used to add the attribute. The parameter `VLEN` is
11516 // templated to allow the use of "x" when targeting scalable functions
11517 // for SVE.
11518 template <typename T>
11519 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11520                                  char ISA, StringRef ParSeq,
11521                                  StringRef MangledName, bool OutputBecomesInput,
11522                                  llvm::Function *Fn) {
11523   SmallString<256> Buffer;
11524   llvm::raw_svector_ostream Out(Buffer);
11525   Out << Prefix << ISA << LMask << VLEN;
11526   if (OutputBecomesInput)
11527     Out << "v";
11528   Out << ParSeq << "_" << MangledName;
11529   Fn->addFnAttr(Out.str());
11530 }
11531 
11532 // Helper function to generate the Advanced SIMD names depending on
11533 // the value of the NDS when simdlen is not present.
11534 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11535                                       StringRef Prefix, char ISA,
11536                                       StringRef ParSeq, StringRef MangledName,
11537                                       bool OutputBecomesInput,
11538                                       llvm::Function *Fn) {
11539   switch (NDS) {
11540   case 8:
11541     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11542                          OutputBecomesInput, Fn);
11543     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11544                          OutputBecomesInput, Fn);
11545     break;
11546   case 16:
11547     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11548                          OutputBecomesInput, Fn);
11549     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11550                          OutputBecomesInput, Fn);
11551     break;
11552   case 32:
11553     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11554                          OutputBecomesInput, Fn);
11555     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11556                          OutputBecomesInput, Fn);
11557     break;
11558   case 64:
11559   case 128:
11560     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11561                          OutputBecomesInput, Fn);
11562     break;
11563   default:
11564     llvm_unreachable("Scalar type is too wide.");
11565   }
11566 }
11567 
11568 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11569 static void emitAArch64DeclareSimdFunction(
11570     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11571     ArrayRef<ParamAttrTy> ParamAttrs,
11572     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11573     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11574 
11575   // Get basic data for building the vector signature.
11576   const auto Data = getNDSWDS(FD, ParamAttrs);
11577   const unsigned NDS = std::get<0>(Data);
11578   const unsigned WDS = std::get<1>(Data);
11579   const bool OutputBecomesInput = std::get<2>(Data);
11580 
11581   // Check the values provided via `simdlen` by the user.
11582   // 1. A `simdlen(1)` doesn't produce vector signatures,
11583   if (UserVLEN == 1) {
11584     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11585         DiagnosticsEngine::Warning,
11586         "The clause simdlen(1) has no effect when targeting aarch64.");
11587     CGM.getDiags().Report(SLoc, DiagID);
11588     return;
11589   }
11590 
11591   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11592   // Advanced SIMD output.
11593   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11594     unsigned DiagID = CGM.getDiags().getCustomDiagID(
11595         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11596                                     "power of 2 when targeting Advanced SIMD.");
11597     CGM.getDiags().Report(SLoc, DiagID);
11598     return;
11599   }
11600 
11601   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11602   // limits.
11603   if (ISA == 's' && UserVLEN != 0) {
11604     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11605       unsigned DiagID = CGM.getDiags().getCustomDiagID(
11606           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11607                                       "lanes in the architectural constraints "
11608                                       "for SVE (min is 128-bit, max is "
11609                                       "2048-bit, by steps of 128-bit)");
11610       CGM.getDiags().Report(SLoc, DiagID) << WDS;
11611       return;
11612     }
11613   }
11614 
11615   // Sort out parameter sequence.
11616   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11617   StringRef Prefix = "_ZGV";
11618   // Generate simdlen from user input (if any).
11619   if (UserVLEN) {
11620     if (ISA == 's') {
11621       // SVE generates only a masked function.
11622       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11623                            OutputBecomesInput, Fn);
11624     } else {
11625       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11626       // Advanced SIMD generates one or two functions, depending on
11627       // the `[not]inbranch` clause.
11628       switch (State) {
11629       case OMPDeclareSimdDeclAttr::BS_Undefined:
11630         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11631                              OutputBecomesInput, Fn);
11632         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11633                              OutputBecomesInput, Fn);
11634         break;
11635       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11636         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11637                              OutputBecomesInput, Fn);
11638         break;
11639       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11640         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11641                              OutputBecomesInput, Fn);
11642         break;
11643       }
11644     }
11645   } else {
11646     // If no user simdlen is provided, follow the AAVFABI rules for
11647     // generating the vector length.
11648     if (ISA == 's') {
11649       // SVE, section 3.4.1, item 1.
11650       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11651                            OutputBecomesInput, Fn);
11652     } else {
11653       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11654       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11655       // two vector names depending on the use of the clause
11656       // `[not]inbranch`.
11657       switch (State) {
11658       case OMPDeclareSimdDeclAttr::BS_Undefined:
11659         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11660                                   OutputBecomesInput, Fn);
11661         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11662                                   OutputBecomesInput, Fn);
11663         break;
11664       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11665         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11666                                   OutputBecomesInput, Fn);
11667         break;
11668       case OMPDeclareSimdDeclAttr::BS_Inbranch:
11669         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11670                                   OutputBecomesInput, Fn);
11671         break;
11672       }
11673     }
11674   }
11675 }
11676 
11677 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11678                                               llvm::Function *Fn) {
11679   ASTContext &C = CGM.getContext();
11680   FD = FD->getMostRecentDecl();
11681   // Map params to their positions in function decl.
11682   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11683   if (isa<CXXMethodDecl>(FD))
11684     ParamPositions.try_emplace(FD, 0);
11685   unsigned ParamPos = ParamPositions.size();
11686   for (const ParmVarDecl *P : FD->parameters()) {
11687     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11688     ++ParamPos;
11689   }
11690   while (FD) {
11691     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11692       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11693       // Mark uniform parameters.
11694       for (const Expr *E : Attr->uniforms()) {
11695         E = E->IgnoreParenImpCasts();
11696         unsigned Pos;
11697         if (isa<CXXThisExpr>(E)) {
11698           Pos = ParamPositions[FD];
11699         } else {
11700           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11701                                 ->getCanonicalDecl();
11702           Pos = ParamPositions[PVD];
11703         }
11704         ParamAttrs[Pos].Kind = Uniform;
11705       }
11706       // Get alignment info.
11707       auto NI = Attr->alignments_begin();
11708       for (const Expr *E : Attr->aligneds()) {
11709         E = E->IgnoreParenImpCasts();
11710         unsigned Pos;
11711         QualType ParmTy;
11712         if (isa<CXXThisExpr>(E)) {
11713           Pos = ParamPositions[FD];
11714           ParmTy = E->getType();
11715         } else {
11716           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11717                                 ->getCanonicalDecl();
11718           Pos = ParamPositions[PVD];
11719           ParmTy = PVD->getType();
11720         }
11721         ParamAttrs[Pos].Alignment =
11722             (*NI)
11723                 ? (*NI)->EvaluateKnownConstInt(C)
11724                 : llvm::APSInt::getUnsigned(
11725                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11726                           .getQuantity());
11727         ++NI;
11728       }
11729       // Mark linear parameters.
11730       auto SI = Attr->steps_begin();
11731       auto MI = Attr->modifiers_begin();
11732       for (const Expr *E : Attr->linears()) {
11733         E = E->IgnoreParenImpCasts();
11734         unsigned Pos;
11735         // Rescaling factor needed to compute the linear parameter
11736         // value in the mangled name.
11737         unsigned PtrRescalingFactor = 1;
11738         if (isa<CXXThisExpr>(E)) {
11739           Pos = ParamPositions[FD];
11740         } else {
11741           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11742                                 ->getCanonicalDecl();
11743           Pos = ParamPositions[PVD];
11744           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11745             PtrRescalingFactor = CGM.getContext()
11746                                      .getTypeSizeInChars(P->getPointeeType())
11747                                      .getQuantity();
11748         }
11749         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11750         ParamAttr.Kind = Linear;
11751         // Assuming a stride of 1, for `linear` without modifiers.
11752         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11753         if (*SI) {
11754           Expr::EvalResult Result;
11755           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11756             if (const auto *DRE =
11757                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11758               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
11759                 ParamAttr.Kind = LinearWithVarStride;
11760                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
11761                     ParamPositions[StridePVD->getCanonicalDecl()]);
11762               }
11763             }
11764           } else {
11765             ParamAttr.StrideOrArg = Result.Val.getInt();
11766           }
11767         }
11768         // If we are using a linear clause on a pointer, we need to
11769         // rescale the value of linear_step with the byte size of the
11770         // pointee type.
11771         if (Linear == ParamAttr.Kind)
11772           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11773         ++SI;
11774         ++MI;
11775       }
11776       llvm::APSInt VLENVal;
11777       SourceLocation ExprLoc;
11778       const Expr *VLENExpr = Attr->getSimdlen();
11779       if (VLENExpr) {
11780         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11781         ExprLoc = VLENExpr->getExprLoc();
11782       }
11783       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11784       if (CGM.getTriple().isX86()) {
11785         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11786       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11787         unsigned VLEN = VLENVal.getExtValue();
11788         StringRef MangledName = Fn->getName();
11789         if (CGM.getTarget().hasFeature("sve"))
11790           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11791                                          MangledName, 's', 128, Fn, ExprLoc);
11792         if (CGM.getTarget().hasFeature("neon"))
11793           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11794                                          MangledName, 'n', 128, Fn, ExprLoc);
11795       }
11796     }
11797     FD = FD->getPreviousDecl();
11798   }
11799 }
11800 
11801 namespace {
11802 /// Cleanup action for doacross support.
11803 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11804 public:
11805   static const int DoacrossFinArgs = 2;
11806 
11807 private:
11808   llvm::FunctionCallee RTLFn;
11809   llvm::Value *Args[DoacrossFinArgs];
11810 
11811 public:
11812   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11813                     ArrayRef<llvm::Value *> CallArgs)
11814       : RTLFn(RTLFn) {
11815     assert(CallArgs.size() == DoacrossFinArgs);
11816     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11817   }
11818   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11819     if (!CGF.HaveInsertPoint())
11820       return;
11821     CGF.EmitRuntimeCall(RTLFn, Args);
11822   }
11823 };
11824 } // namespace
11825 
11826 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11827                                        const OMPLoopDirective &D,
11828                                        ArrayRef<Expr *> NumIterations) {
11829   if (!CGF.HaveInsertPoint())
11830     return;
11831 
11832   ASTContext &C = CGM.getContext();
11833   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11834   RecordDecl *RD;
11835   if (KmpDimTy.isNull()) {
11836     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11837     //  kmp_int64 lo; // lower
11838     //  kmp_int64 up; // upper
11839     //  kmp_int64 st; // stride
11840     // };
11841     RD = C.buildImplicitRecord("kmp_dim");
11842     RD->startDefinition();
11843     addFieldToRecordDecl(C, RD, Int64Ty);
11844     addFieldToRecordDecl(C, RD, Int64Ty);
11845     addFieldToRecordDecl(C, RD, Int64Ty);
11846     RD->completeDefinition();
11847     KmpDimTy = C.getRecordType(RD);
11848   } else {
11849     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11850   }
11851   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11852   QualType ArrayTy =
11853       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11854 
11855   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11856   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11857   enum { LowerFD = 0, UpperFD, StrideFD };
11858   // Fill dims with data.
11859   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11860     LValue DimsLVal = CGF.MakeAddrLValue(
11861         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11862     // dims.upper = num_iterations;
11863     LValue UpperLVal = CGF.EmitLValueForField(
11864         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11865     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11866         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11867         Int64Ty, NumIterations[I]->getExprLoc());
11868     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11869     // dims.stride = 1;
11870     LValue StrideLVal = CGF.EmitLValueForField(
11871         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11872     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11873                           StrideLVal);
11874   }
11875 
11876   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11877   // kmp_int32 num_dims, struct kmp_dim * dims);
11878   llvm::Value *Args[] = {
11879       emitUpdateLocation(CGF, D.getBeginLoc()),
11880       getThreadID(CGF, D.getBeginLoc()),
11881       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11882       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11883           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11884           CGM.VoidPtrTy)};
11885 
11886   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11887       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11888   CGF.EmitRuntimeCall(RTLFn, Args);
11889   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11890       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11891   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11892       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11893   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11894                                              llvm::makeArrayRef(FiniArgs));
11895 }
11896 
11897 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11898                                           const OMPDependClause *C) {
11899   QualType Int64Ty =
11900       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11901   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11902   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11903       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11904   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11905   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11906     const Expr *CounterVal = C->getLoopData(I);
11907     assert(CounterVal);
11908     llvm::Value *CntVal = CGF.EmitScalarConversion(
11909         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11910         CounterVal->getExprLoc());
11911     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11912                           /*Volatile=*/false, Int64Ty);
11913   }
11914   llvm::Value *Args[] = {
11915       emitUpdateLocation(CGF, C->getBeginLoc()),
11916       getThreadID(CGF, C->getBeginLoc()),
11917       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11918   llvm::FunctionCallee RTLFn;
11919   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11920     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11921                                                   OMPRTL___kmpc_doacross_post);
11922   } else {
11923     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11924     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11925                                                   OMPRTL___kmpc_doacross_wait);
11926   }
11927   CGF.EmitRuntimeCall(RTLFn, Args);
11928 }
11929 
11930 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11931                                llvm::FunctionCallee Callee,
11932                                ArrayRef<llvm::Value *> Args) const {
11933   assert(Loc.isValid() && "Outlined function call location must be valid.");
11934   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11935 
11936   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11937     if (Fn->doesNotThrow()) {
11938       CGF.EmitNounwindRuntimeCall(Fn, Args);
11939       return;
11940     }
11941   }
11942   CGF.EmitRuntimeCall(Callee, Args);
11943 }
11944 
11945 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11946     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11947     ArrayRef<llvm::Value *> Args) const {
11948   emitCall(CGF, Loc, OutlinedFn, Args);
11949 }
11950 
11951 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11952   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11953     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11954       HasEmittedDeclareTargetRegion = true;
11955 }
11956 
11957 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11958                                              const VarDecl *NativeParam,
11959                                              const VarDecl *TargetParam) const {
11960   return CGF.GetAddrOfLocalVar(NativeParam);
11961 }
11962 
11963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11964                                                    const VarDecl *VD) {
11965   if (!VD)
11966     return Address::invalid();
11967   Address UntiedAddr = Address::invalid();
11968   Address UntiedRealAddr = Address::invalid();
11969   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11970   if (It != FunctionToUntiedTaskStackMap.end()) {
11971     const UntiedLocalVarsAddressesMap &UntiedData =
11972         UntiedLocalVarsStack[It->second];
11973     auto I = UntiedData.find(VD);
11974     if (I != UntiedData.end()) {
11975       UntiedAddr = I->second.first;
11976       UntiedRealAddr = I->second.second;
11977     }
11978   }
11979   const VarDecl *CVD = VD->getCanonicalDecl();
11980   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11981     // Use the default allocation.
11982     if (!isAllocatableDecl(VD))
11983       return UntiedAddr;
11984     llvm::Value *Size;
11985     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11986     if (CVD->getType()->isVariablyModifiedType()) {
11987       Size = CGF.getTypeSize(CVD->getType());
11988       // Align the size: ((size + align - 1) / align) * align
11989       Size = CGF.Builder.CreateNUWAdd(
11990           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11991       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11992       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11993     } else {
11994       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11995       Size = CGM.getSize(Sz.alignTo(Align));
11996     }
11997     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11998     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11999     assert(AA->getAllocator() &&
12000            "Expected allocator expression for non-default allocator.");
12001     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
12002     // According to the standard, the original allocator type is a enum
12003     // (integer). Convert to pointer type, if required.
12004     Allocator = CGF.EmitScalarConversion(
12005         Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
12006         AA->getAllocator()->getExprLoc());
12007     llvm::Value *Args[] = {ThreadID, Size, Allocator};
12008 
12009     llvm::Value *Addr =
12010         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
12011                                 CGM.getModule(), OMPRTL___kmpc_alloc),
12012                             Args, getName({CVD->getName(), ".void.addr"}));
12013     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12014         CGM.getModule(), OMPRTL___kmpc_free);
12015     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12016     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12017         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12018     if (UntiedAddr.isValid())
12019       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12020 
12021     // Cleanup action for allocate support.
12022     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12023       llvm::FunctionCallee RTLFn;
12024       unsigned LocEncoding;
12025       Address Addr;
12026       const Expr *Allocator;
12027 
12028     public:
12029       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
12030                            Address Addr, const Expr *Allocator)
12031           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12032             Allocator(Allocator) {}
12033       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12034         if (!CGF.HaveInsertPoint())
12035           return;
12036         llvm::Value *Args[3];
12037         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12038             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12039         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12040             Addr.getPointer(), CGF.VoidPtrTy);
12041         llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
12042         // According to the standard, the original allocator type is a enum
12043         // (integer). Convert to pointer type, if required.
12044         AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12045                                             CGF.getContext().VoidPtrTy,
12046                                             Allocator->getExprLoc());
12047         Args[2] = AllocVal;
12048 
12049         CGF.EmitRuntimeCall(RTLFn, Args);
12050       }
12051     };
12052     Address VDAddr =
12053         UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12054     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12055         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12056         VDAddr, AA->getAllocator());
12057     if (UntiedRealAddr.isValid())
12058       if (auto *Region =
12059               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12060         Region->emitUntiedSwitch(CGF);
12061     return VDAddr;
12062   }
12063   return UntiedAddr;
12064 }
12065 
12066 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12067                                              const VarDecl *VD) const {
12068   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12069   if (It == FunctionToUntiedTaskStackMap.end())
12070     return false;
12071   return UntiedLocalVarsStack[It->second].count(VD) > 0;
12072 }
12073 
12074 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12075     CodeGenModule &CGM, const OMPLoopDirective &S)
12076     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12077   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12078   if (!NeedToPush)
12079     return;
12080   NontemporalDeclsSet &DS =
12081       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12082   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12083     for (const Stmt *Ref : C->private_refs()) {
12084       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12085       const ValueDecl *VD;
12086       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12087         VD = DRE->getDecl();
12088       } else {
12089         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12090         assert((ME->isImplicitCXXThis() ||
12091                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12092                "Expected member of current class.");
12093         VD = ME->getMemberDecl();
12094       }
12095       DS.insert(VD);
12096     }
12097   }
12098 }
12099 
12100 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12101   if (!NeedToPush)
12102     return;
12103   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12104 }
12105 
12106 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12107     CodeGenFunction &CGF,
12108     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12109                           std::pair<Address, Address>> &LocalVars)
12110     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12111   if (!NeedToPush)
12112     return;
12113   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12114       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12115   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12116 }
12117 
12118 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12119   if (!NeedToPush)
12120     return;
12121   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12122 }
12123 
12124 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12125   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12126 
12127   return llvm::any_of(
12128       CGM.getOpenMPRuntime().NontemporalDeclsStack,
12129       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
12130 }
12131 
12132 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12133     const OMPExecutableDirective &S,
12134     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12135     const {
12136   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12137   // Vars in target/task regions must be excluded completely.
12138   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12139       isOpenMPTaskingDirective(S.getDirectiveKind())) {
12140     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12141     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12142     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12143     for (const CapturedStmt::Capture &Cap : CS->captures()) {
12144       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12145         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12146     }
12147   }
12148   // Exclude vars in private clauses.
12149   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12150     for (const Expr *Ref : C->varlists()) {
12151       if (!Ref->getType()->isScalarType())
12152         continue;
12153       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12154       if (!DRE)
12155         continue;
12156       NeedToCheckForLPCs.insert(DRE->getDecl());
12157     }
12158   }
12159   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12160     for (const Expr *Ref : C->varlists()) {
12161       if (!Ref->getType()->isScalarType())
12162         continue;
12163       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12164       if (!DRE)
12165         continue;
12166       NeedToCheckForLPCs.insert(DRE->getDecl());
12167     }
12168   }
12169   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12170     for (const Expr *Ref : C->varlists()) {
12171       if (!Ref->getType()->isScalarType())
12172         continue;
12173       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12174       if (!DRE)
12175         continue;
12176       NeedToCheckForLPCs.insert(DRE->getDecl());
12177     }
12178   }
12179   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12180     for (const Expr *Ref : C->varlists()) {
12181       if (!Ref->getType()->isScalarType())
12182         continue;
12183       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12184       if (!DRE)
12185         continue;
12186       NeedToCheckForLPCs.insert(DRE->getDecl());
12187     }
12188   }
12189   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12190     for (const Expr *Ref : C->varlists()) {
12191       if (!Ref->getType()->isScalarType())
12192         continue;
12193       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12194       if (!DRE)
12195         continue;
12196       NeedToCheckForLPCs.insert(DRE->getDecl());
12197     }
12198   }
12199   for (const Decl *VD : NeedToCheckForLPCs) {
12200     for (const LastprivateConditionalData &Data :
12201          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12202       if (Data.DeclToUniqueName.count(VD) > 0) {
12203         if (!Data.Disabled)
12204           NeedToAddForLPCsAsDisabled.insert(VD);
12205         break;
12206       }
12207     }
12208   }
12209 }
12210 
12211 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12212     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12213     : CGM(CGF.CGM),
12214       Action((CGM.getLangOpts().OpenMP >= 50 &&
12215               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12216                            [](const OMPLastprivateClause *C) {
12217                              return C->getKind() ==
12218                                     OMPC_LASTPRIVATE_conditional;
12219                            }))
12220                  ? ActionToDo::PushAsLastprivateConditional
12221                  : ActionToDo::DoNotPush) {
12222   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12223   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12224     return;
12225   assert(Action == ActionToDo::PushAsLastprivateConditional &&
12226          "Expected a push action.");
12227   LastprivateConditionalData &Data =
12228       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12229   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12230     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12231       continue;
12232 
12233     for (const Expr *Ref : C->varlists()) {
12234       Data.DeclToUniqueName.insert(std::make_pair(
12235           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12236           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12237     }
12238   }
12239   Data.IVLVal = IVLVal;
12240   Data.Fn = CGF.CurFn;
12241 }
12242 
12243 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12244     CodeGenFunction &CGF, const OMPExecutableDirective &S)
12245     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12246   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12247   if (CGM.getLangOpts().OpenMP < 50)
12248     return;
12249   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12250   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12251   if (!NeedToAddForLPCsAsDisabled.empty()) {
12252     Action = ActionToDo::DisableLastprivateConditional;
12253     LastprivateConditionalData &Data =
12254         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12255     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12256       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12257     Data.Fn = CGF.CurFn;
12258     Data.Disabled = true;
12259   }
12260 }
12261 
12262 CGOpenMPRuntime::LastprivateConditionalRAII
12263 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12264     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12265   return LastprivateConditionalRAII(CGF, S);
12266 }
12267 
12268 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12269   if (CGM.getLangOpts().OpenMP < 50)
12270     return;
12271   if (Action == ActionToDo::DisableLastprivateConditional) {
12272     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12273            "Expected list of disabled private vars.");
12274     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12275   }
12276   if (Action == ActionToDo::PushAsLastprivateConditional) {
12277     assert(
12278         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12279         "Expected list of lastprivate conditional vars.");
12280     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12281   }
12282 }
12283 
12284 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12285                                                         const VarDecl *VD) {
12286   ASTContext &C = CGM.getContext();
12287   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12288   if (I == LastprivateConditionalToTypes.end())
12289     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12290   QualType NewType;
12291   const FieldDecl *VDField;
12292   const FieldDecl *FiredField;
12293   LValue BaseLVal;
12294   auto VI = I->getSecond().find(VD);
12295   if (VI == I->getSecond().end()) {
12296     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12297     RD->startDefinition();
12298     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12299     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12300     RD->completeDefinition();
12301     NewType = C.getRecordType(RD);
12302     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12303     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12304     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12305   } else {
12306     NewType = std::get<0>(VI->getSecond());
12307     VDField = std::get<1>(VI->getSecond());
12308     FiredField = std::get<2>(VI->getSecond());
12309     BaseLVal = std::get<3>(VI->getSecond());
12310   }
12311   LValue FiredLVal =
12312       CGF.EmitLValueForField(BaseLVal, FiredField);
12313   CGF.EmitStoreOfScalar(
12314       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12315       FiredLVal);
12316   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12317 }
12318 
12319 namespace {
12320 /// Checks if the lastprivate conditional variable is referenced in LHS.
12321 class LastprivateConditionalRefChecker final
12322     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12323   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12324   const Expr *FoundE = nullptr;
12325   const Decl *FoundD = nullptr;
12326   StringRef UniqueDeclName;
12327   LValue IVLVal;
12328   llvm::Function *FoundFn = nullptr;
12329   SourceLocation Loc;
12330 
12331 public:
12332   bool VisitDeclRefExpr(const DeclRefExpr *E) {
12333     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12334          llvm::reverse(LPM)) {
12335       auto It = D.DeclToUniqueName.find(E->getDecl());
12336       if (It == D.DeclToUniqueName.end())
12337         continue;
12338       if (D.Disabled)
12339         return false;
12340       FoundE = E;
12341       FoundD = E->getDecl()->getCanonicalDecl();
12342       UniqueDeclName = It->second;
12343       IVLVal = D.IVLVal;
12344       FoundFn = D.Fn;
12345       break;
12346     }
12347     return FoundE == E;
12348   }
12349   bool VisitMemberExpr(const MemberExpr *E) {
12350     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12351       return false;
12352     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12353          llvm::reverse(LPM)) {
12354       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12355       if (It == D.DeclToUniqueName.end())
12356         continue;
12357       if (D.Disabled)
12358         return false;
12359       FoundE = E;
12360       FoundD = E->getMemberDecl()->getCanonicalDecl();
12361       UniqueDeclName = It->second;
12362       IVLVal = D.IVLVal;
12363       FoundFn = D.Fn;
12364       break;
12365     }
12366     return FoundE == E;
12367   }
12368   bool VisitStmt(const Stmt *S) {
12369     for (const Stmt *Child : S->children()) {
12370       if (!Child)
12371         continue;
12372       if (const auto *E = dyn_cast<Expr>(Child))
12373         if (!E->isGLValue())
12374           continue;
12375       if (Visit(Child))
12376         return true;
12377     }
12378     return false;
12379   }
12380   explicit LastprivateConditionalRefChecker(
12381       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12382       : LPM(LPM) {}
12383   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12384   getFoundData() const {
12385     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12386   }
12387 };
12388 } // namespace
12389 
12390 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12391                                                        LValue IVLVal,
12392                                                        StringRef UniqueDeclName,
12393                                                        LValue LVal,
12394                                                        SourceLocation Loc) {
12395   // Last updated loop counter for the lastprivate conditional var.
12396   // int<xx> last_iv = 0;
12397   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12398   llvm::Constant *LastIV =
12399       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12400   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12401       IVLVal.getAlignment().getAsAlign());
12402   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12403 
12404   // Last value of the lastprivate conditional.
12405   // decltype(priv_a) last_a;
12406   llvm::Constant *Last = getOrCreateInternalVariable(
12407       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12408   cast<llvm::GlobalVariable>(Last)->setAlignment(
12409       LVal.getAlignment().getAsAlign());
12410   LValue LastLVal =
12411       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12412 
12413   // Global loop counter. Required to handle inner parallel-for regions.
12414   // iv
12415   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12416 
12417   // #pragma omp critical(a)
12418   // if (last_iv <= iv) {
12419   //   last_iv = iv;
12420   //   last_a = priv_a;
12421   // }
12422   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12423                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12424     Action.Enter(CGF);
12425     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12426     // (last_iv <= iv) ? Check if the variable is updated and store new
12427     // value in global var.
12428     llvm::Value *CmpRes;
12429     if (IVLVal.getType()->isSignedIntegerType()) {
12430       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12431     } else {
12432       assert(IVLVal.getType()->isUnsignedIntegerType() &&
12433              "Loop iteration variable must be integer.");
12434       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12435     }
12436     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12437     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12438     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12439     // {
12440     CGF.EmitBlock(ThenBB);
12441 
12442     //   last_iv = iv;
12443     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12444 
12445     //   last_a = priv_a;
12446     switch (CGF.getEvaluationKind(LVal.getType())) {
12447     case TEK_Scalar: {
12448       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12449       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12450       break;
12451     }
12452     case TEK_Complex: {
12453       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12454       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12455       break;
12456     }
12457     case TEK_Aggregate:
12458       llvm_unreachable(
12459           "Aggregates are not supported in lastprivate conditional.");
12460     }
12461     // }
12462     CGF.EmitBranch(ExitBB);
12463     // There is no need to emit line number for unconditional branch.
12464     (void)ApplyDebugLocation::CreateEmpty(CGF);
12465     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12466   };
12467 
12468   if (CGM.getLangOpts().OpenMPSimd) {
12469     // Do not emit as a critical region as no parallel region could be emitted.
12470     RegionCodeGenTy ThenRCG(CodeGen);
12471     ThenRCG(CGF);
12472   } else {
12473     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12474   }
12475 }
12476 
12477 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12478                                                          const Expr *LHS) {
12479   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12480     return;
12481   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12482   if (!Checker.Visit(LHS))
12483     return;
12484   const Expr *FoundE;
12485   const Decl *FoundD;
12486   StringRef UniqueDeclName;
12487   LValue IVLVal;
12488   llvm::Function *FoundFn;
12489   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12490       Checker.getFoundData();
12491   if (FoundFn != CGF.CurFn) {
12492     // Special codegen for inner parallel regions.
12493     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12494     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12495     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12496            "Lastprivate conditional is not found in outer region.");
12497     QualType StructTy = std::get<0>(It->getSecond());
12498     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12499     LValue PrivLVal = CGF.EmitLValue(FoundE);
12500     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12501         PrivLVal.getAddress(CGF),
12502         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12503     LValue BaseLVal =
12504         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12505     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12506     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12507                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12508                         FiredLVal, llvm::AtomicOrdering::Unordered,
12509                         /*IsVolatile=*/true, /*isInit=*/false);
12510     return;
12511   }
12512 
12513   // Private address of the lastprivate conditional in the current context.
12514   // priv_a
12515   LValue LVal = CGF.EmitLValue(FoundE);
12516   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12517                                    FoundE->getExprLoc());
12518 }
12519 
12520 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12521     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12522     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12523   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12524     return;
12525   auto Range = llvm::reverse(LastprivateConditionalStack);
12526   auto It = llvm::find_if(
12527       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12528   if (It == Range.end() || It->Fn != CGF.CurFn)
12529     return;
12530   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12531   assert(LPCI != LastprivateConditionalToTypes.end() &&
12532          "Lastprivates must be registered already.");
12533   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12534   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12535   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12536   for (const auto &Pair : It->DeclToUniqueName) {
12537     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12538     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
12539       continue;
12540     auto I = LPCI->getSecond().find(Pair.first);
12541     assert(I != LPCI->getSecond().end() &&
12542            "Lastprivate must be rehistered already.");
12543     // bool Cmp = priv_a.Fired != 0;
12544     LValue BaseLVal = std::get<3>(I->getSecond());
12545     LValue FiredLVal =
12546         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12547     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12548     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12549     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12550     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12551     // if (Cmp) {
12552     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12553     CGF.EmitBlock(ThenBB);
12554     Address Addr = CGF.GetAddrOfLocalVar(VD);
12555     LValue LVal;
12556     if (VD->getType()->isReferenceType())
12557       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12558                                            AlignmentSource::Decl);
12559     else
12560       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12561                                 AlignmentSource::Decl);
12562     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12563                                      D.getBeginLoc());
12564     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12565     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12566     // }
12567   }
12568 }
12569 
12570 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12571     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12572     SourceLocation Loc) {
12573   if (CGF.getLangOpts().OpenMP < 50)
12574     return;
12575   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12576   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12577          "Unknown lastprivate conditional variable.");
12578   StringRef UniqueName = It->second;
12579   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12580   // The variable was not updated in the region - exit.
12581   if (!GV)
12582     return;
12583   LValue LPLVal = CGF.MakeAddrLValue(
12584       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12585   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12586   CGF.EmitStoreOfScalar(Res, PrivLVal);
12587 }
12588 
12589 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12590     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12591     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12592   llvm_unreachable("Not supported in SIMD-only mode");
12593 }
12594 
12595 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12596     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12597     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12598   llvm_unreachable("Not supported in SIMD-only mode");
12599 }
12600 
12601 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12602     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12603     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12604     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12605     bool Tied, unsigned &NumberOfParts) {
12606   llvm_unreachable("Not supported in SIMD-only mode");
12607 }
12608 
12609 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12610                                            SourceLocation Loc,
12611                                            llvm::Function *OutlinedFn,
12612                                            ArrayRef<llvm::Value *> CapturedVars,
12613                                            const Expr *IfCond) {
12614   llvm_unreachable("Not supported in SIMD-only mode");
12615 }
12616 
12617 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12618     CodeGenFunction &CGF, StringRef CriticalName,
12619     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12620     const Expr *Hint) {
12621   llvm_unreachable("Not supported in SIMD-only mode");
12622 }
12623 
12624 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12625                                            const RegionCodeGenTy &MasterOpGen,
12626                                            SourceLocation Loc) {
12627   llvm_unreachable("Not supported in SIMD-only mode");
12628 }
12629 
12630 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12631                                            const RegionCodeGenTy &MasterOpGen,
12632                                            SourceLocation Loc,
12633                                            const Expr *Filter) {
12634   llvm_unreachable("Not supported in SIMD-only mode");
12635 }
12636 
12637 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12638                                             SourceLocation Loc) {
12639   llvm_unreachable("Not supported in SIMD-only mode");
12640 }
12641 
12642 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12643     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12644     SourceLocation Loc) {
12645   llvm_unreachable("Not supported in SIMD-only mode");
12646 }
12647 
12648 void CGOpenMPSIMDRuntime::emitSingleRegion(
12649     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12650     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12651     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12652     ArrayRef<const Expr *> AssignmentOps) {
12653   llvm_unreachable("Not supported in SIMD-only mode");
12654 }
12655 
12656 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12657                                             const RegionCodeGenTy &OrderedOpGen,
12658                                             SourceLocation Loc,
12659                                             bool IsThreads) {
12660   llvm_unreachable("Not supported in SIMD-only mode");
12661 }
12662 
12663 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12664                                           SourceLocation Loc,
12665                                           OpenMPDirectiveKind Kind,
12666                                           bool EmitChecks,
12667                                           bool ForceSimpleCall) {
12668   llvm_unreachable("Not supported in SIMD-only mode");
12669 }
12670 
12671 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12672     CodeGenFunction &CGF, SourceLocation Loc,
12673     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12674     bool Ordered, const DispatchRTInput &DispatchValues) {
12675   llvm_unreachable("Not supported in SIMD-only mode");
12676 }
12677 
12678 void CGOpenMPSIMDRuntime::emitForStaticInit(
12679     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12680     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12681   llvm_unreachable("Not supported in SIMD-only mode");
12682 }
12683 
12684 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12685     CodeGenFunction &CGF, SourceLocation Loc,
12686     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12687   llvm_unreachable("Not supported in SIMD-only mode");
12688 }
12689 
12690 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12691                                                      SourceLocation Loc,
12692                                                      unsigned IVSize,
12693                                                      bool IVSigned) {
12694   llvm_unreachable("Not supported in SIMD-only mode");
12695 }
12696 
12697 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12698                                               SourceLocation Loc,
12699                                               OpenMPDirectiveKind DKind) {
12700   llvm_unreachable("Not supported in SIMD-only mode");
12701 }
12702 
12703 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12704                                               SourceLocation Loc,
12705                                               unsigned IVSize, bool IVSigned,
12706                                               Address IL, Address LB,
12707                                               Address UB, Address ST) {
12708   llvm_unreachable("Not supported in SIMD-only mode");
12709 }
12710 
12711 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12712                                                llvm::Value *NumThreads,
12713                                                SourceLocation Loc) {
12714   llvm_unreachable("Not supported in SIMD-only mode");
12715 }
12716 
12717 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12718                                              ProcBindKind ProcBind,
12719                                              SourceLocation Loc) {
12720   llvm_unreachable("Not supported in SIMD-only mode");
12721 }
12722 
12723 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12724                                                     const VarDecl *VD,
12725                                                     Address VDAddr,
12726                                                     SourceLocation Loc) {
12727   llvm_unreachable("Not supported in SIMD-only mode");
12728 }
12729 
12730 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12731     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12732     CodeGenFunction *CGF) {
12733   llvm_unreachable("Not supported in SIMD-only mode");
12734 }
12735 
12736 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12737     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12738   llvm_unreachable("Not supported in SIMD-only mode");
12739 }
12740 
12741 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12742                                     ArrayRef<const Expr *> Vars,
12743                                     SourceLocation Loc,
12744                                     llvm::AtomicOrdering AO) {
12745   llvm_unreachable("Not supported in SIMD-only mode");
12746 }
12747 
12748 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12749                                        const OMPExecutableDirective &D,
12750                                        llvm::Function *TaskFunction,
12751                                        QualType SharedsTy, Address Shareds,
12752                                        const Expr *IfCond,
12753                                        const OMPTaskDataTy &Data) {
12754   llvm_unreachable("Not supported in SIMD-only mode");
12755 }
12756 
12757 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12758     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12759     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12760     const Expr *IfCond, const OMPTaskDataTy &Data) {
12761   llvm_unreachable("Not supported in SIMD-only mode");
12762 }
12763 
12764 void CGOpenMPSIMDRuntime::emitReduction(
12765     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12766     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12767     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12768   assert(Options.SimpleReduction && "Only simple reduction is expected.");
12769   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12770                                  ReductionOps, Options);
12771 }
12772 
12773 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12774     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12775     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12776   llvm_unreachable("Not supported in SIMD-only mode");
12777 }
12778 
12779 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12780                                                 SourceLocation Loc,
12781                                                 bool IsWorksharingReduction) {
12782   llvm_unreachable("Not supported in SIMD-only mode");
12783 }
12784 
12785 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12786                                                   SourceLocation Loc,
12787                                                   ReductionCodeGen &RCG,
12788                                                   unsigned N) {
12789   llvm_unreachable("Not supported in SIMD-only mode");
12790 }
12791 
12792 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12793                                                   SourceLocation Loc,
12794                                                   llvm::Value *ReductionsPtr,
12795                                                   LValue SharedLVal) {
12796   llvm_unreachable("Not supported in SIMD-only mode");
12797 }
12798 
12799 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12800                                            SourceLocation Loc) {
12801   llvm_unreachable("Not supported in SIMD-only mode");
12802 }
12803 
12804 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12805     CodeGenFunction &CGF, SourceLocation Loc,
12806     OpenMPDirectiveKind CancelRegion) {
12807   llvm_unreachable("Not supported in SIMD-only mode");
12808 }
12809 
12810 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12811                                          SourceLocation Loc, const Expr *IfCond,
12812                                          OpenMPDirectiveKind CancelRegion) {
12813   llvm_unreachable("Not supported in SIMD-only mode");
12814 }
12815 
12816 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12817     const OMPExecutableDirective &D, StringRef ParentName,
12818     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12819     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12820   llvm_unreachable("Not supported in SIMD-only mode");
12821 }
12822 
12823 void CGOpenMPSIMDRuntime::emitTargetCall(
12824     CodeGenFunction &CGF, const OMPExecutableDirective &D,
12825     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12826     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12827     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12828                                      const OMPLoopDirective &D)>
12829         SizeEmitter) {
12830   llvm_unreachable("Not supported in SIMD-only mode");
12831 }
12832 
12833 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12834   llvm_unreachable("Not supported in SIMD-only mode");
12835 }
12836 
12837 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12838   llvm_unreachable("Not supported in SIMD-only mode");
12839 }
12840 
12841 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12842   return false;
12843 }
12844 
12845 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12846                                         const OMPExecutableDirective &D,
12847                                         SourceLocation Loc,
12848                                         llvm::Function *OutlinedFn,
12849                                         ArrayRef<llvm::Value *> CapturedVars) {
12850   llvm_unreachable("Not supported in SIMD-only mode");
12851 }
12852 
12853 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12854                                              const Expr *NumTeams,
12855                                              const Expr *ThreadLimit,
12856                                              SourceLocation Loc) {
12857   llvm_unreachable("Not supported in SIMD-only mode");
12858 }
12859 
12860 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12861     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12862     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
12863   llvm_unreachable("Not supported in SIMD-only mode");
12864 }
12865 
12866 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12867     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12868     const Expr *Device) {
12869   llvm_unreachable("Not supported in SIMD-only mode");
12870 }
12871 
12872 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12873                                            const OMPLoopDirective &D,
12874                                            ArrayRef<Expr *> NumIterations) {
12875   llvm_unreachable("Not supported in SIMD-only mode");
12876 }
12877 
12878 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12879                                               const OMPDependClause *C) {
12880   llvm_unreachable("Not supported in SIMD-only mode");
12881 }
12882 
12883 const VarDecl *
12884 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12885                                         const VarDecl *NativeParam) const {
12886   llvm_unreachable("Not supported in SIMD-only mode");
12887 }
12888 
12889 Address
12890 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12891                                          const VarDecl *NativeParam,
12892                                          const VarDecl *TargetParam) const {
12893   llvm_unreachable("Not supported in SIMD-only mode");
12894 }
12895