xref: /freebsd-src/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/Basic/FileManager.h"
25 #include "clang/Basic/OpenMPKinds.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/CodeGen/ConstantInitBuilder.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SetOperations.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Bitcode/BitcodeReader.h"
32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/Value.h"
37 #include "llvm/Support/AtomicOrdering.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <numeric>
42 
43 using namespace clang;
44 using namespace CodeGen;
45 using namespace llvm::omp;
46 
47 namespace {
48 /// Base class for handling code generation inside OpenMP regions.
49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
50 public:
51   /// Kinds of OpenMP regions used in codegen.
52   enum CGOpenMPRegionKind {
53     /// Region with outlined function for standalone 'parallel'
54     /// directive.
55     ParallelOutlinedRegion,
56     /// Region with outlined function for standalone 'task' directive.
57     TaskOutlinedRegion,
58     /// Region for constructs that do not require function outlining,
59     /// like 'for', 'sections', 'atomic' etc. directives.
60     InlinedRegion,
61     /// Region with outlined function for standalone 'target' directive.
62     TargetRegion,
63   };
64 
65   CGOpenMPRegionInfo(const CapturedStmt &CS,
66                      const CGOpenMPRegionKind RegionKind,
67                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68                      bool HasCancel)
69       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
70         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
71 
72   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
73                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
74                      bool HasCancel)
75       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
76         Kind(Kind), HasCancel(HasCancel) {}
77 
78   /// Get a variable or parameter for storing global thread id
79   /// inside OpenMP construct.
80   virtual const VarDecl *getThreadIDVariable() const = 0;
81 
82   /// Emit the captured statement body.
83   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
84 
85   /// Get an LValue for the current ThreadID variable.
86   /// \return LValue for thread id variable. This LValue always has type int32*.
87   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
88 
89   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
90 
91   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
92 
93   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
94 
95   bool hasCancel() const { return HasCancel; }
96 
97   static bool classof(const CGCapturedStmtInfo *Info) {
98     return Info->getKind() == CR_OpenMP;
99   }
100 
101   ~CGOpenMPRegionInfo() override = default;
102 
103 protected:
104   CGOpenMPRegionKind RegionKind;
105   RegionCodeGenTy CodeGen;
106   OpenMPDirectiveKind Kind;
107   bool HasCancel;
108 };
109 
110 /// API for captured statement code generation in OpenMP constructs.
111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
112 public:
113   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
114                              const RegionCodeGenTy &CodeGen,
115                              OpenMPDirectiveKind Kind, bool HasCancel,
116                              StringRef HelperName)
117       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
118                            HasCancel),
119         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122 
123   /// Get a variable or parameter for storing global thread id
124   /// inside OpenMP construct.
125   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 
127   /// Get the name of the capture helper.
128   StringRef getHelperName() const override { return HelperName; }
129 
130   static bool classof(const CGCapturedStmtInfo *Info) {
131     return CGOpenMPRegionInfo::classof(Info) &&
132            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
133                ParallelOutlinedRegion;
134   }
135 
136 private:
137   /// A variable or parameter storing global thread id for OpenMP
138   /// constructs.
139   const VarDecl *ThreadIDVar;
140   StringRef HelperName;
141 };
142 
143 /// API for captured statement code generation in OpenMP constructs.
144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
145 public:
146   class UntiedTaskActionTy final : public PrePostActionTy {
147     bool Untied;
148     const VarDecl *PartIDVar;
149     const RegionCodeGenTy UntiedCodeGen;
150     llvm::SwitchInst *UntiedSwitch = nullptr;
151 
152   public:
153     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
154                        const RegionCodeGenTy &UntiedCodeGen)
155         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
156     void Enter(CodeGenFunction &CGF) override {
157       if (Untied) {
158         // Emit task switching point.
159         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
160             CGF.GetAddrOfLocalVar(PartIDVar),
161             PartIDVar->getType()->castAs<PointerType>());
162         llvm::Value *Res =
163             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
164         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
165         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
166         CGF.EmitBlock(DoneBB);
167         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
168         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
169         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
170                               CGF.Builder.GetInsertBlock());
171         emitUntiedSwitch(CGF);
172       }
173     }
174     void emitUntiedSwitch(CodeGenFunction &CGF) const {
175       if (Untied) {
176         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
177             CGF.GetAddrOfLocalVar(PartIDVar),
178             PartIDVar->getType()->castAs<PointerType>());
179         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180                               PartIdLVal);
181         UntiedCodeGen(CGF);
182         CodeGenFunction::JumpDest CurPoint =
183             CGF.getJumpDestInCurrentScope(".untied.next.");
184         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
185         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
186         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187                               CGF.Builder.GetInsertBlock());
188         CGF.EmitBranchThroughCleanup(CurPoint);
189         CGF.EmitBlock(CurPoint.getBlock());
190       }
191     }
192     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
193   };
194   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
195                                  const VarDecl *ThreadIDVar,
196                                  const RegionCodeGenTy &CodeGen,
197                                  OpenMPDirectiveKind Kind, bool HasCancel,
198                                  const UntiedTaskActionTy &Action)
199       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
200         ThreadIDVar(ThreadIDVar), Action(Action) {
201     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
202   }
203 
204   /// Get a variable or parameter for storing global thread id
205   /// inside OpenMP construct.
206   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
207 
208   /// Get an LValue for the current ThreadID variable.
209   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
210 
211   /// Get the name of the capture helper.
212   StringRef getHelperName() const override { return ".omp_outlined."; }
213 
214   void emitUntiedSwitch(CodeGenFunction &CGF) override {
215     Action.emitUntiedSwitch(CGF);
216   }
217 
218   static bool classof(const CGCapturedStmtInfo *Info) {
219     return CGOpenMPRegionInfo::classof(Info) &&
220            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
221                TaskOutlinedRegion;
222   }
223 
224 private:
225   /// A variable or parameter storing global thread id for OpenMP
226   /// constructs.
227   const VarDecl *ThreadIDVar;
228   /// Action for emitting code for untied tasks.
229   const UntiedTaskActionTy &Action;
230 };
231 
232 /// API for inlined captured statement code generation in OpenMP
233 /// constructs.
234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
235 public:
236   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
237                             const RegionCodeGenTy &CodeGen,
238                             OpenMPDirectiveKind Kind, bool HasCancel)
239       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
240         OldCSI(OldCSI),
241         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
242 
243   // Retrieve the value of the context parameter.
244   llvm::Value *getContextValue() const override {
245     if (OuterRegionInfo)
246       return OuterRegionInfo->getContextValue();
247     llvm_unreachable("No context value for inlined OpenMP region");
248   }
249 
250   void setContextValue(llvm::Value *V) override {
251     if (OuterRegionInfo) {
252       OuterRegionInfo->setContextValue(V);
253       return;
254     }
255     llvm_unreachable("No context value for inlined OpenMP region");
256   }
257 
258   /// Lookup the captured field decl for a variable.
259   const FieldDecl *lookup(const VarDecl *VD) const override {
260     if (OuterRegionInfo)
261       return OuterRegionInfo->lookup(VD);
262     // If there is no outer outlined region,no need to lookup in a list of
263     // captured variables, we can use the original one.
264     return nullptr;
265   }
266 
267   FieldDecl *getThisFieldDecl() const override {
268     if (OuterRegionInfo)
269       return OuterRegionInfo->getThisFieldDecl();
270     return nullptr;
271   }
272 
273   /// Get a variable or parameter for storing global thread id
274   /// inside OpenMP construct.
275   const VarDecl *getThreadIDVariable() const override {
276     if (OuterRegionInfo)
277       return OuterRegionInfo->getThreadIDVariable();
278     return nullptr;
279   }
280 
281   /// Get an LValue for the current ThreadID variable.
282   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
285     llvm_unreachable("No LValue for inlined OpenMP construct");
286   }
287 
288   /// Get the name of the capture helper.
289   StringRef getHelperName() const override {
290     if (auto *OuterRegionInfo = getOldCSI())
291       return OuterRegionInfo->getHelperName();
292     llvm_unreachable("No helper name for inlined OpenMP construct");
293   }
294 
295   void emitUntiedSwitch(CodeGenFunction &CGF) override {
296     if (OuterRegionInfo)
297       OuterRegionInfo->emitUntiedSwitch(CGF);
298   }
299 
300   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
301 
302   static bool classof(const CGCapturedStmtInfo *Info) {
303     return CGOpenMPRegionInfo::classof(Info) &&
304            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
305   }
306 
307   ~CGOpenMPInlinedRegionInfo() override = default;
308 
309 private:
310   /// CodeGen info about outer OpenMP region.
311   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
312   CGOpenMPRegionInfo *OuterRegionInfo;
313 };
314 
315 /// API for captured statement code generation in OpenMP target
316 /// constructs. For this captures, implicit parameters are used instead of the
317 /// captured fields. The name of the target region has to be unique in a given
318 /// application so it is provided by the client, because only the client has
319 /// the information to generate that.
320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
321 public:
322   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
323                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
324       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
325                            /*HasCancel=*/false),
326         HelperName(HelperName) {}
327 
328   /// This is unused for target regions because each starts executing
329   /// with a single thread.
330   const VarDecl *getThreadIDVariable() const override { return nullptr; }
331 
332   /// Get the name of the capture helper.
333   StringRef getHelperName() const override { return HelperName; }
334 
335   static bool classof(const CGCapturedStmtInfo *Info) {
336     return CGOpenMPRegionInfo::classof(Info) &&
337            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
338   }
339 
340 private:
341   StringRef HelperName;
342 };
343 
344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
345   llvm_unreachable("No codegen for expressions");
346 }
347 /// API for generation of expressions captured in a innermost OpenMP
348 /// region.
349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
350 public:
351   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
352       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
353                                   OMPD_unknown,
354                                   /*HasCancel=*/false),
355         PrivScope(CGF) {
356     // Make sure the globals captured in the provided statement are local by
357     // using the privatization logic. We assume the same variable is not
358     // captured more than once.
359     for (const auto &C : CS.captures()) {
360       if (!C.capturesVariable() && !C.capturesVariableByCopy())
361         continue;
362 
363       const VarDecl *VD = C.getCapturedVar();
364       if (VD->isLocalVarDeclOrParm())
365         continue;
366 
367       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
368                       /*RefersToEnclosingVariableOrCapture=*/false,
369                       VD->getType().getNonReferenceType(), VK_LValue,
370                       C.getLocation());
371       PrivScope.addPrivate(
372           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
373     }
374     (void)PrivScope.Privatize();
375   }
376 
377   /// Lookup the captured field decl for a variable.
378   const FieldDecl *lookup(const VarDecl *VD) const override {
379     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
380       return FD;
381     return nullptr;
382   }
383 
384   /// Emit the captured statement body.
385   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
386     llvm_unreachable("No body for expressions");
387   }
388 
389   /// Get a variable or parameter for storing global thread id
390   /// inside OpenMP construct.
391   const VarDecl *getThreadIDVariable() const override {
392     llvm_unreachable("No thread id for expressions");
393   }
394 
395   /// Get the name of the capture helper.
396   StringRef getHelperName() const override {
397     llvm_unreachable("No helper name for expressions");
398   }
399 
400   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
401 
402 private:
403   /// Private scope to capture global variables.
404   CodeGenFunction::OMPPrivateScope PrivScope;
405 };
406 
407 /// RAII for emitting code of OpenMP constructs.
408 class InlinedOpenMPRegionRAII {
409   CodeGenFunction &CGF;
410   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
411   FieldDecl *LambdaThisCaptureField = nullptr;
412   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
413 
414 public:
415   /// Constructs region for combined constructs.
416   /// \param CodeGen Code generation sequence for combined directives. Includes
417   /// a list of functions used for code generation of implicitly inlined
418   /// regions.
419   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
420                           OpenMPDirectiveKind Kind, bool HasCancel)
421       : CGF(CGF) {
422     // Start emission for the construct.
423     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
424         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
427     CGF.LambdaThisCaptureField = nullptr;
428     BlockInfo = CGF.BlockInfo;
429     CGF.BlockInfo = nullptr;
430   }
431 
432   ~InlinedOpenMPRegionRAII() {
433     // Restore original CapturedStmtInfo only if we're done with code emission.
434     auto *OldCSI =
435         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
436     delete CGF.CapturedStmtInfo;
437     CGF.CapturedStmtInfo = OldCSI;
438     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
439     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
440     CGF.BlockInfo = BlockInfo;
441   }
442 };
443 
444 /// Values for bit flags used in the ident_t to describe the fields.
445 /// All enumeric elements are named and described in accordance with the code
446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
447 enum OpenMPLocationFlags : unsigned {
448   /// Use trampoline for internal microtask.
449   OMP_IDENT_IMD = 0x01,
450   /// Use c-style ident structure.
451   OMP_IDENT_KMPC = 0x02,
452   /// Atomic reduction option for kmpc_reduce.
453   OMP_ATOMIC_REDUCE = 0x10,
454   /// Explicit 'barrier' directive.
455   OMP_IDENT_BARRIER_EXPL = 0x20,
456   /// Implicit barrier in code.
457   OMP_IDENT_BARRIER_IMPL = 0x40,
458   /// Implicit barrier in 'for' directive.
459   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
460   /// Implicit barrier in 'sections' directive.
461   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
462   /// Implicit barrier in 'single' directive.
463   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
464   /// Call of __kmp_for_static_init for static loop.
465   OMP_IDENT_WORK_LOOP = 0x200,
466   /// Call of __kmp_for_static_init for sections.
467   OMP_IDENT_WORK_SECTIONS = 0x400,
468   /// Call of __kmp_for_static_init for distribute.
469   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
470   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
471 };
472 
473 namespace {
474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
475 /// Values for bit flags for marking which requires clauses have been used.
476 enum OpenMPOffloadingRequiresDirFlags : int64_t {
477   /// flag undefined.
478   OMP_REQ_UNDEFINED               = 0x000,
479   /// no requires clause present.
480   OMP_REQ_NONE                    = 0x001,
481   /// reverse_offload clause.
482   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
483   /// unified_address clause.
484   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
485   /// unified_shared_memory clause.
486   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
487   /// dynamic_allocators clause.
488   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
489   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
490 };
491 
492 enum OpenMPOffloadingReservedDeviceIDs {
493   /// Device ID if the device was not defined, runtime should get it
494   /// from environment variables in the spec.
495   OMP_DEVICEID_UNDEF = -1,
496 };
497 } // anonymous namespace
498 
499 /// Describes ident structure that describes a source location.
500 /// All descriptions are taken from
501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
502 /// Original structure:
503 /// typedef struct ident {
504 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
505 ///                                  see above  */
506 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
507 ///                                  KMP_IDENT_KMPC identifies this union
508 ///                                  member  */
509 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
510 ///                                  see above */
511 ///#if USE_ITT_BUILD
512 ///                            /*  but currently used for storing
513 ///                                region-specific ITT */
514 ///                            /*  contextual information. */
515 ///#endif /* USE_ITT_BUILD */
516 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
517 ///                                 C++  */
518 ///    char const *psource;    /**< String describing the source location.
519 ///                            The string is composed of semi-colon separated
520 //                             fields which describe the source file,
521 ///                            the function and a pair of line numbers that
522 ///                            delimit the construct.
523 ///                             */
524 /// } ident_t;
525 enum IdentFieldIndex {
526   /// might be used in Fortran
527   IdentField_Reserved_1,
528   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
529   IdentField_Flags,
530   /// Not really used in Fortran any more
531   IdentField_Reserved_2,
532   /// Source[4] in Fortran, do not use for C++
533   IdentField_Reserved_3,
534   /// String describing the source location. The string is composed of
535   /// semi-colon separated fields which describe the source file, the function
536   /// and a pair of line numbers that delimit the construct.
537   IdentField_PSource
538 };
539 
540 /// Schedule types for 'omp for' loops (these enumerators are taken from
541 /// the enum sched_type in kmp.h).
542 enum OpenMPSchedType {
543   /// Lower bound for default (unordered) versions.
544   OMP_sch_lower = 32,
545   OMP_sch_static_chunked = 33,
546   OMP_sch_static = 34,
547   OMP_sch_dynamic_chunked = 35,
548   OMP_sch_guided_chunked = 36,
549   OMP_sch_runtime = 37,
550   OMP_sch_auto = 38,
551   /// static with chunk adjustment (e.g., simd)
552   OMP_sch_static_balanced_chunked = 45,
553   /// Lower bound for 'ordered' versions.
554   OMP_ord_lower = 64,
555   OMP_ord_static_chunked = 65,
556   OMP_ord_static = 66,
557   OMP_ord_dynamic_chunked = 67,
558   OMP_ord_guided_chunked = 68,
559   OMP_ord_runtime = 69,
560   OMP_ord_auto = 70,
561   OMP_sch_default = OMP_sch_static,
562   /// dist_schedule types
563   OMP_dist_sch_static_chunked = 91,
564   OMP_dist_sch_static = 92,
565   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
566   /// Set if the monotonic schedule modifier was present.
567   OMP_sch_modifier_monotonic = (1 << 29),
568   /// Set if the nonmonotonic schedule modifier was present.
569   OMP_sch_modifier_nonmonotonic = (1 << 30),
570 };
571 
572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
573 /// region.
574 class CleanupTy final : public EHScopeStack::Cleanup {
575   PrePostActionTy *Action;
576 
577 public:
578   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
579   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
580     if (!CGF.HaveInsertPoint())
581       return;
582     Action->Exit(CGF);
583   }
584 };
585 
586 } // anonymous namespace
587 
588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
589   CodeGenFunction::RunCleanupsScope Scope(CGF);
590   if (PrePostAction) {
591     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
592     Callback(CodeGen, CGF, *PrePostAction);
593   } else {
594     PrePostActionTy Action;
595     Callback(CodeGen, CGF, Action);
596   }
597 }
598 
599 /// Check if the combiner is a call to UDR combiner and if it is so return the
600 /// UDR decl used for reduction.
601 static const OMPDeclareReductionDecl *
602 getReductionInit(const Expr *ReductionOp) {
603   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
604     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
605       if (const auto *DRE =
606               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
607         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
608           return DRD;
609   return nullptr;
610 }
611 
612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
613                                              const OMPDeclareReductionDecl *DRD,
614                                              const Expr *InitOp,
615                                              Address Private, Address Original,
616                                              QualType Ty) {
617   if (DRD->getInitializer()) {
618     std::pair<llvm::Function *, llvm::Function *> Reduction =
619         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
620     const auto *CE = cast<CallExpr>(InitOp);
621     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
622     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
623     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
624     const auto *LHSDRE =
625         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
626     const auto *RHSDRE =
627         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
628     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
629     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
630                             [=]() { return Private; });
631     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
632                             [=]() { return Original; });
633     (void)PrivateScope.Privatize();
634     RValue Func = RValue::get(Reduction.second);
635     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
636     CGF.EmitIgnoredExpr(InitOp);
637   } else {
638     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
639     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
640     auto *GV = new llvm::GlobalVariable(
641         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
642         llvm::GlobalValue::PrivateLinkage, Init, Name);
643     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
644     RValue InitRVal;
645     switch (CGF.getEvaluationKind(Ty)) {
646     case TEK_Scalar:
647       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
648       break;
649     case TEK_Complex:
650       InitRVal =
651           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
652       break;
653     case TEK_Aggregate:
654       InitRVal = RValue::getAggregate(LV.getAddress(CGF));
655       break;
656     }
657     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
658     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
659     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
660                          /*IsInitializer=*/false);
661   }
662 }
663 
664 /// Emit initialization of arrays of complex types.
665 /// \param DestAddr Address of the array.
666 /// \param Type Type of array.
667 /// \param Init Initial expression of array.
668 /// \param SrcAddr Address of the original array.
669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
670                                  QualType Type, bool EmitDeclareReductionInit,
671                                  const Expr *Init,
672                                  const OMPDeclareReductionDecl *DRD,
673                                  Address SrcAddr = Address::invalid()) {
674   // Perform element-by-element initialization.
675   QualType ElementTy;
676 
677   // Drill down to the base element type on both arrays.
678   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
679   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
680   DestAddr =
681       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
682   if (DRD)
683     SrcAddr =
684         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
685 
686   llvm::Value *SrcBegin = nullptr;
687   if (DRD)
688     SrcBegin = SrcAddr.getPointer();
689   llvm::Value *DestBegin = DestAddr.getPointer();
690   // Cast from pointer to array type to pointer to single element.
691   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
692   // The basic structure here is a while-do loop.
693   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
694   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
695   llvm::Value *IsEmpty =
696       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
697   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
698 
699   // Enter the loop body, making that address the current address.
700   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
701   CGF.EmitBlock(BodyBB);
702 
703   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
704 
705   llvm::PHINode *SrcElementPHI = nullptr;
706   Address SrcElementCurrent = Address::invalid();
707   if (DRD) {
708     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
709                                           "omp.arraycpy.srcElementPast");
710     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
711     SrcElementCurrent =
712         Address(SrcElementPHI,
713                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
714   }
715   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
716       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
717   DestElementPHI->addIncoming(DestBegin, EntryBB);
718   Address DestElementCurrent =
719       Address(DestElementPHI,
720               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
721 
722   // Emit copy.
723   {
724     CodeGenFunction::RunCleanupsScope InitScope(CGF);
725     if (EmitDeclareReductionInit) {
726       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
727                                        SrcElementCurrent, ElementTy);
728     } else
729       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
730                            /*IsInitializer=*/false);
731   }
732 
733   if (DRD) {
734     // Shift the address forward by one element.
735     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
736         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
737     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
738   }
739 
740   // Shift the address forward by one element.
741   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
742       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
743   // Check whether we've reached the end.
744   llvm::Value *Done =
745       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
746   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
747   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
748 
749   // Done.
750   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
751 }
752 
753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
754   return CGF.EmitOMPSharedLValue(E);
755 }
756 
757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
758                                             const Expr *E) {
759   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
760     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
761   return LValue();
762 }
763 
764 void ReductionCodeGen::emitAggregateInitialization(
765     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
766     const OMPDeclareReductionDecl *DRD) {
767   // Emit VarDecl with copy init for arrays.
768   // Get the address of the original variable captured in current
769   // captured region.
770   const auto *PrivateVD =
771       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
772   bool EmitDeclareReductionInit =
773       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
774   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
775                        EmitDeclareReductionInit,
776                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
777                                                 : PrivateVD->getInit(),
778                        DRD, SharedLVal.getAddress(CGF));
779 }
780 
781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
782                                    ArrayRef<const Expr *> Origs,
783                                    ArrayRef<const Expr *> Privates,
784                                    ArrayRef<const Expr *> ReductionOps) {
785   ClausesData.reserve(Shareds.size());
786   SharedAddresses.reserve(Shareds.size());
787   Sizes.reserve(Shareds.size());
788   BaseDecls.reserve(Shareds.size());
789   const auto *IOrig = Origs.begin();
790   const auto *IPriv = Privates.begin();
791   const auto *IRed = ReductionOps.begin();
792   for (const Expr *Ref : Shareds) {
793     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
794     std::advance(IOrig, 1);
795     std::advance(IPriv, 1);
796     std::advance(IRed, 1);
797   }
798 }
799 
800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
801   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
802          "Number of generated lvalues must be exactly N.");
803   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
804   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
805   SharedAddresses.emplace_back(First, Second);
806   if (ClausesData[N].Shared == ClausesData[N].Ref) {
807     OrigAddresses.emplace_back(First, Second);
808   } else {
809     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
810     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
811     OrigAddresses.emplace_back(First, Second);
812   }
813 }
814 
815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
816   const auto *PrivateVD =
817       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
818   QualType PrivateType = PrivateVD->getType();
819   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
820   if (!PrivateType->isVariablyModifiedType()) {
821     Sizes.emplace_back(
822         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
823         nullptr);
824     return;
825   }
826   llvm::Value *Size;
827   llvm::Value *SizeInChars;
828   auto *ElemType =
829       cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
830           ->getElementType();
831   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
832   if (AsArraySection) {
833     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
834                                      OrigAddresses[N].first.getPointer(CGF));
835     Size = CGF.Builder.CreateNUWAdd(
836         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
837     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
838   } else {
839     SizeInChars =
840         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
841     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
842   }
843   Sizes.emplace_back(SizeInChars, Size);
844   CodeGenFunction::OpaqueValueMapping OpaqueMap(
845       CGF,
846       cast<OpaqueValueExpr>(
847           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848       RValue::get(Size));
849   CGF.EmitVariablyModifiedType(PrivateType);
850 }
851 
852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
853                                          llvm::Value *Size) {
854   const auto *PrivateVD =
855       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856   QualType PrivateType = PrivateVD->getType();
857   if (!PrivateType->isVariablyModifiedType()) {
858     assert(!Size && !Sizes[N].second &&
859            "Size should be nullptr for non-variably modified reduction "
860            "items.");
861     return;
862   }
863   CodeGenFunction::OpaqueValueMapping OpaqueMap(
864       CGF,
865       cast<OpaqueValueExpr>(
866           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
867       RValue::get(Size));
868   CGF.EmitVariablyModifiedType(PrivateType);
869 }
870 
871 void ReductionCodeGen::emitInitialization(
872     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
873     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
874   assert(SharedAddresses.size() > N && "No variable was generated");
875   const auto *PrivateVD =
876       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
877   const OMPDeclareReductionDecl *DRD =
878       getReductionInit(ClausesData[N].ReductionOp);
879   QualType PrivateType = PrivateVD->getType();
880   PrivateAddr = CGF.Builder.CreateElementBitCast(
881       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
882   QualType SharedType = SharedAddresses[N].first.getType();
883   SharedLVal = CGF.MakeAddrLValue(
884       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
885                                        CGF.ConvertTypeForMem(SharedType)),
886       SharedType, SharedAddresses[N].first.getBaseInfo(),
887       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
888   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
889     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
890   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
891     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
892                                      PrivateAddr, SharedLVal.getAddress(CGF),
893                                      SharedLVal.getType());
894   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897                          PrivateVD->getType().getQualifiers(),
898                          /*IsInitializer=*/false);
899   }
900 }
901 
902 bool ReductionCodeGen::needCleanups(unsigned N) {
903   const auto *PrivateVD =
904       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
905   QualType PrivateType = PrivateVD->getType();
906   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
907   return DTorKind != QualType::DK_none;
908 }
909 
910 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
911                                     Address PrivateAddr) {
912   const auto *PrivateVD =
913       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914   QualType PrivateType = PrivateVD->getType();
915   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916   if (needCleanups(N)) {
917     PrivateAddr = CGF.Builder.CreateElementBitCast(
918         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
919     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
920   }
921 }
922 
923 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
924                           LValue BaseLV) {
925   BaseTy = BaseTy.getNonReferenceType();
926   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
928     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
929       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
930     } else {
931       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
932       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
933     }
934     BaseTy = BaseTy->getPointeeType();
935   }
936   return CGF.MakeAddrLValue(
937       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
938                                        CGF.ConvertTypeForMem(ElTy)),
939       BaseLV.getType(), BaseLV.getBaseInfo(),
940       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
941 }
942 
943 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
944                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
945                           llvm::Value *Addr) {
946   Address Tmp = Address::invalid();
947   Address TopTmp = Address::invalid();
948   Address MostTopTmp = Address::invalid();
949   BaseTy = BaseTy.getNonReferenceType();
950   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
951          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
952     Tmp = CGF.CreateMemTemp(BaseTy);
953     if (TopTmp.isValid())
954       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
955     else
956       MostTopTmp = Tmp;
957     TopTmp = Tmp;
958     BaseTy = BaseTy->getPointeeType();
959   }
960   llvm::Type *Ty = BaseLVType;
961   if (Tmp.isValid())
962     Ty = Tmp.getElementType();
963   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
964   if (Tmp.isValid()) {
965     CGF.Builder.CreateStore(Addr, Tmp);
966     return MostTopTmp;
967   }
968   return Address(Addr, BaseLVAlignment);
969 }
970 
971 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
972   const VarDecl *OrigVD = nullptr;
973   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
974     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
975     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
976       Base = TempOASE->getBase()->IgnoreParenImpCasts();
977     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
978       Base = TempASE->getBase()->IgnoreParenImpCasts();
979     DE = cast<DeclRefExpr>(Base);
980     OrigVD = cast<VarDecl>(DE->getDecl());
981   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
982     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
983     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
984       Base = TempASE->getBase()->IgnoreParenImpCasts();
985     DE = cast<DeclRefExpr>(Base);
986     OrigVD = cast<VarDecl>(DE->getDecl());
987   }
988   return OrigVD;
989 }
990 
991 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
992                                                Address PrivateAddr) {
993   const DeclRefExpr *DE;
994   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
995     BaseDecls.emplace_back(OrigVD);
996     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
997     LValue BaseLValue =
998         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
999                     OriginalBaseLValue);
1000     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1001         BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1002     llvm::Value *PrivatePointer =
1003         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1004             PrivateAddr.getPointer(),
1005             SharedAddresses[N].first.getAddress(CGF).getType());
1006     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1007     return castToBase(CGF, OrigVD->getType(),
1008                       SharedAddresses[N].first.getType(),
1009                       OriginalBaseLValue.getAddress(CGF).getType(),
1010                       OriginalBaseLValue.getAlignment(), Ptr);
1011   }
1012   BaseDecls.emplace_back(
1013       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1014   return PrivateAddr;
1015 }
1016 
1017 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1018   const OMPDeclareReductionDecl *DRD =
1019       getReductionInit(ClausesData[N].ReductionOp);
1020   return DRD && DRD->getInitializer();
1021 }
1022 
1023 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1024   return CGF.EmitLoadOfPointerLValue(
1025       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1026       getThreadIDVariable()->getType()->castAs<PointerType>());
1027 }
1028 
1029 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1030   if (!CGF.HaveInsertPoint())
1031     return;
1032   // 1.2.2 OpenMP Language Terminology
1033   // Structured block - An executable statement with a single entry at the
1034   // top and a single exit at the bottom.
1035   // The point of exit cannot be a branch out of the structured block.
1036   // longjmp() and throw() must not violate the entry/exit criteria.
1037   CGF.EHStack.pushTerminate();
1038   CodeGen(CGF);
1039   CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043     CodeGenFunction &CGF) {
1044   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045                             getThreadIDVariable()->getType(),
1046                             AlignmentSource::Decl);
1047 }
1048 
1049 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050                                        QualType FieldTy) {
1051   auto *Field = FieldDecl::Create(
1052       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055   Field->setAccess(AS_public);
1056   DC->addDecl(Field);
1057   return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061                                  StringRef Separator)
1062     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063       OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064   ASTContext &C = CGM.getContext();
1065   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1066   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1067   RD->startDefinition();
1068   // reserved_1
1069   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1070   // flags
1071   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1072   // reserved_2
1073   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1074   // reserved_3
1075   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1076   // psource
1077   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1078   RD->completeDefinition();
1079   IdentQTy = C.getRecordType(RD);
1080   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1081   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1082 
1083   // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1084   OMPBuilder.initialize();
1085   loadOffloadInfoMetadata();
1086 }
1087 
1088 void CGOpenMPRuntime::clear() {
1089   InternalVars.clear();
1090   // Clean non-target variable declarations possibly used only in debug info.
1091   for (const auto &Data : EmittedNonTargetVariables) {
1092     if (!Data.getValue().pointsToAliveValue())
1093       continue;
1094     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1095     if (!GV)
1096       continue;
1097     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1098       continue;
1099     GV->eraseFromParent();
1100   }
1101 }
1102 
1103 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1104   SmallString<128> Buffer;
1105   llvm::raw_svector_ostream OS(Buffer);
1106   StringRef Sep = FirstSeparator;
1107   for (StringRef Part : Parts) {
1108     OS << Sep << Part;
1109     Sep = Separator;
1110   }
1111   return std::string(OS.str());
1112 }
1113 
1114 static llvm::Function *
1115 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1116                           const Expr *CombinerInitializer, const VarDecl *In,
1117                           const VarDecl *Out, bool IsCombiner) {
1118   // void .omp_combiner.(Ty *in, Ty *out);
1119   ASTContext &C = CGM.getContext();
1120   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1121   FunctionArgList Args;
1122   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1123                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1124   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1125                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1126   Args.push_back(&OmpOutParm);
1127   Args.push_back(&OmpInParm);
1128   const CGFunctionInfo &FnInfo =
1129       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1130   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1131   std::string Name = CGM.getOpenMPRuntime().getName(
1132       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1133   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1134                                     Name, &CGM.getModule());
1135   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1136   if (CGM.getLangOpts().Optimize) {
1137     Fn->removeFnAttr(llvm::Attribute::NoInline);
1138     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1139     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1140   }
1141   CodeGenFunction CGF(CGM);
1142   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1143   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1144   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1145                     Out->getLocation());
1146   CodeGenFunction::OMPPrivateScope Scope(CGF);
1147   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1148   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1149     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1150         .getAddress(CGF);
1151   });
1152   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1153   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1154     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1155         .getAddress(CGF);
1156   });
1157   (void)Scope.Privatize();
1158   if (!IsCombiner && Out->hasInit() &&
1159       !CGF.isTrivialInitializer(Out->getInit())) {
1160     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1161                          Out->getType().getQualifiers(),
1162                          /*IsInitializer=*/true);
1163   }
1164   if (CombinerInitializer)
1165     CGF.EmitIgnoredExpr(CombinerInitializer);
1166   Scope.ForceCleanup();
1167   CGF.FinishFunction();
1168   return Fn;
1169 }
1170 
1171 void CGOpenMPRuntime::emitUserDefinedReduction(
1172     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1173   if (UDRMap.count(D) > 0)
1174     return;
1175   llvm::Function *Combiner = emitCombinerOrInitializer(
1176       CGM, D->getType(), D->getCombiner(),
1177       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1178       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1179       /*IsCombiner=*/true);
1180   llvm::Function *Initializer = nullptr;
1181   if (const Expr *Init = D->getInitializer()) {
1182     Initializer = emitCombinerOrInitializer(
1183         CGM, D->getType(),
1184         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1185                                                                      : nullptr,
1186         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1187         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1188         /*IsCombiner=*/false);
1189   }
1190   UDRMap.try_emplace(D, Combiner, Initializer);
1191   if (CGF) {
1192     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1193     Decls.second.push_back(D);
1194   }
1195 }
1196 
1197 std::pair<llvm::Function *, llvm::Function *>
1198 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1199   auto I = UDRMap.find(D);
1200   if (I != UDRMap.end())
1201     return I->second;
1202   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1203   return UDRMap.lookup(D);
1204 }
1205 
1206 namespace {
1207 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1208 // Builder if one is present.
1209 struct PushAndPopStackRAII {
1210   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1211                       bool HasCancel)
1212       : OMPBuilder(OMPBuilder) {
1213     if (!OMPBuilder)
1214       return;
1215 
1216     // The following callback is the crucial part of clangs cleanup process.
1217     //
1218     // NOTE:
1219     // Once the OpenMPIRBuilder is used to create parallel regions (and
1220     // similar), the cancellation destination (Dest below) is determined via
1221     // IP. That means if we have variables to finalize we split the block at IP,
1222     // use the new block (=BB) as destination to build a JumpDest (via
1223     // getJumpDestInCurrentScope(BB)) which then is fed to
1224     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1225     // to push & pop an FinalizationInfo object.
1226     // The FiniCB will still be needed but at the point where the
1227     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1228     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1229       assert(IP.getBlock()->end() == IP.getPoint() &&
1230              "Clang CG should cause non-terminated block!");
1231       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1232       CGF.Builder.restoreIP(IP);
1233       CodeGenFunction::JumpDest Dest =
1234           CGF.getOMPCancelDestination(OMPD_parallel);
1235       CGF.EmitBranchThroughCleanup(Dest);
1236     };
1237 
1238     // TODO: Remove this once we emit parallel regions through the
1239     //       OpenMPIRBuilder as it can do this setup internally.
1240     llvm::OpenMPIRBuilder::FinalizationInfo FI(
1241         {FiniCB, OMPD_parallel, HasCancel});
1242     OMPBuilder->pushFinalizationCB(std::move(FI));
1243   }
1244   ~PushAndPopStackRAII() {
1245     if (OMPBuilder)
1246       OMPBuilder->popFinalizationCB();
1247   }
1248   llvm::OpenMPIRBuilder *OMPBuilder;
1249 };
1250 } // namespace
1251 
1252 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1253     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1254     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1255     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1256   assert(ThreadIDVar->getType()->isPointerType() &&
1257          "thread id variable must be of type kmp_int32 *");
1258   CodeGenFunction CGF(CGM, true);
1259   bool HasCancel = false;
1260   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1261     HasCancel = OPD->hasCancel();
1262   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1263     HasCancel = OPD->hasCancel();
1264   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1265     HasCancel = OPSD->hasCancel();
1266   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1267     HasCancel = OPFD->hasCancel();
1268   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1269     HasCancel = OPFD->hasCancel();
1270   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1271     HasCancel = OPFD->hasCancel();
1272   else if (const auto *OPFD =
1273                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1274     HasCancel = OPFD->hasCancel();
1275   else if (const auto *OPFD =
1276                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1277     HasCancel = OPFD->hasCancel();
1278 
1279   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1280   //       parallel region to make cancellation barriers work properly.
1281   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1282   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel);
1283   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1284                                     HasCancel, OutlinedHelperName);
1285   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1286   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1287 }
1288 
1289 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1290     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1291     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1292   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1293   return emitParallelOrTeamsOutlinedFunction(
1294       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1295 }
1296 
1297 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1298     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1299     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1300   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1301   return emitParallelOrTeamsOutlinedFunction(
1302       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1303 }
1304 
1305 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1306     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1307     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1308     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1309     bool Tied, unsigned &NumberOfParts) {
1310   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1311                                               PrePostActionTy &) {
1312     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1313     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1314     llvm::Value *TaskArgs[] = {
1315         UpLoc, ThreadID,
1316         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1317                                     TaskTVar->getType()->castAs<PointerType>())
1318             .getPointer(CGF)};
1319     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1320                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1321                         TaskArgs);
1322   };
1323   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1324                                                             UntiedCodeGen);
1325   CodeGen.setAction(Action);
1326   assert(!ThreadIDVar->getType()->isPointerType() &&
1327          "thread id variable must be of type kmp_int32 for tasks");
1328   const OpenMPDirectiveKind Region =
1329       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1330                                                       : OMPD_task;
1331   const CapturedStmt *CS = D.getCapturedStmt(Region);
1332   bool HasCancel = false;
1333   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1334     HasCancel = TD->hasCancel();
1335   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1336     HasCancel = TD->hasCancel();
1337   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1338     HasCancel = TD->hasCancel();
1339   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1340     HasCancel = TD->hasCancel();
1341 
1342   CodeGenFunction CGF(CGM, true);
1343   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1344                                         InnermostKind, HasCancel, Action);
1345   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1346   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1347   if (!Tied)
1348     NumberOfParts = Action.getNumberOfParts();
1349   return Res;
1350 }
1351 
1352 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1353                              const RecordDecl *RD, const CGRecordLayout &RL,
1354                              ArrayRef<llvm::Constant *> Data) {
1355   llvm::StructType *StructTy = RL.getLLVMType();
1356   unsigned PrevIdx = 0;
1357   ConstantInitBuilder CIBuilder(CGM);
1358   auto DI = Data.begin();
1359   for (const FieldDecl *FD : RD->fields()) {
1360     unsigned Idx = RL.getLLVMFieldNo(FD);
1361     // Fill the alignment.
1362     for (unsigned I = PrevIdx; I < Idx; ++I)
1363       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1364     PrevIdx = Idx + 1;
1365     Fields.add(*DI);
1366     ++DI;
1367   }
1368 }
1369 
1370 template <class... As>
1371 static llvm::GlobalVariable *
1372 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1373                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1374                    As &&... Args) {
1375   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1376   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1377   ConstantInitBuilder CIBuilder(CGM);
1378   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1379   buildStructValue(Fields, CGM, RD, RL, Data);
1380   return Fields.finishAndCreateGlobal(
1381       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1382       std::forward<As>(Args)...);
1383 }
1384 
1385 template <typename T>
1386 static void
1387 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1388                                          ArrayRef<llvm::Constant *> Data,
1389                                          T &Parent) {
1390   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1391   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1392   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1393   buildStructValue(Fields, CGM, RD, RL, Data);
1394   Fields.finishAndAddTo(Parent);
1395 }
1396 
1397 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1398   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1399   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1400   FlagsTy FlagsKey(Flags, Reserved2Flags);
1401   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1402   if (!Entry) {
1403     if (!DefaultOpenMPPSource) {
1404       // Initialize default location for psource field of ident_t structure of
1405       // all ident_t objects. Format is ";file;function;line;column;;".
1406       // Taken from
1407       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1408       DefaultOpenMPPSource =
1409           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1410       DefaultOpenMPPSource =
1411           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1412     }
1413 
1414     llvm::Constant *Data[] = {
1415         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1416         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1417         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1418         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1419     llvm::GlobalValue *DefaultOpenMPLocation =
1420         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1421                            llvm::GlobalValue::PrivateLinkage);
1422     DefaultOpenMPLocation->setUnnamedAddr(
1423         llvm::GlobalValue::UnnamedAddr::Global);
1424 
1425     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1426   }
1427   return Address(Entry, Align);
1428 }
1429 
1430 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1431                                              bool AtCurrentPoint) {
1432   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1433   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1434 
1435   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1436   if (AtCurrentPoint) {
1437     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1438         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1439   } else {
1440     Elem.second.ServiceInsertPt =
1441         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1442     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1443   }
1444 }
1445 
1446 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1447   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1448   if (Elem.second.ServiceInsertPt) {
1449     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1450     Elem.second.ServiceInsertPt = nullptr;
1451     Ptr->eraseFromParent();
1452   }
1453 }
1454 
1455 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1456                                                  SourceLocation Loc,
1457                                                  unsigned Flags) {
1458   Flags |= OMP_IDENT_KMPC;
1459   // If no debug info is generated - return global default location.
1460   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1461       Loc.isInvalid())
1462     return getOrCreateDefaultLocation(Flags).getPointer();
1463 
1464   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 
1466   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1467   Address LocValue = Address::invalid();
1468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1469   if (I != OpenMPLocThreadIDMap.end())
1470     LocValue = Address(I->second.DebugLoc, Align);
1471 
1472   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1473   // GetOpenMPThreadID was called before this routine.
1474   if (!LocValue.isValid()) {
1475     // Generate "ident_t .kmpc_loc.addr;"
1476     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1477     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1478     Elem.second.DebugLoc = AI.getPointer();
1479     LocValue = AI;
1480 
1481     if (!Elem.second.ServiceInsertPt)
1482       setLocThreadIdInsertPt(CGF);
1483     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1484     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1485     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1486                              CGF.getTypeSize(IdentQTy));
1487   }
1488 
1489   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1490   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1491   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1492   LValue PSource =
1493       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1494 
1495   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1496   if (OMPDebugLoc == nullptr) {
1497     SmallString<128> Buffer2;
1498     llvm::raw_svector_ostream OS2(Buffer2);
1499     // Build debug location
1500     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1501     OS2 << ";" << PLoc.getFilename() << ";";
1502     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1503       OS2 << FD->getQualifiedNameAsString();
1504     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1505     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1506     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1507   }
1508   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1509   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1510 
1511   // Our callers always pass this to a runtime function, so for
1512   // convenience, go ahead and return a naked pointer.
1513   return LocValue.getPointer();
1514 }
1515 
1516 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1517                                           SourceLocation Loc) {
1518   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1519 
1520   llvm::Value *ThreadID = nullptr;
1521   // Check whether we've already cached a load of the thread id in this
1522   // function.
1523   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1524   if (I != OpenMPLocThreadIDMap.end()) {
1525     ThreadID = I->second.ThreadID;
1526     if (ThreadID != nullptr)
1527       return ThreadID;
1528   }
1529   // If exceptions are enabled, do not use parameter to avoid possible crash.
1530   if (auto *OMPRegionInfo =
1531           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1532     if (OMPRegionInfo->getThreadIDVariable()) {
1533       // Check if this an outlined function with thread id passed as argument.
1534       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1535       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1536       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1537           !CGF.getLangOpts().CXXExceptions ||
1538           CGF.Builder.GetInsertBlock() == TopBlock ||
1539           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1540           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1541               TopBlock ||
1542           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1543               CGF.Builder.GetInsertBlock()) {
1544         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1545         // If value loaded in entry block, cache it and use it everywhere in
1546         // function.
1547         if (CGF.Builder.GetInsertBlock() == TopBlock) {
1548           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1549           Elem.second.ThreadID = ThreadID;
1550         }
1551         return ThreadID;
1552       }
1553     }
1554   }
1555 
1556   // This is not an outlined function region - need to call __kmpc_int32
1557   // kmpc_global_thread_num(ident_t *loc).
1558   // Generate thread id value and cache this value for use across the
1559   // function.
1560   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1561   if (!Elem.second.ServiceInsertPt)
1562     setLocThreadIdInsertPt(CGF);
1563   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1564   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1565   llvm::CallInst *Call = CGF.Builder.CreateCall(
1566       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1567                                             OMPRTL___kmpc_global_thread_num),
1568       emitUpdateLocation(CGF, Loc));
1569   Call->setCallingConv(CGF.getRuntimeCC());
1570   Elem.second.ThreadID = Call;
1571   return Call;
1572 }
1573 
1574 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1577     clearLocThreadIdInsertPt(CGF);
1578     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1579   }
1580   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1581     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1582       UDRMap.erase(D);
1583     FunctionUDRMap.erase(CGF.CurFn);
1584   }
1585   auto I = FunctionUDMMap.find(CGF.CurFn);
1586   if (I != FunctionUDMMap.end()) {
1587     for(const auto *D : I->second)
1588       UDMMap.erase(D);
1589     FunctionUDMMap.erase(I);
1590   }
1591   LastprivateConditionalToTypes.erase(CGF.CurFn);
1592 }
1593 
1594 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1595   return IdentTy->getPointerTo();
1596 }
1597 
1598 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1599   if (!Kmpc_MicroTy) {
1600     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1601     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1602                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1603     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1604   }
1605   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1606 }
1607 
1608 llvm::FunctionCallee
1609 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
1610   assert((IVSize == 32 || IVSize == 64) &&
1611          "IV size is not compatible with the omp runtime");
1612   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1613                                             : "__kmpc_for_static_init_4u")
1614                                 : (IVSigned ? "__kmpc_for_static_init_8"
1615                                             : "__kmpc_for_static_init_8u");
1616   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1617   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1618   llvm::Type *TypeParams[] = {
1619     getIdentTyPointerTy(),                     // loc
1620     CGM.Int32Ty,                               // tid
1621     CGM.Int32Ty,                               // schedtype
1622     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1623     PtrTy,                                     // p_lower
1624     PtrTy,                                     // p_upper
1625     PtrTy,                                     // p_stride
1626     ITy,                                       // incr
1627     ITy                                        // chunk
1628   };
1629   auto *FnTy =
1630       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1631   return CGM.CreateRuntimeFunction(FnTy, Name);
1632 }
1633 
1634 llvm::FunctionCallee
1635 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1636   assert((IVSize == 32 || IVSize == 64) &&
1637          "IV size is not compatible with the omp runtime");
1638   StringRef Name =
1639       IVSize == 32
1640           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1641           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1642   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1643   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1644                                CGM.Int32Ty,           // tid
1645                                CGM.Int32Ty,           // schedtype
1646                                ITy,                   // lower
1647                                ITy,                   // upper
1648                                ITy,                   // stride
1649                                ITy                    // chunk
1650   };
1651   auto *FnTy =
1652       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1653   return CGM.CreateRuntimeFunction(FnTy, Name);
1654 }
1655 
1656 llvm::FunctionCallee
1657 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1658   assert((IVSize == 32 || IVSize == 64) &&
1659          "IV size is not compatible with the omp runtime");
1660   StringRef Name =
1661       IVSize == 32
1662           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1663           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1664   llvm::Type *TypeParams[] = {
1665       getIdentTyPointerTy(), // loc
1666       CGM.Int32Ty,           // tid
1667   };
1668   auto *FnTy =
1669       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1670   return CGM.CreateRuntimeFunction(FnTy, Name);
1671 }
1672 
1673 llvm::FunctionCallee
1674 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1675   assert((IVSize == 32 || IVSize == 64) &&
1676          "IV size is not compatible with the omp runtime");
1677   StringRef Name =
1678       IVSize == 32
1679           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1680           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1681   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1682   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1683   llvm::Type *TypeParams[] = {
1684     getIdentTyPointerTy(),                     // loc
1685     CGM.Int32Ty,                               // tid
1686     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1687     PtrTy,                                     // p_lower
1688     PtrTy,                                     // p_upper
1689     PtrTy                                      // p_stride
1690   };
1691   auto *FnTy =
1692       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1693   return CGM.CreateRuntimeFunction(FnTy, Name);
1694 }
1695 
1696 /// Obtain information that uniquely identifies a target entry. This
1697 /// consists of the file and device IDs as well as line number associated with
1698 /// the relevant entry source location.
1699 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1700                                      unsigned &DeviceID, unsigned &FileID,
1701                                      unsigned &LineNum) {
1702   SourceManager &SM = C.getSourceManager();
1703 
1704   // The loc should be always valid and have a file ID (the user cannot use
1705   // #pragma directives in macros)
1706 
1707   assert(Loc.isValid() && "Source location is expected to be always valid.");
1708 
1709   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1710   assert(PLoc.isValid() && "Source location is expected to be always valid.");
1711 
1712   llvm::sys::fs::UniqueID ID;
1713   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1714     SM.getDiagnostics().Report(diag::err_cannot_open_file)
1715         << PLoc.getFilename() << EC.message();
1716 
1717   DeviceID = ID.getDevice();
1718   FileID = ID.getFile();
1719   LineNum = PLoc.getLine();
1720 }
1721 
1722 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1723   if (CGM.getLangOpts().OpenMPSimd)
1724     return Address::invalid();
1725   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1726       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1727   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1728               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1729                HasRequiresUnifiedSharedMemory))) {
1730     SmallString<64> PtrName;
1731     {
1732       llvm::raw_svector_ostream OS(PtrName);
1733       OS << CGM.getMangledName(GlobalDecl(VD));
1734       if (!VD->isExternallyVisible()) {
1735         unsigned DeviceID, FileID, Line;
1736         getTargetEntryUniqueInfo(CGM.getContext(),
1737                                  VD->getCanonicalDecl()->getBeginLoc(),
1738                                  DeviceID, FileID, Line);
1739         OS << llvm::format("_%x", FileID);
1740       }
1741       OS << "_decl_tgt_ref_ptr";
1742     }
1743     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1744     if (!Ptr) {
1745       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1746       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1747                                         PtrName);
1748 
1749       auto *GV = cast<llvm::GlobalVariable>(Ptr);
1750       GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1751 
1752       if (!CGM.getLangOpts().OpenMPIsDevice)
1753         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1754       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1755     }
1756     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1757   }
1758   return Address::invalid();
1759 }
1760 
1761 llvm::Constant *
1762 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1763   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1764          !CGM.getContext().getTargetInfo().isTLSSupported());
1765   // Lookup the entry, lazily creating it if necessary.
1766   std::string Suffix = getName({"cache", ""});
1767   return getOrCreateInternalVariable(
1768       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1769 }
1770 
1771 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1772                                                 const VarDecl *VD,
1773                                                 Address VDAddr,
1774                                                 SourceLocation Loc) {
1775   if (CGM.getLangOpts().OpenMPUseTLS &&
1776       CGM.getContext().getTargetInfo().isTLSSupported())
1777     return VDAddr;
1778 
1779   llvm::Type *VarTy = VDAddr.getElementType();
1780   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1781                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1782                                                        CGM.Int8PtrTy),
1783                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1784                          getOrCreateThreadPrivateCache(VD)};
1785   return Address(CGF.EmitRuntimeCall(
1786                      OMPBuilder.getOrCreateRuntimeFunction(
1787                          CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1788                      Args),
1789                  VDAddr.getAlignment());
1790 }
1791 
1792 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1793     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1794     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1795   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1796   // library.
1797   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1798   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1799                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1800                       OMPLoc);
1801   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1802   // to register constructor/destructor for variable.
1803   llvm::Value *Args[] = {
1804       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1805       Ctor, CopyCtor, Dtor};
1806   CGF.EmitRuntimeCall(
1807       OMPBuilder.getOrCreateRuntimeFunction(
1808           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1809       Args);
1810 }
1811 
1812 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1813     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1814     bool PerformInit, CodeGenFunction *CGF) {
1815   if (CGM.getLangOpts().OpenMPUseTLS &&
1816       CGM.getContext().getTargetInfo().isTLSSupported())
1817     return nullptr;
1818 
1819   VD = VD->getDefinition(CGM.getContext());
1820   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1821     QualType ASTTy = VD->getType();
1822 
1823     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1824     const Expr *Init = VD->getAnyInitializer();
1825     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1826       // Generate function that re-emits the declaration's initializer into the
1827       // threadprivate copy of the variable VD
1828       CodeGenFunction CtorCGF(CGM);
1829       FunctionArgList Args;
1830       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1831                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1832                             ImplicitParamDecl::Other);
1833       Args.push_back(&Dst);
1834 
1835       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1836           CGM.getContext().VoidPtrTy, Args);
1837       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1838       std::string Name = getName({"__kmpc_global_ctor_", ""});
1839       llvm::Function *Fn =
1840           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1841       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1842                             Args, Loc, Loc);
1843       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1844           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1845           CGM.getContext().VoidPtrTy, Dst.getLocation());
1846       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1847       Arg = CtorCGF.Builder.CreateElementBitCast(
1848           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1849       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1850                                /*IsInitializer=*/true);
1851       ArgVal = CtorCGF.EmitLoadOfScalar(
1852           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1853           CGM.getContext().VoidPtrTy, Dst.getLocation());
1854       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1855       CtorCGF.FinishFunction();
1856       Ctor = Fn;
1857     }
1858     if (VD->getType().isDestructedType() != QualType::DK_none) {
1859       // Generate function that emits destructor call for the threadprivate copy
1860       // of the variable VD
1861       CodeGenFunction DtorCGF(CGM);
1862       FunctionArgList Args;
1863       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1864                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1865                             ImplicitParamDecl::Other);
1866       Args.push_back(&Dst);
1867 
1868       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1869           CGM.getContext().VoidTy, Args);
1870       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1871       std::string Name = getName({"__kmpc_global_dtor_", ""});
1872       llvm::Function *Fn =
1873           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1874       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1875       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1876                             Loc, Loc);
1877       // Create a scope with an artificial location for the body of this function.
1878       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1879       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1880           DtorCGF.GetAddrOfLocalVar(&Dst),
1881           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1882       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1883                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1884                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1885       DtorCGF.FinishFunction();
1886       Dtor = Fn;
1887     }
1888     // Do not emit init function if it is not required.
1889     if (!Ctor && !Dtor)
1890       return nullptr;
1891 
1892     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1893     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1894                                                /*isVarArg=*/false)
1895                            ->getPointerTo();
1896     // Copying constructor for the threadprivate variable.
1897     // Must be NULL - reserved by runtime, but currently it requires that this
1898     // parameter is always NULL. Otherwise it fires assertion.
1899     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1900     if (Ctor == nullptr) {
1901       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1902                                              /*isVarArg=*/false)
1903                          ->getPointerTo();
1904       Ctor = llvm::Constant::getNullValue(CtorTy);
1905     }
1906     if (Dtor == nullptr) {
1907       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1908                                              /*isVarArg=*/false)
1909                          ->getPointerTo();
1910       Dtor = llvm::Constant::getNullValue(DtorTy);
1911     }
1912     if (!CGF) {
1913       auto *InitFunctionTy =
1914           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1915       std::string Name = getName({"__omp_threadprivate_init_", ""});
1916       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1917           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1918       CodeGenFunction InitCGF(CGM);
1919       FunctionArgList ArgList;
1920       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1921                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1922                             Loc, Loc);
1923       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1924       InitCGF.FinishFunction();
1925       return InitFunction;
1926     }
1927     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1928   }
1929   return nullptr;
1930 }
1931 
1932 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1933                                                      llvm::GlobalVariable *Addr,
1934                                                      bool PerformInit) {
1935   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1936       !CGM.getLangOpts().OpenMPIsDevice)
1937     return false;
1938   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1939       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1940   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1941       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1942        HasRequiresUnifiedSharedMemory))
1943     return CGM.getLangOpts().OpenMPIsDevice;
1944   VD = VD->getDefinition(CGM.getContext());
1945   assert(VD && "Unknown VarDecl");
1946 
1947   if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1948     return CGM.getLangOpts().OpenMPIsDevice;
1949 
1950   QualType ASTTy = VD->getType();
1951   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1952 
1953   // Produce the unique prefix to identify the new target regions. We use
1954   // the source location of the variable declaration which we know to not
1955   // conflict with any target region.
1956   unsigned DeviceID;
1957   unsigned FileID;
1958   unsigned Line;
1959   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1960   SmallString<128> Buffer, Out;
1961   {
1962     llvm::raw_svector_ostream OS(Buffer);
1963     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1964        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1965   }
1966 
1967   const Expr *Init = VD->getAnyInitializer();
1968   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1969     llvm::Constant *Ctor;
1970     llvm::Constant *ID;
1971     if (CGM.getLangOpts().OpenMPIsDevice) {
1972       // Generate function that re-emits the declaration's initializer into
1973       // the threadprivate copy of the variable VD
1974       CodeGenFunction CtorCGF(CGM);
1975 
1976       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979           FTy, Twine(Buffer, "_ctor"), FI, Loc);
1980       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1981       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982                             FunctionArgList(), Loc, Loc);
1983       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1984       CtorCGF.EmitAnyExprToMem(Init,
1985                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
1986                                Init->getType().getQualifiers(),
1987                                /*IsInitializer=*/true);
1988       CtorCGF.FinishFunction();
1989       Ctor = Fn;
1990       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1991       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1992     } else {
1993       Ctor = new llvm::GlobalVariable(
1994           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1995           llvm::GlobalValue::PrivateLinkage,
1996           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1997       ID = Ctor;
1998     }
1999 
2000     // Register the information for the entry associated with the constructor.
2001     Out.clear();
2002     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2004         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2005   }
2006   if (VD->getType().isDestructedType() != QualType::DK_none) {
2007     llvm::Constant *Dtor;
2008     llvm::Constant *ID;
2009     if (CGM.getLangOpts().OpenMPIsDevice) {
2010       // Generate function that emits destructor call for the threadprivate
2011       // copy of the variable VD
2012       CodeGenFunction DtorCGF(CGM);
2013 
2014       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2015       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2016       llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
2017           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2018       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2019       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2020                             FunctionArgList(), Loc, Loc);
2021       // Create a scope with an artificial location for the body of this
2022       // function.
2023       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2024       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2025                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2026                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2027       DtorCGF.FinishFunction();
2028       Dtor = Fn;
2029       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2030       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2031     } else {
2032       Dtor = new llvm::GlobalVariable(
2033           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2034           llvm::GlobalValue::PrivateLinkage,
2035           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2036       ID = Dtor;
2037     }
2038     // Register the information for the entry associated with the destructor.
2039     Out.clear();
2040     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2041         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2042         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2043   }
2044   return CGM.getLangOpts().OpenMPIsDevice;
2045 }
2046 
2047 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2048                                                           QualType VarType,
2049                                                           StringRef Name) {
2050   std::string Suffix = getName({"artificial", ""});
2051   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2052   llvm::Value *GAddr =
2053       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2054   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2055       CGM.getTarget().isTLSSupported()) {
2056     cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
2057     return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
2058   }
2059   std::string CacheSuffix = getName({"cache", ""});
2060   llvm::Value *Args[] = {
2061       emitUpdateLocation(CGF, SourceLocation()),
2062       getThreadID(CGF, SourceLocation()),
2063       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2064       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2065                                 /*isSigned=*/false),
2066       getOrCreateInternalVariable(
2067           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2068   return Address(
2069       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2070           CGF.EmitRuntimeCall(
2071               OMPBuilder.getOrCreateRuntimeFunction(
2072                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2073               Args),
2074           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2075       CGM.getContext().getTypeAlignInChars(VarType));
2076 }
2077 
2078 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2079                                    const RegionCodeGenTy &ThenGen,
2080                                    const RegionCodeGenTy &ElseGen) {
2081   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2082 
2083   // If the condition constant folds and can be elided, try to avoid emitting
2084   // the condition and the dead arm of the if/else.
2085   bool CondConstant;
2086   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2087     if (CondConstant)
2088       ThenGen(CGF);
2089     else
2090       ElseGen(CGF);
2091     return;
2092   }
2093 
2094   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2095   // emit the conditional branch.
2096   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2097   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2098   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2099   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2100 
2101   // Emit the 'then' code.
2102   CGF.EmitBlock(ThenBlock);
2103   ThenGen(CGF);
2104   CGF.EmitBranch(ContBlock);
2105   // Emit the 'else' code if present.
2106   // There is no need to emit line number for unconditional branch.
2107   (void)ApplyDebugLocation::CreateEmpty(CGF);
2108   CGF.EmitBlock(ElseBlock);
2109   ElseGen(CGF);
2110   // There is no need to emit line number for unconditional branch.
2111   (void)ApplyDebugLocation::CreateEmpty(CGF);
2112   CGF.EmitBranch(ContBlock);
2113   // Emit the continuation block for code after the if.
2114   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2115 }
2116 
2117 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2118                                        llvm::Function *OutlinedFn,
2119                                        ArrayRef<llvm::Value *> CapturedVars,
2120                                        const Expr *IfCond) {
2121   if (!CGF.HaveInsertPoint())
2122     return;
2123   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2124   auto &M = CGM.getModule();
2125   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2126                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2127     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2128     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2129     llvm::Value *Args[] = {
2130         RTLoc,
2131         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2132         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2133     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2134     RealArgs.append(std::begin(Args), std::end(Args));
2135     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2136 
2137     llvm::FunctionCallee RTLFn =
2138         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2139     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2140   };
2141   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2142                     this](CodeGenFunction &CGF, PrePostActionTy &) {
2143     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2144     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2145     // Build calls:
2146     // __kmpc_serialized_parallel(&Loc, GTid);
2147     llvm::Value *Args[] = {RTLoc, ThreadID};
2148     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2149                             M, OMPRTL___kmpc_serialized_parallel),
2150                         Args);
2151 
2152     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2153     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2154     Address ZeroAddrBound =
2155         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2156                                          /*Name=*/".bound.zero.addr");
2157     CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
2158     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2159     // ThreadId for serialized parallels is 0.
2160     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2161     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2162     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2163     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2164 
2165     // __kmpc_end_serialized_parallel(&Loc, GTid);
2166     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2167     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2168                             M, OMPRTL___kmpc_end_serialized_parallel),
2169                         EndArgs);
2170   };
2171   if (IfCond) {
2172     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2173   } else {
2174     RegionCodeGenTy ThenRCG(ThenGen);
2175     ThenRCG(CGF);
2176   }
2177 }
2178 
2179 // If we're inside an (outlined) parallel region, use the region info's
2180 // thread-ID variable (it is passed in a first argument of the outlined function
2181 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2182 // regular serial code region, get thread ID by calling kmp_int32
2183 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2184 // return the address of that temp.
2185 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2186                                              SourceLocation Loc) {
2187   if (auto *OMPRegionInfo =
2188           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2189     if (OMPRegionInfo->getThreadIDVariable())
2190       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2191 
2192   llvm::Value *ThreadID = getThreadID(CGF, Loc);
2193   QualType Int32Ty =
2194       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2195   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2196   CGF.EmitStoreOfScalar(ThreadID,
2197                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2198 
2199   return ThreadIDTemp;
2200 }
2201 
2202 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
2203     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2204   SmallString<256> Buffer;
2205   llvm::raw_svector_ostream Out(Buffer);
2206   Out << Name;
2207   StringRef RuntimeName = Out.str();
2208   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2209   if (Elem.second) {
2210     assert(Elem.second->getType()->getPointerElementType() == Ty &&
2211            "OMP internal variable has different type than requested");
2212     return &*Elem.second;
2213   }
2214 
2215   return Elem.second = new llvm::GlobalVariable(
2216              CGM.getModule(), Ty, /*IsConstant*/ false,
2217              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2218              Elem.first(), /*InsertBefore=*/nullptr,
2219              llvm::GlobalValue::NotThreadLocal, AddressSpace);
2220 }
2221 
2222 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2223   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2224   std::string Name = getName({Prefix, "var"});
2225   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2226 }
2227 
2228 namespace {
2229 /// Common pre(post)-action for different OpenMP constructs.
2230 class CommonActionTy final : public PrePostActionTy {
2231   llvm::FunctionCallee EnterCallee;
2232   ArrayRef<llvm::Value *> EnterArgs;
2233   llvm::FunctionCallee ExitCallee;
2234   ArrayRef<llvm::Value *> ExitArgs;
2235   bool Conditional;
2236   llvm::BasicBlock *ContBlock = nullptr;
2237 
2238 public:
2239   CommonActionTy(llvm::FunctionCallee EnterCallee,
2240                  ArrayRef<llvm::Value *> EnterArgs,
2241                  llvm::FunctionCallee ExitCallee,
2242                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2243       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2244         ExitArgs(ExitArgs), Conditional(Conditional) {}
2245   void Enter(CodeGenFunction &CGF) override {
2246     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2247     if (Conditional) {
2248       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2249       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2250       ContBlock = CGF.createBasicBlock("omp_if.end");
2251       // Generate the branch (If-stmt)
2252       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2253       CGF.EmitBlock(ThenBlock);
2254     }
2255   }
2256   void Done(CodeGenFunction &CGF) {
2257     // Emit the rest of blocks/branches
2258     CGF.EmitBranch(ContBlock);
2259     CGF.EmitBlock(ContBlock, true);
2260   }
2261   void Exit(CodeGenFunction &CGF) override {
2262     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2263   }
2264 };
2265 } // anonymous namespace
2266 
2267 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2268                                          StringRef CriticalName,
2269                                          const RegionCodeGenTy &CriticalOpGen,
2270                                          SourceLocation Loc, const Expr *Hint) {
2271   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2272   // CriticalOpGen();
2273   // __kmpc_end_critical(ident_t *, gtid, Lock);
2274   // Prepare arguments and build a call to __kmpc_critical
2275   if (!CGF.HaveInsertPoint())
2276     return;
2277   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2278                          getCriticalRegionLock(CriticalName)};
2279   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2280                                                 std::end(Args));
2281   if (Hint) {
2282     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2283         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2284   }
2285   CommonActionTy Action(
2286       OMPBuilder.getOrCreateRuntimeFunction(
2287           CGM.getModule(),
2288           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2289       EnterArgs,
2290       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2291                                             OMPRTL___kmpc_end_critical),
2292       Args);
2293   CriticalOpGen.setAction(Action);
2294   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2295 }
2296 
2297 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2298                                        const RegionCodeGenTy &MasterOpGen,
2299                                        SourceLocation Loc) {
2300   if (!CGF.HaveInsertPoint())
2301     return;
2302   // if(__kmpc_master(ident_t *, gtid)) {
2303   //   MasterOpGen();
2304   //   __kmpc_end_master(ident_t *, gtid);
2305   // }
2306   // Prepare arguments and build a call to __kmpc_master
2307   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2308   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2309                             CGM.getModule(), OMPRTL___kmpc_master),
2310                         Args,
2311                         OMPBuilder.getOrCreateRuntimeFunction(
2312                             CGM.getModule(), OMPRTL___kmpc_end_master),
2313                         Args,
2314                         /*Conditional=*/true);
2315   MasterOpGen.setAction(Action);
2316   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2317   Action.Done(CGF);
2318 }
2319 
2320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2321                                         SourceLocation Loc) {
2322   if (!CGF.HaveInsertPoint())
2323     return;
2324   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2325     OMPBuilder.CreateTaskyield(CGF.Builder);
2326   } else {
2327     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2328     llvm::Value *Args[] = {
2329         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2330         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2331     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2332                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2333                         Args);
2334   }
2335 
2336   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2337     Region->emitUntiedSwitch(CGF);
2338 }
2339 
2340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2341                                           const RegionCodeGenTy &TaskgroupOpGen,
2342                                           SourceLocation Loc) {
2343   if (!CGF.HaveInsertPoint())
2344     return;
2345   // __kmpc_taskgroup(ident_t *, gtid);
2346   // TaskgroupOpGen();
2347   // __kmpc_end_taskgroup(ident_t *, gtid);
2348   // Prepare arguments and build a call to __kmpc_taskgroup
2349   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2350   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2351                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2352                         Args,
2353                         OMPBuilder.getOrCreateRuntimeFunction(
2354                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2355                         Args);
2356   TaskgroupOpGen.setAction(Action);
2357   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2358 }
2359 
2360 /// Given an array of pointers to variables, project the address of a
2361 /// given variable.
2362 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2363                                       unsigned Index, const VarDecl *Var) {
2364   // Pull out the pointer to the variable.
2365   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2366   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2367 
2368   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2369   Addr = CGF.Builder.CreateElementBitCast(
2370       Addr, CGF.ConvertTypeForMem(Var->getType()));
2371   return Addr;
2372 }
2373 
2374 static llvm::Value *emitCopyprivateCopyFunction(
2375     CodeGenModule &CGM, llvm::Type *ArgsType,
2376     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2377     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2378     SourceLocation Loc) {
2379   ASTContext &C = CGM.getContext();
2380   // void copy_func(void *LHSArg, void *RHSArg);
2381   FunctionArgList Args;
2382   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2383                            ImplicitParamDecl::Other);
2384   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385                            ImplicitParamDecl::Other);
2386   Args.push_back(&LHSArg);
2387   Args.push_back(&RHSArg);
2388   const auto &CGFI =
2389       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2390   std::string Name =
2391       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2392   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2393                                     llvm::GlobalValue::InternalLinkage, Name,
2394                                     &CGM.getModule());
2395   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2396   Fn->setDoesNotRecurse();
2397   CodeGenFunction CGF(CGM);
2398   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2399   // Dest = (void*[n])(LHSArg);
2400   // Src = (void*[n])(RHSArg);
2401   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2402       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2403       ArgsType), CGF.getPointerAlign());
2404   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2406       ArgsType), CGF.getPointerAlign());
2407   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2408   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2409   // ...
2410   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2411   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2412     const auto *DestVar =
2413         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2414     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2415 
2416     const auto *SrcVar =
2417         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2418     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2419 
2420     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2421     QualType Type = VD->getType();
2422     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2423   }
2424   CGF.FinishFunction();
2425   return Fn;
2426 }
2427 
2428 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2429                                        const RegionCodeGenTy &SingleOpGen,
2430                                        SourceLocation Loc,
2431                                        ArrayRef<const Expr *> CopyprivateVars,
2432                                        ArrayRef<const Expr *> SrcExprs,
2433                                        ArrayRef<const Expr *> DstExprs,
2434                                        ArrayRef<const Expr *> AssignmentOps) {
2435   if (!CGF.HaveInsertPoint())
2436     return;
2437   assert(CopyprivateVars.size() == SrcExprs.size() &&
2438          CopyprivateVars.size() == DstExprs.size() &&
2439          CopyprivateVars.size() == AssignmentOps.size());
2440   ASTContext &C = CGM.getContext();
2441   // int32 did_it = 0;
2442   // if(__kmpc_single(ident_t *, gtid)) {
2443   //   SingleOpGen();
2444   //   __kmpc_end_single(ident_t *, gtid);
2445   //   did_it = 1;
2446   // }
2447   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2448   // <copy_func>, did_it);
2449 
2450   Address DidIt = Address::invalid();
2451   if (!CopyprivateVars.empty()) {
2452     // int32 did_it = 0;
2453     QualType KmpInt32Ty =
2454         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2455     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2456     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2457   }
2458   // Prepare arguments and build a call to __kmpc_single
2459   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2460   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2461                             CGM.getModule(), OMPRTL___kmpc_single),
2462                         Args,
2463                         OMPBuilder.getOrCreateRuntimeFunction(
2464                             CGM.getModule(), OMPRTL___kmpc_end_single),
2465                         Args,
2466                         /*Conditional=*/true);
2467   SingleOpGen.setAction(Action);
2468   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2469   if (DidIt.isValid()) {
2470     // did_it = 1;
2471     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2472   }
2473   Action.Done(CGF);
2474   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2475   // <copy_func>, did_it);
2476   if (DidIt.isValid()) {
2477     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2478     QualType CopyprivateArrayTy = C.getConstantArrayType(
2479         C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2480         /*IndexTypeQuals=*/0);
2481     // Create a list of all private variables for copyprivate.
2482     Address CopyprivateList =
2483         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2484     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2485       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2486       CGF.Builder.CreateStore(
2487           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2488               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2489               CGF.VoidPtrTy),
2490           Elem);
2491     }
2492     // Build function that copies private values from single region to all other
2493     // threads in the corresponding parallel region.
2494     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2495         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2496         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2497     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2498     Address CL =
2499       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2500                                                       CGF.VoidPtrTy);
2501     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2502     llvm::Value *Args[] = {
2503         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2504         getThreadID(CGF, Loc),        // i32 <gtid>
2505         BufSize,                      // size_t <buf_size>
2506         CL.getPointer(),              // void *<copyprivate list>
2507         CpyFn,                        // void (*) (void *, void *) <copy_func>
2508         DidItVal                      // i32 did_it
2509     };
2510     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2511                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2512                         Args);
2513   }
2514 }
2515 
2516 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2517                                         const RegionCodeGenTy &OrderedOpGen,
2518                                         SourceLocation Loc, bool IsThreads) {
2519   if (!CGF.HaveInsertPoint())
2520     return;
2521   // __kmpc_ordered(ident_t *, gtid);
2522   // OrderedOpGen();
2523   // __kmpc_end_ordered(ident_t *, gtid);
2524   // Prepare arguments and build a call to __kmpc_ordered
2525   if (IsThreads) {
2526     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2527     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2528                               CGM.getModule(), OMPRTL___kmpc_ordered),
2529                           Args,
2530                           OMPBuilder.getOrCreateRuntimeFunction(
2531                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2532                           Args);
2533     OrderedOpGen.setAction(Action);
2534     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2535     return;
2536   }
2537   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2538 }
2539 
2540 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2541   unsigned Flags;
2542   if (Kind == OMPD_for)
2543     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2544   else if (Kind == OMPD_sections)
2545     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2546   else if (Kind == OMPD_single)
2547     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2548   else if (Kind == OMPD_barrier)
2549     Flags = OMP_IDENT_BARRIER_EXPL;
2550   else
2551     Flags = OMP_IDENT_BARRIER_IMPL;
2552   return Flags;
2553 }
2554 
2555 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2556     CodeGenFunction &CGF, const OMPLoopDirective &S,
2557     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2558   // Check if the loop directive is actually a doacross loop directive. In this
2559   // case choose static, 1 schedule.
2560   if (llvm::any_of(
2561           S.getClausesOfKind<OMPOrderedClause>(),
2562           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2563     ScheduleKind = OMPC_SCHEDULE_static;
2564     // Chunk size is 1 in this case.
2565     llvm::APInt ChunkSize(32, 1);
2566     ChunkExpr = IntegerLiteral::Create(
2567         CGF.getContext(), ChunkSize,
2568         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2569         SourceLocation());
2570   }
2571 }
2572 
2573 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2574                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2575                                       bool ForceSimpleCall) {
2576   // Check if we should use the OMPBuilder
2577   auto *OMPRegionInfo =
2578       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2579   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2580     CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
2581         CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2582     return;
2583   }
2584 
2585   if (!CGF.HaveInsertPoint())
2586     return;
2587   // Build call __kmpc_cancel_barrier(loc, thread_id);
2588   // Build call __kmpc_barrier(loc, thread_id);
2589   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2590   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2591   // thread_id);
2592   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2593                          getThreadID(CGF, Loc)};
2594   if (OMPRegionInfo) {
2595     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2596       llvm::Value *Result = CGF.EmitRuntimeCall(
2597           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2598                                                 OMPRTL___kmpc_cancel_barrier),
2599           Args);
2600       if (EmitChecks) {
2601         // if (__kmpc_cancel_barrier()) {
2602         //   exit from construct;
2603         // }
2604         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2605         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2606         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2607         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2608         CGF.EmitBlock(ExitBB);
2609         //   exit from construct;
2610         CodeGenFunction::JumpDest CancelDestination =
2611             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2612         CGF.EmitBranchThroughCleanup(CancelDestination);
2613         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2614       }
2615       return;
2616     }
2617   }
2618   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2619                           CGM.getModule(), OMPRTL___kmpc_barrier),
2620                       Args);
2621 }
2622 
2623 /// Map the OpenMP loop schedule to the runtime enumeration.
2624 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2625                                           bool Chunked, bool Ordered) {
2626   switch (ScheduleKind) {
2627   case OMPC_SCHEDULE_static:
2628     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2629                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2630   case OMPC_SCHEDULE_dynamic:
2631     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2632   case OMPC_SCHEDULE_guided:
2633     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2634   case OMPC_SCHEDULE_runtime:
2635     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2636   case OMPC_SCHEDULE_auto:
2637     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2638   case OMPC_SCHEDULE_unknown:
2639     assert(!Chunked && "chunk was specified but schedule kind not known");
2640     return Ordered ? OMP_ord_static : OMP_sch_static;
2641   }
2642   llvm_unreachable("Unexpected runtime schedule");
2643 }
2644 
2645 /// Map the OpenMP distribute schedule to the runtime enumeration.
2646 static OpenMPSchedType
2647 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2648   // only static is allowed for dist_schedule
2649   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2650 }
2651 
2652 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2653                                          bool Chunked) const {
2654   OpenMPSchedType Schedule =
2655       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2656   return Schedule == OMP_sch_static;
2657 }
2658 
2659 bool CGOpenMPRuntime::isStaticNonchunked(
2660     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2661   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2662   return Schedule == OMP_dist_sch_static;
2663 }
2664 
2665 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2666                                       bool Chunked) const {
2667   OpenMPSchedType Schedule =
2668       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2669   return Schedule == OMP_sch_static_chunked;
2670 }
2671 
2672 bool CGOpenMPRuntime::isStaticChunked(
2673     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2674   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2675   return Schedule == OMP_dist_sch_static_chunked;
2676 }
2677 
2678 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2679   OpenMPSchedType Schedule =
2680       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2681   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2682   return Schedule != OMP_sch_static;
2683 }
2684 
2685 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2686                                   OpenMPScheduleClauseModifier M1,
2687                                   OpenMPScheduleClauseModifier M2) {
2688   int Modifier = 0;
2689   switch (M1) {
2690   case OMPC_SCHEDULE_MODIFIER_monotonic:
2691     Modifier = OMP_sch_modifier_monotonic;
2692     break;
2693   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2694     Modifier = OMP_sch_modifier_nonmonotonic;
2695     break;
2696   case OMPC_SCHEDULE_MODIFIER_simd:
2697     if (Schedule == OMP_sch_static_chunked)
2698       Schedule = OMP_sch_static_balanced_chunked;
2699     break;
2700   case OMPC_SCHEDULE_MODIFIER_last:
2701   case OMPC_SCHEDULE_MODIFIER_unknown:
2702     break;
2703   }
2704   switch (M2) {
2705   case OMPC_SCHEDULE_MODIFIER_monotonic:
2706     Modifier = OMP_sch_modifier_monotonic;
2707     break;
2708   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2709     Modifier = OMP_sch_modifier_nonmonotonic;
2710     break;
2711   case OMPC_SCHEDULE_MODIFIER_simd:
2712     if (Schedule == OMP_sch_static_chunked)
2713       Schedule = OMP_sch_static_balanced_chunked;
2714     break;
2715   case OMPC_SCHEDULE_MODIFIER_last:
2716   case OMPC_SCHEDULE_MODIFIER_unknown:
2717     break;
2718   }
2719   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2720   // If the static schedule kind is specified or if the ordered clause is
2721   // specified, and if the nonmonotonic modifier is not specified, the effect is
2722   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2723   // modifier is specified, the effect is as if the nonmonotonic modifier is
2724   // specified.
2725   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2726     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2727           Schedule == OMP_sch_static_balanced_chunked ||
2728           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2729           Schedule == OMP_dist_sch_static_chunked ||
2730           Schedule == OMP_dist_sch_static))
2731       Modifier = OMP_sch_modifier_nonmonotonic;
2732   }
2733   return Schedule | Modifier;
2734 }
2735 
2736 void CGOpenMPRuntime::emitForDispatchInit(
2737     CodeGenFunction &CGF, SourceLocation Loc,
2738     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2739     bool Ordered, const DispatchRTInput &DispatchValues) {
2740   if (!CGF.HaveInsertPoint())
2741     return;
2742   OpenMPSchedType Schedule = getRuntimeSchedule(
2743       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2744   assert(Ordered ||
2745          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2746           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2747           Schedule != OMP_sch_static_balanced_chunked));
2748   // Call __kmpc_dispatch_init(
2749   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2750   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2751   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2752 
2753   // If the Chunk was not specified in the clause - use default value 1.
2754   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2755                                             : CGF.Builder.getIntN(IVSize, 1);
2756   llvm::Value *Args[] = {
2757       emitUpdateLocation(CGF, Loc),
2758       getThreadID(CGF, Loc),
2759       CGF.Builder.getInt32(addMonoNonMonoModifier(
2760           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2761       DispatchValues.LB,                                     // Lower
2762       DispatchValues.UB,                                     // Upper
2763       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2764       Chunk                                                  // Chunk
2765   };
2766   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2767 }
2768 
2769 static void emitForStaticInitCall(
2770     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2771     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2772     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2773     const CGOpenMPRuntime::StaticRTInput &Values) {
2774   if (!CGF.HaveInsertPoint())
2775     return;
2776 
2777   assert(!Values.Ordered);
2778   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2779          Schedule == OMP_sch_static_balanced_chunked ||
2780          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2781          Schedule == OMP_dist_sch_static ||
2782          Schedule == OMP_dist_sch_static_chunked);
2783 
2784   // Call __kmpc_for_static_init(
2785   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2786   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2787   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2788   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2789   llvm::Value *Chunk = Values.Chunk;
2790   if (Chunk == nullptr) {
2791     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2792             Schedule == OMP_dist_sch_static) &&
2793            "expected static non-chunked schedule");
2794     // If the Chunk was not specified in the clause - use default value 1.
2795     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2796   } else {
2797     assert((Schedule == OMP_sch_static_chunked ||
2798             Schedule == OMP_sch_static_balanced_chunked ||
2799             Schedule == OMP_ord_static_chunked ||
2800             Schedule == OMP_dist_sch_static_chunked) &&
2801            "expected static chunked schedule");
2802   }
2803   llvm::Value *Args[] = {
2804       UpdateLocation,
2805       ThreadId,
2806       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2807                                                   M2)), // Schedule type
2808       Values.IL.getPointer(),                           // &isLastIter
2809       Values.LB.getPointer(),                           // &LB
2810       Values.UB.getPointer(),                           // &UB
2811       Values.ST.getPointer(),                           // &Stride
2812       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2813       Chunk                                             // Chunk
2814   };
2815   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2816 }
2817 
2818 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2819                                         SourceLocation Loc,
2820                                         OpenMPDirectiveKind DKind,
2821                                         const OpenMPScheduleTy &ScheduleKind,
2822                                         const StaticRTInput &Values) {
2823   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2824       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2825   assert(isOpenMPWorksharingDirective(DKind) &&
2826          "Expected loop-based or sections-based directive.");
2827   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2828                                              isOpenMPLoopDirective(DKind)
2829                                                  ? OMP_IDENT_WORK_LOOP
2830                                                  : OMP_IDENT_WORK_SECTIONS);
2831   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2832   llvm::FunctionCallee StaticInitFunction =
2833       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2834   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2835   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2836                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2837 }
2838 
2839 void CGOpenMPRuntime::emitDistributeStaticInit(
2840     CodeGenFunction &CGF, SourceLocation Loc,
2841     OpenMPDistScheduleClauseKind SchedKind,
2842     const CGOpenMPRuntime::StaticRTInput &Values) {
2843   OpenMPSchedType ScheduleNum =
2844       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2845   llvm::Value *UpdatedLocation =
2846       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2847   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2848   llvm::FunctionCallee StaticInitFunction =
2849       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
2850   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2851                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2852                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2853 }
2854 
2855 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2856                                           SourceLocation Loc,
2857                                           OpenMPDirectiveKind DKind) {
2858   if (!CGF.HaveInsertPoint())
2859     return;
2860   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2861   llvm::Value *Args[] = {
2862       emitUpdateLocation(CGF, Loc,
2863                          isOpenMPDistributeDirective(DKind)
2864                              ? OMP_IDENT_WORK_DISTRIBUTE
2865                              : isOpenMPLoopDirective(DKind)
2866                                    ? OMP_IDENT_WORK_LOOP
2867                                    : OMP_IDENT_WORK_SECTIONS),
2868       getThreadID(CGF, Loc)};
2869   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2870   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2871                           CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2872                       Args);
2873 }
2874 
2875 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2876                                                  SourceLocation Loc,
2877                                                  unsigned IVSize,
2878                                                  bool IVSigned) {
2879   if (!CGF.HaveInsertPoint())
2880     return;
2881   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2882   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2883   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2884 }
2885 
2886 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2887                                           SourceLocation Loc, unsigned IVSize,
2888                                           bool IVSigned, Address IL,
2889                                           Address LB, Address UB,
2890                                           Address ST) {
2891   // Call __kmpc_dispatch_next(
2892   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2893   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2894   //          kmp_int[32|64] *p_stride);
2895   llvm::Value *Args[] = {
2896       emitUpdateLocation(CGF, Loc),
2897       getThreadID(CGF, Loc),
2898       IL.getPointer(), // &isLastIter
2899       LB.getPointer(), // &Lower
2900       UB.getPointer(), // &Upper
2901       ST.getPointer()  // &Stride
2902   };
2903   llvm::Value *Call =
2904       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2905   return CGF.EmitScalarConversion(
2906       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2907       CGF.getContext().BoolTy, Loc);
2908 }
2909 
2910 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2911                                            llvm::Value *NumThreads,
2912                                            SourceLocation Loc) {
2913   if (!CGF.HaveInsertPoint())
2914     return;
2915   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2916   llvm::Value *Args[] = {
2917       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2918       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2919   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2920                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2921                       Args);
2922 }
2923 
2924 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2925                                          ProcBindKind ProcBind,
2926                                          SourceLocation Loc) {
2927   if (!CGF.HaveInsertPoint())
2928     return;
2929   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2930   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2931   llvm::Value *Args[] = {
2932       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2933       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2934   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2935                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2936                       Args);
2937 }
2938 
2939 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2940                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2941   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2942     OMPBuilder.CreateFlush(CGF.Builder);
2943   } else {
2944     if (!CGF.HaveInsertPoint())
2945       return;
2946     // Build call void __kmpc_flush(ident_t *loc)
2947     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2948                             CGM.getModule(), OMPRTL___kmpc_flush),
2949                         emitUpdateLocation(CGF, Loc));
2950   }
2951 }
2952 
2953 namespace {
2954 /// Indexes of fields for type kmp_task_t.
2955 enum KmpTaskTFields {
2956   /// List of shared variables.
2957   KmpTaskTShareds,
2958   /// Task routine.
2959   KmpTaskTRoutine,
2960   /// Partition id for the untied tasks.
2961   KmpTaskTPartId,
2962   /// Function with call of destructors for private variables.
2963   Data1,
2964   /// Task priority.
2965   Data2,
2966   /// (Taskloops only) Lower bound.
2967   KmpTaskTLowerBound,
2968   /// (Taskloops only) Upper bound.
2969   KmpTaskTUpperBound,
2970   /// (Taskloops only) Stride.
2971   KmpTaskTStride,
2972   /// (Taskloops only) Is last iteration flag.
2973   KmpTaskTLastIter,
2974   /// (Taskloops only) Reduction data.
2975   KmpTaskTReductions,
2976 };
2977 } // anonymous namespace
2978 
2979 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2980   return OffloadEntriesTargetRegion.empty() &&
2981          OffloadEntriesDeviceGlobalVar.empty();
2982 }
2983 
2984 /// Initialize target region entry.
2985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2986     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2987                                     StringRef ParentName, unsigned LineNum,
2988                                     unsigned Order) {
2989   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2990                                              "only required for the device "
2991                                              "code generation.");
2992   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2993       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2994                                    OMPTargetRegionEntryTargetRegion);
2995   ++OffloadingEntriesNum;
2996 }
2997 
2998 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2999     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3000                                   StringRef ParentName, unsigned LineNum,
3001                                   llvm::Constant *Addr, llvm::Constant *ID,
3002                                   OMPTargetRegionEntryKind Flags) {
3003   // If we are emitting code for a target, the entry is already initialized,
3004   // only has to be registered.
3005   if (CGM.getLangOpts().OpenMPIsDevice) {
3006     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3007       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3008           DiagnosticsEngine::Error,
3009           "Unable to find target region on line '%0' in the device code.");
3010       CGM.getDiags().Report(DiagID) << LineNum;
3011       return;
3012     }
3013     auto &Entry =
3014         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3015     assert(Entry.isValid() && "Entry not initialized!");
3016     Entry.setAddress(Addr);
3017     Entry.setID(ID);
3018     Entry.setFlags(Flags);
3019   } else {
3020     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3021     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3022     ++OffloadingEntriesNum;
3023   }
3024 }
3025 
3026 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3027     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3028     unsigned LineNum) const {
3029   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3030   if (PerDevice == OffloadEntriesTargetRegion.end())
3031     return false;
3032   auto PerFile = PerDevice->second.find(FileID);
3033   if (PerFile == PerDevice->second.end())
3034     return false;
3035   auto PerParentName = PerFile->second.find(ParentName);
3036   if (PerParentName == PerFile->second.end())
3037     return false;
3038   auto PerLine = PerParentName->second.find(LineNum);
3039   if (PerLine == PerParentName->second.end())
3040     return false;
3041   // Fail if this entry is already registered.
3042   if (PerLine->second.getAddress() || PerLine->second.getID())
3043     return false;
3044   return true;
3045 }
3046 
3047 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3048     const OffloadTargetRegionEntryInfoActTy &Action) {
3049   // Scan all target region entries and perform the provided action.
3050   for (const auto &D : OffloadEntriesTargetRegion)
3051     for (const auto &F : D.second)
3052       for (const auto &P : F.second)
3053         for (const auto &L : P.second)
3054           Action(D.first, F.first, P.first(), L.first, L.second);
3055 }
3056 
3057 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3058     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3059                                        OMPTargetGlobalVarEntryKind Flags,
3060                                        unsigned Order) {
3061   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3062                                              "only required for the device "
3063                                              "code generation.");
3064   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3065   ++OffloadingEntriesNum;
3066 }
3067 
3068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3069     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3070                                      CharUnits VarSize,
3071                                      OMPTargetGlobalVarEntryKind Flags,
3072                                      llvm::GlobalValue::LinkageTypes Linkage) {
3073   if (CGM.getLangOpts().OpenMPIsDevice) {
3074     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3075     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3076            "Entry not initialized!");
3077     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3078            "Resetting with the new address.");
3079     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3080       if (Entry.getVarSize().isZero()) {
3081         Entry.setVarSize(VarSize);
3082         Entry.setLinkage(Linkage);
3083       }
3084       return;
3085     }
3086     Entry.setVarSize(VarSize);
3087     Entry.setLinkage(Linkage);
3088     Entry.setAddress(Addr);
3089   } else {
3090     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3091       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3092       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3093              "Entry not initialized!");
3094       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3095              "Resetting with the new address.");
3096       if (Entry.getVarSize().isZero()) {
3097         Entry.setVarSize(VarSize);
3098         Entry.setLinkage(Linkage);
3099       }
3100       return;
3101     }
3102     OffloadEntriesDeviceGlobalVar.try_emplace(
3103         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3104     ++OffloadingEntriesNum;
3105   }
3106 }
3107 
3108 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3109     actOnDeviceGlobalVarEntriesInfo(
3110         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3111   // Scan all target region entries and perform the provided action.
3112   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3113     Action(E.getKey(), E.getValue());
3114 }
3115 
3116 void CGOpenMPRuntime::createOffloadEntry(
3117     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3118     llvm::GlobalValue::LinkageTypes Linkage) {
3119   StringRef Name = Addr->getName();
3120   llvm::Module &M = CGM.getModule();
3121   llvm::LLVMContext &C = M.getContext();
3122 
3123   // Create constant string with the name.
3124   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3125 
3126   std::string StringName = getName({"omp_offloading", "entry_name"});
3127   auto *Str = new llvm::GlobalVariable(
3128       M, StrPtrInit->getType(), /*isConstant=*/true,
3129       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3130   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3131 
3132   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3133                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3134                             llvm::ConstantInt::get(CGM.SizeTy, Size),
3135                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3136                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3137   std::string EntryName = getName({"omp_offloading", "entry", ""});
3138   llvm::GlobalVariable *Entry = createGlobalStruct(
3139       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3140       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3141 
3142   // The entry has to be created in the section the linker expects it to be.
3143   Entry->setSection("omp_offloading_entries");
3144 }
3145 
3146 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3147   // Emit the offloading entries and metadata so that the device codegen side
3148   // can easily figure out what to emit. The produced metadata looks like
3149   // this:
3150   //
3151   // !omp_offload.info = !{!1, ...}
3152   //
3153   // Right now we only generate metadata for function that contain target
3154   // regions.
3155 
3156   // If we are in simd mode or there are no entries, we don't need to do
3157   // anything.
3158   if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3159     return;
3160 
3161   llvm::Module &M = CGM.getModule();
3162   llvm::LLVMContext &C = M.getContext();
3163   SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3164                          SourceLocation, StringRef>,
3165               16>
3166       OrderedEntries(OffloadEntriesInfoManager.size());
3167   llvm::SmallVector<StringRef, 16> ParentFunctions(
3168       OffloadEntriesInfoManager.size());
3169 
3170   // Auxiliary methods to create metadata values and strings.
3171   auto &&GetMDInt = [this](unsigned V) {
3172     return llvm::ConstantAsMetadata::get(
3173         llvm::ConstantInt::get(CGM.Int32Ty, V));
3174   };
3175 
3176   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3177 
3178   // Create the offloading info metadata node.
3179   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3180 
3181   // Create function that emits metadata for each target region entry;
3182   auto &&TargetRegionMetadataEmitter =
3183       [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3184        &GetMDString](
3185           unsigned DeviceID, unsigned FileID, StringRef ParentName,
3186           unsigned Line,
3187           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3188         // Generate metadata for target regions. Each entry of this metadata
3189         // contains:
3190         // - Entry 0 -> Kind of this type of metadata (0).
3191         // - Entry 1 -> Device ID of the file where the entry was identified.
3192         // - Entry 2 -> File ID of the file where the entry was identified.
3193         // - Entry 3 -> Mangled name of the function where the entry was
3194         // identified.
3195         // - Entry 4 -> Line in the file where the entry was identified.
3196         // - Entry 5 -> Order the entry was created.
3197         // The first element of the metadata node is the kind.
3198         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3199                                  GetMDInt(FileID),      GetMDString(ParentName),
3200                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
3201 
3202         SourceLocation Loc;
3203         for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3204                   E = CGM.getContext().getSourceManager().fileinfo_end();
3205              I != E; ++I) {
3206           if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3207               I->getFirst()->getUniqueID().getFile() == FileID) {
3208             Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3209                 I->getFirst(), Line, 1);
3210             break;
3211           }
3212         }
3213         // Save this entry in the right position of the ordered entries array.
3214         OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3215         ParentFunctions[E.getOrder()] = ParentName;
3216 
3217         // Add metadata to the named metadata node.
3218         MD->addOperand(llvm::MDNode::get(C, Ops));
3219       };
3220 
3221   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3222       TargetRegionMetadataEmitter);
3223 
3224   // Create function that emits metadata for each device global variable entry;
3225   auto &&DeviceGlobalVarMetadataEmitter =
3226       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3227        MD](StringRef MangledName,
3228            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3229                &E) {
3230         // Generate metadata for global variables. Each entry of this metadata
3231         // contains:
3232         // - Entry 0 -> Kind of this type of metadata (1).
3233         // - Entry 1 -> Mangled name of the variable.
3234         // - Entry 2 -> Declare target kind.
3235         // - Entry 3 -> Order the entry was created.
3236         // The first element of the metadata node is the kind.
3237         llvm::Metadata *Ops[] = {
3238             GetMDInt(E.getKind()), GetMDString(MangledName),
3239             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3240 
3241         // Save this entry in the right position of the ordered entries array.
3242         OrderedEntries[E.getOrder()] =
3243             std::make_tuple(&E, SourceLocation(), MangledName);
3244 
3245         // Add metadata to the named metadata node.
3246         MD->addOperand(llvm::MDNode::get(C, Ops));
3247       };
3248 
3249   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3250       DeviceGlobalVarMetadataEmitter);
3251 
3252   for (const auto &E : OrderedEntries) {
3253     assert(std::get<0>(E) && "All ordered entries must exist!");
3254     if (const auto *CE =
3255             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3256                 std::get<0>(E))) {
3257       if (!CE->getID() || !CE->getAddress()) {
3258         // Do not blame the entry if the parent funtion is not emitted.
3259         StringRef FnName = ParentFunctions[CE->getOrder()];
3260         if (!CGM.GetGlobalValue(FnName))
3261           continue;
3262         unsigned DiagID = CGM.getDiags().getCustomDiagID(
3263             DiagnosticsEngine::Error,
3264             "Offloading entry for target region in %0 is incorrect: either the "
3265             "address or the ID is invalid.");
3266         CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3267         continue;
3268       }
3269       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3270                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3271     } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3272                                              OffloadEntryInfoDeviceGlobalVar>(
3273                    std::get<0>(E))) {
3274       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3275           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3276               CE->getFlags());
3277       switch (Flags) {
3278       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3279         if (CGM.getLangOpts().OpenMPIsDevice &&
3280             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3281           continue;
3282         if (!CE->getAddress()) {
3283           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3284               DiagnosticsEngine::Error, "Offloading entry for declare target "
3285                                         "variable %0 is incorrect: the "
3286                                         "address is invalid.");
3287           CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3288           continue;
3289         }
3290         // The vaiable has no definition - no need to add the entry.
3291         if (CE->getVarSize().isZero())
3292           continue;
3293         break;
3294       }
3295       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3296         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3297                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3298                "Declaret target link address is set.");
3299         if (CGM.getLangOpts().OpenMPIsDevice)
3300           continue;
3301         if (!CE->getAddress()) {
3302           unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303               DiagnosticsEngine::Error,
3304               "Offloading entry for declare target variable is incorrect: the "
3305               "address is invalid.");
3306           CGM.getDiags().Report(DiagID);
3307           continue;
3308         }
3309         break;
3310       }
3311       createOffloadEntry(CE->getAddress(), CE->getAddress(),
3312                          CE->getVarSize().getQuantity(), Flags,
3313                          CE->getLinkage());
3314     } else {
3315       llvm_unreachable("Unsupported entry kind.");
3316     }
3317   }
3318 }
3319 
3320 /// Loads all the offload entries information from the host IR
3321 /// metadata.
3322 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3323   // If we are in target mode, load the metadata from the host IR. This code has
3324   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3325 
3326   if (!CGM.getLangOpts().OpenMPIsDevice)
3327     return;
3328 
3329   if (CGM.getLangOpts().OMPHostIRFile.empty())
3330     return;
3331 
3332   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3333   if (auto EC = Buf.getError()) {
3334     CGM.getDiags().Report(diag::err_cannot_open_file)
3335         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3336     return;
3337   }
3338 
3339   llvm::LLVMContext C;
3340   auto ME = expectedToErrorOrAndEmitErrors(
3341       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3342 
3343   if (auto EC = ME.getError()) {
3344     unsigned DiagID = CGM.getDiags().getCustomDiagID(
3345         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3346     CGM.getDiags().Report(DiagID)
3347         << CGM.getLangOpts().OMPHostIRFile << EC.message();
3348     return;
3349   }
3350 
3351   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3352   if (!MD)
3353     return;
3354 
3355   for (llvm::MDNode *MN : MD->operands()) {
3356     auto &&GetMDInt = [MN](unsigned Idx) {
3357       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3358       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3359     };
3360 
3361     auto &&GetMDString = [MN](unsigned Idx) {
3362       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3363       return V->getString();
3364     };
3365 
3366     switch (GetMDInt(0)) {
3367     default:
3368       llvm_unreachable("Unexpected metadata!");
3369       break;
3370     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3371         OffloadingEntryInfoTargetRegion:
3372       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3373           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3374           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3375           /*Order=*/GetMDInt(5));
3376       break;
3377     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3378         OffloadingEntryInfoDeviceGlobalVar:
3379       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3380           /*MangledName=*/GetMDString(1),
3381           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3382               /*Flags=*/GetMDInt(2)),
3383           /*Order=*/GetMDInt(3));
3384       break;
3385     }
3386   }
3387 }
3388 
3389 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3390   if (!KmpRoutineEntryPtrTy) {
3391     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3392     ASTContext &C = CGM.getContext();
3393     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3394     FunctionProtoType::ExtProtoInfo EPI;
3395     KmpRoutineEntryPtrQTy = C.getPointerType(
3396         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3397     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3398   }
3399 }
3400 
3401 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3402   // Make sure the type of the entry is already created. This is the type we
3403   // have to create:
3404   // struct __tgt_offload_entry{
3405   //   void      *addr;       // Pointer to the offload entry info.
3406   //                          // (function or global)
3407   //   char      *name;       // Name of the function or global.
3408   //   size_t     size;       // Size of the entry info (0 if it a function).
3409   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
3410   //   int32_t    reserved;   // Reserved, to use by the runtime library.
3411   // };
3412   if (TgtOffloadEntryQTy.isNull()) {
3413     ASTContext &C = CGM.getContext();
3414     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3415     RD->startDefinition();
3416     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3417     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3418     addFieldToRecordDecl(C, RD, C.getSizeType());
3419     addFieldToRecordDecl(
3420         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3421     addFieldToRecordDecl(
3422         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3423     RD->completeDefinition();
3424     RD->addAttr(PackedAttr::CreateImplicit(C));
3425     TgtOffloadEntryQTy = C.getRecordType(RD);
3426   }
3427   return TgtOffloadEntryQTy;
3428 }
3429 
3430 namespace {
3431 struct PrivateHelpersTy {
3432   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3433                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3434       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3435         PrivateElemInit(PrivateElemInit) {}
3436   const Expr *OriginalRef = nullptr;
3437   const VarDecl *Original = nullptr;
3438   const VarDecl *PrivateCopy = nullptr;
3439   const VarDecl *PrivateElemInit = nullptr;
3440 };
3441 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3442 } // anonymous namespace
3443 
3444 static RecordDecl *
3445 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3446   if (!Privates.empty()) {
3447     ASTContext &C = CGM.getContext();
3448     // Build struct .kmp_privates_t. {
3449     //         /*  private vars  */
3450     //       };
3451     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3452     RD->startDefinition();
3453     for (const auto &Pair : Privates) {
3454       const VarDecl *VD = Pair.second.Original;
3455       QualType Type = VD->getType().getNonReferenceType();
3456       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3457       if (VD->hasAttrs()) {
3458         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3459              E(VD->getAttrs().end());
3460              I != E; ++I)
3461           FD->addAttr(*I);
3462       }
3463     }
3464     RD->completeDefinition();
3465     return RD;
3466   }
3467   return nullptr;
3468 }
3469 
3470 static RecordDecl *
3471 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3472                          QualType KmpInt32Ty,
3473                          QualType KmpRoutineEntryPointerQTy) {
3474   ASTContext &C = CGM.getContext();
3475   // Build struct kmp_task_t {
3476   //         void *              shareds;
3477   //         kmp_routine_entry_t routine;
3478   //         kmp_int32           part_id;
3479   //         kmp_cmplrdata_t data1;
3480   //         kmp_cmplrdata_t data2;
3481   // For taskloops additional fields:
3482   //         kmp_uint64          lb;
3483   //         kmp_uint64          ub;
3484   //         kmp_int64           st;
3485   //         kmp_int32           liter;
3486   //         void *              reductions;
3487   //       };
3488   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3489   UD->startDefinition();
3490   addFieldToRecordDecl(C, UD, KmpInt32Ty);
3491   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3492   UD->completeDefinition();
3493   QualType KmpCmplrdataTy = C.getRecordType(UD);
3494   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3495   RD->startDefinition();
3496   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3497   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3498   addFieldToRecordDecl(C, RD, KmpInt32Ty);
3499   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3500   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3501   if (isOpenMPTaskLoopDirective(Kind)) {
3502     QualType KmpUInt64Ty =
3503         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3504     QualType KmpInt64Ty =
3505         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3506     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3507     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3508     addFieldToRecordDecl(C, RD, KmpInt64Ty);
3509     addFieldToRecordDecl(C, RD, KmpInt32Ty);
3510     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3511   }
3512   RD->completeDefinition();
3513   return RD;
3514 }
3515 
3516 static RecordDecl *
3517 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3518                                      ArrayRef<PrivateDataTy> Privates) {
3519   ASTContext &C = CGM.getContext();
3520   // Build struct kmp_task_t_with_privates {
3521   //         kmp_task_t task_data;
3522   //         .kmp_privates_t. privates;
3523   //       };
3524   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3525   RD->startDefinition();
3526   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3527   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3528     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3529   RD->completeDefinition();
3530   return RD;
3531 }
3532 
3533 /// Emit a proxy function which accepts kmp_task_t as the second
3534 /// argument.
3535 /// \code
3536 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3537 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3538 ///   For taskloops:
3539 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3540 ///   tt->reductions, tt->shareds);
3541 ///   return 0;
3542 /// }
3543 /// \endcode
3544 static llvm::Function *
3545 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3546                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3547                       QualType KmpTaskTWithPrivatesPtrQTy,
3548                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3549                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
3550                       llvm::Value *TaskPrivatesMap) {
3551   ASTContext &C = CGM.getContext();
3552   FunctionArgList Args;
3553   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3554                             ImplicitParamDecl::Other);
3555   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3556                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3557                                 ImplicitParamDecl::Other);
3558   Args.push_back(&GtidArg);
3559   Args.push_back(&TaskTypeArg);
3560   const auto &TaskEntryFnInfo =
3561       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3562   llvm::FunctionType *TaskEntryTy =
3563       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3564   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3565   auto *TaskEntry = llvm::Function::Create(
3566       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3567   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3568   TaskEntry->setDoesNotRecurse();
3569   CodeGenFunction CGF(CGM);
3570   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3571                     Loc, Loc);
3572 
3573   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3574   // tt,
3575   // For taskloops:
3576   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3577   // tt->task_data.shareds);
3578   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3579       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3580   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3581       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3582       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3583   const auto *KmpTaskTWithPrivatesQTyRD =
3584       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3585   LValue Base =
3586       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3587   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3588   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3589   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3590   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3591 
3592   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3593   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3594   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3595       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3596       CGF.ConvertTypeForMem(SharedsPtrTy));
3597 
3598   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3599   llvm::Value *PrivatesParam;
3600   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3601     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3602     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3603         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3604   } else {
3605     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3606   }
3607 
3608   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3609                                TaskPrivatesMap,
3610                                CGF.Builder
3611                                    .CreatePointerBitCastOrAddrSpaceCast(
3612                                        TDBase.getAddress(CGF), CGF.VoidPtrTy)
3613                                    .getPointer()};
3614   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3615                                           std::end(CommonArgs));
3616   if (isOpenMPTaskLoopDirective(Kind)) {
3617     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3618     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3619     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3620     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3621     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3622     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3623     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3624     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3625     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3626     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3627     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3628     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3629     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3630     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3631     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3632     CallArgs.push_back(LBParam);
3633     CallArgs.push_back(UBParam);
3634     CallArgs.push_back(StParam);
3635     CallArgs.push_back(LIParam);
3636     CallArgs.push_back(RParam);
3637   }
3638   CallArgs.push_back(SharedsParam);
3639 
3640   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3641                                                   CallArgs);
3642   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3643                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3644   CGF.FinishFunction();
3645   return TaskEntry;
3646 }
3647 
3648 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3649                                             SourceLocation Loc,
3650                                             QualType KmpInt32Ty,
3651                                             QualType KmpTaskTWithPrivatesPtrQTy,
3652                                             QualType KmpTaskTWithPrivatesQTy) {
3653   ASTContext &C = CGM.getContext();
3654   FunctionArgList Args;
3655   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3656                             ImplicitParamDecl::Other);
3657   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3658                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3659                                 ImplicitParamDecl::Other);
3660   Args.push_back(&GtidArg);
3661   Args.push_back(&TaskTypeArg);
3662   const auto &DestructorFnInfo =
3663       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3664   llvm::FunctionType *DestructorFnTy =
3665       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3666   std::string Name =
3667       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3668   auto *DestructorFn =
3669       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3670                              Name, &CGM.getModule());
3671   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3672                                     DestructorFnInfo);
3673   DestructorFn->setDoesNotRecurse();
3674   CodeGenFunction CGF(CGM);
3675   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3676                     Args, Loc, Loc);
3677 
3678   LValue Base = CGF.EmitLoadOfPointerLValue(
3679       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3680       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3681   const auto *KmpTaskTWithPrivatesQTyRD =
3682       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3683   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3684   Base = CGF.EmitLValueForField(Base, *FI);
3685   for (const auto *Field :
3686        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3687     if (QualType::DestructionKind DtorKind =
3688             Field->getType().isDestructedType()) {
3689       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3690       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3691     }
3692   }
3693   CGF.FinishFunction();
3694   return DestructorFn;
3695 }
3696 
3697 /// Emit a privates mapping function for correct handling of private and
3698 /// firstprivate variables.
3699 /// \code
3700 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3701 /// **noalias priv1,...,  <tyn> **noalias privn) {
3702 ///   *priv1 = &.privates.priv1;
3703 ///   ...;
3704 ///   *privn = &.privates.privn;
3705 /// }
3706 /// \endcode
3707 static llvm::Value *
3708 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3709                                ArrayRef<const Expr *> PrivateVars,
3710                                ArrayRef<const Expr *> FirstprivateVars,
3711                                ArrayRef<const Expr *> LastprivateVars,
3712                                QualType PrivatesQTy,
3713                                ArrayRef<PrivateDataTy> Privates) {
3714   ASTContext &C = CGM.getContext();
3715   FunctionArgList Args;
3716   ImplicitParamDecl TaskPrivatesArg(
3717       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3718       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3719       ImplicitParamDecl::Other);
3720   Args.push_back(&TaskPrivatesArg);
3721   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3722   unsigned Counter = 1;
3723   for (const Expr *E : PrivateVars) {
3724     Args.push_back(ImplicitParamDecl::Create(
3725         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3726         C.getPointerType(C.getPointerType(E->getType()))
3727             .withConst()
3728             .withRestrict(),
3729         ImplicitParamDecl::Other));
3730     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3731     PrivateVarsPos[VD] = Counter;
3732     ++Counter;
3733   }
3734   for (const Expr *E : FirstprivateVars) {
3735     Args.push_back(ImplicitParamDecl::Create(
3736         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3737         C.getPointerType(C.getPointerType(E->getType()))
3738             .withConst()
3739             .withRestrict(),
3740         ImplicitParamDecl::Other));
3741     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3742     PrivateVarsPos[VD] = Counter;
3743     ++Counter;
3744   }
3745   for (const Expr *E : LastprivateVars) {
3746     Args.push_back(ImplicitParamDecl::Create(
3747         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3748         C.getPointerType(C.getPointerType(E->getType()))
3749             .withConst()
3750             .withRestrict(),
3751         ImplicitParamDecl::Other));
3752     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3753     PrivateVarsPos[VD] = Counter;
3754     ++Counter;
3755   }
3756   const auto &TaskPrivatesMapFnInfo =
3757       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3758   llvm::FunctionType *TaskPrivatesMapTy =
3759       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3760   std::string Name =
3761       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3762   auto *TaskPrivatesMap = llvm::Function::Create(
3763       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3764       &CGM.getModule());
3765   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3766                                     TaskPrivatesMapFnInfo);
3767   if (CGM.getLangOpts().Optimize) {
3768     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3769     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3770     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3771   }
3772   CodeGenFunction CGF(CGM);
3773   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3774                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3775 
3776   // *privi = &.privates.privi;
3777   LValue Base = CGF.EmitLoadOfPointerLValue(
3778       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3779       TaskPrivatesArg.getType()->castAs<PointerType>());
3780   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3781   Counter = 0;
3782   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3783     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3784     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3785     LValue RefLVal =
3786         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3787     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3788         RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3789     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3790     ++Counter;
3791   }
3792   CGF.FinishFunction();
3793   return TaskPrivatesMap;
3794 }
3795 
3796 /// Emit initialization for private variables in task-based directives.
3797 static void emitPrivatesInit(CodeGenFunction &CGF,
3798                              const OMPExecutableDirective &D,
3799                              Address KmpTaskSharedsPtr, LValue TDBase,
3800                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3801                              QualType SharedsTy, QualType SharedsPtrTy,
3802                              const OMPTaskDataTy &Data,
3803                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3804   ASTContext &C = CGF.getContext();
3805   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3806   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3807   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3808                                  ? OMPD_taskloop
3809                                  : OMPD_task;
3810   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3811   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3812   LValue SrcBase;
3813   bool IsTargetTask =
3814       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3815       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3816   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
3817   // PointersArray and SizesArray. The original variables for these arrays are
3818   // not captured and we get their addresses explicitly.
3819   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3820       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3821     SrcBase = CGF.MakeAddrLValue(
3822         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3823             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3824         SharedsTy);
3825   }
3826   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3827   for (const PrivateDataTy &Pair : Privates) {
3828     const VarDecl *VD = Pair.second.PrivateCopy;
3829     const Expr *Init = VD->getAnyInitializer();
3830     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3831                              !CGF.isTrivialInitializer(Init)))) {
3832       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3833       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3834         const VarDecl *OriginalVD = Pair.second.Original;
3835         // Check if the variable is the target-based BasePointersArray,
3836         // PointersArray or SizesArray.
3837         LValue SharedRefLValue;
3838         QualType Type = PrivateLValue.getType();
3839         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3840         if (IsTargetTask && !SharedField) {
3841           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3842                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3843                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3844                          ->getNumParams() == 0 &&
3845                  isa<TranslationUnitDecl>(
3846                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3847                          ->getDeclContext()) &&
3848                  "Expected artificial target data variable.");
3849           SharedRefLValue =
3850               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3851         } else if (ForDup) {
3852           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3853           SharedRefLValue = CGF.MakeAddrLValue(
3854               Address(SharedRefLValue.getPointer(CGF),
3855                       C.getDeclAlign(OriginalVD)),
3856               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3857               SharedRefLValue.getTBAAInfo());
3858         } else if (CGF.LambdaCaptureFields.count(
3859                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3860                    dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) {
3861           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3862         } else {
3863           // Processing for implicitly captured variables.
3864           InlinedOpenMPRegionRAII Region(
3865               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3866               /*HasCancel=*/false);
3867           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3868         }
3869         if (Type->isArrayType()) {
3870           // Initialize firstprivate array.
3871           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3872             // Perform simple memcpy.
3873             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3874           } else {
3875             // Initialize firstprivate array using element-by-element
3876             // initialization.
3877             CGF.EmitOMPAggregateAssign(
3878                 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3879                 Type,
3880                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3881                                                   Address SrcElement) {
3882                   // Clean up any temporaries needed by the initialization.
3883                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3884                   InitScope.addPrivate(
3885                       Elem, [SrcElement]() -> Address { return SrcElement; });
3886                   (void)InitScope.Privatize();
3887                   // Emit initialization for single element.
3888                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3889                       CGF, &CapturesInfo);
3890                   CGF.EmitAnyExprToMem(Init, DestElement,
3891                                        Init->getType().getQualifiers(),
3892                                        /*IsInitializer=*/false);
3893                 });
3894           }
3895         } else {
3896           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3897           InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3898             return SharedRefLValue.getAddress(CGF);
3899           });
3900           (void)InitScope.Privatize();
3901           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3902           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3903                              /*capturedByInit=*/false);
3904         }
3905       } else {
3906         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3907       }
3908     }
3909     ++FI;
3910   }
3911 }
3912 
3913 /// Check if duplication function is required for taskloops.
3914 static bool checkInitIsRequired(CodeGenFunction &CGF,
3915                                 ArrayRef<PrivateDataTy> Privates) {
3916   bool InitRequired = false;
3917   for (const PrivateDataTy &Pair : Privates) {
3918     const VarDecl *VD = Pair.second.PrivateCopy;
3919     const Expr *Init = VD->getAnyInitializer();
3920     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3921                                     !CGF.isTrivialInitializer(Init));
3922     if (InitRequired)
3923       break;
3924   }
3925   return InitRequired;
3926 }
3927 
3928 
3929 /// Emit task_dup function (for initialization of
3930 /// private/firstprivate/lastprivate vars and last_iter flag)
3931 /// \code
3932 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3933 /// lastpriv) {
3934 /// // setup lastprivate flag
3935 ///    task_dst->last = lastpriv;
3936 /// // could be constructor calls here...
3937 /// }
3938 /// \endcode
3939 static llvm::Value *
3940 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3941                     const OMPExecutableDirective &D,
3942                     QualType KmpTaskTWithPrivatesPtrQTy,
3943                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3944                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3945                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3946                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3947   ASTContext &C = CGM.getContext();
3948   FunctionArgList Args;
3949   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3950                            KmpTaskTWithPrivatesPtrQTy,
3951                            ImplicitParamDecl::Other);
3952   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3953                            KmpTaskTWithPrivatesPtrQTy,
3954                            ImplicitParamDecl::Other);
3955   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3956                                 ImplicitParamDecl::Other);
3957   Args.push_back(&DstArg);
3958   Args.push_back(&SrcArg);
3959   Args.push_back(&LastprivArg);
3960   const auto &TaskDupFnInfo =
3961       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3962   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3963   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3964   auto *TaskDup = llvm::Function::Create(
3965       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3966   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3967   TaskDup->setDoesNotRecurse();
3968   CodeGenFunction CGF(CGM);
3969   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3970                     Loc);
3971 
3972   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3973       CGF.GetAddrOfLocalVar(&DstArg),
3974       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3975   // task_dst->liter = lastpriv;
3976   if (WithLastIter) {
3977     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3978     LValue Base = CGF.EmitLValueForField(
3979         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3980     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3981     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3982         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3983     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3984   }
3985 
3986   // Emit initial values for private copies (if any).
3987   assert(!Privates.empty());
3988   Address KmpTaskSharedsPtr = Address::invalid();
3989   if (!Data.FirstprivateVars.empty()) {
3990     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3991         CGF.GetAddrOfLocalVar(&SrcArg),
3992         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3993     LValue Base = CGF.EmitLValueForField(
3994         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3995     KmpTaskSharedsPtr = Address(
3996         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3997                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3998                                                   KmpTaskTShareds)),
3999                              Loc),
4000         CGM.getNaturalTypeAlignment(SharedsTy));
4001   }
4002   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4003                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4004   CGF.FinishFunction();
4005   return TaskDup;
4006 }
4007 
4008 /// Checks if destructor function is required to be generated.
4009 /// \return true if cleanups are required, false otherwise.
4010 static bool
4011 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4012   bool NeedsCleanup = false;
4013   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4014   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4015   for (const FieldDecl *FD : PrivateRD->fields()) {
4016     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4017     if (NeedsCleanup)
4018       break;
4019   }
4020   return NeedsCleanup;
4021 }
4022 
4023 namespace {
4024 /// Loop generator for OpenMP iterator expression.
4025 class OMPIteratorGeneratorScope final
4026     : public CodeGenFunction::OMPPrivateScope {
4027   CodeGenFunction &CGF;
4028   const OMPIteratorExpr *E = nullptr;
4029   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4030   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4031   OMPIteratorGeneratorScope() = delete;
4032   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4033 
4034 public:
4035   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4036       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4037     if (!E)
4038       return;
4039     SmallVector<llvm::Value *, 4> Uppers;
4040     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4041       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4042       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4043       addPrivate(VD, [&CGF, VD]() {
4044         return CGF.CreateMemTemp(VD->getType(), VD->getName());
4045       });
4046       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4047       addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4048         return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4049                                  "counter.addr");
4050       });
4051     }
4052     Privatize();
4053 
4054     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4055       const OMPIteratorHelperData &HelperData = E->getHelper(I);
4056       LValue CLVal =
4057           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4058                              HelperData.CounterVD->getType());
4059       // Counter = 0;
4060       CGF.EmitStoreOfScalar(
4061           llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4062           CLVal);
4063       CodeGenFunction::JumpDest &ContDest =
4064           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4065       CodeGenFunction::JumpDest &ExitDest =
4066           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4067       // N = <number-of_iterations>;
4068       llvm::Value *N = Uppers[I];
4069       // cont:
4070       // if (Counter < N) goto body; else goto exit;
4071       CGF.EmitBlock(ContDest.getBlock());
4072       auto *CVal =
4073           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4074       llvm::Value *Cmp =
4075           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4076               ? CGF.Builder.CreateICmpSLT(CVal, N)
4077               : CGF.Builder.CreateICmpULT(CVal, N);
4078       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4079       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4080       // body:
4081       CGF.EmitBlock(BodyBB);
4082       // Iteri = Begini + Counter * Stepi;
4083       CGF.EmitIgnoredExpr(HelperData.Update);
4084     }
4085   }
4086   ~OMPIteratorGeneratorScope() {
4087     if (!E)
4088       return;
4089     for (unsigned I = E->numOfIterators(); I > 0; --I) {
4090       // Counter = Counter + 1;
4091       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4092       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4093       // goto cont;
4094       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4095       // exit:
4096       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4097     }
4098   }
4099 };
4100 } // namespace
4101 
4102 static std::pair<llvm::Value *, llvm::Value *>
4103 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4104   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4105   llvm::Value *Addr;
4106   if (OASE) {
4107     const Expr *Base = OASE->getBase();
4108     Addr = CGF.EmitScalarExpr(Base);
4109   } else {
4110     Addr = CGF.EmitLValue(E).getPointer(CGF);
4111   }
4112   llvm::Value *SizeVal;
4113   QualType Ty = E->getType();
4114   if (OASE) {
4115     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4116     for (const Expr *SE : OASE->getDimensions()) {
4117       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4118       Sz = CGF.EmitScalarConversion(
4119           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4120       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4121     }
4122   } else if (const auto *ASE =
4123                  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4124     LValue UpAddrLVal =
4125         CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4126     llvm::Value *UpAddr =
4127         CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
4128     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4129     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4130     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4131   } else {
4132     SizeVal = CGF.getTypeSize(Ty);
4133   }
4134   return std::make_pair(Addr, SizeVal);
4135 }
4136 
4137 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4138 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4139   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4140   if (KmpTaskAffinityInfoTy.isNull()) {
4141     RecordDecl *KmpAffinityInfoRD =
4142         C.buildImplicitRecord("kmp_task_affinity_info_t");
4143     KmpAffinityInfoRD->startDefinition();
4144     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4145     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4146     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4147     KmpAffinityInfoRD->completeDefinition();
4148     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4149   }
4150 }
4151 
4152 CGOpenMPRuntime::TaskResultTy
4153 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4154                               const OMPExecutableDirective &D,
4155                               llvm::Function *TaskFunction, QualType SharedsTy,
4156                               Address Shareds, const OMPTaskDataTy &Data) {
4157   ASTContext &C = CGM.getContext();
4158   llvm::SmallVector<PrivateDataTy, 4> Privates;
4159   // Aggregate privates and sort them by the alignment.
4160   const auto *I = Data.PrivateCopies.begin();
4161   for (const Expr *E : Data.PrivateVars) {
4162     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4163     Privates.emplace_back(
4164         C.getDeclAlign(VD),
4165         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4166                          /*PrivateElemInit=*/nullptr));
4167     ++I;
4168   }
4169   I = Data.FirstprivateCopies.begin();
4170   const auto *IElemInitRef = Data.FirstprivateInits.begin();
4171   for (const Expr *E : Data.FirstprivateVars) {
4172     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4173     Privates.emplace_back(
4174         C.getDeclAlign(VD),
4175         PrivateHelpersTy(
4176             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4177             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4178     ++I;
4179     ++IElemInitRef;
4180   }
4181   I = Data.LastprivateCopies.begin();
4182   for (const Expr *E : Data.LastprivateVars) {
4183     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4184     Privates.emplace_back(
4185         C.getDeclAlign(VD),
4186         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4187                          /*PrivateElemInit=*/nullptr));
4188     ++I;
4189   }
4190   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4191     return L.first > R.first;
4192   });
4193   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4194   // Build type kmp_routine_entry_t (if not built yet).
4195   emitKmpRoutineEntryT(KmpInt32Ty);
4196   // Build type kmp_task_t (if not built yet).
4197   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4198     if (SavedKmpTaskloopTQTy.isNull()) {
4199       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4200           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4201     }
4202     KmpTaskTQTy = SavedKmpTaskloopTQTy;
4203   } else {
4204     assert((D.getDirectiveKind() == OMPD_task ||
4205             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4206             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4207            "Expected taskloop, task or target directive");
4208     if (SavedKmpTaskTQTy.isNull()) {
4209       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4210           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4211     }
4212     KmpTaskTQTy = SavedKmpTaskTQTy;
4213   }
4214   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4215   // Build particular struct kmp_task_t for the given task.
4216   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4217       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4218   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4219   QualType KmpTaskTWithPrivatesPtrQTy =
4220       C.getPointerType(KmpTaskTWithPrivatesQTy);
4221   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4222   llvm::Type *KmpTaskTWithPrivatesPtrTy =
4223       KmpTaskTWithPrivatesTy->getPointerTo();
4224   llvm::Value *KmpTaskTWithPrivatesTySize =
4225       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4226   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4227 
4228   // Emit initial values for private copies (if any).
4229   llvm::Value *TaskPrivatesMap = nullptr;
4230   llvm::Type *TaskPrivatesMapTy =
4231       std::next(TaskFunction->arg_begin(), 3)->getType();
4232   if (!Privates.empty()) {
4233     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4234     TaskPrivatesMap = emitTaskPrivateMappingFunction(
4235         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4236         FI->getType(), Privates);
4237     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4238         TaskPrivatesMap, TaskPrivatesMapTy);
4239   } else {
4240     TaskPrivatesMap = llvm::ConstantPointerNull::get(
4241         cast<llvm::PointerType>(TaskPrivatesMapTy));
4242   }
4243   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4244   // kmp_task_t *tt);
4245   llvm::Function *TaskEntry = emitProxyTaskFunction(
4246       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4247       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4248       TaskPrivatesMap);
4249 
4250   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4251   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4252   // kmp_routine_entry_t *task_entry);
4253   // Task flags. Format is taken from
4254   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
4255   // description of kmp_tasking_flags struct.
4256   enum {
4257     TiedFlag = 0x1,
4258     FinalFlag = 0x2,
4259     DestructorsFlag = 0x8,
4260     PriorityFlag = 0x20,
4261     DetachableFlag = 0x40,
4262   };
4263   unsigned Flags = Data.Tied ? TiedFlag : 0;
4264   bool NeedsCleanup = false;
4265   if (!Privates.empty()) {
4266     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4267     if (NeedsCleanup)
4268       Flags = Flags | DestructorsFlag;
4269   }
4270   if (Data.Priority.getInt())
4271     Flags = Flags | PriorityFlag;
4272   if (D.hasClausesOfKind<OMPDetachClause>())
4273     Flags = Flags | DetachableFlag;
4274   llvm::Value *TaskFlags =
4275       Data.Final.getPointer()
4276           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4277                                      CGF.Builder.getInt32(FinalFlag),
4278                                      CGF.Builder.getInt32(/*C=*/0))
4279           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4280   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4281   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4282   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4283       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4284       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4285           TaskEntry, KmpRoutineEntryPtrTy)};
4286   llvm::Value *NewTask;
4287   if (D.hasClausesOfKind<OMPNowaitClause>()) {
4288     // Check if we have any device clause associated with the directive.
4289     const Expr *Device = nullptr;
4290     if (auto *C = D.getSingleClause<OMPDeviceClause>())
4291       Device = C->getDevice();
4292     // Emit device ID if any otherwise use default value.
4293     llvm::Value *DeviceID;
4294     if (Device)
4295       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4296                                            CGF.Int64Ty, /*isSigned=*/true);
4297     else
4298       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4299     AllocArgs.push_back(DeviceID);
4300     NewTask = CGF.EmitRuntimeCall(
4301         OMPBuilder.getOrCreateRuntimeFunction(
4302             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4303         AllocArgs);
4304   } else {
4305     NewTask =
4306         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4307                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4308                             AllocArgs);
4309   }
4310   // Emit detach clause initialization.
4311   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4312   // task_descriptor);
4313   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4314     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4315     LValue EvtLVal = CGF.EmitLValue(Evt);
4316 
4317     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4318     // int gtid, kmp_task_t *task);
4319     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4320     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4321     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4322     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4323         OMPBuilder.getOrCreateRuntimeFunction(
4324             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4325         {Loc, Tid, NewTask});
4326     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4327                                       Evt->getExprLoc());
4328     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4329   }
4330   // Process affinity clauses.
4331   if (D.hasClausesOfKind<OMPAffinityClause>()) {
4332     // Process list of affinity data.
4333     ASTContext &C = CGM.getContext();
4334     Address AffinitiesArray = Address::invalid();
4335     // Calculate number of elements to form the array of affinity data.
4336     llvm::Value *NumOfElements = nullptr;
4337     unsigned NumAffinities = 0;
4338     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4339       if (const Expr *Modifier = C->getModifier()) {
4340         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4341         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4342           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4343           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4344           NumOfElements =
4345               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4346         }
4347       } else {
4348         NumAffinities += C->varlist_size();
4349       }
4350     }
4351     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4352     // Fields ids in kmp_task_affinity_info record.
4353     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4354 
4355     QualType KmpTaskAffinityInfoArrayTy;
4356     if (NumOfElements) {
4357       NumOfElements = CGF.Builder.CreateNUWAdd(
4358           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4359       OpaqueValueExpr OVE(
4360           Loc,
4361           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4362           VK_RValue);
4363       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4364                                                     RValue::get(NumOfElements));
4365       KmpTaskAffinityInfoArrayTy =
4366           C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal,
4367                                  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4368       // Properly emit variable-sized array.
4369       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4370                                            ImplicitParamDecl::Other);
4371       CGF.EmitVarDecl(*PD);
4372       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4373       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4374                                                 /*isSigned=*/false);
4375     } else {
4376       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4377           KmpTaskAffinityInfoTy,
4378           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4379           ArrayType::Normal, /*IndexTypeQuals=*/0);
4380       AffinitiesArray =
4381           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4382       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4383       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4384                                              /*isSigned=*/false);
4385     }
4386 
4387     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4388     // Fill array by elements without iterators.
4389     unsigned Pos = 0;
4390     bool HasIterator = false;
4391     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4392       if (C->getModifier()) {
4393         HasIterator = true;
4394         continue;
4395       }
4396       for (const Expr *E : C->varlists()) {
4397         llvm::Value *Addr;
4398         llvm::Value *Size;
4399         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4400         LValue Base =
4401             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4402                                KmpTaskAffinityInfoTy);
4403         // affs[i].base_addr = &<Affinities[i].second>;
4404         LValue BaseAddrLVal = CGF.EmitLValueForField(
4405             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4406         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4407                               BaseAddrLVal);
4408         // affs[i].len = sizeof(<Affinities[i].second>);
4409         LValue LenLVal = CGF.EmitLValueForField(
4410             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4411         CGF.EmitStoreOfScalar(Size, LenLVal);
4412         ++Pos;
4413       }
4414     }
4415     LValue PosLVal;
4416     if (HasIterator) {
4417       PosLVal = CGF.MakeAddrLValue(
4418           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4419           C.getSizeType());
4420       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4421     }
4422     // Process elements with iterators.
4423     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4424       const Expr *Modifier = C->getModifier();
4425       if (!Modifier)
4426         continue;
4427       OMPIteratorGeneratorScope IteratorScope(
4428           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4429       for (const Expr *E : C->varlists()) {
4430         llvm::Value *Addr;
4431         llvm::Value *Size;
4432         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4433         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4434         LValue Base = CGF.MakeAddrLValue(
4435             Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx),
4436                     AffinitiesArray.getAlignment()),
4437             KmpTaskAffinityInfoTy);
4438         // affs[i].base_addr = &<Affinities[i].second>;
4439         LValue BaseAddrLVal = CGF.EmitLValueForField(
4440             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4441         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4442                               BaseAddrLVal);
4443         // affs[i].len = sizeof(<Affinities[i].second>);
4444         LValue LenLVal = CGF.EmitLValueForField(
4445             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4446         CGF.EmitStoreOfScalar(Size, LenLVal);
4447         Idx = CGF.Builder.CreateNUWAdd(
4448             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4449         CGF.EmitStoreOfScalar(Idx, PosLVal);
4450       }
4451     }
4452     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4453     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4454     // naffins, kmp_task_affinity_info_t *affin_list);
4455     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4456     llvm::Value *GTid = getThreadID(CGF, Loc);
4457     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4458         AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4459     // FIXME: Emit the function and ignore its result for now unless the
4460     // runtime function is properly implemented.
4461     (void)CGF.EmitRuntimeCall(
4462         OMPBuilder.getOrCreateRuntimeFunction(
4463             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4464         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4465   }
4466   llvm::Value *NewTaskNewTaskTTy =
4467       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4468           NewTask, KmpTaskTWithPrivatesPtrTy);
4469   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4470                                                KmpTaskTWithPrivatesQTy);
4471   LValue TDBase =
4472       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4473   // Fill the data in the resulting kmp_task_t record.
4474   // Copy shareds if there are any.
4475   Address KmpTaskSharedsPtr = Address::invalid();
4476   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4477     KmpTaskSharedsPtr =
4478         Address(CGF.EmitLoadOfScalar(
4479                     CGF.EmitLValueForField(
4480                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4481                                            KmpTaskTShareds)),
4482                     Loc),
4483                 CGM.getNaturalTypeAlignment(SharedsTy));
4484     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4485     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4486     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4487   }
4488   // Emit initial values for private copies (if any).
4489   TaskResultTy Result;
4490   if (!Privates.empty()) {
4491     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4492                      SharedsTy, SharedsPtrTy, Data, Privates,
4493                      /*ForDup=*/false);
4494     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4495         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4496       Result.TaskDupFn = emitTaskDupFunction(
4497           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4498           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4499           /*WithLastIter=*/!Data.LastprivateVars.empty());
4500     }
4501   }
4502   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4503   enum { Priority = 0, Destructors = 1 };
4504   // Provide pointer to function with destructors for privates.
4505   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4506   const RecordDecl *KmpCmplrdataUD =
4507       (*FI)->getType()->getAsUnionType()->getDecl();
4508   if (NeedsCleanup) {
4509     llvm::Value *DestructorFn = emitDestructorsFunction(
4510         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4511         KmpTaskTWithPrivatesQTy);
4512     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4513     LValue DestructorsLV = CGF.EmitLValueForField(
4514         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4515     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4516                               DestructorFn, KmpRoutineEntryPtrTy),
4517                           DestructorsLV);
4518   }
4519   // Set priority.
4520   if (Data.Priority.getInt()) {
4521     LValue Data2LV = CGF.EmitLValueForField(
4522         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4523     LValue PriorityLV = CGF.EmitLValueForField(
4524         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4525     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4526   }
4527   Result.NewTask = NewTask;
4528   Result.TaskEntry = TaskEntry;
4529   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4530   Result.TDBase = TDBase;
4531   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4532   return Result;
4533 }
4534 
4535 namespace {
4536 /// Dependence kind for RTL.
4537 enum RTLDependenceKindTy {
4538   DepIn = 0x01,
4539   DepInOut = 0x3,
4540   DepMutexInOutSet = 0x4
4541 };
4542 /// Fields ids in kmp_depend_info record.
4543 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4544 } // namespace
4545 
4546 /// Translates internal dependency kind into the runtime kind.
4547 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4548   RTLDependenceKindTy DepKind;
4549   switch (K) {
4550   case OMPC_DEPEND_in:
4551     DepKind = DepIn;
4552     break;
4553   // Out and InOut dependencies must use the same code.
4554   case OMPC_DEPEND_out:
4555   case OMPC_DEPEND_inout:
4556     DepKind = DepInOut;
4557     break;
4558   case OMPC_DEPEND_mutexinoutset:
4559     DepKind = DepMutexInOutSet;
4560     break;
4561   case OMPC_DEPEND_source:
4562   case OMPC_DEPEND_sink:
4563   case OMPC_DEPEND_depobj:
4564   case OMPC_DEPEND_unknown:
4565     llvm_unreachable("Unknown task dependence type");
4566   }
4567   return DepKind;
4568 }
4569 
4570 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4571 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4572                            QualType &FlagsTy) {
4573   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4574   if (KmpDependInfoTy.isNull()) {
4575     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4576     KmpDependInfoRD->startDefinition();
4577     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4578     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4579     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4580     KmpDependInfoRD->completeDefinition();
4581     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4582   }
4583 }
4584 
4585 std::pair<llvm::Value *, LValue>
4586 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4587                                    SourceLocation Loc) {
4588   ASTContext &C = CGM.getContext();
4589   QualType FlagsTy;
4590   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4591   RecordDecl *KmpDependInfoRD =
4592       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4593   LValue Base = CGF.EmitLoadOfPointerLValue(
4594       DepobjLVal.getAddress(CGF),
4595       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4596   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4597   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4598           Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4599   Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4600                             Base.getTBAAInfo());
4601   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4602       Addr.getPointer(),
4603       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4604   LValue NumDepsBase = CGF.MakeAddrLValue(
4605       Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4606       Base.getBaseInfo(), Base.getTBAAInfo());
4607   // NumDeps = deps[i].base_addr;
4608   LValue BaseAddrLVal = CGF.EmitLValueForField(
4609       NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4610   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4611   return std::make_pair(NumDeps, Base);
4612 }
4613 
4614 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4615                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4616                            const OMPTaskDataTy::DependData &Data,
4617                            Address DependenciesArray) {
4618   CodeGenModule &CGM = CGF.CGM;
4619   ASTContext &C = CGM.getContext();
4620   QualType FlagsTy;
4621   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4622   RecordDecl *KmpDependInfoRD =
4623       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4624   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4625 
4626   OMPIteratorGeneratorScope IteratorScope(
4627       CGF, cast_or_null<OMPIteratorExpr>(
4628                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4629                                  : nullptr));
4630   for (const Expr *E : Data.DepExprs) {
4631     llvm::Value *Addr;
4632     llvm::Value *Size;
4633     std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4634     LValue Base;
4635     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4636       Base = CGF.MakeAddrLValue(
4637           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4638     } else {
4639       LValue &PosLVal = *Pos.get<LValue *>();
4640       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4641       Base = CGF.MakeAddrLValue(
4642           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx),
4643                   DependenciesArray.getAlignment()),
4644           KmpDependInfoTy);
4645     }
4646     // deps[i].base_addr = &<Dependencies[i].second>;
4647     LValue BaseAddrLVal = CGF.EmitLValueForField(
4648         Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4649     CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4650                           BaseAddrLVal);
4651     // deps[i].len = sizeof(<Dependencies[i].second>);
4652     LValue LenLVal = CGF.EmitLValueForField(
4653         Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4654     CGF.EmitStoreOfScalar(Size, LenLVal);
4655     // deps[i].flags = <Dependencies[i].first>;
4656     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4657     LValue FlagsLVal = CGF.EmitLValueForField(
4658         Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4659     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4660                           FlagsLVal);
4661     if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4662       ++(*P);
4663     } else {
4664       LValue &PosLVal = *Pos.get<LValue *>();
4665       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4666       Idx = CGF.Builder.CreateNUWAdd(Idx,
4667                                      llvm::ConstantInt::get(Idx->getType(), 1));
4668       CGF.EmitStoreOfScalar(Idx, PosLVal);
4669     }
4670   }
4671 }
4672 
4673 static SmallVector<llvm::Value *, 4>
4674 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4675                         const OMPTaskDataTy::DependData &Data) {
4676   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4677          "Expected depobj dependecy kind.");
4678   SmallVector<llvm::Value *, 4> Sizes;
4679   SmallVector<LValue, 4> SizeLVals;
4680   ASTContext &C = CGF.getContext();
4681   QualType FlagsTy;
4682   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4683   RecordDecl *KmpDependInfoRD =
4684       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4685   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4686   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4687   {
4688     OMPIteratorGeneratorScope IteratorScope(
4689         CGF, cast_or_null<OMPIteratorExpr>(
4690                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4691                                    : nullptr));
4692     for (const Expr *E : Data.DepExprs) {
4693       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4694       LValue Base = CGF.EmitLoadOfPointerLValue(
4695           DepobjLVal.getAddress(CGF),
4696           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4697       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4698           Base.getAddress(CGF), KmpDependInfoPtrT);
4699       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4700                                 Base.getTBAAInfo());
4701       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4702           Addr.getPointer(),
4703           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4704       LValue NumDepsBase = CGF.MakeAddrLValue(
4705           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4706           Base.getBaseInfo(), Base.getTBAAInfo());
4707       // NumDeps = deps[i].base_addr;
4708       LValue BaseAddrLVal = CGF.EmitLValueForField(
4709           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4710       llvm::Value *NumDeps =
4711           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4712       LValue NumLVal = CGF.MakeAddrLValue(
4713           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4714           C.getUIntPtrType());
4715       CGF.InitTempAlloca(NumLVal.getAddress(CGF),
4716                          llvm::ConstantInt::get(CGF.IntPtrTy, 0));
4717       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4718       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4719       CGF.EmitStoreOfScalar(Add, NumLVal);
4720       SizeLVals.push_back(NumLVal);
4721     }
4722   }
4723   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4724     llvm::Value *Size =
4725         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4726     Sizes.push_back(Size);
4727   }
4728   return Sizes;
4729 }
4730 
4731 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4732                                LValue PosLVal,
4733                                const OMPTaskDataTy::DependData &Data,
4734                                Address DependenciesArray) {
4735   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4736          "Expected depobj dependecy kind.");
4737   ASTContext &C = CGF.getContext();
4738   QualType FlagsTy;
4739   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4740   RecordDecl *KmpDependInfoRD =
4741       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4742   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4743   llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4744   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4745   {
4746     OMPIteratorGeneratorScope IteratorScope(
4747         CGF, cast_or_null<OMPIteratorExpr>(
4748                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4749                                    : nullptr));
4750     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4751       const Expr *E = Data.DepExprs[I];
4752       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4753       LValue Base = CGF.EmitLoadOfPointerLValue(
4754           DepobjLVal.getAddress(CGF),
4755           C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4756       Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4757           Base.getAddress(CGF), KmpDependInfoPtrT);
4758       Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4759                                 Base.getTBAAInfo());
4760 
4761       // Get number of elements in a single depobj.
4762       llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4763           Addr.getPointer(),
4764           llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4765       LValue NumDepsBase = CGF.MakeAddrLValue(
4766           Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy,
4767           Base.getBaseInfo(), Base.getTBAAInfo());
4768       // NumDeps = deps[i].base_addr;
4769       LValue BaseAddrLVal = CGF.EmitLValueForField(
4770           NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4771       llvm::Value *NumDeps =
4772           CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4773 
4774       // memcopy dependency data.
4775       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4776           ElSize,
4777           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4778       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4779       Address DepAddr =
4780           Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos),
4781                   DependenciesArray.getAlignment());
4782       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4783 
4784       // Increase pos.
4785       // pos += size;
4786       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4787       CGF.EmitStoreOfScalar(Add, PosLVal);
4788     }
4789   }
4790 }
4791 
4792 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4793     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4794     SourceLocation Loc) {
4795   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4796         return D.DepExprs.empty();
4797       }))
4798     return std::make_pair(nullptr, Address::invalid());
4799   // Process list of dependencies.
4800   ASTContext &C = CGM.getContext();
4801   Address DependenciesArray = Address::invalid();
4802   llvm::Value *NumOfElements = nullptr;
4803   unsigned NumDependencies = std::accumulate(
4804       Dependencies.begin(), Dependencies.end(), 0,
4805       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4806         return D.DepKind == OMPC_DEPEND_depobj
4807                    ? V
4808                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4809       });
4810   QualType FlagsTy;
4811   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4812   bool HasDepobjDeps = false;
4813   bool HasRegularWithIterators = false;
4814   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4815   llvm::Value *NumOfRegularWithIterators =
4816       llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4817   // Calculate number of depobj dependecies and regular deps with the iterators.
4818   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4819     if (D.DepKind == OMPC_DEPEND_depobj) {
4820       SmallVector<llvm::Value *, 4> Sizes =
4821           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4822       for (llvm::Value *Size : Sizes) {
4823         NumOfDepobjElements =
4824             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4825       }
4826       HasDepobjDeps = true;
4827       continue;
4828     }
4829     // Include number of iterations, if any.
4830     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4831       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4832         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4833         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4834         NumOfRegularWithIterators =
4835             CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz);
4836       }
4837       HasRegularWithIterators = true;
4838       continue;
4839     }
4840   }
4841 
4842   QualType KmpDependInfoArrayTy;
4843   if (HasDepobjDeps || HasRegularWithIterators) {
4844     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4845                                            /*isSigned=*/false);
4846     if (HasDepobjDeps) {
4847       NumOfElements =
4848           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4849     }
4850     if (HasRegularWithIterators) {
4851       NumOfElements =
4852           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4853     }
4854     OpaqueValueExpr OVE(Loc,
4855                         C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4856                         VK_RValue);
4857     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE,
4858                                                   RValue::get(NumOfElements));
4859     KmpDependInfoArrayTy =
4860         C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal,
4861                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4862     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4863     // Properly emit variable-sized array.
4864     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4865                                          ImplicitParamDecl::Other);
4866     CGF.EmitVarDecl(*PD);
4867     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4868     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4869                                               /*isSigned=*/false);
4870   } else {
4871     KmpDependInfoArrayTy = C.getConstantArrayType(
4872         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4873         ArrayType::Normal, /*IndexTypeQuals=*/0);
4874     DependenciesArray =
4875         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4876     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4877     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4878                                            /*isSigned=*/false);
4879   }
4880   unsigned Pos = 0;
4881   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4882     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4883         Dependencies[I].IteratorExpr)
4884       continue;
4885     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4886                    DependenciesArray);
4887   }
4888   // Copy regular dependecies with iterators.
4889   LValue PosLVal = CGF.MakeAddrLValue(
4890       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4891   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4892   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4893     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4894         !Dependencies[I].IteratorExpr)
4895       continue;
4896     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4897                    DependenciesArray);
4898   }
4899   // Copy final depobj arrays without iterators.
4900   if (HasDepobjDeps) {
4901     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4902       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4903         continue;
4904       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4905                          DependenciesArray);
4906     }
4907   }
4908   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4909       DependenciesArray, CGF.VoidPtrTy);
4910   return std::make_pair(NumOfElements, DependenciesArray);
4911 }
4912 
4913 Address CGOpenMPRuntime::emitDepobjDependClause(
4914     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4915     SourceLocation Loc) {
4916   if (Dependencies.DepExprs.empty())
4917     return Address::invalid();
4918   // Process list of dependencies.
4919   ASTContext &C = CGM.getContext();
4920   Address DependenciesArray = Address::invalid();
4921   unsigned NumDependencies = Dependencies.DepExprs.size();
4922   QualType FlagsTy;
4923   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4924   RecordDecl *KmpDependInfoRD =
4925       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4926 
4927   llvm::Value *Size;
4928   // Define type kmp_depend_info[<Dependencies.size()>];
4929   // For depobj reserve one extra element to store the number of elements.
4930   // It is required to handle depobj(x) update(in) construct.
4931   // kmp_depend_info[<Dependencies.size()>] deps;
4932   llvm::Value *NumDepsVal;
4933   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4934   if (const auto *IE =
4935           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4936     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4937     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4938       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4939       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4940       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4941     }
4942     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4943                                     NumDepsVal);
4944     CharUnits SizeInBytes =
4945         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4946     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4947     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4948     NumDepsVal =
4949         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4950   } else {
4951     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4952         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4953         nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4954     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4955     Size = CGM.getSize(Sz.alignTo(Align));
4956     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4957   }
4958   // Need to allocate on the dynamic memory.
4959   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4960   // Use default allocator.
4961   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4962   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4963 
4964   llvm::Value *Addr =
4965       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4966                               CGM.getModule(), OMPRTL___kmpc_alloc),
4967                           Args, ".dep.arr.addr");
4968   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4969       Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
4970   DependenciesArray = Address(Addr, Align);
4971   // Write number of elements in the first element of array for depobj.
4972   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4973   // deps[i].base_addr = NumDependencies;
4974   LValue BaseAddrLVal = CGF.EmitLValueForField(
4975       Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4976   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4977   llvm::PointerUnion<unsigned *, LValue *> Pos;
4978   unsigned Idx = 1;
4979   LValue PosLVal;
4980   if (Dependencies.IteratorExpr) {
4981     PosLVal = CGF.MakeAddrLValue(
4982         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4983         C.getSizeType());
4984     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4985                           /*IsInit=*/true);
4986     Pos = &PosLVal;
4987   } else {
4988     Pos = &Idx;
4989   }
4990   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4991   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4992       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
4993   return DependenciesArray;
4994 }
4995 
4996 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4997                                         SourceLocation Loc) {
4998   ASTContext &C = CGM.getContext();
4999   QualType FlagsTy;
5000   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5001   LValue Base = CGF.EmitLoadOfPointerLValue(
5002       DepobjLVal.getAddress(CGF),
5003       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5004   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5005   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5006       Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5007   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5008       Addr.getPointer(),
5009       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5010   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5011                                                                CGF.VoidPtrTy);
5012   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5013   // Use default allocator.
5014   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5015   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5016 
5017   // _kmpc_free(gtid, addr, nullptr);
5018   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5019                                 CGM.getModule(), OMPRTL___kmpc_free),
5020                             Args);
5021 }
5022 
5023 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5024                                        OpenMPDependClauseKind NewDepKind,
5025                                        SourceLocation Loc) {
5026   ASTContext &C = CGM.getContext();
5027   QualType FlagsTy;
5028   getDependTypes(C, KmpDependInfoTy, FlagsTy);
5029   RecordDecl *KmpDependInfoRD =
5030       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5031   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5032   llvm::Value *NumDeps;
5033   LValue Base;
5034   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5035 
5036   Address Begin = Base.getAddress(CGF);
5037   // Cast from pointer to array type to pointer to single element.
5038   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps);
5039   // The basic structure here is a while-do loop.
5040   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5041   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5042   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5043   CGF.EmitBlock(BodyBB);
5044   llvm::PHINode *ElementPHI =
5045       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5046   ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5047   Begin = Address(ElementPHI, Begin.getAlignment());
5048   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5049                             Base.getTBAAInfo());
5050   // deps[i].flags = NewDepKind;
5051   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5052   LValue FlagsLVal = CGF.EmitLValueForField(
5053       Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5054   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5055                         FlagsLVal);
5056 
5057   // Shift the address forward by one element.
5058   Address ElementNext =
5059       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5060   ElementPHI->addIncoming(ElementNext.getPointer(),
5061                           CGF.Builder.GetInsertBlock());
5062   llvm::Value *IsEmpty =
5063       CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5064   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5065   // Done.
5066   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5067 }
5068 
5069 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5070                                    const OMPExecutableDirective &D,
5071                                    llvm::Function *TaskFunction,
5072                                    QualType SharedsTy, Address Shareds,
5073                                    const Expr *IfCond,
5074                                    const OMPTaskDataTy &Data) {
5075   if (!CGF.HaveInsertPoint())
5076     return;
5077 
5078   TaskResultTy Result =
5079       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5080   llvm::Value *NewTask = Result.NewTask;
5081   llvm::Function *TaskEntry = Result.TaskEntry;
5082   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5083   LValue TDBase = Result.TDBase;
5084   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5085   // Process list of dependences.
5086   Address DependenciesArray = Address::invalid();
5087   llvm::Value *NumOfElements;
5088   std::tie(NumOfElements, DependenciesArray) =
5089       emitDependClause(CGF, Data.Dependences, Loc);
5090 
5091   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5092   // libcall.
5093   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5094   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5095   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5096   // list is not empty
5097   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5098   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5099   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5100   llvm::Value *DepTaskArgs[7];
5101   if (!Data.Dependences.empty()) {
5102     DepTaskArgs[0] = UpLoc;
5103     DepTaskArgs[1] = ThreadID;
5104     DepTaskArgs[2] = NewTask;
5105     DepTaskArgs[3] = NumOfElements;
5106     DepTaskArgs[4] = DependenciesArray.getPointer();
5107     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5108     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5109   }
5110   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5111                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5112     if (!Data.Tied) {
5113       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5114       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5115       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5116     }
5117     if (!Data.Dependences.empty()) {
5118       CGF.EmitRuntimeCall(
5119           OMPBuilder.getOrCreateRuntimeFunction(
5120               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5121           DepTaskArgs);
5122     } else {
5123       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5124                               CGM.getModule(), OMPRTL___kmpc_omp_task),
5125                           TaskArgs);
5126     }
5127     // Check if parent region is untied and build return for untied task;
5128     if (auto *Region =
5129             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5130       Region->emitUntiedSwitch(CGF);
5131   };
5132 
5133   llvm::Value *DepWaitTaskArgs[6];
5134   if (!Data.Dependences.empty()) {
5135     DepWaitTaskArgs[0] = UpLoc;
5136     DepWaitTaskArgs[1] = ThreadID;
5137     DepWaitTaskArgs[2] = NumOfElements;
5138     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5139     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5140     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5141   }
5142   auto &M = CGM.getModule();
5143   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5144                         TaskEntry, &Data, &DepWaitTaskArgs,
5145                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5146     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5147     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5148     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5149     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5150     // is specified.
5151     if (!Data.Dependences.empty())
5152       CGF.EmitRuntimeCall(
5153           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5154           DepWaitTaskArgs);
5155     // Call proxy_task_entry(gtid, new_task);
5156     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5157                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5158       Action.Enter(CGF);
5159       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5160       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5161                                                           OutlinedFnArgs);
5162     };
5163 
5164     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5165     // kmp_task_t *new_task);
5166     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5167     // kmp_task_t *new_task);
5168     RegionCodeGenTy RCG(CodeGen);
5169     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5170                               M, OMPRTL___kmpc_omp_task_begin_if0),
5171                           TaskArgs,
5172                           OMPBuilder.getOrCreateRuntimeFunction(
5173                               M, OMPRTL___kmpc_omp_task_complete_if0),
5174                           TaskArgs);
5175     RCG.setAction(Action);
5176     RCG(CGF);
5177   };
5178 
5179   if (IfCond) {
5180     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5181   } else {
5182     RegionCodeGenTy ThenRCG(ThenCodeGen);
5183     ThenRCG(CGF);
5184   }
5185 }
5186 
5187 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5188                                        const OMPLoopDirective &D,
5189                                        llvm::Function *TaskFunction,
5190                                        QualType SharedsTy, Address Shareds,
5191                                        const Expr *IfCond,
5192                                        const OMPTaskDataTy &Data) {
5193   if (!CGF.HaveInsertPoint())
5194     return;
5195   TaskResultTy Result =
5196       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5197   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5198   // libcall.
5199   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5200   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5201   // sched, kmp_uint64 grainsize, void *task_dup);
5202   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5203   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5204   llvm::Value *IfVal;
5205   if (IfCond) {
5206     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5207                                       /*isSigned=*/true);
5208   } else {
5209     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5210   }
5211 
5212   LValue LBLVal = CGF.EmitLValueForField(
5213       Result.TDBase,
5214       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5215   const auto *LBVar =
5216       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5217   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5218                        LBLVal.getQuals(),
5219                        /*IsInitializer=*/true);
5220   LValue UBLVal = CGF.EmitLValueForField(
5221       Result.TDBase,
5222       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5223   const auto *UBVar =
5224       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5225   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5226                        UBLVal.getQuals(),
5227                        /*IsInitializer=*/true);
5228   LValue StLVal = CGF.EmitLValueForField(
5229       Result.TDBase,
5230       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5231   const auto *StVar =
5232       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5233   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5234                        StLVal.getQuals(),
5235                        /*IsInitializer=*/true);
5236   // Store reductions address.
5237   LValue RedLVal = CGF.EmitLValueForField(
5238       Result.TDBase,
5239       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5240   if (Data.Reductions) {
5241     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5242   } else {
5243     CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5244                                CGF.getContext().VoidPtrTy);
5245   }
5246   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5247   llvm::Value *TaskArgs[] = {
5248       UpLoc,
5249       ThreadID,
5250       Result.NewTask,
5251       IfVal,
5252       LBLVal.getPointer(CGF),
5253       UBLVal.getPointer(CGF),
5254       CGF.EmitLoadOfScalar(StLVal, Loc),
5255       llvm::ConstantInt::getSigned(
5256           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5257       llvm::ConstantInt::getSigned(
5258           CGF.IntTy, Data.Schedule.getPointer()
5259                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5260                          : NoSchedule),
5261       Data.Schedule.getPointer()
5262           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5263                                       /*isSigned=*/false)
5264           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5265       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5266                              Result.TaskDupFn, CGF.VoidPtrTy)
5267                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5268   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5269                           CGM.getModule(), OMPRTL___kmpc_taskloop),
5270                       TaskArgs);
5271 }
5272 
5273 /// Emit reduction operation for each element of array (required for
5274 /// array sections) LHS op = RHS.
5275 /// \param Type Type of array.
5276 /// \param LHSVar Variable on the left side of the reduction operation
5277 /// (references element of array in original variable).
5278 /// \param RHSVar Variable on the right side of the reduction operation
5279 /// (references element of array in original variable).
5280 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5281 /// RHSVar.
5282 static void EmitOMPAggregateReduction(
5283     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5284     const VarDecl *RHSVar,
5285     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5286                                   const Expr *, const Expr *)> &RedOpGen,
5287     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5288     const Expr *UpExpr = nullptr) {
5289   // Perform element-by-element initialization.
5290   QualType ElementTy;
5291   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5292   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5293 
5294   // Drill down to the base element type on both arrays.
5295   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5296   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5297 
5298   llvm::Value *RHSBegin = RHSAddr.getPointer();
5299   llvm::Value *LHSBegin = LHSAddr.getPointer();
5300   // Cast from pointer to array type to pointer to single element.
5301   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5302   // The basic structure here is a while-do loop.
5303   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5304   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5305   llvm::Value *IsEmpty =
5306       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5307   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5308 
5309   // Enter the loop body, making that address the current address.
5310   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5311   CGF.EmitBlock(BodyBB);
5312 
5313   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5314 
5315   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5316       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5317   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5318   Address RHSElementCurrent =
5319       Address(RHSElementPHI,
5320               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5321 
5322   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5323       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5324   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5325   Address LHSElementCurrent =
5326       Address(LHSElementPHI,
5327               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5328 
5329   // Emit copy.
5330   CodeGenFunction::OMPPrivateScope Scope(CGF);
5331   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5332   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5333   Scope.Privatize();
5334   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5335   Scope.ForceCleanup();
5336 
5337   // Shift the address forward by one element.
5338   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5339       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5340   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5341       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5342   // Check whether we've reached the end.
5343   llvm::Value *Done =
5344       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5345   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5346   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5347   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5348 
5349   // Done.
5350   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5351 }
5352 
5353 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5354 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5355 /// UDR combiner function.
5356 static void emitReductionCombiner(CodeGenFunction &CGF,
5357                                   const Expr *ReductionOp) {
5358   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5359     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5360       if (const auto *DRE =
5361               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5362         if (const auto *DRD =
5363                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5364           std::pair<llvm::Function *, llvm::Function *> Reduction =
5365               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5366           RValue Func = RValue::get(Reduction.first);
5367           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5368           CGF.EmitIgnoredExpr(ReductionOp);
5369           return;
5370         }
5371   CGF.EmitIgnoredExpr(ReductionOp);
5372 }
5373 
5374 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5375     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5376     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5377     ArrayRef<const Expr *> ReductionOps) {
5378   ASTContext &C = CGM.getContext();
5379 
5380   // void reduction_func(void *LHSArg, void *RHSArg);
5381   FunctionArgList Args;
5382   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5383                            ImplicitParamDecl::Other);
5384   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5385                            ImplicitParamDecl::Other);
5386   Args.push_back(&LHSArg);
5387   Args.push_back(&RHSArg);
5388   const auto &CGFI =
5389       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5390   std::string Name = getName({"omp", "reduction", "reduction_func"});
5391   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5392                                     llvm::GlobalValue::InternalLinkage, Name,
5393                                     &CGM.getModule());
5394   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5395   Fn->setDoesNotRecurse();
5396   CodeGenFunction CGF(CGM);
5397   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5398 
5399   // Dst = (void*[n])(LHSArg);
5400   // Src = (void*[n])(RHSArg);
5401   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5402       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5403       ArgsType), CGF.getPointerAlign());
5404   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5405       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5406       ArgsType), CGF.getPointerAlign());
5407 
5408   //  ...
5409   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5410   //  ...
5411   CodeGenFunction::OMPPrivateScope Scope(CGF);
5412   auto IPriv = Privates.begin();
5413   unsigned Idx = 0;
5414   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5415     const auto *RHSVar =
5416         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5417     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5418       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5419     });
5420     const auto *LHSVar =
5421         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5422     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5423       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5424     });
5425     QualType PrivTy = (*IPriv)->getType();
5426     if (PrivTy->isVariablyModifiedType()) {
5427       // Get array size and emit VLA type.
5428       ++Idx;
5429       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5430       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5431       const VariableArrayType *VLA =
5432           CGF.getContext().getAsVariableArrayType(PrivTy);
5433       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5434       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5435           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5436       CGF.EmitVariablyModifiedType(PrivTy);
5437     }
5438   }
5439   Scope.Privatize();
5440   IPriv = Privates.begin();
5441   auto ILHS = LHSExprs.begin();
5442   auto IRHS = RHSExprs.begin();
5443   for (const Expr *E : ReductionOps) {
5444     if ((*IPriv)->getType()->isArrayType()) {
5445       // Emit reduction for array section.
5446       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5447       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5448       EmitOMPAggregateReduction(
5449           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5450           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5451             emitReductionCombiner(CGF, E);
5452           });
5453     } else {
5454       // Emit reduction for array subscript or single variable.
5455       emitReductionCombiner(CGF, E);
5456     }
5457     ++IPriv;
5458     ++ILHS;
5459     ++IRHS;
5460   }
5461   Scope.ForceCleanup();
5462   CGF.FinishFunction();
5463   return Fn;
5464 }
5465 
5466 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5467                                                   const Expr *ReductionOp,
5468                                                   const Expr *PrivateRef,
5469                                                   const DeclRefExpr *LHS,
5470                                                   const DeclRefExpr *RHS) {
5471   if (PrivateRef->getType()->isArrayType()) {
5472     // Emit reduction for array section.
5473     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5474     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5475     EmitOMPAggregateReduction(
5476         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5477         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5478           emitReductionCombiner(CGF, ReductionOp);
5479         });
5480   } else {
5481     // Emit reduction for array subscript or single variable.
5482     emitReductionCombiner(CGF, ReductionOp);
5483   }
5484 }
5485 
5486 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5487                                     ArrayRef<const Expr *> Privates,
5488                                     ArrayRef<const Expr *> LHSExprs,
5489                                     ArrayRef<const Expr *> RHSExprs,
5490                                     ArrayRef<const Expr *> ReductionOps,
5491                                     ReductionOptionsTy Options) {
5492   if (!CGF.HaveInsertPoint())
5493     return;
5494 
5495   bool WithNowait = Options.WithNowait;
5496   bool SimpleReduction = Options.SimpleReduction;
5497 
5498   // Next code should be emitted for reduction:
5499   //
5500   // static kmp_critical_name lock = { 0 };
5501   //
5502   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5503   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5504   //  ...
5505   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5506   //  *(Type<n>-1*)rhs[<n>-1]);
5507   // }
5508   //
5509   // ...
5510   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5511   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5512   // RedList, reduce_func, &<lock>)) {
5513   // case 1:
5514   //  ...
5515   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5516   //  ...
5517   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5518   // break;
5519   // case 2:
5520   //  ...
5521   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5522   //  ...
5523   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5524   // break;
5525   // default:;
5526   // }
5527   //
5528   // if SimpleReduction is true, only the next code is generated:
5529   //  ...
5530   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5531   //  ...
5532 
5533   ASTContext &C = CGM.getContext();
5534 
5535   if (SimpleReduction) {
5536     CodeGenFunction::RunCleanupsScope Scope(CGF);
5537     auto IPriv = Privates.begin();
5538     auto ILHS = LHSExprs.begin();
5539     auto IRHS = RHSExprs.begin();
5540     for (const Expr *E : ReductionOps) {
5541       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5542                                   cast<DeclRefExpr>(*IRHS));
5543       ++IPriv;
5544       ++ILHS;
5545       ++IRHS;
5546     }
5547     return;
5548   }
5549 
5550   // 1. Build a list of reduction variables.
5551   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5552   auto Size = RHSExprs.size();
5553   for (const Expr *E : Privates) {
5554     if (E->getType()->isVariablyModifiedType())
5555       // Reserve place for array size.
5556       ++Size;
5557   }
5558   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5559   QualType ReductionArrayTy =
5560       C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5561                              /*IndexTypeQuals=*/0);
5562   Address ReductionList =
5563       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5564   auto IPriv = Privates.begin();
5565   unsigned Idx = 0;
5566   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5567     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5568     CGF.Builder.CreateStore(
5569         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5570             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5571         Elem);
5572     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5573       // Store array size.
5574       ++Idx;
5575       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5576       llvm::Value *Size = CGF.Builder.CreateIntCast(
5577           CGF.getVLASize(
5578                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5579               .NumElts,
5580           CGF.SizeTy, /*isSigned=*/false);
5581       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5582                               Elem);
5583     }
5584   }
5585 
5586   // 2. Emit reduce_func().
5587   llvm::Function *ReductionFn = emitReductionFunction(
5588       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5589       LHSExprs, RHSExprs, ReductionOps);
5590 
5591   // 3. Create static kmp_critical_name lock = { 0 };
5592   std::string Name = getName({"reduction"});
5593   llvm::Value *Lock = getCriticalRegionLock(Name);
5594 
5595   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5596   // RedList, reduce_func, &<lock>);
5597   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5598   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5599   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5600   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5601       ReductionList.getPointer(), CGF.VoidPtrTy);
5602   llvm::Value *Args[] = {
5603       IdentTLoc,                             // ident_t *<loc>
5604       ThreadId,                              // i32 <gtid>
5605       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5606       ReductionArrayTySize,                  // size_type sizeof(RedList)
5607       RL,                                    // void *RedList
5608       ReductionFn, // void (*) (void *, void *) <reduce_func>
5609       Lock         // kmp_critical_name *&<lock>
5610   };
5611   llvm::Value *Res = CGF.EmitRuntimeCall(
5612       OMPBuilder.getOrCreateRuntimeFunction(
5613           CGM.getModule(),
5614           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5615       Args);
5616 
5617   // 5. Build switch(res)
5618   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5619   llvm::SwitchInst *SwInst =
5620       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5621 
5622   // 6. Build case 1:
5623   //  ...
5624   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5625   //  ...
5626   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5627   // break;
5628   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5629   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5630   CGF.EmitBlock(Case1BB);
5631 
5632   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5633   llvm::Value *EndArgs[] = {
5634       IdentTLoc, // ident_t *<loc>
5635       ThreadId,  // i32 <gtid>
5636       Lock       // kmp_critical_name *&<lock>
5637   };
5638   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5639                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5640     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5641     auto IPriv = Privates.begin();
5642     auto ILHS = LHSExprs.begin();
5643     auto IRHS = RHSExprs.begin();
5644     for (const Expr *E : ReductionOps) {
5645       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5646                                      cast<DeclRefExpr>(*IRHS));
5647       ++IPriv;
5648       ++ILHS;
5649       ++IRHS;
5650     }
5651   };
5652   RegionCodeGenTy RCG(CodeGen);
5653   CommonActionTy Action(
5654       nullptr, llvm::None,
5655       OMPBuilder.getOrCreateRuntimeFunction(
5656           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5657                                       : OMPRTL___kmpc_end_reduce),
5658       EndArgs);
5659   RCG.setAction(Action);
5660   RCG(CGF);
5661 
5662   CGF.EmitBranch(DefaultBB);
5663 
5664   // 7. Build case 2:
5665   //  ...
5666   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5667   //  ...
5668   // break;
5669   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5670   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5671   CGF.EmitBlock(Case2BB);
5672 
5673   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5674                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5675     auto ILHS = LHSExprs.begin();
5676     auto IRHS = RHSExprs.begin();
5677     auto IPriv = Privates.begin();
5678     for (const Expr *E : ReductionOps) {
5679       const Expr *XExpr = nullptr;
5680       const Expr *EExpr = nullptr;
5681       const Expr *UpExpr = nullptr;
5682       BinaryOperatorKind BO = BO_Comma;
5683       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5684         if (BO->getOpcode() == BO_Assign) {
5685           XExpr = BO->getLHS();
5686           UpExpr = BO->getRHS();
5687         }
5688       }
5689       // Try to emit update expression as a simple atomic.
5690       const Expr *RHSExpr = UpExpr;
5691       if (RHSExpr) {
5692         // Analyze RHS part of the whole expression.
5693         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5694                 RHSExpr->IgnoreParenImpCasts())) {
5695           // If this is a conditional operator, analyze its condition for
5696           // min/max reduction operator.
5697           RHSExpr = ACO->getCond();
5698         }
5699         if (const auto *BORHS =
5700                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5701           EExpr = BORHS->getRHS();
5702           BO = BORHS->getOpcode();
5703         }
5704       }
5705       if (XExpr) {
5706         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5707         auto &&AtomicRedGen = [BO, VD,
5708                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5709                                     const Expr *EExpr, const Expr *UpExpr) {
5710           LValue X = CGF.EmitLValue(XExpr);
5711           RValue E;
5712           if (EExpr)
5713             E = CGF.EmitAnyExpr(EExpr);
5714           CGF.EmitOMPAtomicSimpleUpdateExpr(
5715               X, E, BO, /*IsXLHSInRHSPart=*/true,
5716               llvm::AtomicOrdering::Monotonic, Loc,
5717               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5718                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5719                 PrivateScope.addPrivate(
5720                     VD, [&CGF, VD, XRValue, Loc]() {
5721                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5722                       CGF.emitOMPSimpleStore(
5723                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5724                           VD->getType().getNonReferenceType(), Loc);
5725                       return LHSTemp;
5726                     });
5727                 (void)PrivateScope.Privatize();
5728                 return CGF.EmitAnyExpr(UpExpr);
5729               });
5730         };
5731         if ((*IPriv)->getType()->isArrayType()) {
5732           // Emit atomic reduction for array section.
5733           const auto *RHSVar =
5734               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5735           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5736                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5737         } else {
5738           // Emit atomic reduction for array subscript or single variable.
5739           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5740         }
5741       } else {
5742         // Emit as a critical region.
5743         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5744                                            const Expr *, const Expr *) {
5745           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5746           std::string Name = RT.getName({"atomic_reduction"});
5747           RT.emitCriticalRegion(
5748               CGF, Name,
5749               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5750                 Action.Enter(CGF);
5751                 emitReductionCombiner(CGF, E);
5752               },
5753               Loc);
5754         };
5755         if ((*IPriv)->getType()->isArrayType()) {
5756           const auto *LHSVar =
5757               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5758           const auto *RHSVar =
5759               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5760           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5761                                     CritRedGen);
5762         } else {
5763           CritRedGen(CGF, nullptr, nullptr, nullptr);
5764         }
5765       }
5766       ++ILHS;
5767       ++IRHS;
5768       ++IPriv;
5769     }
5770   };
5771   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5772   if (!WithNowait) {
5773     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5774     llvm::Value *EndArgs[] = {
5775         IdentTLoc, // ident_t *<loc>
5776         ThreadId,  // i32 <gtid>
5777         Lock       // kmp_critical_name *&<lock>
5778     };
5779     CommonActionTy Action(nullptr, llvm::None,
5780                           OMPBuilder.getOrCreateRuntimeFunction(
5781                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5782                           EndArgs);
5783     AtomicRCG.setAction(Action);
5784     AtomicRCG(CGF);
5785   } else {
5786     AtomicRCG(CGF);
5787   }
5788 
5789   CGF.EmitBranch(DefaultBB);
5790   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5791 }
5792 
5793 /// Generates unique name for artificial threadprivate variables.
5794 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5795 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5796                                       const Expr *Ref) {
5797   SmallString<256> Buffer;
5798   llvm::raw_svector_ostream Out(Buffer);
5799   const clang::DeclRefExpr *DE;
5800   const VarDecl *D = ::getBaseDecl(Ref, DE);
5801   if (!D)
5802     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5803   D = D->getCanonicalDecl();
5804   std::string Name = CGM.getOpenMPRuntime().getName(
5805       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5806   Out << Prefix << Name << "_"
5807       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5808   return std::string(Out.str());
5809 }
5810 
5811 /// Emits reduction initializer function:
5812 /// \code
5813 /// void @.red_init(void* %arg, void* %orig) {
5814 /// %0 = bitcast void* %arg to <type>*
5815 /// store <type> <init>, <type>* %0
5816 /// ret void
5817 /// }
5818 /// \endcode
5819 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5820                                            SourceLocation Loc,
5821                                            ReductionCodeGen &RCG, unsigned N) {
5822   ASTContext &C = CGM.getContext();
5823   QualType VoidPtrTy = C.VoidPtrTy;
5824   VoidPtrTy.addRestrict();
5825   FunctionArgList Args;
5826   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5827                           ImplicitParamDecl::Other);
5828   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5829                               ImplicitParamDecl::Other);
5830   Args.emplace_back(&Param);
5831   Args.emplace_back(&ParamOrig);
5832   const auto &FnInfo =
5833       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5834   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5835   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5836   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5837                                     Name, &CGM.getModule());
5838   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5839   Fn->setDoesNotRecurse();
5840   CodeGenFunction CGF(CGM);
5841   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5842   Address PrivateAddr = CGF.EmitLoadOfPointer(
5843       CGF.GetAddrOfLocalVar(&Param),
5844       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5845   llvm::Value *Size = nullptr;
5846   // If the size of the reduction item is non-constant, load it from global
5847   // threadprivate variable.
5848   if (RCG.getSizes(N).second) {
5849     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5850         CGF, CGM.getContext().getSizeType(),
5851         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5852     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5853                                 CGM.getContext().getSizeType(), Loc);
5854   }
5855   RCG.emitAggregateType(CGF, N, Size);
5856   LValue OrigLVal;
5857   // If initializer uses initializer from declare reduction construct, emit a
5858   // pointer to the address of the original reduction item (reuired by reduction
5859   // initializer)
5860   if (RCG.usesReductionInitializer(N)) {
5861     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5862     SharedAddr = CGF.EmitLoadOfPointer(
5863         SharedAddr,
5864         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5865     OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5866   } else {
5867     OrigLVal = CGF.MakeNaturalAlignAddrLValue(
5868         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5869         CGM.getContext().VoidPtrTy);
5870   }
5871   // Emit the initializer:
5872   // %0 = bitcast void* %arg to <type>*
5873   // store <type> <init>, <type>* %0
5874   RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
5875                          [](CodeGenFunction &) { return false; });
5876   CGF.FinishFunction();
5877   return Fn;
5878 }
5879 
5880 /// Emits reduction combiner function:
5881 /// \code
5882 /// void @.red_comb(void* %arg0, void* %arg1) {
5883 /// %lhs = bitcast void* %arg0 to <type>*
5884 /// %rhs = bitcast void* %arg1 to <type>*
5885 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5886 /// store <type> %2, <type>* %lhs
5887 /// ret void
5888 /// }
5889 /// \endcode
5890 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5891                                            SourceLocation Loc,
5892                                            ReductionCodeGen &RCG, unsigned N,
5893                                            const Expr *ReductionOp,
5894                                            const Expr *LHS, const Expr *RHS,
5895                                            const Expr *PrivateRef) {
5896   ASTContext &C = CGM.getContext();
5897   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5898   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5899   FunctionArgList Args;
5900   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5901                                C.VoidPtrTy, ImplicitParamDecl::Other);
5902   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5903                             ImplicitParamDecl::Other);
5904   Args.emplace_back(&ParamInOut);
5905   Args.emplace_back(&ParamIn);
5906   const auto &FnInfo =
5907       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5908   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5909   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5910   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5911                                     Name, &CGM.getModule());
5912   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5913   Fn->setDoesNotRecurse();
5914   CodeGenFunction CGF(CGM);
5915   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5916   llvm::Value *Size = nullptr;
5917   // If the size of the reduction item is non-constant, load it from global
5918   // threadprivate variable.
5919   if (RCG.getSizes(N).second) {
5920     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5921         CGF, CGM.getContext().getSizeType(),
5922         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5923     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5924                                 CGM.getContext().getSizeType(), Loc);
5925   }
5926   RCG.emitAggregateType(CGF, N, Size);
5927   // Remap lhs and rhs variables to the addresses of the function arguments.
5928   // %lhs = bitcast void* %arg0 to <type>*
5929   // %rhs = bitcast void* %arg1 to <type>*
5930   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5931   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5932     // Pull out the pointer to the variable.
5933     Address PtrAddr = CGF.EmitLoadOfPointer(
5934         CGF.GetAddrOfLocalVar(&ParamInOut),
5935         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5936     return CGF.Builder.CreateElementBitCast(
5937         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5938   });
5939   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5940     // Pull out the pointer to the variable.
5941     Address PtrAddr = CGF.EmitLoadOfPointer(
5942         CGF.GetAddrOfLocalVar(&ParamIn),
5943         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5944     return CGF.Builder.CreateElementBitCast(
5945         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5946   });
5947   PrivateScope.Privatize();
5948   // Emit the combiner body:
5949   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5950   // store <type> %2, <type>* %lhs
5951   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5952       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5953       cast<DeclRefExpr>(RHS));
5954   CGF.FinishFunction();
5955   return Fn;
5956 }
5957 
5958 /// Emits reduction finalizer function:
5959 /// \code
5960 /// void @.red_fini(void* %arg) {
5961 /// %0 = bitcast void* %arg to <type>*
5962 /// <destroy>(<type>* %0)
5963 /// ret void
5964 /// }
5965 /// \endcode
5966 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5967                                            SourceLocation Loc,
5968                                            ReductionCodeGen &RCG, unsigned N) {
5969   if (!RCG.needCleanups(N))
5970     return nullptr;
5971   ASTContext &C = CGM.getContext();
5972   FunctionArgList Args;
5973   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5974                           ImplicitParamDecl::Other);
5975   Args.emplace_back(&Param);
5976   const auto &FnInfo =
5977       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5978   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5979   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5980   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5981                                     Name, &CGM.getModule());
5982   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5983   Fn->setDoesNotRecurse();
5984   CodeGenFunction CGF(CGM);
5985   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5986   Address PrivateAddr = CGF.EmitLoadOfPointer(
5987       CGF.GetAddrOfLocalVar(&Param),
5988       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5989   llvm::Value *Size = nullptr;
5990   // If the size of the reduction item is non-constant, load it from global
5991   // threadprivate variable.
5992   if (RCG.getSizes(N).second) {
5993     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5994         CGF, CGM.getContext().getSizeType(),
5995         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5996     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5997                                 CGM.getContext().getSizeType(), Loc);
5998   }
5999   RCG.emitAggregateType(CGF, N, Size);
6000   // Emit the finalizer body:
6001   // <destroy>(<type>* %0)
6002   RCG.emitCleanups(CGF, N, PrivateAddr);
6003   CGF.FinishFunction(Loc);
6004   return Fn;
6005 }
6006 
6007 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6008     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6009     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6010   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6011     return nullptr;
6012 
6013   // Build typedef struct:
6014   // kmp_taskred_input {
6015   //   void *reduce_shar; // shared reduction item
6016   //   void *reduce_orig; // original reduction item used for initialization
6017   //   size_t reduce_size; // size of data item
6018   //   void *reduce_init; // data initialization routine
6019   //   void *reduce_fini; // data finalization routine
6020   //   void *reduce_comb; // data combiner routine
6021   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6022   // } kmp_taskred_input_t;
6023   ASTContext &C = CGM.getContext();
6024   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6025   RD->startDefinition();
6026   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6027   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6028   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6029   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6030   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6031   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6032   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6033       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6034   RD->completeDefinition();
6035   QualType RDType = C.getRecordType(RD);
6036   unsigned Size = Data.ReductionVars.size();
6037   llvm::APInt ArraySize(/*numBits=*/64, Size);
6038   QualType ArrayRDType = C.getConstantArrayType(
6039       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6040   // kmp_task_red_input_t .rd_input.[Size];
6041   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6042   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6043                        Data.ReductionCopies, Data.ReductionOps);
6044   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6045     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6046     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6047                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6048     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6049         TaskRedInput.getPointer(), Idxs,
6050         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6051         ".rd_input.gep.");
6052     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6053     // ElemLVal.reduce_shar = &Shareds[Cnt];
6054     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6055     RCG.emitSharedOrigLValue(CGF, Cnt);
6056     llvm::Value *CastedShared =
6057         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6058     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6059     // ElemLVal.reduce_orig = &Origs[Cnt];
6060     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6061     llvm::Value *CastedOrig =
6062         CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6063     CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6064     RCG.emitAggregateType(CGF, Cnt);
6065     llvm::Value *SizeValInChars;
6066     llvm::Value *SizeVal;
6067     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6068     // We use delayed creation/initialization for VLAs and array sections. It is
6069     // required because runtime does not provide the way to pass the sizes of
6070     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6071     // threadprivate global variables are used to store these values and use
6072     // them in the functions.
6073     bool DelayedCreation = !!SizeVal;
6074     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6075                                                /*isSigned=*/false);
6076     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6077     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6078     // ElemLVal.reduce_init = init;
6079     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6080     llvm::Value *InitAddr =
6081         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6082     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6083     // ElemLVal.reduce_fini = fini;
6084     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6085     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6086     llvm::Value *FiniAddr = Fini
6087                                 ? CGF.EmitCastToVoidPtr(Fini)
6088                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6089     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6090     // ElemLVal.reduce_comb = comb;
6091     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6092     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6093         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6094         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6095     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6096     // ElemLVal.flags = 0;
6097     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6098     if (DelayedCreation) {
6099       CGF.EmitStoreOfScalar(
6100           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6101           FlagsLVal);
6102     } else
6103       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6104                                  FlagsLVal.getType());
6105   }
6106   if (Data.IsReductionWithTaskMod) {
6107     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6108     // is_ws, int num, void *data);
6109     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6110     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6111                                                   CGM.IntTy, /*isSigned=*/true);
6112     llvm::Value *Args[] = {
6113         IdentTLoc, GTid,
6114         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6115                                /*isSigned=*/true),
6116         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6117         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6118             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6119     return CGF.EmitRuntimeCall(
6120         OMPBuilder.getOrCreateRuntimeFunction(
6121             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6122         Args);
6123   }
6124   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6125   llvm::Value *Args[] = {
6126       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6127                                 /*isSigned=*/true),
6128       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6129       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6130                                                       CGM.VoidPtrTy)};
6131   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6132                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
6133                              Args);
6134 }
6135 
6136 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6137                                             SourceLocation Loc,
6138                                             bool IsWorksharingReduction) {
6139   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6140   // is_ws, int num, void *data);
6141   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6142   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6143                                                 CGM.IntTy, /*isSigned=*/true);
6144   llvm::Value *Args[] = {IdentTLoc, GTid,
6145                          llvm::ConstantInt::get(CGM.IntTy,
6146                                                 IsWorksharingReduction ? 1 : 0,
6147                                                 /*isSigned=*/true)};
6148   (void)CGF.EmitRuntimeCall(
6149       OMPBuilder.getOrCreateRuntimeFunction(
6150           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6151       Args);
6152 }
6153 
6154 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6155                                               SourceLocation Loc,
6156                                               ReductionCodeGen &RCG,
6157                                               unsigned N) {
6158   auto Sizes = RCG.getSizes(N);
6159   // Emit threadprivate global variable if the type is non-constant
6160   // (Sizes.second = nullptr).
6161   if (Sizes.second) {
6162     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6163                                                      /*isSigned=*/false);
6164     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6165         CGF, CGM.getContext().getSizeType(),
6166         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6167     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6168   }
6169 }
6170 
6171 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6172                                               SourceLocation Loc,
6173                                               llvm::Value *ReductionsPtr,
6174                                               LValue SharedLVal) {
6175   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6176   // *d);
6177   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6178                                                    CGM.IntTy,
6179                                                    /*isSigned=*/true),
6180                          ReductionsPtr,
6181                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6182                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6183   return Address(
6184       CGF.EmitRuntimeCall(
6185           OMPBuilder.getOrCreateRuntimeFunction(
6186               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6187           Args),
6188       SharedLVal.getAlignment());
6189 }
6190 
6191 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6192                                        SourceLocation Loc) {
6193   if (!CGF.HaveInsertPoint())
6194     return;
6195 
6196   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
6197     OMPBuilder.CreateTaskwait(CGF.Builder);
6198   } else {
6199     // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6200     // global_tid);
6201     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6202     // Ignore return result until untied tasks are supported.
6203     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6204                             CGM.getModule(), OMPRTL___kmpc_omp_taskwait),
6205                         Args);
6206   }
6207 
6208   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6209     Region->emitUntiedSwitch(CGF);
6210 }
6211 
6212 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6213                                            OpenMPDirectiveKind InnerKind,
6214                                            const RegionCodeGenTy &CodeGen,
6215                                            bool HasCancel) {
6216   if (!CGF.HaveInsertPoint())
6217     return;
6218   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6219   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6220 }
6221 
6222 namespace {
6223 enum RTCancelKind {
6224   CancelNoreq = 0,
6225   CancelParallel = 1,
6226   CancelLoop = 2,
6227   CancelSections = 3,
6228   CancelTaskgroup = 4
6229 };
6230 } // anonymous namespace
6231 
6232 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6233   RTCancelKind CancelKind = CancelNoreq;
6234   if (CancelRegion == OMPD_parallel)
6235     CancelKind = CancelParallel;
6236   else if (CancelRegion == OMPD_for)
6237     CancelKind = CancelLoop;
6238   else if (CancelRegion == OMPD_sections)
6239     CancelKind = CancelSections;
6240   else {
6241     assert(CancelRegion == OMPD_taskgroup);
6242     CancelKind = CancelTaskgroup;
6243   }
6244   return CancelKind;
6245 }
6246 
6247 void CGOpenMPRuntime::emitCancellationPointCall(
6248     CodeGenFunction &CGF, SourceLocation Loc,
6249     OpenMPDirectiveKind CancelRegion) {
6250   if (!CGF.HaveInsertPoint())
6251     return;
6252   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6253   // global_tid, kmp_int32 cncl_kind);
6254   if (auto *OMPRegionInfo =
6255           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6256     // For 'cancellation point taskgroup', the task region info may not have a
6257     // cancel. This may instead happen in another adjacent task.
6258     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6259       llvm::Value *Args[] = {
6260           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6261           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6262       // Ignore return result until untied tasks are supported.
6263       llvm::Value *Result = CGF.EmitRuntimeCall(
6264           OMPBuilder.getOrCreateRuntimeFunction(
6265               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6266           Args);
6267       // if (__kmpc_cancellationpoint()) {
6268       //   exit from construct;
6269       // }
6270       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6271       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6272       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6273       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6274       CGF.EmitBlock(ExitBB);
6275       // exit from construct;
6276       CodeGenFunction::JumpDest CancelDest =
6277           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6278       CGF.EmitBranchThroughCleanup(CancelDest);
6279       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6280     }
6281   }
6282 }
6283 
6284 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6285                                      const Expr *IfCond,
6286                                      OpenMPDirectiveKind CancelRegion) {
6287   if (!CGF.HaveInsertPoint())
6288     return;
6289   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6290   // kmp_int32 cncl_kind);
6291   auto &M = CGM.getModule();
6292   if (auto *OMPRegionInfo =
6293           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6294     auto &&ThenGen = [this, &M, Loc, CancelRegion,
6295                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6296       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6297       llvm::Value *Args[] = {
6298           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6299           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6300       // Ignore return result until untied tasks are supported.
6301       llvm::Value *Result = CGF.EmitRuntimeCall(
6302           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6303       // if (__kmpc_cancel()) {
6304       //   exit from construct;
6305       // }
6306       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6307       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6308       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6309       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6310       CGF.EmitBlock(ExitBB);
6311       // exit from construct;
6312       CodeGenFunction::JumpDest CancelDest =
6313           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6314       CGF.EmitBranchThroughCleanup(CancelDest);
6315       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6316     };
6317     if (IfCond) {
6318       emitIfClause(CGF, IfCond, ThenGen,
6319                    [](CodeGenFunction &, PrePostActionTy &) {});
6320     } else {
6321       RegionCodeGenTy ThenRCG(ThenGen);
6322       ThenRCG(CGF);
6323     }
6324   }
6325 }
6326 
6327 namespace {
6328 /// Cleanup action for uses_allocators support.
6329 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6330   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6331 
6332 public:
6333   OMPUsesAllocatorsActionTy(
6334       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6335       : Allocators(Allocators) {}
6336   void Enter(CodeGenFunction &CGF) override {
6337     if (!CGF.HaveInsertPoint())
6338       return;
6339     for (const auto &AllocatorData : Allocators) {
6340       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6341           CGF, AllocatorData.first, AllocatorData.second);
6342     }
6343   }
6344   void Exit(CodeGenFunction &CGF) override {
6345     if (!CGF.HaveInsertPoint())
6346       return;
6347     for (const auto &AllocatorData : Allocators) {
6348       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6349                                                         AllocatorData.first);
6350     }
6351   }
6352 };
6353 } // namespace
6354 
6355 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6356     const OMPExecutableDirective &D, StringRef ParentName,
6357     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6358     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6359   assert(!ParentName.empty() && "Invalid target region parent name!");
6360   HasEmittedTargetRegion = true;
6361   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6362   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6363     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6364       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6365       if (!D.AllocatorTraits)
6366         continue;
6367       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6368     }
6369   }
6370   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6371   CodeGen.setAction(UsesAllocatorAction);
6372   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6373                                    IsOffloadEntry, CodeGen);
6374 }
6375 
6376 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6377                                              const Expr *Allocator,
6378                                              const Expr *AllocatorTraits) {
6379   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6380   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6381   // Use default memspace handle.
6382   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6383   llvm::Value *NumTraits = llvm::ConstantInt::get(
6384       CGF.IntTy, cast<ConstantArrayType>(
6385                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6386                      ->getSize()
6387                      .getLimitedValue());
6388   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6389   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6390       AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6391   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6392                                            AllocatorTraitsLVal.getBaseInfo(),
6393                                            AllocatorTraitsLVal.getTBAAInfo());
6394   llvm::Value *Traits =
6395       CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6396 
6397   llvm::Value *AllocatorVal =
6398       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6399                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
6400                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
6401   // Store to allocator.
6402   CGF.EmitVarDecl(*cast<VarDecl>(
6403       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6404   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6405   AllocatorVal =
6406       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6407                                Allocator->getType(), Allocator->getExprLoc());
6408   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6409 }
6410 
6411 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6412                                              const Expr *Allocator) {
6413   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6414   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6415   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6416   llvm::Value *AllocatorVal =
6417       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6418   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6419                                           CGF.getContext().VoidPtrTy,
6420                                           Allocator->getExprLoc());
6421   (void)CGF.EmitRuntimeCall(
6422       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6423                                             OMPRTL___kmpc_destroy_allocator),
6424       {ThreadId, AllocatorVal});
6425 }
6426 
6427 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6428     const OMPExecutableDirective &D, StringRef ParentName,
6429     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6430     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6431   // Create a unique name for the entry function using the source location
6432   // information of the current target region. The name will be something like:
6433   //
6434   // __omp_offloading_DD_FFFF_PP_lBB
6435   //
6436   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6437   // mangled name of the function that encloses the target region and BB is the
6438   // line number of the target region.
6439 
6440   unsigned DeviceID;
6441   unsigned FileID;
6442   unsigned Line;
6443   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6444                            Line);
6445   SmallString<64> EntryFnName;
6446   {
6447     llvm::raw_svector_ostream OS(EntryFnName);
6448     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6449        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6450   }
6451 
6452   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6453 
6454   CodeGenFunction CGF(CGM, true);
6455   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6456   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6457 
6458   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6459 
6460   // If this target outline function is not an offload entry, we don't need to
6461   // register it.
6462   if (!IsOffloadEntry)
6463     return;
6464 
6465   // The target region ID is used by the runtime library to identify the current
6466   // target region, so it only has to be unique and not necessarily point to
6467   // anything. It could be the pointer to the outlined function that implements
6468   // the target region, but we aren't using that so that the compiler doesn't
6469   // need to keep that, and could therefore inline the host function if proven
6470   // worthwhile during optimization. In the other hand, if emitting code for the
6471   // device, the ID has to be the function address so that it can retrieved from
6472   // the offloading entry and launched by the runtime library. We also mark the
6473   // outlined function to have external linkage in case we are emitting code for
6474   // the device, because these functions will be entry points to the device.
6475 
6476   if (CGM.getLangOpts().OpenMPIsDevice) {
6477     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6478     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6479     OutlinedFn->setDSOLocal(false);
6480   } else {
6481     std::string Name = getName({EntryFnName, "region_id"});
6482     OutlinedFnID = new llvm::GlobalVariable(
6483         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6484         llvm::GlobalValue::WeakAnyLinkage,
6485         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6486   }
6487 
6488   // Register the information for the entry associated with this target region.
6489   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6490       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6491       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6492 }
6493 
6494 /// Checks if the expression is constant or does not have non-trivial function
6495 /// calls.
6496 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6497   // We can skip constant expressions.
6498   // We can skip expressions with trivial calls or simple expressions.
6499   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6500           !E->hasNonTrivialCall(Ctx)) &&
6501          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6502 }
6503 
6504 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6505                                                     const Stmt *Body) {
6506   const Stmt *Child = Body->IgnoreContainers();
6507   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6508     Child = nullptr;
6509     for (const Stmt *S : C->body()) {
6510       if (const auto *E = dyn_cast<Expr>(S)) {
6511         if (isTrivial(Ctx, E))
6512           continue;
6513       }
6514       // Some of the statements can be ignored.
6515       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6516           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6517         continue;
6518       // Analyze declarations.
6519       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6520         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6521               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6522                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6523                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6524                   isa<UsingDirectiveDecl>(D) ||
6525                   isa<OMPDeclareReductionDecl>(D) ||
6526                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6527                 return true;
6528               const auto *VD = dyn_cast<VarDecl>(D);
6529               if (!VD)
6530                 return false;
6531               return VD->isConstexpr() ||
6532                      ((VD->getType().isTrivialType(Ctx) ||
6533                        VD->getType()->isReferenceType()) &&
6534                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6535             }))
6536           continue;
6537       }
6538       // Found multiple children - cannot get the one child only.
6539       if (Child)
6540         return nullptr;
6541       Child = S;
6542     }
6543     if (Child)
6544       Child = Child->IgnoreContainers();
6545   }
6546   return Child;
6547 }
6548 
6549 /// Emit the number of teams for a target directive.  Inspect the num_teams
6550 /// clause associated with a teams construct combined or closely nested
6551 /// with the target directive.
6552 ///
6553 /// Emit a team of size one for directives such as 'target parallel' that
6554 /// have no associated teams construct.
6555 ///
6556 /// Otherwise, return nullptr.
6557 static llvm::Value *
6558 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6559                                const OMPExecutableDirective &D) {
6560   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6561          "Clauses associated with the teams directive expected to be emitted "
6562          "only for the host!");
6563   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6564   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6565          "Expected target-based executable directive.");
6566   CGBuilderTy &Bld = CGF.Builder;
6567   switch (DirectiveKind) {
6568   case OMPD_target: {
6569     const auto *CS = D.getInnermostCapturedStmt();
6570     const auto *Body =
6571         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6572     const Stmt *ChildStmt =
6573         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6574     if (const auto *NestedDir =
6575             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6576       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6577         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6578           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6579           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6580           const Expr *NumTeams =
6581               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6582           llvm::Value *NumTeamsVal =
6583               CGF.EmitScalarExpr(NumTeams,
6584                                  /*IgnoreResultAssign*/ true);
6585           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6586                                    /*isSigned=*/true);
6587         }
6588         return Bld.getInt32(0);
6589       }
6590       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6591           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6592         return Bld.getInt32(1);
6593       return Bld.getInt32(0);
6594     }
6595     return nullptr;
6596   }
6597   case OMPD_target_teams:
6598   case OMPD_target_teams_distribute:
6599   case OMPD_target_teams_distribute_simd:
6600   case OMPD_target_teams_distribute_parallel_for:
6601   case OMPD_target_teams_distribute_parallel_for_simd: {
6602     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6603       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6604       const Expr *NumTeams =
6605           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6606       llvm::Value *NumTeamsVal =
6607           CGF.EmitScalarExpr(NumTeams,
6608                              /*IgnoreResultAssign*/ true);
6609       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6610                                /*isSigned=*/true);
6611     }
6612     return Bld.getInt32(0);
6613   }
6614   case OMPD_target_parallel:
6615   case OMPD_target_parallel_for:
6616   case OMPD_target_parallel_for_simd:
6617   case OMPD_target_simd:
6618     return Bld.getInt32(1);
6619   case OMPD_parallel:
6620   case OMPD_for:
6621   case OMPD_parallel_for:
6622   case OMPD_parallel_master:
6623   case OMPD_parallel_sections:
6624   case OMPD_for_simd:
6625   case OMPD_parallel_for_simd:
6626   case OMPD_cancel:
6627   case OMPD_cancellation_point:
6628   case OMPD_ordered:
6629   case OMPD_threadprivate:
6630   case OMPD_allocate:
6631   case OMPD_task:
6632   case OMPD_simd:
6633   case OMPD_sections:
6634   case OMPD_section:
6635   case OMPD_single:
6636   case OMPD_master:
6637   case OMPD_critical:
6638   case OMPD_taskyield:
6639   case OMPD_barrier:
6640   case OMPD_taskwait:
6641   case OMPD_taskgroup:
6642   case OMPD_atomic:
6643   case OMPD_flush:
6644   case OMPD_depobj:
6645   case OMPD_scan:
6646   case OMPD_teams:
6647   case OMPD_target_data:
6648   case OMPD_target_exit_data:
6649   case OMPD_target_enter_data:
6650   case OMPD_distribute:
6651   case OMPD_distribute_simd:
6652   case OMPD_distribute_parallel_for:
6653   case OMPD_distribute_parallel_for_simd:
6654   case OMPD_teams_distribute:
6655   case OMPD_teams_distribute_simd:
6656   case OMPD_teams_distribute_parallel_for:
6657   case OMPD_teams_distribute_parallel_for_simd:
6658   case OMPD_target_update:
6659   case OMPD_declare_simd:
6660   case OMPD_declare_variant:
6661   case OMPD_begin_declare_variant:
6662   case OMPD_end_declare_variant:
6663   case OMPD_declare_target:
6664   case OMPD_end_declare_target:
6665   case OMPD_declare_reduction:
6666   case OMPD_declare_mapper:
6667   case OMPD_taskloop:
6668   case OMPD_taskloop_simd:
6669   case OMPD_master_taskloop:
6670   case OMPD_master_taskloop_simd:
6671   case OMPD_parallel_master_taskloop:
6672   case OMPD_parallel_master_taskloop_simd:
6673   case OMPD_requires:
6674   case OMPD_unknown:
6675     break;
6676   default:
6677     break;
6678   }
6679   llvm_unreachable("Unexpected directive kind.");
6680 }
6681 
6682 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6683                                   llvm::Value *DefaultThreadLimitVal) {
6684   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6685       CGF.getContext(), CS->getCapturedStmt());
6686   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6687     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6688       llvm::Value *NumThreads = nullptr;
6689       llvm::Value *CondVal = nullptr;
6690       // Handle if clause. If if clause present, the number of threads is
6691       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6692       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6693         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6694         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6695         const OMPIfClause *IfClause = nullptr;
6696         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6697           if (C->getNameModifier() == OMPD_unknown ||
6698               C->getNameModifier() == OMPD_parallel) {
6699             IfClause = C;
6700             break;
6701           }
6702         }
6703         if (IfClause) {
6704           const Expr *Cond = IfClause->getCondition();
6705           bool Result;
6706           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6707             if (!Result)
6708               return CGF.Builder.getInt32(1);
6709           } else {
6710             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6711             if (const auto *PreInit =
6712                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6713               for (const auto *I : PreInit->decls()) {
6714                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6715                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6716                 } else {
6717                   CodeGenFunction::AutoVarEmission Emission =
6718                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6719                   CGF.EmitAutoVarCleanups(Emission);
6720                 }
6721               }
6722             }
6723             CondVal = CGF.EvaluateExprAsBool(Cond);
6724           }
6725         }
6726       }
6727       // Check the value of num_threads clause iff if clause was not specified
6728       // or is not evaluated to false.
6729       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6730         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6731         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6732         const auto *NumThreadsClause =
6733             Dir->getSingleClause<OMPNumThreadsClause>();
6734         CodeGenFunction::LexicalScope Scope(
6735             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6736         if (const auto *PreInit =
6737                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6738           for (const auto *I : PreInit->decls()) {
6739             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6740               CGF.EmitVarDecl(cast<VarDecl>(*I));
6741             } else {
6742               CodeGenFunction::AutoVarEmission Emission =
6743                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6744               CGF.EmitAutoVarCleanups(Emission);
6745             }
6746           }
6747         }
6748         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6749         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6750                                                /*isSigned=*/false);
6751         if (DefaultThreadLimitVal)
6752           NumThreads = CGF.Builder.CreateSelect(
6753               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6754               DefaultThreadLimitVal, NumThreads);
6755       } else {
6756         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6757                                            : CGF.Builder.getInt32(0);
6758       }
6759       // Process condition of the if clause.
6760       if (CondVal) {
6761         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6762                                               CGF.Builder.getInt32(1));
6763       }
6764       return NumThreads;
6765     }
6766     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6767       return CGF.Builder.getInt32(1);
6768     return DefaultThreadLimitVal;
6769   }
6770   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6771                                : CGF.Builder.getInt32(0);
6772 }
6773 
6774 /// Emit the number of threads for a target directive.  Inspect the
6775 /// thread_limit clause associated with a teams construct combined or closely
6776 /// nested with the target directive.
6777 ///
6778 /// Emit the num_threads clause for directives such as 'target parallel' that
6779 /// have no associated teams construct.
6780 ///
6781 /// Otherwise, return nullptr.
6782 static llvm::Value *
6783 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6784                                  const OMPExecutableDirective &D) {
6785   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6786          "Clauses associated with the teams directive expected to be emitted "
6787          "only for the host!");
6788   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6789   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6790          "Expected target-based executable directive.");
6791   CGBuilderTy &Bld = CGF.Builder;
6792   llvm::Value *ThreadLimitVal = nullptr;
6793   llvm::Value *NumThreadsVal = nullptr;
6794   switch (DirectiveKind) {
6795   case OMPD_target: {
6796     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6797     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6798       return NumThreads;
6799     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6800         CGF.getContext(), CS->getCapturedStmt());
6801     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6802       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6803         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6804         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6805         const auto *ThreadLimitClause =
6806             Dir->getSingleClause<OMPThreadLimitClause>();
6807         CodeGenFunction::LexicalScope Scope(
6808             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6809         if (const auto *PreInit =
6810                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6811           for (const auto *I : PreInit->decls()) {
6812             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6813               CGF.EmitVarDecl(cast<VarDecl>(*I));
6814             } else {
6815               CodeGenFunction::AutoVarEmission Emission =
6816                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6817               CGF.EmitAutoVarCleanups(Emission);
6818             }
6819           }
6820         }
6821         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6822             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6823         ThreadLimitVal =
6824             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6825       }
6826       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6827           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6828         CS = Dir->getInnermostCapturedStmt();
6829         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6830             CGF.getContext(), CS->getCapturedStmt());
6831         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6832       }
6833       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6834           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6835         CS = Dir->getInnermostCapturedStmt();
6836         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6837           return NumThreads;
6838       }
6839       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6840         return Bld.getInt32(1);
6841     }
6842     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6843   }
6844   case OMPD_target_teams: {
6845     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6846       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6847       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6848       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6849           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6850       ThreadLimitVal =
6851           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6852     }
6853     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6854     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6855       return NumThreads;
6856     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6857         CGF.getContext(), CS->getCapturedStmt());
6858     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6859       if (Dir->getDirectiveKind() == OMPD_distribute) {
6860         CS = Dir->getInnermostCapturedStmt();
6861         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6862           return NumThreads;
6863       }
6864     }
6865     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6866   }
6867   case OMPD_target_teams_distribute:
6868     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6869       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6870       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6871       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6872           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6873       ThreadLimitVal =
6874           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6875     }
6876     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6877   case OMPD_target_parallel:
6878   case OMPD_target_parallel_for:
6879   case OMPD_target_parallel_for_simd:
6880   case OMPD_target_teams_distribute_parallel_for:
6881   case OMPD_target_teams_distribute_parallel_for_simd: {
6882     llvm::Value *CondVal = nullptr;
6883     // Handle if clause. If if clause present, the number of threads is
6884     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6885     if (D.hasClausesOfKind<OMPIfClause>()) {
6886       const OMPIfClause *IfClause = nullptr;
6887       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6888         if (C->getNameModifier() == OMPD_unknown ||
6889             C->getNameModifier() == OMPD_parallel) {
6890           IfClause = C;
6891           break;
6892         }
6893       }
6894       if (IfClause) {
6895         const Expr *Cond = IfClause->getCondition();
6896         bool Result;
6897         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6898           if (!Result)
6899             return Bld.getInt32(1);
6900         } else {
6901           CodeGenFunction::RunCleanupsScope Scope(CGF);
6902           CondVal = CGF.EvaluateExprAsBool(Cond);
6903         }
6904       }
6905     }
6906     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6907       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6908       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6909       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6910           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6911       ThreadLimitVal =
6912           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6913     }
6914     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6915       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6916       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6917       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6918           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6919       NumThreadsVal =
6920           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6921       ThreadLimitVal = ThreadLimitVal
6922                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6923                                                                 ThreadLimitVal),
6924                                               NumThreadsVal, ThreadLimitVal)
6925                            : NumThreadsVal;
6926     }
6927     if (!ThreadLimitVal)
6928       ThreadLimitVal = Bld.getInt32(0);
6929     if (CondVal)
6930       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6931     return ThreadLimitVal;
6932   }
6933   case OMPD_target_teams_distribute_simd:
6934   case OMPD_target_simd:
6935     return Bld.getInt32(1);
6936   case OMPD_parallel:
6937   case OMPD_for:
6938   case OMPD_parallel_for:
6939   case OMPD_parallel_master:
6940   case OMPD_parallel_sections:
6941   case OMPD_for_simd:
6942   case OMPD_parallel_for_simd:
6943   case OMPD_cancel:
6944   case OMPD_cancellation_point:
6945   case OMPD_ordered:
6946   case OMPD_threadprivate:
6947   case OMPD_allocate:
6948   case OMPD_task:
6949   case OMPD_simd:
6950   case OMPD_sections:
6951   case OMPD_section:
6952   case OMPD_single:
6953   case OMPD_master:
6954   case OMPD_critical:
6955   case OMPD_taskyield:
6956   case OMPD_barrier:
6957   case OMPD_taskwait:
6958   case OMPD_taskgroup:
6959   case OMPD_atomic:
6960   case OMPD_flush:
6961   case OMPD_depobj:
6962   case OMPD_scan:
6963   case OMPD_teams:
6964   case OMPD_target_data:
6965   case OMPD_target_exit_data:
6966   case OMPD_target_enter_data:
6967   case OMPD_distribute:
6968   case OMPD_distribute_simd:
6969   case OMPD_distribute_parallel_for:
6970   case OMPD_distribute_parallel_for_simd:
6971   case OMPD_teams_distribute:
6972   case OMPD_teams_distribute_simd:
6973   case OMPD_teams_distribute_parallel_for:
6974   case OMPD_teams_distribute_parallel_for_simd:
6975   case OMPD_target_update:
6976   case OMPD_declare_simd:
6977   case OMPD_declare_variant:
6978   case OMPD_begin_declare_variant:
6979   case OMPD_end_declare_variant:
6980   case OMPD_declare_target:
6981   case OMPD_end_declare_target:
6982   case OMPD_declare_reduction:
6983   case OMPD_declare_mapper:
6984   case OMPD_taskloop:
6985   case OMPD_taskloop_simd:
6986   case OMPD_master_taskloop:
6987   case OMPD_master_taskloop_simd:
6988   case OMPD_parallel_master_taskloop:
6989   case OMPD_parallel_master_taskloop_simd:
6990   case OMPD_requires:
6991   case OMPD_unknown:
6992     break;
6993   default:
6994     break;
6995   }
6996   llvm_unreachable("Unsupported directive kind.");
6997 }
6998 
6999 namespace {
7000 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7001 
7002 // Utility to handle information from clauses associated with a given
7003 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7004 // It provides a convenient interface to obtain the information and generate
7005 // code for that information.
7006 class MappableExprsHandler {
7007 public:
7008   /// Values for bit flags used to specify the mapping type for
7009   /// offloading.
7010   enum OpenMPOffloadMappingFlags : uint64_t {
7011     /// No flags
7012     OMP_MAP_NONE = 0x0,
7013     /// Allocate memory on the device and move data from host to device.
7014     OMP_MAP_TO = 0x01,
7015     /// Allocate memory on the device and move data from device to host.
7016     OMP_MAP_FROM = 0x02,
7017     /// Always perform the requested mapping action on the element, even
7018     /// if it was already mapped before.
7019     OMP_MAP_ALWAYS = 0x04,
7020     /// Delete the element from the device environment, ignoring the
7021     /// current reference count associated with the element.
7022     OMP_MAP_DELETE = 0x08,
7023     /// The element being mapped is a pointer-pointee pair; both the
7024     /// pointer and the pointee should be mapped.
7025     OMP_MAP_PTR_AND_OBJ = 0x10,
7026     /// This flags signals that the base address of an entry should be
7027     /// passed to the target kernel as an argument.
7028     OMP_MAP_TARGET_PARAM = 0x20,
7029     /// Signal that the runtime library has to return the device pointer
7030     /// in the current position for the data being mapped. Used when we have the
7031     /// use_device_ptr or use_device_addr clause.
7032     OMP_MAP_RETURN_PARAM = 0x40,
7033     /// This flag signals that the reference being passed is a pointer to
7034     /// private data.
7035     OMP_MAP_PRIVATE = 0x80,
7036     /// Pass the element to the device by value.
7037     OMP_MAP_LITERAL = 0x100,
7038     /// Implicit map
7039     OMP_MAP_IMPLICIT = 0x200,
7040     /// Close is a hint to the runtime to allocate memory close to
7041     /// the target device.
7042     OMP_MAP_CLOSE = 0x400,
7043     /// The 16 MSBs of the flags indicate whether the entry is member of some
7044     /// struct/class.
7045     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7046     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7047   };
7048 
7049   /// Get the offset of the OMP_MAP_MEMBER_OF field.
7050   static unsigned getFlagMemberOffset() {
7051     unsigned Offset = 0;
7052     for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7053          Remain = Remain >> 1)
7054       Offset++;
7055     return Offset;
7056   }
7057 
7058   /// Class that associates information with a base pointer to be passed to the
7059   /// runtime library.
7060   class BasePointerInfo {
7061     /// The base pointer.
7062     llvm::Value *Ptr = nullptr;
7063     /// The base declaration that refers to this device pointer, or null if
7064     /// there is none.
7065     const ValueDecl *DevPtrDecl = nullptr;
7066 
7067   public:
7068     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7069         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7070     llvm::Value *operator*() const { return Ptr; }
7071     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7072     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7073   };
7074 
7075   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7076   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7077   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7078 
7079   /// Map between a struct and the its lowest & highest elements which have been
7080   /// mapped.
7081   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7082   ///                    HE(FieldIndex, Pointer)}
7083   struct StructRangeInfoTy {
7084     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7085         0, Address::invalid()};
7086     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7087         0, Address::invalid()};
7088     Address Base = Address::invalid();
7089   };
7090 
7091 private:
7092   /// Kind that defines how a device pointer has to be returned.
7093   struct MapInfo {
7094     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7095     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7096     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7097     bool ReturnDevicePointer = false;
7098     bool IsImplicit = false;
7099     bool ForDeviceAddr = false;
7100 
7101     MapInfo() = default;
7102     MapInfo(
7103         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7104         OpenMPMapClauseKind MapType,
7105         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
7106         bool IsImplicit, bool ForDeviceAddr = false)
7107         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7108           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7109           ForDeviceAddr(ForDeviceAddr) {}
7110   };
7111 
7112   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7113   /// member and there is no map information about it, then emission of that
7114   /// entry is deferred until the whole struct has been processed.
7115   struct DeferredDevicePtrEntryTy {
7116     const Expr *IE = nullptr;
7117     const ValueDecl *VD = nullptr;
7118     bool ForDeviceAddr = false;
7119 
7120     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7121                              bool ForDeviceAddr)
7122         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7123   };
7124 
7125   /// The target directive from where the mappable clauses were extracted. It
7126   /// is either a executable directive or a user-defined mapper directive.
7127   llvm::PointerUnion<const OMPExecutableDirective *,
7128                      const OMPDeclareMapperDecl *>
7129       CurDir;
7130 
7131   /// Function the directive is being generated for.
7132   CodeGenFunction &CGF;
7133 
7134   /// Set of all first private variables in the current directive.
7135   /// bool data is set to true if the variable is implicitly marked as
7136   /// firstprivate, false otherwise.
7137   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7138 
7139   /// Map between device pointer declarations and their expression components.
7140   /// The key value for declarations in 'this' is null.
7141   llvm::DenseMap<
7142       const ValueDecl *,
7143       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7144       DevPointersMap;
7145 
7146   llvm::Value *getExprTypeSize(const Expr *E) const {
7147     QualType ExprTy = E->getType().getCanonicalType();
7148 
7149     // Calculate the size for array shaping expression.
7150     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7151       llvm::Value *Size =
7152           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7153       for (const Expr *SE : OAE->getDimensions()) {
7154         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7155         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7156                                       CGF.getContext().getSizeType(),
7157                                       SE->getExprLoc());
7158         Size = CGF.Builder.CreateNUWMul(Size, Sz);
7159       }
7160       return Size;
7161     }
7162 
7163     // Reference types are ignored for mapping purposes.
7164     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7165       ExprTy = RefTy->getPointeeType().getCanonicalType();
7166 
7167     // Given that an array section is considered a built-in type, we need to
7168     // do the calculation based on the length of the section instead of relying
7169     // on CGF.getTypeSize(E->getType()).
7170     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7171       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7172                             OAE->getBase()->IgnoreParenImpCasts())
7173                             .getCanonicalType();
7174 
7175       // If there is no length associated with the expression and lower bound is
7176       // not specified too, that means we are using the whole length of the
7177       // base.
7178       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7179           !OAE->getLowerBound())
7180         return CGF.getTypeSize(BaseTy);
7181 
7182       llvm::Value *ElemSize;
7183       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7184         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7185       } else {
7186         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7187         assert(ATy && "Expecting array type if not a pointer type.");
7188         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7189       }
7190 
7191       // If we don't have a length at this point, that is because we have an
7192       // array section with a single element.
7193       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7194         return ElemSize;
7195 
7196       if (const Expr *LenExpr = OAE->getLength()) {
7197         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7198         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7199                                              CGF.getContext().getSizeType(),
7200                                              LenExpr->getExprLoc());
7201         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7202       }
7203       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7204              OAE->getLowerBound() && "expected array_section[lb:].");
7205       // Size = sizetype - lb * elemtype;
7206       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7207       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7208       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7209                                        CGF.getContext().getSizeType(),
7210                                        OAE->getLowerBound()->getExprLoc());
7211       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7212       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7213       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7214       LengthVal = CGF.Builder.CreateSelect(
7215           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7216       return LengthVal;
7217     }
7218     return CGF.getTypeSize(ExprTy);
7219   }
7220 
7221   /// Return the corresponding bits for a given map clause modifier. Add
7222   /// a flag marking the map as a pointer if requested. Add a flag marking the
7223   /// map as the first one of a series of maps that relate to the same map
7224   /// expression.
7225   OpenMPOffloadMappingFlags getMapTypeBits(
7226       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7227       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7228     OpenMPOffloadMappingFlags Bits =
7229         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7230     switch (MapType) {
7231     case OMPC_MAP_alloc:
7232     case OMPC_MAP_release:
7233       // alloc and release is the default behavior in the runtime library,  i.e.
7234       // if we don't pass any bits alloc/release that is what the runtime is
7235       // going to do. Therefore, we don't need to signal anything for these two
7236       // type modifiers.
7237       break;
7238     case OMPC_MAP_to:
7239       Bits |= OMP_MAP_TO;
7240       break;
7241     case OMPC_MAP_from:
7242       Bits |= OMP_MAP_FROM;
7243       break;
7244     case OMPC_MAP_tofrom:
7245       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7246       break;
7247     case OMPC_MAP_delete:
7248       Bits |= OMP_MAP_DELETE;
7249       break;
7250     case OMPC_MAP_unknown:
7251       llvm_unreachable("Unexpected map type!");
7252     }
7253     if (AddPtrFlag)
7254       Bits |= OMP_MAP_PTR_AND_OBJ;
7255     if (AddIsTargetParamFlag)
7256       Bits |= OMP_MAP_TARGET_PARAM;
7257     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7258         != MapModifiers.end())
7259       Bits |= OMP_MAP_ALWAYS;
7260     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7261         != MapModifiers.end())
7262       Bits |= OMP_MAP_CLOSE;
7263     return Bits;
7264   }
7265 
7266   /// Return true if the provided expression is a final array section. A
7267   /// final array section, is one whose length can't be proved to be one.
7268   bool isFinalArraySectionExpression(const Expr *E) const {
7269     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7270 
7271     // It is not an array section and therefore not a unity-size one.
7272     if (!OASE)
7273       return false;
7274 
7275     // An array section with no colon always refer to a single element.
7276     if (OASE->getColonLocFirst().isInvalid())
7277       return false;
7278 
7279     const Expr *Length = OASE->getLength();
7280 
7281     // If we don't have a length we have to check if the array has size 1
7282     // for this dimension. Also, we should always expect a length if the
7283     // base type is pointer.
7284     if (!Length) {
7285       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7286                              OASE->getBase()->IgnoreParenImpCasts())
7287                              .getCanonicalType();
7288       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7289         return ATy->getSize().getSExtValue() != 1;
7290       // If we don't have a constant dimension length, we have to consider
7291       // the current section as having any size, so it is not necessarily
7292       // unitary. If it happen to be unity size, that's user fault.
7293       return true;
7294     }
7295 
7296     // Check if the length evaluates to 1.
7297     Expr::EvalResult Result;
7298     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7299       return true; // Can have more that size 1.
7300 
7301     llvm::APSInt ConstLength = Result.Val.getInt();
7302     return ConstLength.getSExtValue() != 1;
7303   }
7304 
7305   /// Generate the base pointers, section pointers, sizes and map type
7306   /// bits for the provided map type, map modifier, and expression components.
7307   /// \a IsFirstComponent should be set to true if the provided set of
7308   /// components is the first associated with a capture.
7309   void generateInfoForComponentList(
7310       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7311       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7312       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7313       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7314       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7315       bool IsImplicit, bool ForDeviceAddr = false,
7316       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7317           OverlappedElements = llvm::None) const {
7318     // The following summarizes what has to be generated for each map and the
7319     // types below. The generated information is expressed in this order:
7320     // base pointer, section pointer, size, flags
7321     // (to add to the ones that come from the map type and modifier).
7322     //
7323     // double d;
7324     // int i[100];
7325     // float *p;
7326     //
7327     // struct S1 {
7328     //   int i;
7329     //   float f[50];
7330     // }
7331     // struct S2 {
7332     //   int i;
7333     //   float f[50];
7334     //   S1 s;
7335     //   double *p;
7336     //   struct S2 *ps;
7337     // }
7338     // S2 s;
7339     // S2 *ps;
7340     //
7341     // map(d)
7342     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7343     //
7344     // map(i)
7345     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7346     //
7347     // map(i[1:23])
7348     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(p)
7351     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7352     //
7353     // map(p[1:24])
7354     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7355     //
7356     // map(s)
7357     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7358     //
7359     // map(s.i)
7360     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7361     //
7362     // map(s.s.f)
7363     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7364     //
7365     // map(s.p)
7366     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7367     //
7368     // map(to: s.p[:22])
7369     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7370     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7371     // &(s.p), &(s.p[0]), 22*sizeof(double),
7372     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7373     // (*) alloc space for struct members, only this is a target parameter
7374     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7375     //      optimizes this entry out, same in the examples below)
7376     // (***) map the pointee (map: to)
7377     //
7378     // map(s.ps)
7379     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7380     //
7381     // map(from: s.ps->s.i)
7382     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7383     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7384     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7385     //
7386     // map(to: s.ps->ps)
7387     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7388     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7389     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7390     //
7391     // map(s.ps->ps->ps)
7392     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7393     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7394     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7395     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7396     //
7397     // map(to: s.ps->ps->s.f[:22])
7398     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7399     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7400     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7401     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7402     //
7403     // map(ps)
7404     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7405     //
7406     // map(ps->i)
7407     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7408     //
7409     // map(ps->s.f)
7410     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7411     //
7412     // map(from: ps->p)
7413     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7414     //
7415     // map(to: ps->p[:22])
7416     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7417     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7418     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7419     //
7420     // map(ps->ps)
7421     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7422     //
7423     // map(from: ps->ps->s.i)
7424     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7425     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7426     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7427     //
7428     // map(from: ps->ps->ps)
7429     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7430     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7431     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7432     //
7433     // map(ps->ps->ps->ps)
7434     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7435     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7436     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7437     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7438     //
7439     // map(to: ps->ps->ps->s.f[:22])
7440     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7441     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7442     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7443     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7444     //
7445     // map(to: s.f[:22]) map(from: s.p[:33])
7446     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7447     //     sizeof(double*) (**), TARGET_PARAM
7448     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7449     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7450     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7451     // (*) allocate contiguous space needed to fit all mapped members even if
7452     //     we allocate space for members not mapped (in this example,
7453     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7454     //     them as well because they fall between &s.f[0] and &s.p)
7455     //
7456     // map(from: s.f[:22]) map(to: ps->p[:33])
7457     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7458     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7459     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7460     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7461     // (*) the struct this entry pertains to is the 2nd element in the list of
7462     //     arguments, hence MEMBER_OF(2)
7463     //
7464     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7465     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7466     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7467     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7468     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7469     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7470     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7471     // (*) the struct this entry pertains to is the 4th element in the list
7472     //     of arguments, hence MEMBER_OF(4)
7473 
7474     // Track if the map information being generated is the first for a capture.
7475     bool IsCaptureFirstInfo = IsFirstComponentList;
7476     // When the variable is on a declare target link or in a to clause with
7477     // unified memory, a reference is needed to hold the host/device address
7478     // of the variable.
7479     bool RequiresReference = false;
7480 
7481     // Scan the components from the base to the complete expression.
7482     auto CI = Components.rbegin();
7483     auto CE = Components.rend();
7484     auto I = CI;
7485 
7486     // Track if the map information being generated is the first for a list of
7487     // components.
7488     bool IsExpressionFirstInfo = true;
7489     Address BP = Address::invalid();
7490     const Expr *AssocExpr = I->getAssociatedExpression();
7491     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7492     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7493     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7494 
7495     if (isa<MemberExpr>(AssocExpr)) {
7496       // The base is the 'this' pointer. The content of the pointer is going
7497       // to be the base of the field being mapped.
7498       BP = CGF.LoadCXXThisAddress();
7499     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7500                (OASE &&
7501                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7502       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7503     } else if (OAShE &&
7504                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7505       BP = Address(
7506           CGF.EmitScalarExpr(OAShE->getBase()),
7507           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7508     } else {
7509       // The base is the reference to the variable.
7510       // BP = &Var.
7511       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7512       if (const auto *VD =
7513               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7514         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7515                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7516           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7517               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7518                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7519             RequiresReference = true;
7520             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7521           }
7522         }
7523       }
7524 
7525       // If the variable is a pointer and is being dereferenced (i.e. is not
7526       // the last component), the base has to be the pointer itself, not its
7527       // reference. References are ignored for mapping purposes.
7528       QualType Ty =
7529           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7530       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7531         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7532 
7533         // We do not need to generate individual map information for the
7534         // pointer, it can be associated with the combined storage.
7535         ++I;
7536       }
7537     }
7538 
7539     // Track whether a component of the list should be marked as MEMBER_OF some
7540     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7541     // in a component list should be marked as MEMBER_OF, all subsequent entries
7542     // do not belong to the base struct. E.g.
7543     // struct S2 s;
7544     // s.ps->ps->ps->f[:]
7545     //   (1) (2) (3) (4)
7546     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7547     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7548     // is the pointee of ps(2) which is not member of struct s, so it should not
7549     // be marked as such (it is still PTR_AND_OBJ).
7550     // The variable is initialized to false so that PTR_AND_OBJ entries which
7551     // are not struct members are not considered (e.g. array of pointers to
7552     // data).
7553     bool ShouldBeMemberOf = false;
7554 
7555     // Variable keeping track of whether or not we have encountered a component
7556     // in the component list which is a member expression. Useful when we have a
7557     // pointer or a final array section, in which case it is the previous
7558     // component in the list which tells us whether we have a member expression.
7559     // E.g. X.f[:]
7560     // While processing the final array section "[:]" it is "f" which tells us
7561     // whether we are dealing with a member of a declared struct.
7562     const MemberExpr *EncounteredME = nullptr;
7563 
7564     for (; I != CE; ++I) {
7565       // If the current component is member of a struct (parent struct) mark it.
7566       if (!EncounteredME) {
7567         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7568         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7569         // as MEMBER_OF the parent struct.
7570         if (EncounteredME)
7571           ShouldBeMemberOf = true;
7572       }
7573 
7574       auto Next = std::next(I);
7575 
7576       // We need to generate the addresses and sizes if this is the last
7577       // component, if the component is a pointer or if it is an array section
7578       // whose length can't be proved to be one. If this is a pointer, it
7579       // becomes the base address for the following components.
7580 
7581       // A final array section, is one whose length can't be proved to be one.
7582       bool IsFinalArraySection =
7583           isFinalArraySectionExpression(I->getAssociatedExpression());
7584 
7585       // Get information on whether the element is a pointer. Have to do a
7586       // special treatment for array sections given that they are built-in
7587       // types.
7588       const auto *OASE =
7589           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7590       const auto *OAShE =
7591           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7592       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7593       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7594       bool IsPointer =
7595           OAShE ||
7596           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7597                        .getCanonicalType()
7598                        ->isAnyPointerType()) ||
7599           I->getAssociatedExpression()->getType()->isAnyPointerType();
7600       bool IsNonDerefPointer = IsPointer && !UO && !BO;
7601 
7602       if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
7603         // If this is not the last component, we expect the pointer to be
7604         // associated with an array expression or member expression.
7605         assert((Next == CE ||
7606                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7607                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7608                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7609                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7610                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7611                "Unexpected expression");
7612 
7613         Address LB = Address::invalid();
7614         if (OAShE) {
7615           LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
7616                        CGF.getContext().getTypeAlignInChars(
7617                            OAShE->getBase()->getType()));
7618         } else {
7619           LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7620                    .getAddress(CGF);
7621         }
7622 
7623         // If this component is a pointer inside the base struct then we don't
7624         // need to create any entry for it - it will be combined with the object
7625         // it is pointing to into a single PTR_AND_OBJ entry.
7626         bool IsMemberPointerOrAddr =
7627             (IsPointer || ForDeviceAddr) && EncounteredME &&
7628             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7629              EncounteredME);
7630         if (!OverlappedElements.empty()) {
7631           // Handle base element with the info for overlapped elements.
7632           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7633           assert(Next == CE &&
7634                  "Expected last element for the overlapped elements.");
7635           assert(!IsPointer &&
7636                  "Unexpected base element with the pointer type.");
7637           // Mark the whole struct as the struct that requires allocation on the
7638           // device.
7639           PartialStruct.LowestElem = {0, LB};
7640           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7641               I->getAssociatedExpression()->getType());
7642           Address HB = CGF.Builder.CreateConstGEP(
7643               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7644                                                               CGF.VoidPtrTy),
7645               TypeSize.getQuantity() - 1);
7646           PartialStruct.HighestElem = {
7647               std::numeric_limits<decltype(
7648                   PartialStruct.HighestElem.first)>::max(),
7649               HB};
7650           PartialStruct.Base = BP;
7651           // Emit data for non-overlapped data.
7652           OpenMPOffloadMappingFlags Flags =
7653               OMP_MAP_MEMBER_OF |
7654               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7655                              /*AddPtrFlag=*/false,
7656                              /*AddIsTargetParamFlag=*/false);
7657           LB = BP;
7658           llvm::Value *Size = nullptr;
7659           // Do bitcopy of all non-overlapped structure elements.
7660           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7661                    Component : OverlappedElements) {
7662             Address ComponentLB = Address::invalid();
7663             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7664                  Component) {
7665               if (MC.getAssociatedDeclaration()) {
7666                 ComponentLB =
7667                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7668                         .getAddress(CGF);
7669                 Size = CGF.Builder.CreatePtrDiff(
7670                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7671                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7672                 break;
7673               }
7674             }
7675             BasePointers.push_back(BP.getPointer());
7676             Pointers.push_back(LB.getPointer());
7677             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7678                                                       /*isSigned=*/true));
7679             Types.push_back(Flags);
7680             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7681           }
7682           BasePointers.push_back(BP.getPointer());
7683           Pointers.push_back(LB.getPointer());
7684           Size = CGF.Builder.CreatePtrDiff(
7685               CGF.EmitCastToVoidPtr(
7686                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7687               CGF.EmitCastToVoidPtr(LB.getPointer()));
7688           Sizes.push_back(
7689               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7690           Types.push_back(Flags);
7691           break;
7692         }
7693         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7694         if (!IsMemberPointerOrAddr) {
7695           BasePointers.push_back(BP.getPointer());
7696           Pointers.push_back(LB.getPointer());
7697           Sizes.push_back(
7698               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7699 
7700           // We need to add a pointer flag for each map that comes from the
7701           // same expression except for the first one. We also need to signal
7702           // this map is the first one that relates with the current capture
7703           // (there is a set of entries for each capture).
7704           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7705               MapType, MapModifiers, IsImplicit,
7706               !IsExpressionFirstInfo || RequiresReference,
7707               IsCaptureFirstInfo && !RequiresReference);
7708 
7709           if (!IsExpressionFirstInfo) {
7710             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7711             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7712             if (IsPointer)
7713               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7714                          OMP_MAP_DELETE | OMP_MAP_CLOSE);
7715 
7716             if (ShouldBeMemberOf) {
7717               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7718               // should be later updated with the correct value of MEMBER_OF.
7719               Flags |= OMP_MAP_MEMBER_OF;
7720               // From now on, all subsequent PTR_AND_OBJ entries should not be
7721               // marked as MEMBER_OF.
7722               ShouldBeMemberOf = false;
7723             }
7724           }
7725 
7726           Types.push_back(Flags);
7727         }
7728 
7729         // If we have encountered a member expression so far, keep track of the
7730         // mapped member. If the parent is "*this", then the value declaration
7731         // is nullptr.
7732         if (EncounteredME) {
7733           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7734           unsigned FieldIndex = FD->getFieldIndex();
7735 
7736           // Update info about the lowest and highest elements for this struct
7737           if (!PartialStruct.Base.isValid()) {
7738             PartialStruct.LowestElem = {FieldIndex, LB};
7739             if (IsFinalArraySection) {
7740               Address HB =
7741                   CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7742                       .getAddress(CGF);
7743               PartialStruct.HighestElem = {FieldIndex, HB};
7744             } else {
7745               PartialStruct.HighestElem = {FieldIndex, LB};
7746             }
7747             PartialStruct.Base = BP;
7748           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7749             PartialStruct.LowestElem = {FieldIndex, LB};
7750           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7751             PartialStruct.HighestElem = {FieldIndex, LB};
7752           }
7753         }
7754 
7755         // If we have a final array section, we are done with this expression.
7756         if (IsFinalArraySection)
7757           break;
7758 
7759         // The pointer becomes the base for the next element.
7760         if (Next != CE)
7761           BP = LB;
7762 
7763         IsExpressionFirstInfo = false;
7764         IsCaptureFirstInfo = false;
7765       }
7766     }
7767   }
7768 
7769   /// Return the adjusted map modifiers if the declaration a capture refers to
7770   /// appears in a first-private clause. This is expected to be used only with
7771   /// directives that start with 'target'.
7772   MappableExprsHandler::OpenMPOffloadMappingFlags
7773   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7774     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7775 
7776     // A first private variable captured by reference will use only the
7777     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7778     // declaration is known as first-private in this handler.
7779     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7780       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7781           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7782         return MappableExprsHandler::OMP_MAP_ALWAYS |
7783                MappableExprsHandler::OMP_MAP_TO;
7784       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7785         return MappableExprsHandler::OMP_MAP_TO |
7786                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7787       return MappableExprsHandler::OMP_MAP_PRIVATE |
7788              MappableExprsHandler::OMP_MAP_TO;
7789     }
7790     return MappableExprsHandler::OMP_MAP_TO |
7791            MappableExprsHandler::OMP_MAP_FROM;
7792   }
7793 
7794   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7795     // Rotate by getFlagMemberOffset() bits.
7796     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7797                                                   << getFlagMemberOffset());
7798   }
7799 
7800   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7801                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7802     // If the entry is PTR_AND_OBJ but has not been marked with the special
7803     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7804     // marked as MEMBER_OF.
7805     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7806         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7807       return;
7808 
7809     // Reset the placeholder value to prepare the flag for the assignment of the
7810     // proper MEMBER_OF value.
7811     Flags &= ~OMP_MAP_MEMBER_OF;
7812     Flags |= MemberOfFlag;
7813   }
7814 
7815   void getPlainLayout(const CXXRecordDecl *RD,
7816                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7817                       bool AsBase) const {
7818     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7819 
7820     llvm::StructType *St =
7821         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7822 
7823     unsigned NumElements = St->getNumElements();
7824     llvm::SmallVector<
7825         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7826         RecordLayout(NumElements);
7827 
7828     // Fill bases.
7829     for (const auto &I : RD->bases()) {
7830       if (I.isVirtual())
7831         continue;
7832       const auto *Base = I.getType()->getAsCXXRecordDecl();
7833       // Ignore empty bases.
7834       if (Base->isEmpty() || CGF.getContext()
7835                                  .getASTRecordLayout(Base)
7836                                  .getNonVirtualSize()
7837                                  .isZero())
7838         continue;
7839 
7840       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7841       RecordLayout[FieldIndex] = Base;
7842     }
7843     // Fill in virtual bases.
7844     for (const auto &I : RD->vbases()) {
7845       const auto *Base = I.getType()->getAsCXXRecordDecl();
7846       // Ignore empty bases.
7847       if (Base->isEmpty())
7848         continue;
7849       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7850       if (RecordLayout[FieldIndex])
7851         continue;
7852       RecordLayout[FieldIndex] = Base;
7853     }
7854     // Fill in all the fields.
7855     assert(!RD->isUnion() && "Unexpected union.");
7856     for (const auto *Field : RD->fields()) {
7857       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7858       // will fill in later.)
7859       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7860         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7861         RecordLayout[FieldIndex] = Field;
7862       }
7863     }
7864     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7865              &Data : RecordLayout) {
7866       if (Data.isNull())
7867         continue;
7868       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7869         getPlainLayout(Base, Layout, /*AsBase=*/true);
7870       else
7871         Layout.push_back(Data.get<const FieldDecl *>());
7872     }
7873   }
7874 
7875 public:
7876   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7877       : CurDir(&Dir), CGF(CGF) {
7878     // Extract firstprivate clause information.
7879     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7880       for (const auto *D : C->varlists())
7881         FirstPrivateDecls.try_emplace(
7882             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7883     // Extract implicit firstprivates from uses_allocators clauses.
7884     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
7885       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
7886         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
7887         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
7888           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
7889                                         /*Implicit=*/true);
7890         else if (const auto *VD = dyn_cast<VarDecl>(
7891                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
7892                          ->getDecl()))
7893           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
7894       }
7895     }
7896     // Extract device pointer clause information.
7897     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7898       for (auto L : C->component_lists())
7899         DevPointersMap[L.first].push_back(L.second);
7900   }
7901 
7902   /// Constructor for the declare mapper directive.
7903   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7904       : CurDir(&Dir), CGF(CGF) {}
7905 
7906   /// Generate code for the combined entry if we have a partially mapped struct
7907   /// and take care of the mapping flags of the arguments corresponding to
7908   /// individual struct members.
7909   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7910                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7911                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7912                          const StructRangeInfoTy &PartialStruct) const {
7913     // Base is the base of the struct
7914     BasePointers.push_back(PartialStruct.Base.getPointer());
7915     // Pointer is the address of the lowest element
7916     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7917     Pointers.push_back(LB);
7918     // Size is (addr of {highest+1} element) - (addr of lowest element)
7919     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7920     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7921     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7922     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7923     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7924     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7925                                                   /*isSigned=*/false);
7926     Sizes.push_back(Size);
7927     // Map type is always TARGET_PARAM
7928     Types.push_back(OMP_MAP_TARGET_PARAM);
7929     // Remove TARGET_PARAM flag from the first element
7930     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7931 
7932     // All other current entries will be MEMBER_OF the combined entry
7933     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7934     // 0xFFFF in the MEMBER_OF field).
7935     OpenMPOffloadMappingFlags MemberOfFlag =
7936         getMemberOfFlag(BasePointers.size() - 1);
7937     for (auto &M : CurTypes)
7938       setCorrectMemberOfFlag(M, MemberOfFlag);
7939   }
7940 
7941   /// Generate all the base pointers, section pointers, sizes and map
7942   /// types for the extracted mappable expressions. Also, for each item that
7943   /// relates with a device pointer, a pair of the relevant declaration and
7944   /// index where it occurs is appended to the device pointers info array.
7945   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7946                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7947                        MapFlagsArrayTy &Types) const {
7948     // We have to process the component lists that relate with the same
7949     // declaration in a single chunk so that we can generate the map flags
7950     // correctly. Therefore, we organize all lists in a map.
7951     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7952 
7953     // Helper function to fill the information map for the different supported
7954     // clauses.
7955     auto &&InfoGen =
7956         [&Info](const ValueDecl *D,
7957                 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7958                 OpenMPMapClauseKind MapType,
7959                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7960                 bool ReturnDevicePointer, bool IsImplicit,
7961                 bool ForDeviceAddr = false) {
7962           const ValueDecl *VD =
7963               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7964           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7965                                 IsImplicit, ForDeviceAddr);
7966         };
7967 
7968     assert(CurDir.is<const OMPExecutableDirective *>() &&
7969            "Expect a executable directive");
7970     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7971     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7972       for (const auto L : C->component_lists()) {
7973         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7974             /*ReturnDevicePointer=*/false, C->isImplicit());
7975       }
7976     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7977       for (const auto L : C->component_lists()) {
7978         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7979             /*ReturnDevicePointer=*/false, C->isImplicit());
7980       }
7981     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7982       for (const auto L : C->component_lists()) {
7983         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7984             /*ReturnDevicePointer=*/false, C->isImplicit());
7985       }
7986 
7987     // Look at the use_device_ptr clause information and mark the existing map
7988     // entries as such. If there is no map information for an entry in the
7989     // use_device_ptr list, we create one with map type 'alloc' and zero size
7990     // section. It is the user fault if that was not mapped before. If there is
7991     // no map information and the pointer is a struct member, then we defer the
7992     // emission of that entry until the whole struct has been processed.
7993     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7994         DeferredInfo;
7995 
7996     for (const auto *C :
7997          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7998       for (const auto L : C->component_lists()) {
7999         assert(!L.second.empty() && "Not expecting empty list of components!");
8000         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8001         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8002         const Expr *IE = L.second.back().getAssociatedExpression();
8003         // If the first component is a member expression, we have to look into
8004         // 'this', which maps to null in the map of map information. Otherwise
8005         // look directly for the information.
8006         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8007 
8008         // We potentially have map information for this declaration already.
8009         // Look for the first set of components that refer to it.
8010         if (It != Info.end()) {
8011           auto CI = std::find_if(
8012               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
8013                 return MI.Components.back().getAssociatedDeclaration() == VD;
8014               });
8015           // If we found a map entry, signal that the pointer has to be returned
8016           // and move on to the next declaration.
8017           if (CI != It->second.end()) {
8018             CI->ReturnDevicePointer = true;
8019             continue;
8020           }
8021         }
8022 
8023         // We didn't find any match in our map information - generate a zero
8024         // size array section - if the pointer is a struct member we defer this
8025         // action until the whole struct has been processed.
8026         if (isa<MemberExpr>(IE)) {
8027           // Insert the pointer into Info to be processed by
8028           // generateInfoForComponentList. Because it is a member pointer
8029           // without a pointee, no entry will be generated for it, therefore
8030           // we need to generate one after the whole struct has been processed.
8031           // Nonetheless, generateInfoForComponentList must be called to take
8032           // the pointer into account for the calculation of the range of the
8033           // partial struct.
8034           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8035                   /*ReturnDevicePointer=*/false, C->isImplicit());
8036           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8037         } else {
8038           llvm::Value *Ptr =
8039               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8040           BasePointers.emplace_back(Ptr, VD);
8041           Pointers.push_back(Ptr);
8042           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8043           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8044         }
8045       }
8046     }
8047 
8048     // Look at the use_device_addr clause information and mark the existing map
8049     // entries as such. If there is no map information for an entry in the
8050     // use_device_addr list, we create one with map type 'alloc' and zero size
8051     // section. It is the user fault if that was not mapped before. If there is
8052     // no map information and the pointer is a struct member, then we defer the
8053     // emission of that entry until the whole struct has been processed.
8054     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8055     for (const auto *C :
8056          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
8057       for (const auto L : C->component_lists()) {
8058         assert(!L.second.empty() && "Not expecting empty list of components!");
8059         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
8060         if (!Processed.insert(VD).second)
8061           continue;
8062         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8063         const Expr *IE = L.second.back().getAssociatedExpression();
8064         // If the first component is a member expression, we have to look into
8065         // 'this', which maps to null in the map of map information. Otherwise
8066         // look directly for the information.
8067         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8068 
8069         // We potentially have map information for this declaration already.
8070         // Look for the first set of components that refer to it.
8071         if (It != Info.end()) {
8072           auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
8073             return MI.Components.back().getAssociatedDeclaration() == VD;
8074           });
8075           // If we found a map entry, signal that the pointer has to be returned
8076           // and move on to the next declaration.
8077           if (CI != It->second.end()) {
8078             CI->ReturnDevicePointer = true;
8079             continue;
8080           }
8081         }
8082 
8083         // We didn't find any match in our map information - generate a zero
8084         // size array section - if the pointer is a struct member we defer this
8085         // action until the whole struct has been processed.
8086         if (isa<MemberExpr>(IE)) {
8087           // Insert the pointer into Info to be processed by
8088           // generateInfoForComponentList. Because it is a member pointer
8089           // without a pointee, no entry will be generated for it, therefore
8090           // we need to generate one after the whole struct has been processed.
8091           // Nonetheless, generateInfoForComponentList must be called to take
8092           // the pointer into account for the calculation of the range of the
8093           // partial struct.
8094           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
8095                   /*ReturnDevicePointer=*/false, C->isImplicit(),
8096                   /*ForDeviceAddr=*/true);
8097           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8098         } else {
8099           llvm::Value *Ptr;
8100           if (IE->isGLValue())
8101             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8102           else
8103             Ptr = CGF.EmitScalarExpr(IE);
8104           BasePointers.emplace_back(Ptr, VD);
8105           Pointers.push_back(Ptr);
8106           Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8107           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8108         }
8109       }
8110     }
8111 
8112     for (const auto &M : Info) {
8113       // We need to know when we generate information for the first component
8114       // associated with a capture, because the mapping flags depend on it.
8115       bool IsFirstComponentList = true;
8116 
8117       // Temporary versions of arrays
8118       MapBaseValuesArrayTy CurBasePointers;
8119       MapValuesArrayTy CurPointers;
8120       MapValuesArrayTy CurSizes;
8121       MapFlagsArrayTy CurTypes;
8122       StructRangeInfoTy PartialStruct;
8123 
8124       for (const MapInfo &L : M.second) {
8125         assert(!L.Components.empty() &&
8126                "Not expecting declaration with no component lists.");
8127 
8128         // Remember the current base pointer index.
8129         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8130         generateInfoForComponentList(
8131             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8132             CurPointers, CurSizes, CurTypes, PartialStruct,
8133             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8134 
8135         // If this entry relates with a device pointer, set the relevant
8136         // declaration and add the 'return pointer' flag.
8137         if (L.ReturnDevicePointer) {
8138           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8139                  "Unexpected number of mapped base pointers.");
8140 
8141           const ValueDecl *RelevantVD =
8142               L.Components.back().getAssociatedDeclaration();
8143           assert(RelevantVD &&
8144                  "No relevant declaration related with device pointer??");
8145 
8146           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8147           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8148         }
8149         IsFirstComponentList = false;
8150       }
8151 
8152       // Append any pending zero-length pointers which are struct members and
8153       // used with use_device_ptr or use_device_addr.
8154       auto CI = DeferredInfo.find(M.first);
8155       if (CI != DeferredInfo.end()) {
8156         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8157           llvm::Value *BasePtr;
8158           llvm::Value *Ptr;
8159           if (L.ForDeviceAddr) {
8160             if (L.IE->isGLValue())
8161               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8162             else
8163               Ptr = this->CGF.EmitScalarExpr(L.IE);
8164             BasePtr = Ptr;
8165             // Entry is RETURN_PARAM. Also, set the placeholder value
8166             // MEMBER_OF=FFFF so that the entry is later updated with the
8167             // correct value of MEMBER_OF.
8168             CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8169           } else {
8170             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8171             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8172                                              L.IE->getExprLoc());
8173             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8174             // value MEMBER_OF=FFFF so that the entry is later updated with the
8175             // correct value of MEMBER_OF.
8176             CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8177                                OMP_MAP_MEMBER_OF);
8178           }
8179           CurBasePointers.emplace_back(BasePtr, L.VD);
8180           CurPointers.push_back(Ptr);
8181           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8182         }
8183       }
8184 
8185       // If there is an entry in PartialStruct it means we have a struct with
8186       // individual members mapped. Emit an extra combined entry.
8187       if (PartialStruct.Base.isValid())
8188         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8189                           PartialStruct);
8190 
8191       // We need to append the results of this capture to what we already have.
8192       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8193       Pointers.append(CurPointers.begin(), CurPointers.end());
8194       Sizes.append(CurSizes.begin(), CurSizes.end());
8195       Types.append(CurTypes.begin(), CurTypes.end());
8196     }
8197   }
8198 
8199   /// Generate all the base pointers, section pointers, sizes and map types for
8200   /// the extracted map clauses of user-defined mapper.
8201   void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8202                                 MapValuesArrayTy &Pointers,
8203                                 MapValuesArrayTy &Sizes,
8204                                 MapFlagsArrayTy &Types) const {
8205     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8206            "Expect a declare mapper directive");
8207     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8208     // We have to process the component lists that relate with the same
8209     // declaration in a single chunk so that we can generate the map flags
8210     // correctly. Therefore, we organize all lists in a map.
8211     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8212 
8213     // Helper function to fill the information map for the different supported
8214     // clauses.
8215     auto &&InfoGen = [&Info](
8216         const ValueDecl *D,
8217         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8218         OpenMPMapClauseKind MapType,
8219         ArrayRef<OpenMPMapModifierKind> MapModifiers,
8220         bool ReturnDevicePointer, bool IsImplicit) {
8221       const ValueDecl *VD =
8222           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8223       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8224                             IsImplicit);
8225     };
8226 
8227     for (const auto *C : CurMapperDir->clauselists()) {
8228       const auto *MC = cast<OMPMapClause>(C);
8229       for (const auto L : MC->component_lists()) {
8230         InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8231                 /*ReturnDevicePointer=*/false, MC->isImplicit());
8232       }
8233     }
8234 
8235     for (const auto &M : Info) {
8236       // We need to know when we generate information for the first component
8237       // associated with a capture, because the mapping flags depend on it.
8238       bool IsFirstComponentList = true;
8239 
8240       // Temporary versions of arrays
8241       MapBaseValuesArrayTy CurBasePointers;
8242       MapValuesArrayTy CurPointers;
8243       MapValuesArrayTy CurSizes;
8244       MapFlagsArrayTy CurTypes;
8245       StructRangeInfoTy PartialStruct;
8246 
8247       for (const MapInfo &L : M.second) {
8248         assert(!L.Components.empty() &&
8249                "Not expecting declaration with no component lists.");
8250         generateInfoForComponentList(
8251             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8252             CurPointers, CurSizes, CurTypes, PartialStruct,
8253             IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
8254         IsFirstComponentList = false;
8255       }
8256 
8257       // If there is an entry in PartialStruct it means we have a struct with
8258       // individual members mapped. Emit an extra combined entry.
8259       if (PartialStruct.Base.isValid())
8260         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8261                           PartialStruct);
8262 
8263       // We need to append the results of this capture to what we already have.
8264       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8265       Pointers.append(CurPointers.begin(), CurPointers.end());
8266       Sizes.append(CurSizes.begin(), CurSizes.end());
8267       Types.append(CurTypes.begin(), CurTypes.end());
8268     }
8269   }
8270 
8271   /// Emit capture info for lambdas for variables captured by reference.
8272   void generateInfoForLambdaCaptures(
8273       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8274       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8275       MapFlagsArrayTy &Types,
8276       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8277     const auto *RD = VD->getType()
8278                          .getCanonicalType()
8279                          .getNonReferenceType()
8280                          ->getAsCXXRecordDecl();
8281     if (!RD || !RD->isLambda())
8282       return;
8283     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8284     LValue VDLVal = CGF.MakeAddrLValue(
8285         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8286     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8287     FieldDecl *ThisCapture = nullptr;
8288     RD->getCaptureFields(Captures, ThisCapture);
8289     if (ThisCapture) {
8290       LValue ThisLVal =
8291           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8292       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8293       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8294                                  VDLVal.getPointer(CGF));
8295       BasePointers.push_back(ThisLVal.getPointer(CGF));
8296       Pointers.push_back(ThisLValVal.getPointer(CGF));
8297       Sizes.push_back(
8298           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8299                                     CGF.Int64Ty, /*isSigned=*/true));
8300       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8301                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8302     }
8303     for (const LambdaCapture &LC : RD->captures()) {
8304       if (!LC.capturesVariable())
8305         continue;
8306       const VarDecl *VD = LC.getCapturedVar();
8307       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8308         continue;
8309       auto It = Captures.find(VD);
8310       assert(It != Captures.end() && "Found lambda capture without field.");
8311       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8312       if (LC.getCaptureKind() == LCK_ByRef) {
8313         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8314         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8315                                    VDLVal.getPointer(CGF));
8316         BasePointers.push_back(VarLVal.getPointer(CGF));
8317         Pointers.push_back(VarLValVal.getPointer(CGF));
8318         Sizes.push_back(CGF.Builder.CreateIntCast(
8319             CGF.getTypeSize(
8320                 VD->getType().getCanonicalType().getNonReferenceType()),
8321             CGF.Int64Ty, /*isSigned=*/true));
8322       } else {
8323         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8324         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8325                                    VDLVal.getPointer(CGF));
8326         BasePointers.push_back(VarLVal.getPointer(CGF));
8327         Pointers.push_back(VarRVal.getScalarVal());
8328         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8329       }
8330       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8331                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8332     }
8333   }
8334 
8335   /// Set correct indices for lambdas captures.
8336   void adjustMemberOfForLambdaCaptures(
8337       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8338       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8339       MapFlagsArrayTy &Types) const {
8340     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8341       // Set correct member_of idx for all implicit lambda captures.
8342       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8343                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8344         continue;
8345       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8346       assert(BasePtr && "Unable to find base lambda address.");
8347       int TgtIdx = -1;
8348       for (unsigned J = I; J > 0; --J) {
8349         unsigned Idx = J - 1;
8350         if (Pointers[Idx] != BasePtr)
8351           continue;
8352         TgtIdx = Idx;
8353         break;
8354       }
8355       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8356       // All other current entries will be MEMBER_OF the combined entry
8357       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8358       // 0xFFFF in the MEMBER_OF field).
8359       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8360       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8361     }
8362   }
8363 
8364   /// Generate the base pointers, section pointers, sizes and map types
8365   /// associated to a given capture.
8366   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8367                               llvm::Value *Arg,
8368                               MapBaseValuesArrayTy &BasePointers,
8369                               MapValuesArrayTy &Pointers,
8370                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8371                               StructRangeInfoTy &PartialStruct) const {
8372     assert(!Cap->capturesVariableArrayType() &&
8373            "Not expecting to generate map info for a variable array type!");
8374 
8375     // We need to know when we generating information for the first component
8376     const ValueDecl *VD = Cap->capturesThis()
8377                               ? nullptr
8378                               : Cap->getCapturedVar()->getCanonicalDecl();
8379 
8380     // If this declaration appears in a is_device_ptr clause we just have to
8381     // pass the pointer by value. If it is a reference to a declaration, we just
8382     // pass its value.
8383     if (DevPointersMap.count(VD)) {
8384       BasePointers.emplace_back(Arg, VD);
8385       Pointers.push_back(Arg);
8386       Sizes.push_back(
8387           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8388                                     CGF.Int64Ty, /*isSigned=*/true));
8389       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8390       return;
8391     }
8392 
8393     using MapData =
8394         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8395                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8396     SmallVector<MapData, 4> DeclComponentLists;
8397     assert(CurDir.is<const OMPExecutableDirective *>() &&
8398            "Expect a executable directive");
8399     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8400     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8401       for (const auto L : C->decl_component_lists(VD)) {
8402         assert(L.first == VD &&
8403                "We got information for the wrong declaration??");
8404         assert(!L.second.empty() &&
8405                "Not expecting declaration with no component lists.");
8406         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8407                                         C->getMapTypeModifiers(),
8408                                         C->isImplicit());
8409       }
8410     }
8411 
8412     // Find overlapping elements (including the offset from the base element).
8413     llvm::SmallDenseMap<
8414         const MapData *,
8415         llvm::SmallVector<
8416             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8417         4>
8418         OverlappedData;
8419     size_t Count = 0;
8420     for (const MapData &L : DeclComponentLists) {
8421       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8422       OpenMPMapClauseKind MapType;
8423       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8424       bool IsImplicit;
8425       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8426       ++Count;
8427       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8428         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8429         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8430         auto CI = Components.rbegin();
8431         auto CE = Components.rend();
8432         auto SI = Components1.rbegin();
8433         auto SE = Components1.rend();
8434         for (; CI != CE && SI != SE; ++CI, ++SI) {
8435           if (CI->getAssociatedExpression()->getStmtClass() !=
8436               SI->getAssociatedExpression()->getStmtClass())
8437             break;
8438           // Are we dealing with different variables/fields?
8439           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8440             break;
8441         }
8442         // Found overlapping if, at least for one component, reached the head of
8443         // the components list.
8444         if (CI == CE || SI == SE) {
8445           assert((CI != CE || SI != SE) &&
8446                  "Unexpected full match of the mapping components.");
8447           const MapData &BaseData = CI == CE ? L : L1;
8448           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8449               SI == SE ? Components : Components1;
8450           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8451           OverlappedElements.getSecond().push_back(SubData);
8452         }
8453       }
8454     }
8455     // Sort the overlapped elements for each item.
8456     llvm::SmallVector<const FieldDecl *, 4> Layout;
8457     if (!OverlappedData.empty()) {
8458       if (const auto *CRD =
8459               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8460         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8461       else {
8462         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8463         Layout.append(RD->field_begin(), RD->field_end());
8464       }
8465     }
8466     for (auto &Pair : OverlappedData) {
8467       llvm::sort(
8468           Pair.getSecond(),
8469           [&Layout](
8470               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8471               OMPClauseMappableExprCommon::MappableExprComponentListRef
8472                   Second) {
8473             auto CI = First.rbegin();
8474             auto CE = First.rend();
8475             auto SI = Second.rbegin();
8476             auto SE = Second.rend();
8477             for (; CI != CE && SI != SE; ++CI, ++SI) {
8478               if (CI->getAssociatedExpression()->getStmtClass() !=
8479                   SI->getAssociatedExpression()->getStmtClass())
8480                 break;
8481               // Are we dealing with different variables/fields?
8482               if (CI->getAssociatedDeclaration() !=
8483                   SI->getAssociatedDeclaration())
8484                 break;
8485             }
8486 
8487             // Lists contain the same elements.
8488             if (CI == CE && SI == SE)
8489               return false;
8490 
8491             // List with less elements is less than list with more elements.
8492             if (CI == CE || SI == SE)
8493               return CI == CE;
8494 
8495             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8496             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8497             if (FD1->getParent() == FD2->getParent())
8498               return FD1->getFieldIndex() < FD2->getFieldIndex();
8499             const auto It =
8500                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8501                   return FD == FD1 || FD == FD2;
8502                 });
8503             return *It == FD1;
8504           });
8505     }
8506 
8507     // Associated with a capture, because the mapping flags depend on it.
8508     // Go through all of the elements with the overlapped elements.
8509     for (const auto &Pair : OverlappedData) {
8510       const MapData &L = *Pair.getFirst();
8511       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8512       OpenMPMapClauseKind MapType;
8513       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8514       bool IsImplicit;
8515       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8516       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8517           OverlappedComponents = Pair.getSecond();
8518       bool IsFirstComponentList = true;
8519       generateInfoForComponentList(
8520           MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
8521           Types, PartialStruct, IsFirstComponentList, IsImplicit,
8522           /*ForDeviceAddr=*/false, OverlappedComponents);
8523     }
8524     // Go through other elements without overlapped elements.
8525     bool IsFirstComponentList = OverlappedData.empty();
8526     for (const MapData &L : DeclComponentLists) {
8527       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8528       OpenMPMapClauseKind MapType;
8529       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8530       bool IsImplicit;
8531       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8532       auto It = OverlappedData.find(&L);
8533       if (It == OverlappedData.end())
8534         generateInfoForComponentList(MapType, MapModifiers, Components,
8535                                      BasePointers, Pointers, Sizes, Types,
8536                                      PartialStruct, IsFirstComponentList,
8537                                      IsImplicit);
8538       IsFirstComponentList = false;
8539     }
8540   }
8541 
8542   /// Generate the base pointers, section pointers, sizes and map types
8543   /// associated with the declare target link variables.
8544   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8545                                         MapValuesArrayTy &Pointers,
8546                                         MapValuesArrayTy &Sizes,
8547                                         MapFlagsArrayTy &Types) const {
8548     assert(CurDir.is<const OMPExecutableDirective *>() &&
8549            "Expect a executable directive");
8550     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8551     // Map other list items in the map clause which are not captured variables
8552     // but "declare target link" global variables.
8553     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8554       for (const auto L : C->component_lists()) {
8555         if (!L.first)
8556           continue;
8557         const auto *VD = dyn_cast<VarDecl>(L.first);
8558         if (!VD)
8559           continue;
8560         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8561             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8562         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8563             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8564           continue;
8565         StructRangeInfoTy PartialStruct;
8566         generateInfoForComponentList(
8567             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8568             Pointers, Sizes, Types, PartialStruct,
8569             /*IsFirstComponentList=*/true, C->isImplicit());
8570         assert(!PartialStruct.Base.isValid() &&
8571                "No partial structs for declare target link expected.");
8572       }
8573     }
8574   }
8575 
8576   /// Generate the default map information for a given capture \a CI,
8577   /// record field declaration \a RI and captured value \a CV.
8578   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8579                               const FieldDecl &RI, llvm::Value *CV,
8580                               MapBaseValuesArrayTy &CurBasePointers,
8581                               MapValuesArrayTy &CurPointers,
8582                               MapValuesArrayTy &CurSizes,
8583                               MapFlagsArrayTy &CurMapTypes) const {
8584     bool IsImplicit = true;
8585     // Do the default mapping.
8586     if (CI.capturesThis()) {
8587       CurBasePointers.push_back(CV);
8588       CurPointers.push_back(CV);
8589       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8590       CurSizes.push_back(
8591           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8592                                     CGF.Int64Ty, /*isSigned=*/true));
8593       // Default map type.
8594       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8595     } else if (CI.capturesVariableByCopy()) {
8596       CurBasePointers.push_back(CV);
8597       CurPointers.push_back(CV);
8598       if (!RI.getType()->isAnyPointerType()) {
8599         // We have to signal to the runtime captures passed by value that are
8600         // not pointers.
8601         CurMapTypes.push_back(OMP_MAP_LITERAL);
8602         CurSizes.push_back(CGF.Builder.CreateIntCast(
8603             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8604       } else {
8605         // Pointers are implicitly mapped with a zero size and no flags
8606         // (other than first map that is added for all implicit maps).
8607         CurMapTypes.push_back(OMP_MAP_NONE);
8608         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8609       }
8610       const VarDecl *VD = CI.getCapturedVar();
8611       auto I = FirstPrivateDecls.find(VD);
8612       if (I != FirstPrivateDecls.end())
8613         IsImplicit = I->getSecond();
8614     } else {
8615       assert(CI.capturesVariable() && "Expected captured reference.");
8616       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8617       QualType ElementType = PtrTy->getPointeeType();
8618       CurSizes.push_back(CGF.Builder.CreateIntCast(
8619           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8620       // The default map type for a scalar/complex type is 'to' because by
8621       // default the value doesn't have to be retrieved. For an aggregate
8622       // type, the default is 'tofrom'.
8623       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8624       const VarDecl *VD = CI.getCapturedVar();
8625       auto I = FirstPrivateDecls.find(VD);
8626       if (I != FirstPrivateDecls.end() &&
8627           VD->getType().isConstant(CGF.getContext())) {
8628         llvm::Constant *Addr =
8629             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8630         // Copy the value of the original variable to the new global copy.
8631         CGF.Builder.CreateMemCpy(
8632             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8633             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8634             CurSizes.back(), /*IsVolatile=*/false);
8635         // Use new global variable as the base pointers.
8636         CurBasePointers.push_back(Addr);
8637         CurPointers.push_back(Addr);
8638       } else {
8639         CurBasePointers.push_back(CV);
8640         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8641           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8642               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8643               AlignmentSource::Decl));
8644           CurPointers.push_back(PtrAddr.getPointer());
8645         } else {
8646           CurPointers.push_back(CV);
8647         }
8648       }
8649       if (I != FirstPrivateDecls.end())
8650         IsImplicit = I->getSecond();
8651     }
8652     // Every default map produces a single argument which is a target parameter.
8653     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8654 
8655     // Add flag stating this is an implicit map.
8656     if (IsImplicit)
8657       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8658   }
8659 };
8660 } // anonymous namespace
8661 
8662 /// Emit the arrays used to pass the captures and map information to the
8663 /// offloading runtime library. If there is no map or capture information,
8664 /// return nullptr by reference.
8665 static void
8666 emitOffloadingArrays(CodeGenFunction &CGF,
8667                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8668                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8669                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8670                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8671                      CGOpenMPRuntime::TargetDataInfo &Info) {
8672   CodeGenModule &CGM = CGF.CGM;
8673   ASTContext &Ctx = CGF.getContext();
8674 
8675   // Reset the array information.
8676   Info.clearArrayInfo();
8677   Info.NumberOfPtrs = BasePointers.size();
8678 
8679   if (Info.NumberOfPtrs) {
8680     // Detect if we have any capture size requiring runtime evaluation of the
8681     // size so that a constant array could be eventually used.
8682     bool hasRuntimeEvaluationCaptureSize = false;
8683     for (llvm::Value *S : Sizes)
8684       if (!isa<llvm::Constant>(S)) {
8685         hasRuntimeEvaluationCaptureSize = true;
8686         break;
8687       }
8688 
8689     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8690     QualType PointerArrayType = Ctx.getConstantArrayType(
8691         Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8692         /*IndexTypeQuals=*/0);
8693 
8694     Info.BasePointersArray =
8695         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8696     Info.PointersArray =
8697         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8698 
8699     // If we don't have any VLA types or other types that require runtime
8700     // evaluation, we can use a constant array for the map sizes, otherwise we
8701     // need to fill up the arrays as we do for the pointers.
8702     QualType Int64Ty =
8703         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8704     if (hasRuntimeEvaluationCaptureSize) {
8705       QualType SizeArrayType = Ctx.getConstantArrayType(
8706           Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8707           /*IndexTypeQuals=*/0);
8708       Info.SizesArray =
8709           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8710     } else {
8711       // We expect all the sizes to be constant, so we collect them to create
8712       // a constant array.
8713       SmallVector<llvm::Constant *, 16> ConstSizes;
8714       for (llvm::Value *S : Sizes)
8715         ConstSizes.push_back(cast<llvm::Constant>(S));
8716 
8717       auto *SizesArrayInit = llvm::ConstantArray::get(
8718           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8719       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8720       auto *SizesArrayGbl = new llvm::GlobalVariable(
8721           CGM.getModule(), SizesArrayInit->getType(),
8722           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8723           SizesArrayInit, Name);
8724       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8725       Info.SizesArray = SizesArrayGbl;
8726     }
8727 
8728     // The map types are always constant so we don't need to generate code to
8729     // fill arrays. Instead, we create an array constant.
8730     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8731     llvm::copy(MapTypes, Mapping.begin());
8732     llvm::Constant *MapTypesArrayInit =
8733         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8734     std::string MaptypesName =
8735         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8736     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8737         CGM.getModule(), MapTypesArrayInit->getType(),
8738         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8739         MapTypesArrayInit, MaptypesName);
8740     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8741     Info.MapTypesArray = MapTypesArrayGbl;
8742 
8743     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8744       llvm::Value *BPVal = *BasePointers[I];
8745       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8746           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8747           Info.BasePointersArray, 0, I);
8748       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8749           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8750       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8751       CGF.Builder.CreateStore(BPVal, BPAddr);
8752 
8753       if (Info.requiresDevicePointerInfo())
8754         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8755           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8756 
8757       llvm::Value *PVal = Pointers[I];
8758       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8759           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8760           Info.PointersArray, 0, I);
8761       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8762           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8763       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8764       CGF.Builder.CreateStore(PVal, PAddr);
8765 
8766       if (hasRuntimeEvaluationCaptureSize) {
8767         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8768             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8769             Info.SizesArray,
8770             /*Idx0=*/0,
8771             /*Idx1=*/I);
8772         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8773         CGF.Builder.CreateStore(
8774             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8775             SAddr);
8776       }
8777     }
8778   }
8779 }
8780 
8781 /// Emit the arguments to be passed to the runtime library based on the
8782 /// arrays of pointers, sizes and map types.
8783 static void emitOffloadingArraysArgument(
8784     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8785     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8786     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8787   CodeGenModule &CGM = CGF.CGM;
8788   if (Info.NumberOfPtrs) {
8789     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8790         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8791         Info.BasePointersArray,
8792         /*Idx0=*/0, /*Idx1=*/0);
8793     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8794         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8795         Info.PointersArray,
8796         /*Idx0=*/0,
8797         /*Idx1=*/0);
8798     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8799         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8800         /*Idx0=*/0, /*Idx1=*/0);
8801     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8802         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8803         Info.MapTypesArray,
8804         /*Idx0=*/0,
8805         /*Idx1=*/0);
8806   } else {
8807     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8808     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8809     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8810     MapTypesArrayArg =
8811         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8812   }
8813 }
8814 
8815 /// Check for inner distribute directive.
8816 static const OMPExecutableDirective *
8817 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8818   const auto *CS = D.getInnermostCapturedStmt();
8819   const auto *Body =
8820       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8821   const Stmt *ChildStmt =
8822       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8823 
8824   if (const auto *NestedDir =
8825           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8826     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8827     switch (D.getDirectiveKind()) {
8828     case OMPD_target:
8829       if (isOpenMPDistributeDirective(DKind))
8830         return NestedDir;
8831       if (DKind == OMPD_teams) {
8832         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8833             /*IgnoreCaptured=*/true);
8834         if (!Body)
8835           return nullptr;
8836         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8837         if (const auto *NND =
8838                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8839           DKind = NND->getDirectiveKind();
8840           if (isOpenMPDistributeDirective(DKind))
8841             return NND;
8842         }
8843       }
8844       return nullptr;
8845     case OMPD_target_teams:
8846       if (isOpenMPDistributeDirective(DKind))
8847         return NestedDir;
8848       return nullptr;
8849     case OMPD_target_parallel:
8850     case OMPD_target_simd:
8851     case OMPD_target_parallel_for:
8852     case OMPD_target_parallel_for_simd:
8853       return nullptr;
8854     case OMPD_target_teams_distribute:
8855     case OMPD_target_teams_distribute_simd:
8856     case OMPD_target_teams_distribute_parallel_for:
8857     case OMPD_target_teams_distribute_parallel_for_simd:
8858     case OMPD_parallel:
8859     case OMPD_for:
8860     case OMPD_parallel_for:
8861     case OMPD_parallel_master:
8862     case OMPD_parallel_sections:
8863     case OMPD_for_simd:
8864     case OMPD_parallel_for_simd:
8865     case OMPD_cancel:
8866     case OMPD_cancellation_point:
8867     case OMPD_ordered:
8868     case OMPD_threadprivate:
8869     case OMPD_allocate:
8870     case OMPD_task:
8871     case OMPD_simd:
8872     case OMPD_sections:
8873     case OMPD_section:
8874     case OMPD_single:
8875     case OMPD_master:
8876     case OMPD_critical:
8877     case OMPD_taskyield:
8878     case OMPD_barrier:
8879     case OMPD_taskwait:
8880     case OMPD_taskgroup:
8881     case OMPD_atomic:
8882     case OMPD_flush:
8883     case OMPD_depobj:
8884     case OMPD_scan:
8885     case OMPD_teams:
8886     case OMPD_target_data:
8887     case OMPD_target_exit_data:
8888     case OMPD_target_enter_data:
8889     case OMPD_distribute:
8890     case OMPD_distribute_simd:
8891     case OMPD_distribute_parallel_for:
8892     case OMPD_distribute_parallel_for_simd:
8893     case OMPD_teams_distribute:
8894     case OMPD_teams_distribute_simd:
8895     case OMPD_teams_distribute_parallel_for:
8896     case OMPD_teams_distribute_parallel_for_simd:
8897     case OMPD_target_update:
8898     case OMPD_declare_simd:
8899     case OMPD_declare_variant:
8900     case OMPD_begin_declare_variant:
8901     case OMPD_end_declare_variant:
8902     case OMPD_declare_target:
8903     case OMPD_end_declare_target:
8904     case OMPD_declare_reduction:
8905     case OMPD_declare_mapper:
8906     case OMPD_taskloop:
8907     case OMPD_taskloop_simd:
8908     case OMPD_master_taskloop:
8909     case OMPD_master_taskloop_simd:
8910     case OMPD_parallel_master_taskloop:
8911     case OMPD_parallel_master_taskloop_simd:
8912     case OMPD_requires:
8913     case OMPD_unknown:
8914     default:
8915       llvm_unreachable("Unexpected directive.");
8916     }
8917   }
8918 
8919   return nullptr;
8920 }
8921 
8922 /// Emit the user-defined mapper function. The code generation follows the
8923 /// pattern in the example below.
8924 /// \code
8925 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8926 ///                                           void *base, void *begin,
8927 ///                                           int64_t size, int64_t type) {
8928 ///   // Allocate space for an array section first.
8929 ///   if (size > 1 && !maptype.IsDelete)
8930 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8931 ///                                 size*sizeof(Ty), clearToFrom(type));
8932 ///   // Map members.
8933 ///   for (unsigned i = 0; i < size; i++) {
8934 ///     // For each component specified by this mapper:
8935 ///     for (auto c : all_components) {
8936 ///       if (c.hasMapper())
8937 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8938 ///                       c.arg_type);
8939 ///       else
8940 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8941 ///                                     c.arg_begin, c.arg_size, c.arg_type);
8942 ///     }
8943 ///   }
8944 ///   // Delete the array section.
8945 ///   if (size > 1 && maptype.IsDelete)
8946 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8947 ///                                 size*sizeof(Ty), clearToFrom(type));
8948 /// }
8949 /// \endcode
8950 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8951                                             CodeGenFunction *CGF) {
8952   if (UDMMap.count(D) > 0)
8953     return;
8954   ASTContext &C = CGM.getContext();
8955   QualType Ty = D->getType();
8956   QualType PtrTy = C.getPointerType(Ty).withRestrict();
8957   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8958   auto *MapperVarDecl =
8959       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8960   SourceLocation Loc = D->getLocation();
8961   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8962 
8963   // Prepare mapper function arguments and attributes.
8964   ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8965                               C.VoidPtrTy, ImplicitParamDecl::Other);
8966   ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8967                             ImplicitParamDecl::Other);
8968   ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8969                              C.VoidPtrTy, ImplicitParamDecl::Other);
8970   ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8971                             ImplicitParamDecl::Other);
8972   ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8973                             ImplicitParamDecl::Other);
8974   FunctionArgList Args;
8975   Args.push_back(&HandleArg);
8976   Args.push_back(&BaseArg);
8977   Args.push_back(&BeginArg);
8978   Args.push_back(&SizeArg);
8979   Args.push_back(&TypeArg);
8980   const CGFunctionInfo &FnInfo =
8981       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8982   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8983   SmallString<64> TyStr;
8984   llvm::raw_svector_ostream Out(TyStr);
8985   CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8986   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8987   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8988                                     Name, &CGM.getModule());
8989   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8990   Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8991   // Start the mapper function code generation.
8992   CodeGenFunction MapperCGF(CGM);
8993   MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8994   // Compute the starting and end addreses of array elements.
8995   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8996       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8997       C.getPointerType(Int64Ty), Loc);
8998   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8999       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
9000       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
9001   llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
9002   llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9003       MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9004       C.getPointerType(Int64Ty), Loc);
9005   // Prepare common arguments for array initiation and deletion.
9006   llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9007       MapperCGF.GetAddrOfLocalVar(&HandleArg),
9008       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9009   llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9010       MapperCGF.GetAddrOfLocalVar(&BaseArg),
9011       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9012   llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9013       MapperCGF.GetAddrOfLocalVar(&BeginArg),
9014       /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9015 
9016   // Emit array initiation if this is an array section and \p MapType indicates
9017   // that memory allocation is required.
9018   llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9019   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9020                              ElementSize, HeadBB, /*IsInit=*/true);
9021 
9022   // Emit a for loop to iterate through SizeArg of elements and map all of them.
9023 
9024   // Emit the loop header block.
9025   MapperCGF.EmitBlock(HeadBB);
9026   llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9027   llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9028   // Evaluate whether the initial condition is satisfied.
9029   llvm::Value *IsEmpty =
9030       MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9031   MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9032   llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9033 
9034   // Emit the loop body block.
9035   MapperCGF.EmitBlock(BodyBB);
9036   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9037       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9038   PtrPHI->addIncoming(PtrBegin, EntryBB);
9039   Address PtrCurrent =
9040       Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
9041                           .getAlignment()
9042                           .alignmentOfArrayElement(ElementSize));
9043   // Privatize the declared variable of mapper to be the current array element.
9044   CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9045   Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
9046     return MapperCGF
9047         .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
9048         .getAddress(MapperCGF);
9049   });
9050   (void)Scope.Privatize();
9051 
9052   // Get map clause information. Fill up the arrays with all mapped variables.
9053   MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9054   MappableExprsHandler::MapValuesArrayTy Pointers;
9055   MappableExprsHandler::MapValuesArrayTy Sizes;
9056   MappableExprsHandler::MapFlagsArrayTy MapTypes;
9057   MappableExprsHandler MEHandler(*D, MapperCGF);
9058   MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
9059 
9060   // Call the runtime API __tgt_mapper_num_components to get the number of
9061   // pre-existing components.
9062   llvm::Value *OffloadingArgs[] = {Handle};
9063   llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9064       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9065                                             OMPRTL___tgt_mapper_num_components),
9066       OffloadingArgs);
9067   llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9068       PreviousSize,
9069       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9070 
9071   // Fill up the runtime mapper handle for all components.
9072   for (unsigned I = 0; I < BasePointers.size(); ++I) {
9073     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9074         *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9075     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9076         Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9077     llvm::Value *CurSizeArg = Sizes[I];
9078 
9079     // Extract the MEMBER_OF field from the map type.
9080     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
9081     MapperCGF.EmitBlock(MemberBB);
9082     llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
9083     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
9084         OriMapType,
9085         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
9086     llvm::BasicBlock *MemberCombineBB =
9087         MapperCGF.createBasicBlock("omp.member.combine");
9088     llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
9089     llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
9090     MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
9091     // Add the number of pre-existing components to the MEMBER_OF field if it
9092     // is valid.
9093     MapperCGF.EmitBlock(MemberCombineBB);
9094     llvm::Value *CombinedMember =
9095         MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9096     // Do nothing if it is not a member of previous components.
9097     MapperCGF.EmitBlock(TypeBB);
9098     llvm::PHINode *MemberMapType =
9099         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
9100     MemberMapType->addIncoming(OriMapType, MemberBB);
9101     MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
9102 
9103     // Combine the map type inherited from user-defined mapper with that
9104     // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9105     // bits of the \a MapType, which is the input argument of the mapper
9106     // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9107     // bits of MemberMapType.
9108     // [OpenMP 5.0], 1.2.6. map-type decay.
9109     //        | alloc |  to   | from  | tofrom | release | delete
9110     // ----------------------------------------------------------
9111     // alloc  | alloc | alloc | alloc | alloc  | release | delete
9112     // to     | alloc |  to   | alloc |   to   | release | delete
9113     // from   | alloc | alloc | from  |  from  | release | delete
9114     // tofrom | alloc |  to   | from  | tofrom | release | delete
9115     llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9116         MapType,
9117         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
9118                                    MappableExprsHandler::OMP_MAP_FROM));
9119     llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9120     llvm::BasicBlock *AllocElseBB =
9121         MapperCGF.createBasicBlock("omp.type.alloc.else");
9122     llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9123     llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9124     llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9125     llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9126     llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9127     MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9128     // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9129     MapperCGF.EmitBlock(AllocBB);
9130     llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9131         MemberMapType,
9132         MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9133                                      MappableExprsHandler::OMP_MAP_FROM)));
9134     MapperCGF.Builder.CreateBr(EndBB);
9135     MapperCGF.EmitBlock(AllocElseBB);
9136     llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9137         LeftToFrom,
9138         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9139     MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9140     // In case of to, clear OMP_MAP_FROM.
9141     MapperCGF.EmitBlock(ToBB);
9142     llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9143         MemberMapType,
9144         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9145     MapperCGF.Builder.CreateBr(EndBB);
9146     MapperCGF.EmitBlock(ToElseBB);
9147     llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9148         LeftToFrom,
9149         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9150     MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9151     // In case of from, clear OMP_MAP_TO.
9152     MapperCGF.EmitBlock(FromBB);
9153     llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9154         MemberMapType,
9155         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9156     // In case of tofrom, do nothing.
9157     MapperCGF.EmitBlock(EndBB);
9158     llvm::PHINode *CurMapType =
9159         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9160     CurMapType->addIncoming(AllocMapType, AllocBB);
9161     CurMapType->addIncoming(ToMapType, ToBB);
9162     CurMapType->addIncoming(FromMapType, FromBB);
9163     CurMapType->addIncoming(MemberMapType, ToElseBB);
9164 
9165     // TODO: call the corresponding mapper function if a user-defined mapper is
9166     // associated with this map clause.
9167     // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9168     // data structure.
9169     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9170                                      CurSizeArg, CurMapType};
9171     MapperCGF.EmitRuntimeCall(
9172         OMPBuilder.getOrCreateRuntimeFunction(
9173             CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9174         OffloadingArgs);
9175   }
9176 
9177   // Update the pointer to point to the next element that needs to be mapped,
9178   // and check whether we have mapped all elements.
9179   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9180       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9181   PtrPHI->addIncoming(PtrNext, BodyBB);
9182   llvm::Value *IsDone =
9183       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9184   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9185   MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9186 
9187   MapperCGF.EmitBlock(ExitBB);
9188   // Emit array deletion if this is an array section and \p MapType indicates
9189   // that deletion is required.
9190   emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9191                              ElementSize, DoneBB, /*IsInit=*/false);
9192 
9193   // Emit the function exit block.
9194   MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9195   MapperCGF.FinishFunction();
9196   UDMMap.try_emplace(D, Fn);
9197   if (CGF) {
9198     auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9199     Decls.second.push_back(D);
9200   }
9201 }
9202 
9203 /// Emit the array initialization or deletion portion for user-defined mapper
9204 /// code generation. First, it evaluates whether an array section is mapped and
9205 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9206 /// true, and \a MapType indicates to not delete this array, array
9207 /// initialization code is generated. If \a IsInit is false, and \a MapType
9208 /// indicates to not this array, array deletion code is generated.
9209 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9210     CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9211     llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9212     CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9213   StringRef Prefix = IsInit ? ".init" : ".del";
9214 
9215   // Evaluate if this is an array section.
9216   llvm::BasicBlock *IsDeleteBB =
9217       MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"}));
9218   llvm::BasicBlock *BodyBB =
9219       MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9220   llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9221       Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9222   MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9223 
9224   // Evaluate if we are going to delete this section.
9225   MapperCGF.EmitBlock(IsDeleteBB);
9226   llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9227       MapType,
9228       MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9229   llvm::Value *DeleteCond;
9230   if (IsInit) {
9231     DeleteCond = MapperCGF.Builder.CreateIsNull(
9232         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9233   } else {
9234     DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9235         DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9236   }
9237   MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9238 
9239   MapperCGF.EmitBlock(BodyBB);
9240   // Get the array size by multiplying element size and element number (i.e., \p
9241   // Size).
9242   llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9243       Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9244   // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9245   // memory allocation/deletion purpose only.
9246   llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9247       MapType,
9248       MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9249                                    MappableExprsHandler::OMP_MAP_FROM)));
9250   // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9251   // data structure.
9252   llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9253   MapperCGF.EmitRuntimeCall(
9254       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9255                                             OMPRTL___tgt_push_mapper_component),
9256       OffloadingArgs);
9257 }
9258 
9259 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9260     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9261     llvm::Value *DeviceID,
9262     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9263                                      const OMPLoopDirective &D)>
9264         SizeEmitter) {
9265   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9266   const OMPExecutableDirective *TD = &D;
9267   // Get nested teams distribute kind directive, if any.
9268   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9269     TD = getNestedDistributeDirective(CGM.getContext(), D);
9270   if (!TD)
9271     return;
9272   const auto *LD = cast<OMPLoopDirective>(TD);
9273   auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9274                                                      PrePostActionTy &) {
9275     if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9276       llvm::Value *Args[] = {DeviceID, NumIterations};
9277       CGF.EmitRuntimeCall(
9278           OMPBuilder.getOrCreateRuntimeFunction(
9279               CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
9280           Args);
9281     }
9282   };
9283   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9284 }
9285 
9286 void CGOpenMPRuntime::emitTargetCall(
9287     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9288     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9289     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9290     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9291                                      const OMPLoopDirective &D)>
9292         SizeEmitter) {
9293   if (!CGF.HaveInsertPoint())
9294     return;
9295 
9296   assert(OutlinedFn && "Invalid outlined function!");
9297 
9298   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9299   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9300   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9301   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9302                                             PrePostActionTy &) {
9303     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9304   };
9305   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9306 
9307   CodeGenFunction::OMPTargetDataInfo InputInfo;
9308   llvm::Value *MapTypesArray = nullptr;
9309   // Fill up the pointer arrays and transfer execution to the device.
9310   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9311                     &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9312                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9313     if (Device.getInt() == OMPC_DEVICE_ancestor) {
9314       // Reverse offloading is not supported, so just execute on the host.
9315       if (RequiresOuterTask) {
9316         CapturedVars.clear();
9317         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9318       }
9319       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9320       return;
9321     }
9322 
9323     // On top of the arrays that were filled up, the target offloading call
9324     // takes as arguments the device id as well as the host pointer. The host
9325     // pointer is used by the runtime library to identify the current target
9326     // region, so it only has to be unique and not necessarily point to
9327     // anything. It could be the pointer to the outlined function that
9328     // implements the target region, but we aren't using that so that the
9329     // compiler doesn't need to keep that, and could therefore inline the host
9330     // function if proven worthwhile during optimization.
9331 
9332     // From this point on, we need to have an ID of the target region defined.
9333     assert(OutlinedFnID && "Invalid outlined function ID!");
9334 
9335     // Emit device ID if any.
9336     llvm::Value *DeviceID;
9337     if (Device.getPointer()) {
9338       assert((Device.getInt() == OMPC_DEVICE_unknown ||
9339               Device.getInt() == OMPC_DEVICE_device_num) &&
9340              "Expected device_num modifier.");
9341       llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9342       DeviceID =
9343           CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9344     } else {
9345       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9346     }
9347 
9348     // Emit the number of elements in the offloading arrays.
9349     llvm::Value *PointerNum =
9350         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9351 
9352     // Return value of the runtime offloading call.
9353     llvm::Value *Return;
9354 
9355     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9356     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9357 
9358     // Emit tripcount for the target loop-based directive.
9359     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9360 
9361     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9362     // The target region is an outlined function launched by the runtime
9363     // via calls __tgt_target() or __tgt_target_teams().
9364     //
9365     // __tgt_target() launches a target region with one team and one thread,
9366     // executing a serial region.  This master thread may in turn launch
9367     // more threads within its team upon encountering a parallel region,
9368     // however, no additional teams can be launched on the device.
9369     //
9370     // __tgt_target_teams() launches a target region with one or more teams,
9371     // each with one or more threads.  This call is required for target
9372     // constructs such as:
9373     //  'target teams'
9374     //  'target' / 'teams'
9375     //  'target teams distribute parallel for'
9376     //  'target parallel'
9377     // and so on.
9378     //
9379     // Note that on the host and CPU targets, the runtime implementation of
9380     // these calls simply call the outlined function without forking threads.
9381     // The outlined functions themselves have runtime calls to
9382     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9383     // the compiler in emitTeamsCall() and emitParallelCall().
9384     //
9385     // In contrast, on the NVPTX target, the implementation of
9386     // __tgt_target_teams() launches a GPU kernel with the requested number
9387     // of teams and threads so no additional calls to the runtime are required.
9388     if (NumTeams) {
9389       // If we have NumTeams defined this means that we have an enclosed teams
9390       // region. Therefore we also expect to have NumThreads defined. These two
9391       // values should be defined in the presence of a teams directive,
9392       // regardless of having any clauses associated. If the user is using teams
9393       // but no clauses, these two values will be the default that should be
9394       // passed to the runtime library - a 32-bit integer with the value zero.
9395       assert(NumThreads && "Thread limit expression should be available along "
9396                            "with number of teams.");
9397       llvm::Value *OffloadingArgs[] = {DeviceID,
9398                                        OutlinedFnID,
9399                                        PointerNum,
9400                                        InputInfo.BasePointersArray.getPointer(),
9401                                        InputInfo.PointersArray.getPointer(),
9402                                        InputInfo.SizesArray.getPointer(),
9403                                        MapTypesArray,
9404                                        NumTeams,
9405                                        NumThreads};
9406       Return = CGF.EmitRuntimeCall(
9407           OMPBuilder.getOrCreateRuntimeFunction(
9408               CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
9409                                          : OMPRTL___tgt_target_teams),
9410           OffloadingArgs);
9411     } else {
9412       llvm::Value *OffloadingArgs[] = {DeviceID,
9413                                        OutlinedFnID,
9414                                        PointerNum,
9415                                        InputInfo.BasePointersArray.getPointer(),
9416                                        InputInfo.PointersArray.getPointer(),
9417                                        InputInfo.SizesArray.getPointer(),
9418                                        MapTypesArray};
9419       Return = CGF.EmitRuntimeCall(
9420           OMPBuilder.getOrCreateRuntimeFunction(
9421               CGM.getModule(),
9422               HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
9423           OffloadingArgs);
9424     }
9425 
9426     // Check the error code and execute the host version if required.
9427     llvm::BasicBlock *OffloadFailedBlock =
9428         CGF.createBasicBlock("omp_offload.failed");
9429     llvm::BasicBlock *OffloadContBlock =
9430         CGF.createBasicBlock("omp_offload.cont");
9431     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9432     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9433 
9434     CGF.EmitBlock(OffloadFailedBlock);
9435     if (RequiresOuterTask) {
9436       CapturedVars.clear();
9437       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9438     }
9439     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9440     CGF.EmitBranch(OffloadContBlock);
9441 
9442     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9443   };
9444 
9445   // Notify that the host version must be executed.
9446   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9447                     RequiresOuterTask](CodeGenFunction &CGF,
9448                                        PrePostActionTy &) {
9449     if (RequiresOuterTask) {
9450       CapturedVars.clear();
9451       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9452     }
9453     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9454   };
9455 
9456   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9457                           &CapturedVars, RequiresOuterTask,
9458                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9459     // Fill up the arrays with all the captured variables.
9460     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9461     MappableExprsHandler::MapValuesArrayTy Pointers;
9462     MappableExprsHandler::MapValuesArrayTy Sizes;
9463     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9464 
9465     // Get mappable expression information.
9466     MappableExprsHandler MEHandler(D, CGF);
9467     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9468 
9469     auto RI = CS.getCapturedRecordDecl()->field_begin();
9470     auto CV = CapturedVars.begin();
9471     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9472                                               CE = CS.capture_end();
9473          CI != CE; ++CI, ++RI, ++CV) {
9474       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9475       MappableExprsHandler::MapValuesArrayTy CurPointers;
9476       MappableExprsHandler::MapValuesArrayTy CurSizes;
9477       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9478       MappableExprsHandler::StructRangeInfoTy PartialStruct;
9479 
9480       // VLA sizes are passed to the outlined region by copy and do not have map
9481       // information associated.
9482       if (CI->capturesVariableArrayType()) {
9483         CurBasePointers.push_back(*CV);
9484         CurPointers.push_back(*CV);
9485         CurSizes.push_back(CGF.Builder.CreateIntCast(
9486             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9487         // Copy to the device as an argument. No need to retrieve it.
9488         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9489                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9490                               MappableExprsHandler::OMP_MAP_IMPLICIT);
9491       } else {
9492         // If we have any information in the map clause, we use it, otherwise we
9493         // just do a default mapping.
9494         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9495                                          CurSizes, CurMapTypes, PartialStruct);
9496         if (CurBasePointers.empty())
9497           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9498                                            CurPointers, CurSizes, CurMapTypes);
9499         // Generate correct mapping for variables captured by reference in
9500         // lambdas.
9501         if (CI->capturesVariable())
9502           MEHandler.generateInfoForLambdaCaptures(
9503               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9504               CurMapTypes, LambdaPointers);
9505       }
9506       // We expect to have at least an element of information for this capture.
9507       assert(!CurBasePointers.empty() &&
9508              "Non-existing map pointer for capture!");
9509       assert(CurBasePointers.size() == CurPointers.size() &&
9510              CurBasePointers.size() == CurSizes.size() &&
9511              CurBasePointers.size() == CurMapTypes.size() &&
9512              "Inconsistent map information sizes!");
9513 
9514       // If there is an entry in PartialStruct it means we have a struct with
9515       // individual members mapped. Emit an extra combined entry.
9516       if (PartialStruct.Base.isValid())
9517         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9518                                     CurMapTypes, PartialStruct);
9519 
9520       // We need to append the results of this capture to what we already have.
9521       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9522       Pointers.append(CurPointers.begin(), CurPointers.end());
9523       Sizes.append(CurSizes.begin(), CurSizes.end());
9524       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9525     }
9526     // Adjust MEMBER_OF flags for the lambdas captures.
9527     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9528                                               Pointers, MapTypes);
9529     // Map other list items in the map clause which are not captured variables
9530     // but "declare target link" global variables.
9531     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9532                                                MapTypes);
9533 
9534     TargetDataInfo Info;
9535     // Fill up the arrays and create the arguments.
9536     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9537     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9538                                  Info.PointersArray, Info.SizesArray,
9539                                  Info.MapTypesArray, Info);
9540     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9541     InputInfo.BasePointersArray =
9542         Address(Info.BasePointersArray, CGM.getPointerAlign());
9543     InputInfo.PointersArray =
9544         Address(Info.PointersArray, CGM.getPointerAlign());
9545     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9546     MapTypesArray = Info.MapTypesArray;
9547     if (RequiresOuterTask)
9548       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9549     else
9550       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9551   };
9552 
9553   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9554                              CodeGenFunction &CGF, PrePostActionTy &) {
9555     if (RequiresOuterTask) {
9556       CodeGenFunction::OMPTargetDataInfo InputInfo;
9557       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9558     } else {
9559       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9560     }
9561   };
9562 
9563   // If we have a target function ID it means that we need to support
9564   // offloading, otherwise, just execute on the host. We need to execute on host
9565   // regardless of the conditional in the if clause if, e.g., the user do not
9566   // specify target triples.
9567   if (OutlinedFnID) {
9568     if (IfCond) {
9569       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9570     } else {
9571       RegionCodeGenTy ThenRCG(TargetThenGen);
9572       ThenRCG(CGF);
9573     }
9574   } else {
9575     RegionCodeGenTy ElseRCG(TargetElseGen);
9576     ElseRCG(CGF);
9577   }
9578 }
9579 
9580 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9581                                                     StringRef ParentName) {
9582   if (!S)
9583     return;
9584 
9585   // Codegen OMP target directives that offload compute to the device.
9586   bool RequiresDeviceCodegen =
9587       isa<OMPExecutableDirective>(S) &&
9588       isOpenMPTargetExecutionDirective(
9589           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9590 
9591   if (RequiresDeviceCodegen) {
9592     const auto &E = *cast<OMPExecutableDirective>(S);
9593     unsigned DeviceID;
9594     unsigned FileID;
9595     unsigned Line;
9596     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9597                              FileID, Line);
9598 
9599     // Is this a target region that should not be emitted as an entry point? If
9600     // so just signal we are done with this target region.
9601     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9602                                                             ParentName, Line))
9603       return;
9604 
9605     switch (E.getDirectiveKind()) {
9606     case OMPD_target:
9607       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9608                                                    cast<OMPTargetDirective>(E));
9609       break;
9610     case OMPD_target_parallel:
9611       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9612           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9613       break;
9614     case OMPD_target_teams:
9615       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9616           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9617       break;
9618     case OMPD_target_teams_distribute:
9619       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9620           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9621       break;
9622     case OMPD_target_teams_distribute_simd:
9623       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9624           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9625       break;
9626     case OMPD_target_parallel_for:
9627       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9628           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9629       break;
9630     case OMPD_target_parallel_for_simd:
9631       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9632           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9633       break;
9634     case OMPD_target_simd:
9635       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9636           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9637       break;
9638     case OMPD_target_teams_distribute_parallel_for:
9639       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9640           CGM, ParentName,
9641           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9642       break;
9643     case OMPD_target_teams_distribute_parallel_for_simd:
9644       CodeGenFunction::
9645           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9646               CGM, ParentName,
9647               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9648       break;
9649     case OMPD_parallel:
9650     case OMPD_for:
9651     case OMPD_parallel_for:
9652     case OMPD_parallel_master:
9653     case OMPD_parallel_sections:
9654     case OMPD_for_simd:
9655     case OMPD_parallel_for_simd:
9656     case OMPD_cancel:
9657     case OMPD_cancellation_point:
9658     case OMPD_ordered:
9659     case OMPD_threadprivate:
9660     case OMPD_allocate:
9661     case OMPD_task:
9662     case OMPD_simd:
9663     case OMPD_sections:
9664     case OMPD_section:
9665     case OMPD_single:
9666     case OMPD_master:
9667     case OMPD_critical:
9668     case OMPD_taskyield:
9669     case OMPD_barrier:
9670     case OMPD_taskwait:
9671     case OMPD_taskgroup:
9672     case OMPD_atomic:
9673     case OMPD_flush:
9674     case OMPD_depobj:
9675     case OMPD_scan:
9676     case OMPD_teams:
9677     case OMPD_target_data:
9678     case OMPD_target_exit_data:
9679     case OMPD_target_enter_data:
9680     case OMPD_distribute:
9681     case OMPD_distribute_simd:
9682     case OMPD_distribute_parallel_for:
9683     case OMPD_distribute_parallel_for_simd:
9684     case OMPD_teams_distribute:
9685     case OMPD_teams_distribute_simd:
9686     case OMPD_teams_distribute_parallel_for:
9687     case OMPD_teams_distribute_parallel_for_simd:
9688     case OMPD_target_update:
9689     case OMPD_declare_simd:
9690     case OMPD_declare_variant:
9691     case OMPD_begin_declare_variant:
9692     case OMPD_end_declare_variant:
9693     case OMPD_declare_target:
9694     case OMPD_end_declare_target:
9695     case OMPD_declare_reduction:
9696     case OMPD_declare_mapper:
9697     case OMPD_taskloop:
9698     case OMPD_taskloop_simd:
9699     case OMPD_master_taskloop:
9700     case OMPD_master_taskloop_simd:
9701     case OMPD_parallel_master_taskloop:
9702     case OMPD_parallel_master_taskloop_simd:
9703     case OMPD_requires:
9704     case OMPD_unknown:
9705     default:
9706       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9707     }
9708     return;
9709   }
9710 
9711   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9712     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9713       return;
9714 
9715     scanForTargetRegionsFunctions(
9716         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9717     return;
9718   }
9719 
9720   // If this is a lambda function, look into its body.
9721   if (const auto *L = dyn_cast<LambdaExpr>(S))
9722     S = L->getBody();
9723 
9724   // Keep looking for target regions recursively.
9725   for (const Stmt *II : S->children())
9726     scanForTargetRegionsFunctions(II, ParentName);
9727 }
9728 
9729 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9730   // If emitting code for the host, we do not process FD here. Instead we do
9731   // the normal code generation.
9732   if (!CGM.getLangOpts().OpenMPIsDevice) {
9733     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9734       Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9735           OMPDeclareTargetDeclAttr::getDeviceType(FD);
9736       // Do not emit device_type(nohost) functions for the host.
9737       if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9738         return true;
9739     }
9740     return false;
9741   }
9742 
9743   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9744   // Try to detect target regions in the function.
9745   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9746     StringRef Name = CGM.getMangledName(GD);
9747     scanForTargetRegionsFunctions(FD->getBody(), Name);
9748     Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9749         OMPDeclareTargetDeclAttr::getDeviceType(FD);
9750     // Do not emit device_type(nohost) functions for the host.
9751     if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9752       return true;
9753   }
9754 
9755   // Do not to emit function if it is not marked as declare target.
9756   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9757          AlreadyEmittedTargetDecls.count(VD) == 0;
9758 }
9759 
9760 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9761   if (!CGM.getLangOpts().OpenMPIsDevice)
9762     return false;
9763 
9764   // Check if there are Ctors/Dtors in this declaration and look for target
9765   // regions in it. We use the complete variant to produce the kernel name
9766   // mangling.
9767   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9768   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9769     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9770       StringRef ParentName =
9771           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9772       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9773     }
9774     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9775       StringRef ParentName =
9776           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9777       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9778     }
9779   }
9780 
9781   // Do not to emit variable if it is not marked as declare target.
9782   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9783       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9784           cast<VarDecl>(GD.getDecl()));
9785   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9786       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9787        HasRequiresUnifiedSharedMemory)) {
9788     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9789     return true;
9790   }
9791   return false;
9792 }
9793 
9794 llvm::Constant *
9795 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9796                                                 const VarDecl *VD) {
9797   assert(VD->getType().isConstant(CGM.getContext()) &&
9798          "Expected constant variable.");
9799   StringRef VarName;
9800   llvm::Constant *Addr;
9801   llvm::GlobalValue::LinkageTypes Linkage;
9802   QualType Ty = VD->getType();
9803   SmallString<128> Buffer;
9804   {
9805     unsigned DeviceID;
9806     unsigned FileID;
9807     unsigned Line;
9808     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9809                              FileID, Line);
9810     llvm::raw_svector_ostream OS(Buffer);
9811     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9812        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9813     VarName = OS.str();
9814   }
9815   Linkage = llvm::GlobalValue::InternalLinkage;
9816   Addr =
9817       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9818                                   getDefaultFirstprivateAddressSpace());
9819   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9820   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9821   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9822   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9823       VarName, Addr, VarSize,
9824       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9825   return Addr;
9826 }
9827 
9828 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9829                                                    llvm::Constant *Addr) {
9830   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9831       !CGM.getLangOpts().OpenMPIsDevice)
9832     return;
9833   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9834       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9835   if (!Res) {
9836     if (CGM.getLangOpts().OpenMPIsDevice) {
9837       // Register non-target variables being emitted in device code (debug info
9838       // may cause this).
9839       StringRef VarName = CGM.getMangledName(VD);
9840       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9841     }
9842     return;
9843   }
9844   // Register declare target variables.
9845   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9846   StringRef VarName;
9847   CharUnits VarSize;
9848   llvm::GlobalValue::LinkageTypes Linkage;
9849 
9850   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9851       !HasRequiresUnifiedSharedMemory) {
9852     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9853     VarName = CGM.getMangledName(VD);
9854     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9855       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9856       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9857     } else {
9858       VarSize = CharUnits::Zero();
9859     }
9860     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9861     // Temp solution to prevent optimizations of the internal variables.
9862     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9863       std::string RefName = getName({VarName, "ref"});
9864       if (!CGM.GetGlobalValue(RefName)) {
9865         llvm::Constant *AddrRef =
9866             getOrCreateInternalVariable(Addr->getType(), RefName);
9867         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9868         GVAddrRef->setConstant(/*Val=*/true);
9869         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9870         GVAddrRef->setInitializer(Addr);
9871         CGM.addCompilerUsedGlobal(GVAddrRef);
9872       }
9873     }
9874   } else {
9875     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9876             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9877              HasRequiresUnifiedSharedMemory)) &&
9878            "Declare target attribute must link or to with unified memory.");
9879     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9880       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9881     else
9882       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9883 
9884     if (CGM.getLangOpts().OpenMPIsDevice) {
9885       VarName = Addr->getName();
9886       Addr = nullptr;
9887     } else {
9888       VarName = getAddrOfDeclareTargetVar(VD).getName();
9889       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9890     }
9891     VarSize = CGM.getPointerSize();
9892     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9893   }
9894 
9895   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9896       VarName, Addr, VarSize, Flags, Linkage);
9897 }
9898 
9899 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9900   if (isa<FunctionDecl>(GD.getDecl()) ||
9901       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9902     return emitTargetFunctions(GD);
9903 
9904   return emitTargetGlobalVariable(GD);
9905 }
9906 
9907 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9908   for (const VarDecl *VD : DeferredGlobalVariables) {
9909     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9910         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9911     if (!Res)
9912       continue;
9913     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9914         !HasRequiresUnifiedSharedMemory) {
9915       CGM.EmitGlobal(VD);
9916     } else {
9917       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9918               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9919                HasRequiresUnifiedSharedMemory)) &&
9920              "Expected link clause or to clause with unified memory.");
9921       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9922     }
9923   }
9924 }
9925 
9926 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9927     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9928   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9929          " Expected target-based directive.");
9930 }
9931 
9932 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9933   for (const OMPClause *Clause : D->clauselists()) {
9934     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9935       HasRequiresUnifiedSharedMemory = true;
9936     } else if (const auto *AC =
9937                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9938       switch (AC->getAtomicDefaultMemOrderKind()) {
9939       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9940         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9941         break;
9942       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9943         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9944         break;
9945       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9946         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9947         break;
9948       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9949         break;
9950       }
9951     }
9952   }
9953 }
9954 
9955 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9956   return RequiresAtomicOrdering;
9957 }
9958 
9959 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9960                                                        LangAS &AS) {
9961   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9962     return false;
9963   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9964   switch(A->getAllocatorType()) {
9965   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9966   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9967   // Not supported, fallback to the default mem space.
9968   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9969   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9970   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9971   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9972   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9973   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9974   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9975     AS = LangAS::Default;
9976     return true;
9977   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9978     llvm_unreachable("Expected predefined allocator for the variables with the "
9979                      "static storage.");
9980   }
9981   return false;
9982 }
9983 
9984 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9985   return HasRequiresUnifiedSharedMemory;
9986 }
9987 
9988 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9989     CodeGenModule &CGM)
9990     : CGM(CGM) {
9991   if (CGM.getLangOpts().OpenMPIsDevice) {
9992     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9993     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9994   }
9995 }
9996 
9997 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9998   if (CGM.getLangOpts().OpenMPIsDevice)
9999     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10000 }
10001 
10002 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10003   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10004     return true;
10005 
10006   const auto *D = cast<FunctionDecl>(GD.getDecl());
10007   // Do not to emit function if it is marked as declare target as it was already
10008   // emitted.
10009   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10010     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10011       if (auto *F = dyn_cast_or_null<llvm::Function>(
10012               CGM.GetGlobalValue(CGM.getMangledName(GD))))
10013         return !F->isDeclaration();
10014       return false;
10015     }
10016     return true;
10017   }
10018 
10019   return !AlreadyEmittedTargetDecls.insert(D).second;
10020 }
10021 
10022 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10023   // If we don't have entries or if we are emitting code for the device, we
10024   // don't need to do anything.
10025   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10026       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10027       (OffloadEntriesInfoManager.empty() &&
10028        !HasEmittedDeclareTargetRegion &&
10029        !HasEmittedTargetRegion))
10030     return nullptr;
10031 
10032   // Create and register the function that handles the requires directives.
10033   ASTContext &C = CGM.getContext();
10034 
10035   llvm::Function *RequiresRegFn;
10036   {
10037     CodeGenFunction CGF(CGM);
10038     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10039     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10040     std::string ReqName = getName({"omp_offloading", "requires_reg"});
10041     RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10042     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10043     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10044     // TODO: check for other requires clauses.
10045     // The requires directive takes effect only when a target region is
10046     // present in the compilation unit. Otherwise it is ignored and not
10047     // passed to the runtime. This avoids the runtime from throwing an error
10048     // for mismatching requires clauses across compilation units that don't
10049     // contain at least 1 target region.
10050     assert((HasEmittedTargetRegion ||
10051             HasEmittedDeclareTargetRegion ||
10052             !OffloadEntriesInfoManager.empty()) &&
10053            "Target or declare target region expected.");
10054     if (HasRequiresUnifiedSharedMemory)
10055       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10056     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10057                             CGM.getModule(), OMPRTL___tgt_register_requires),
10058                         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10059     CGF.FinishFunction();
10060   }
10061   return RequiresRegFn;
10062 }
10063 
10064 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10065                                     const OMPExecutableDirective &D,
10066                                     SourceLocation Loc,
10067                                     llvm::Function *OutlinedFn,
10068                                     ArrayRef<llvm::Value *> CapturedVars) {
10069   if (!CGF.HaveInsertPoint())
10070     return;
10071 
10072   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10073   CodeGenFunction::RunCleanupsScope Scope(CGF);
10074 
10075   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10076   llvm::Value *Args[] = {
10077       RTLoc,
10078       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10079       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10080   llvm::SmallVector<llvm::Value *, 16> RealArgs;
10081   RealArgs.append(std::begin(Args), std::end(Args));
10082   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10083 
10084   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10085       CGM.getModule(), OMPRTL___kmpc_fork_teams);
10086   CGF.EmitRuntimeCall(RTLFn, RealArgs);
10087 }
10088 
10089 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10090                                          const Expr *NumTeams,
10091                                          const Expr *ThreadLimit,
10092                                          SourceLocation Loc) {
10093   if (!CGF.HaveInsertPoint())
10094     return;
10095 
10096   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10097 
10098   llvm::Value *NumTeamsVal =
10099       NumTeams
10100           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10101                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10102           : CGF.Builder.getInt32(0);
10103 
10104   llvm::Value *ThreadLimitVal =
10105       ThreadLimit
10106           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10107                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
10108           : CGF.Builder.getInt32(0);
10109 
10110   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10111   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10112                                      ThreadLimitVal};
10113   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10114                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10115                       PushNumTeamsArgs);
10116 }
10117 
10118 void CGOpenMPRuntime::emitTargetDataCalls(
10119     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10120     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10121   if (!CGF.HaveInsertPoint())
10122     return;
10123 
10124   // Action used to replace the default codegen action and turn privatization
10125   // off.
10126   PrePostActionTy NoPrivAction;
10127 
10128   // Generate the code for the opening of the data environment. Capture all the
10129   // arguments of the runtime call by reference because they are used in the
10130   // closing of the region.
10131   auto &&BeginThenGen = [this, &D, Device, &Info,
10132                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10133     // Fill up the arrays with all the mapped variables.
10134     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10135     MappableExprsHandler::MapValuesArrayTy Pointers;
10136     MappableExprsHandler::MapValuesArrayTy Sizes;
10137     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10138 
10139     // Get map clause information.
10140     MappableExprsHandler MCHandler(D, CGF);
10141     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10142 
10143     // Fill up the arrays and create the arguments.
10144     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10145 
10146     llvm::Value *BasePointersArrayArg = nullptr;
10147     llvm::Value *PointersArrayArg = nullptr;
10148     llvm::Value *SizesArrayArg = nullptr;
10149     llvm::Value *MapTypesArrayArg = nullptr;
10150     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10151                                  SizesArrayArg, MapTypesArrayArg, Info);
10152 
10153     // Emit device ID if any.
10154     llvm::Value *DeviceID = nullptr;
10155     if (Device) {
10156       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10157                                            CGF.Int64Ty, /*isSigned=*/true);
10158     } else {
10159       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10160     }
10161 
10162     // Emit the number of elements in the offloading arrays.
10163     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10164 
10165     llvm::Value *OffloadingArgs[] = {
10166         DeviceID,         PointerNum,    BasePointersArrayArg,
10167         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10168     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10169                             CGM.getModule(), OMPRTL___tgt_target_data_begin),
10170                         OffloadingArgs);
10171 
10172     // If device pointer privatization is required, emit the body of the region
10173     // here. It will have to be duplicated: with and without privatization.
10174     if (!Info.CaptureDeviceAddrMap.empty())
10175       CodeGen(CGF);
10176   };
10177 
10178   // Generate code for the closing of the data region.
10179   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10180                                             PrePostActionTy &) {
10181     assert(Info.isValid() && "Invalid data environment closing arguments.");
10182 
10183     llvm::Value *BasePointersArrayArg = nullptr;
10184     llvm::Value *PointersArrayArg = nullptr;
10185     llvm::Value *SizesArrayArg = nullptr;
10186     llvm::Value *MapTypesArrayArg = nullptr;
10187     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10188                                  SizesArrayArg, MapTypesArrayArg, Info);
10189 
10190     // Emit device ID if any.
10191     llvm::Value *DeviceID = nullptr;
10192     if (Device) {
10193       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10194                                            CGF.Int64Ty, /*isSigned=*/true);
10195     } else {
10196       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10197     }
10198 
10199     // Emit the number of elements in the offloading arrays.
10200     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10201 
10202     llvm::Value *OffloadingArgs[] = {
10203         DeviceID,         PointerNum,    BasePointersArrayArg,
10204         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10205     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10206                             CGM.getModule(), OMPRTL___tgt_target_data_end),
10207                         OffloadingArgs);
10208   };
10209 
10210   // If we need device pointer privatization, we need to emit the body of the
10211   // region with no privatization in the 'else' branch of the conditional.
10212   // Otherwise, we don't have to do anything.
10213   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10214                                                          PrePostActionTy &) {
10215     if (!Info.CaptureDeviceAddrMap.empty()) {
10216       CodeGen.setAction(NoPrivAction);
10217       CodeGen(CGF);
10218     }
10219   };
10220 
10221   // We don't have to do anything to close the region if the if clause evaluates
10222   // to false.
10223   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10224 
10225   if (IfCond) {
10226     emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10227   } else {
10228     RegionCodeGenTy RCG(BeginThenGen);
10229     RCG(CGF);
10230   }
10231 
10232   // If we don't require privatization of device pointers, we emit the body in
10233   // between the runtime calls. This avoids duplicating the body code.
10234   if (Info.CaptureDeviceAddrMap.empty()) {
10235     CodeGen.setAction(NoPrivAction);
10236     CodeGen(CGF);
10237   }
10238 
10239   if (IfCond) {
10240     emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10241   } else {
10242     RegionCodeGenTy RCG(EndThenGen);
10243     RCG(CGF);
10244   }
10245 }
10246 
10247 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10248     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10249     const Expr *Device) {
10250   if (!CGF.HaveInsertPoint())
10251     return;
10252 
10253   assert((isa<OMPTargetEnterDataDirective>(D) ||
10254           isa<OMPTargetExitDataDirective>(D) ||
10255           isa<OMPTargetUpdateDirective>(D)) &&
10256          "Expecting either target enter, exit data, or update directives.");
10257 
10258   CodeGenFunction::OMPTargetDataInfo InputInfo;
10259   llvm::Value *MapTypesArray = nullptr;
10260   // Generate the code for the opening of the data environment.
10261   auto &&ThenGen = [this, &D, Device, &InputInfo,
10262                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10263     // Emit device ID if any.
10264     llvm::Value *DeviceID = nullptr;
10265     if (Device) {
10266       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10267                                            CGF.Int64Ty, /*isSigned=*/true);
10268     } else {
10269       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10270     }
10271 
10272     // Emit the number of elements in the offloading arrays.
10273     llvm::Constant *PointerNum =
10274         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10275 
10276     llvm::Value *OffloadingArgs[] = {DeviceID,
10277                                      PointerNum,
10278                                      InputInfo.BasePointersArray.getPointer(),
10279                                      InputInfo.PointersArray.getPointer(),
10280                                      InputInfo.SizesArray.getPointer(),
10281                                      MapTypesArray};
10282 
10283     // Select the right runtime function call for each expected standalone
10284     // directive.
10285     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10286     RuntimeFunction RTLFn;
10287     switch (D.getDirectiveKind()) {
10288     case OMPD_target_enter_data:
10289       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
10290                         : OMPRTL___tgt_target_data_begin;
10291       break;
10292     case OMPD_target_exit_data:
10293       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
10294                         : OMPRTL___tgt_target_data_end;
10295       break;
10296     case OMPD_target_update:
10297       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
10298                         : OMPRTL___tgt_target_data_update;
10299       break;
10300     case OMPD_parallel:
10301     case OMPD_for:
10302     case OMPD_parallel_for:
10303     case OMPD_parallel_master:
10304     case OMPD_parallel_sections:
10305     case OMPD_for_simd:
10306     case OMPD_parallel_for_simd:
10307     case OMPD_cancel:
10308     case OMPD_cancellation_point:
10309     case OMPD_ordered:
10310     case OMPD_threadprivate:
10311     case OMPD_allocate:
10312     case OMPD_task:
10313     case OMPD_simd:
10314     case OMPD_sections:
10315     case OMPD_section:
10316     case OMPD_single:
10317     case OMPD_master:
10318     case OMPD_critical:
10319     case OMPD_taskyield:
10320     case OMPD_barrier:
10321     case OMPD_taskwait:
10322     case OMPD_taskgroup:
10323     case OMPD_atomic:
10324     case OMPD_flush:
10325     case OMPD_depobj:
10326     case OMPD_scan:
10327     case OMPD_teams:
10328     case OMPD_target_data:
10329     case OMPD_distribute:
10330     case OMPD_distribute_simd:
10331     case OMPD_distribute_parallel_for:
10332     case OMPD_distribute_parallel_for_simd:
10333     case OMPD_teams_distribute:
10334     case OMPD_teams_distribute_simd:
10335     case OMPD_teams_distribute_parallel_for:
10336     case OMPD_teams_distribute_parallel_for_simd:
10337     case OMPD_declare_simd:
10338     case OMPD_declare_variant:
10339     case OMPD_begin_declare_variant:
10340     case OMPD_end_declare_variant:
10341     case OMPD_declare_target:
10342     case OMPD_end_declare_target:
10343     case OMPD_declare_reduction:
10344     case OMPD_declare_mapper:
10345     case OMPD_taskloop:
10346     case OMPD_taskloop_simd:
10347     case OMPD_master_taskloop:
10348     case OMPD_master_taskloop_simd:
10349     case OMPD_parallel_master_taskloop:
10350     case OMPD_parallel_master_taskloop_simd:
10351     case OMPD_target:
10352     case OMPD_target_simd:
10353     case OMPD_target_teams_distribute:
10354     case OMPD_target_teams_distribute_simd:
10355     case OMPD_target_teams_distribute_parallel_for:
10356     case OMPD_target_teams_distribute_parallel_for_simd:
10357     case OMPD_target_teams:
10358     case OMPD_target_parallel:
10359     case OMPD_target_parallel_for:
10360     case OMPD_target_parallel_for_simd:
10361     case OMPD_requires:
10362     case OMPD_unknown:
10363     default:
10364       llvm_unreachable("Unexpected standalone target data directive.");
10365       break;
10366     }
10367     CGF.EmitRuntimeCall(
10368         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10369         OffloadingArgs);
10370   };
10371 
10372   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10373                              CodeGenFunction &CGF, PrePostActionTy &) {
10374     // Fill up the arrays with all the mapped variables.
10375     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10376     MappableExprsHandler::MapValuesArrayTy Pointers;
10377     MappableExprsHandler::MapValuesArrayTy Sizes;
10378     MappableExprsHandler::MapFlagsArrayTy MapTypes;
10379 
10380     // Get map clause information.
10381     MappableExprsHandler MEHandler(D, CGF);
10382     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10383 
10384     TargetDataInfo Info;
10385     // Fill up the arrays and create the arguments.
10386     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10387     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10388                                  Info.PointersArray, Info.SizesArray,
10389                                  Info.MapTypesArray, Info);
10390     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10391     InputInfo.BasePointersArray =
10392         Address(Info.BasePointersArray, CGM.getPointerAlign());
10393     InputInfo.PointersArray =
10394         Address(Info.PointersArray, CGM.getPointerAlign());
10395     InputInfo.SizesArray =
10396         Address(Info.SizesArray, CGM.getPointerAlign());
10397     MapTypesArray = Info.MapTypesArray;
10398     if (D.hasClausesOfKind<OMPDependClause>())
10399       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10400     else
10401       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10402   };
10403 
10404   if (IfCond) {
10405     emitIfClause(CGF, IfCond, TargetThenGen,
10406                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10407   } else {
10408     RegionCodeGenTy ThenRCG(TargetThenGen);
10409     ThenRCG(CGF);
10410   }
10411 }
10412 
10413 namespace {
10414   /// Kind of parameter in a function with 'declare simd' directive.
10415   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10416   /// Attribute set of the parameter.
10417   struct ParamAttrTy {
10418     ParamKindTy Kind = Vector;
10419     llvm::APSInt StrideOrArg;
10420     llvm::APSInt Alignment;
10421   };
10422 } // namespace
10423 
10424 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10425                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10426   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10427   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10428   // of that clause. The VLEN value must be power of 2.
10429   // In other case the notion of the function`s "characteristic data type" (CDT)
10430   // is used to compute the vector length.
10431   // CDT is defined in the following order:
10432   //   a) For non-void function, the CDT is the return type.
10433   //   b) If the function has any non-uniform, non-linear parameters, then the
10434   //   CDT is the type of the first such parameter.
10435   //   c) If the CDT determined by a) or b) above is struct, union, or class
10436   //   type which is pass-by-value (except for the type that maps to the
10437   //   built-in complex data type), the characteristic data type is int.
10438   //   d) If none of the above three cases is applicable, the CDT is int.
10439   // The VLEN is then determined based on the CDT and the size of vector
10440   // register of that ISA for which current vector version is generated. The
10441   // VLEN is computed using the formula below:
10442   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10443   // where vector register size specified in section 3.2.1 Registers and the
10444   // Stack Frame of original AMD64 ABI document.
10445   QualType RetType = FD->getReturnType();
10446   if (RetType.isNull())
10447     return 0;
10448   ASTContext &C = FD->getASTContext();
10449   QualType CDT;
10450   if (!RetType.isNull() && !RetType->isVoidType()) {
10451     CDT = RetType;
10452   } else {
10453     unsigned Offset = 0;
10454     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10455       if (ParamAttrs[Offset].Kind == Vector)
10456         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10457       ++Offset;
10458     }
10459     if (CDT.isNull()) {
10460       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10461         if (ParamAttrs[I + Offset].Kind == Vector) {
10462           CDT = FD->getParamDecl(I)->getType();
10463           break;
10464         }
10465       }
10466     }
10467   }
10468   if (CDT.isNull())
10469     CDT = C.IntTy;
10470   CDT = CDT->getCanonicalTypeUnqualified();
10471   if (CDT->isRecordType() || CDT->isUnionType())
10472     CDT = C.IntTy;
10473   return C.getTypeSize(CDT);
10474 }
10475 
10476 static void
10477 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10478                            const llvm::APSInt &VLENVal,
10479                            ArrayRef<ParamAttrTy> ParamAttrs,
10480                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10481   struct ISADataTy {
10482     char ISA;
10483     unsigned VecRegSize;
10484   };
10485   ISADataTy ISAData[] = {
10486       {
10487           'b', 128
10488       }, // SSE
10489       {
10490           'c', 256
10491       }, // AVX
10492       {
10493           'd', 256
10494       }, // AVX2
10495       {
10496           'e', 512
10497       }, // AVX512
10498   };
10499   llvm::SmallVector<char, 2> Masked;
10500   switch (State) {
10501   case OMPDeclareSimdDeclAttr::BS_Undefined:
10502     Masked.push_back('N');
10503     Masked.push_back('M');
10504     break;
10505   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10506     Masked.push_back('N');
10507     break;
10508   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10509     Masked.push_back('M');
10510     break;
10511   }
10512   for (char Mask : Masked) {
10513     for (const ISADataTy &Data : ISAData) {
10514       SmallString<256> Buffer;
10515       llvm::raw_svector_ostream Out(Buffer);
10516       Out << "_ZGV" << Data.ISA << Mask;
10517       if (!VLENVal) {
10518         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10519         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10520         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10521       } else {
10522         Out << VLENVal;
10523       }
10524       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10525         switch (ParamAttr.Kind){
10526         case LinearWithVarStride:
10527           Out << 's' << ParamAttr.StrideOrArg;
10528           break;
10529         case Linear:
10530           Out << 'l';
10531           if (ParamAttr.StrideOrArg != 1)
10532             Out << ParamAttr.StrideOrArg;
10533           break;
10534         case Uniform:
10535           Out << 'u';
10536           break;
10537         case Vector:
10538           Out << 'v';
10539           break;
10540         }
10541         if (!!ParamAttr.Alignment)
10542           Out << 'a' << ParamAttr.Alignment;
10543       }
10544       Out << '_' << Fn->getName();
10545       Fn->addFnAttr(Out.str());
10546     }
10547   }
10548 }
10549 
10550 // This are the Functions that are needed to mangle the name of the
10551 // vector functions generated by the compiler, according to the rules
10552 // defined in the "Vector Function ABI specifications for AArch64",
10553 // available at
10554 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10555 
10556 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10557 ///
10558 /// TODO: Need to implement the behavior for reference marked with a
10559 /// var or no linear modifiers (1.b in the section). For this, we
10560 /// need to extend ParamKindTy to support the linear modifiers.
10561 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10562   QT = QT.getCanonicalType();
10563 
10564   if (QT->isVoidType())
10565     return false;
10566 
10567   if (Kind == ParamKindTy::Uniform)
10568     return false;
10569 
10570   if (Kind == ParamKindTy::Linear)
10571     return false;
10572 
10573   // TODO: Handle linear references with modifiers
10574 
10575   if (Kind == ParamKindTy::LinearWithVarStride)
10576     return false;
10577 
10578   return true;
10579 }
10580 
10581 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10582 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10583   QT = QT.getCanonicalType();
10584   unsigned Size = C.getTypeSize(QT);
10585 
10586   // Only scalars and complex within 16 bytes wide set PVB to true.
10587   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10588     return false;
10589 
10590   if (QT->isFloatingType())
10591     return true;
10592 
10593   if (QT->isIntegerType())
10594     return true;
10595 
10596   if (QT->isPointerType())
10597     return true;
10598 
10599   // TODO: Add support for complex types (section 3.1.2, item 2).
10600 
10601   return false;
10602 }
10603 
10604 /// Computes the lane size (LS) of a return type or of an input parameter,
10605 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10606 /// TODO: Add support for references, section 3.2.1, item 1.
10607 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10608   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10609     QualType PTy = QT.getCanonicalType()->getPointeeType();
10610     if (getAArch64PBV(PTy, C))
10611       return C.getTypeSize(PTy);
10612   }
10613   if (getAArch64PBV(QT, C))
10614     return C.getTypeSize(QT);
10615 
10616   return C.getTypeSize(C.getUIntPtrType());
10617 }
10618 
10619 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10620 // signature of the scalar function, as defined in 3.2.2 of the
10621 // AAVFABI.
10622 static std::tuple<unsigned, unsigned, bool>
10623 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10624   QualType RetType = FD->getReturnType().getCanonicalType();
10625 
10626   ASTContext &C = FD->getASTContext();
10627 
10628   bool OutputBecomesInput = false;
10629 
10630   llvm::SmallVector<unsigned, 8> Sizes;
10631   if (!RetType->isVoidType()) {
10632     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10633     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10634       OutputBecomesInput = true;
10635   }
10636   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10637     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10638     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10639   }
10640 
10641   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10642   // The LS of a function parameter / return value can only be a power
10643   // of 2, starting from 8 bits, up to 128.
10644   assert(std::all_of(Sizes.begin(), Sizes.end(),
10645                      [](unsigned Size) {
10646                        return Size == 8 || Size == 16 || Size == 32 ||
10647                               Size == 64 || Size == 128;
10648                      }) &&
10649          "Invalid size");
10650 
10651   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10652                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10653                          OutputBecomesInput);
10654 }
10655 
10656 /// Mangle the parameter part of the vector function name according to
10657 /// their OpenMP classification. The mangling function is defined in
10658 /// section 3.5 of the AAVFABI.
10659 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10660   SmallString<256> Buffer;
10661   llvm::raw_svector_ostream Out(Buffer);
10662   for (const auto &ParamAttr : ParamAttrs) {
10663     switch (ParamAttr.Kind) {
10664     case LinearWithVarStride:
10665       Out << "ls" << ParamAttr.StrideOrArg;
10666       break;
10667     case Linear:
10668       Out << 'l';
10669       // Don't print the step value if it is not present or if it is
10670       // equal to 1.
10671       if (ParamAttr.StrideOrArg != 1)
10672         Out << ParamAttr.StrideOrArg;
10673       break;
10674     case Uniform:
10675       Out << 'u';
10676       break;
10677     case Vector:
10678       Out << 'v';
10679       break;
10680     }
10681 
10682     if (!!ParamAttr.Alignment)
10683       Out << 'a' << ParamAttr.Alignment;
10684   }
10685 
10686   return std::string(Out.str());
10687 }
10688 
10689 // Function used to add the attribute. The parameter `VLEN` is
10690 // templated to allow the use of "x" when targeting scalable functions
10691 // for SVE.
10692 template <typename T>
10693 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10694                                  char ISA, StringRef ParSeq,
10695                                  StringRef MangledName, bool OutputBecomesInput,
10696                                  llvm::Function *Fn) {
10697   SmallString<256> Buffer;
10698   llvm::raw_svector_ostream Out(Buffer);
10699   Out << Prefix << ISA << LMask << VLEN;
10700   if (OutputBecomesInput)
10701     Out << "v";
10702   Out << ParSeq << "_" << MangledName;
10703   Fn->addFnAttr(Out.str());
10704 }
10705 
10706 // Helper function to generate the Advanced SIMD names depending on
10707 // the value of the NDS when simdlen is not present.
10708 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10709                                       StringRef Prefix, char ISA,
10710                                       StringRef ParSeq, StringRef MangledName,
10711                                       bool OutputBecomesInput,
10712                                       llvm::Function *Fn) {
10713   switch (NDS) {
10714   case 8:
10715     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10716                          OutputBecomesInput, Fn);
10717     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10718                          OutputBecomesInput, Fn);
10719     break;
10720   case 16:
10721     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10722                          OutputBecomesInput, Fn);
10723     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10724                          OutputBecomesInput, Fn);
10725     break;
10726   case 32:
10727     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10728                          OutputBecomesInput, Fn);
10729     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10730                          OutputBecomesInput, Fn);
10731     break;
10732   case 64:
10733   case 128:
10734     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10735                          OutputBecomesInput, Fn);
10736     break;
10737   default:
10738     llvm_unreachable("Scalar type is too wide.");
10739   }
10740 }
10741 
10742 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10743 static void emitAArch64DeclareSimdFunction(
10744     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10745     ArrayRef<ParamAttrTy> ParamAttrs,
10746     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10747     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10748 
10749   // Get basic data for building the vector signature.
10750   const auto Data = getNDSWDS(FD, ParamAttrs);
10751   const unsigned NDS = std::get<0>(Data);
10752   const unsigned WDS = std::get<1>(Data);
10753   const bool OutputBecomesInput = std::get<2>(Data);
10754 
10755   // Check the values provided via `simdlen` by the user.
10756   // 1. A `simdlen(1)` doesn't produce vector signatures,
10757   if (UserVLEN == 1) {
10758     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10759         DiagnosticsEngine::Warning,
10760         "The clause simdlen(1) has no effect when targeting aarch64.");
10761     CGM.getDiags().Report(SLoc, DiagID);
10762     return;
10763   }
10764 
10765   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10766   // Advanced SIMD output.
10767   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10768     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10769         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10770                                     "power of 2 when targeting Advanced SIMD.");
10771     CGM.getDiags().Report(SLoc, DiagID);
10772     return;
10773   }
10774 
10775   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10776   // limits.
10777   if (ISA == 's' && UserVLEN != 0) {
10778     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10779       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10780           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10781                                       "lanes in the architectural constraints "
10782                                       "for SVE (min is 128-bit, max is "
10783                                       "2048-bit, by steps of 128-bit)");
10784       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10785       return;
10786     }
10787   }
10788 
10789   // Sort out parameter sequence.
10790   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10791   StringRef Prefix = "_ZGV";
10792   // Generate simdlen from user input (if any).
10793   if (UserVLEN) {
10794     if (ISA == 's') {
10795       // SVE generates only a masked function.
10796       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10797                            OutputBecomesInput, Fn);
10798     } else {
10799       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10800       // Advanced SIMD generates one or two functions, depending on
10801       // the `[not]inbranch` clause.
10802       switch (State) {
10803       case OMPDeclareSimdDeclAttr::BS_Undefined:
10804         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10805                              OutputBecomesInput, Fn);
10806         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10807                              OutputBecomesInput, Fn);
10808         break;
10809       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10810         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10811                              OutputBecomesInput, Fn);
10812         break;
10813       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10814         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10815                              OutputBecomesInput, Fn);
10816         break;
10817       }
10818     }
10819   } else {
10820     // If no user simdlen is provided, follow the AAVFABI rules for
10821     // generating the vector length.
10822     if (ISA == 's') {
10823       // SVE, section 3.4.1, item 1.
10824       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10825                            OutputBecomesInput, Fn);
10826     } else {
10827       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10828       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10829       // two vector names depending on the use of the clause
10830       // `[not]inbranch`.
10831       switch (State) {
10832       case OMPDeclareSimdDeclAttr::BS_Undefined:
10833         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10834                                   OutputBecomesInput, Fn);
10835         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10836                                   OutputBecomesInput, Fn);
10837         break;
10838       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10839         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10840                                   OutputBecomesInput, Fn);
10841         break;
10842       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10843         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10844                                   OutputBecomesInput, Fn);
10845         break;
10846       }
10847     }
10848   }
10849 }
10850 
10851 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10852                                               llvm::Function *Fn) {
10853   ASTContext &C = CGM.getContext();
10854   FD = FD->getMostRecentDecl();
10855   // Map params to their positions in function decl.
10856   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10857   if (isa<CXXMethodDecl>(FD))
10858     ParamPositions.try_emplace(FD, 0);
10859   unsigned ParamPos = ParamPositions.size();
10860   for (const ParmVarDecl *P : FD->parameters()) {
10861     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10862     ++ParamPos;
10863   }
10864   while (FD) {
10865     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10866       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10867       // Mark uniform parameters.
10868       for (const Expr *E : Attr->uniforms()) {
10869         E = E->IgnoreParenImpCasts();
10870         unsigned Pos;
10871         if (isa<CXXThisExpr>(E)) {
10872           Pos = ParamPositions[FD];
10873         } else {
10874           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10875                                 ->getCanonicalDecl();
10876           Pos = ParamPositions[PVD];
10877         }
10878         ParamAttrs[Pos].Kind = Uniform;
10879       }
10880       // Get alignment info.
10881       auto NI = Attr->alignments_begin();
10882       for (const Expr *E : Attr->aligneds()) {
10883         E = E->IgnoreParenImpCasts();
10884         unsigned Pos;
10885         QualType ParmTy;
10886         if (isa<CXXThisExpr>(E)) {
10887           Pos = ParamPositions[FD];
10888           ParmTy = E->getType();
10889         } else {
10890           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10891                                 ->getCanonicalDecl();
10892           Pos = ParamPositions[PVD];
10893           ParmTy = PVD->getType();
10894         }
10895         ParamAttrs[Pos].Alignment =
10896             (*NI)
10897                 ? (*NI)->EvaluateKnownConstInt(C)
10898                 : llvm::APSInt::getUnsigned(
10899                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10900                           .getQuantity());
10901         ++NI;
10902       }
10903       // Mark linear parameters.
10904       auto SI = Attr->steps_begin();
10905       auto MI = Attr->modifiers_begin();
10906       for (const Expr *E : Attr->linears()) {
10907         E = E->IgnoreParenImpCasts();
10908         unsigned Pos;
10909         // Rescaling factor needed to compute the linear parameter
10910         // value in the mangled name.
10911         unsigned PtrRescalingFactor = 1;
10912         if (isa<CXXThisExpr>(E)) {
10913           Pos = ParamPositions[FD];
10914         } else {
10915           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10916                                 ->getCanonicalDecl();
10917           Pos = ParamPositions[PVD];
10918           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10919             PtrRescalingFactor = CGM.getContext()
10920                                      .getTypeSizeInChars(P->getPointeeType())
10921                                      .getQuantity();
10922         }
10923         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10924         ParamAttr.Kind = Linear;
10925         // Assuming a stride of 1, for `linear` without modifiers.
10926         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10927         if (*SI) {
10928           Expr::EvalResult Result;
10929           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10930             if (const auto *DRE =
10931                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10932               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10933                 ParamAttr.Kind = LinearWithVarStride;
10934                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10935                     ParamPositions[StridePVD->getCanonicalDecl()]);
10936               }
10937             }
10938           } else {
10939             ParamAttr.StrideOrArg = Result.Val.getInt();
10940           }
10941         }
10942         // If we are using a linear clause on a pointer, we need to
10943         // rescale the value of linear_step with the byte size of the
10944         // pointee type.
10945         if (Linear == ParamAttr.Kind)
10946           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10947         ++SI;
10948         ++MI;
10949       }
10950       llvm::APSInt VLENVal;
10951       SourceLocation ExprLoc;
10952       const Expr *VLENExpr = Attr->getSimdlen();
10953       if (VLENExpr) {
10954         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10955         ExprLoc = VLENExpr->getExprLoc();
10956       }
10957       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10958       if (CGM.getTriple().isX86()) {
10959         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10960       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10961         unsigned VLEN = VLENVal.getExtValue();
10962         StringRef MangledName = Fn->getName();
10963         if (CGM.getTarget().hasFeature("sve"))
10964           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10965                                          MangledName, 's', 128, Fn, ExprLoc);
10966         if (CGM.getTarget().hasFeature("neon"))
10967           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10968                                          MangledName, 'n', 128, Fn, ExprLoc);
10969       }
10970     }
10971     FD = FD->getPreviousDecl();
10972   }
10973 }
10974 
10975 namespace {
10976 /// Cleanup action for doacross support.
10977 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10978 public:
10979   static const int DoacrossFinArgs = 2;
10980 
10981 private:
10982   llvm::FunctionCallee RTLFn;
10983   llvm::Value *Args[DoacrossFinArgs];
10984 
10985 public:
10986   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10987                     ArrayRef<llvm::Value *> CallArgs)
10988       : RTLFn(RTLFn) {
10989     assert(CallArgs.size() == DoacrossFinArgs);
10990     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10991   }
10992   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10993     if (!CGF.HaveInsertPoint())
10994       return;
10995     CGF.EmitRuntimeCall(RTLFn, Args);
10996   }
10997 };
10998 } // namespace
10999 
11000 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11001                                        const OMPLoopDirective &D,
11002                                        ArrayRef<Expr *> NumIterations) {
11003   if (!CGF.HaveInsertPoint())
11004     return;
11005 
11006   ASTContext &C = CGM.getContext();
11007   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11008   RecordDecl *RD;
11009   if (KmpDimTy.isNull()) {
11010     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
11011     //  kmp_int64 lo; // lower
11012     //  kmp_int64 up; // upper
11013     //  kmp_int64 st; // stride
11014     // };
11015     RD = C.buildImplicitRecord("kmp_dim");
11016     RD->startDefinition();
11017     addFieldToRecordDecl(C, RD, Int64Ty);
11018     addFieldToRecordDecl(C, RD, Int64Ty);
11019     addFieldToRecordDecl(C, RD, Int64Ty);
11020     RD->completeDefinition();
11021     KmpDimTy = C.getRecordType(RD);
11022   } else {
11023     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11024   }
11025   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11026   QualType ArrayTy =
11027       C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11028 
11029   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11030   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11031   enum { LowerFD = 0, UpperFD, StrideFD };
11032   // Fill dims with data.
11033   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11034     LValue DimsLVal = CGF.MakeAddrLValue(
11035         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11036     // dims.upper = num_iterations;
11037     LValue UpperLVal = CGF.EmitLValueForField(
11038         DimsLVal, *std::next(RD->field_begin(), UpperFD));
11039     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11040         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11041         Int64Ty, NumIterations[I]->getExprLoc());
11042     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11043     // dims.stride = 1;
11044     LValue StrideLVal = CGF.EmitLValueForField(
11045         DimsLVal, *std::next(RD->field_begin(), StrideFD));
11046     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11047                           StrideLVal);
11048   }
11049 
11050   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11051   // kmp_int32 num_dims, struct kmp_dim * dims);
11052   llvm::Value *Args[] = {
11053       emitUpdateLocation(CGF, D.getBeginLoc()),
11054       getThreadID(CGF, D.getBeginLoc()),
11055       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11056       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11057           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11058           CGM.VoidPtrTy)};
11059 
11060   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11061       CGM.getModule(), OMPRTL___kmpc_doacross_init);
11062   CGF.EmitRuntimeCall(RTLFn, Args);
11063   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11064       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11065   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11066       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11067   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11068                                              llvm::makeArrayRef(FiniArgs));
11069 }
11070 
11071 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11072                                           const OMPDependClause *C) {
11073   QualType Int64Ty =
11074       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11075   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11076   QualType ArrayTy = CGM.getContext().getConstantArrayType(
11077       Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11078   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11079   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11080     const Expr *CounterVal = C->getLoopData(I);
11081     assert(CounterVal);
11082     llvm::Value *CntVal = CGF.EmitScalarConversion(
11083         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11084         CounterVal->getExprLoc());
11085     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11086                           /*Volatile=*/false, Int64Ty);
11087   }
11088   llvm::Value *Args[] = {
11089       emitUpdateLocation(CGF, C->getBeginLoc()),
11090       getThreadID(CGF, C->getBeginLoc()),
11091       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11092   llvm::FunctionCallee RTLFn;
11093   if (C->getDependencyKind() == OMPC_DEPEND_source) {
11094     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11095                                                   OMPRTL___kmpc_doacross_post);
11096   } else {
11097     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
11098     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11099                                                   OMPRTL___kmpc_doacross_wait);
11100   }
11101   CGF.EmitRuntimeCall(RTLFn, Args);
11102 }
11103 
11104 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11105                                llvm::FunctionCallee Callee,
11106                                ArrayRef<llvm::Value *> Args) const {
11107   assert(Loc.isValid() && "Outlined function call location must be valid.");
11108   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11109 
11110   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11111     if (Fn->doesNotThrow()) {
11112       CGF.EmitNounwindRuntimeCall(Fn, Args);
11113       return;
11114     }
11115   }
11116   CGF.EmitRuntimeCall(Callee, Args);
11117 }
11118 
11119 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11120     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11121     ArrayRef<llvm::Value *> Args) const {
11122   emitCall(CGF, Loc, OutlinedFn, Args);
11123 }
11124 
11125 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11126   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11127     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11128       HasEmittedDeclareTargetRegion = true;
11129 }
11130 
11131 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11132                                              const VarDecl *NativeParam,
11133                                              const VarDecl *TargetParam) const {
11134   return CGF.GetAddrOfLocalVar(NativeParam);
11135 }
11136 
11137 namespace {
11138 /// Cleanup action for allocate support.
11139 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11140 public:
11141   static const int CleanupArgs = 3;
11142 
11143 private:
11144   llvm::FunctionCallee RTLFn;
11145   llvm::Value *Args[CleanupArgs];
11146 
11147 public:
11148   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11149                        ArrayRef<llvm::Value *> CallArgs)
11150       : RTLFn(RTLFn) {
11151     assert(CallArgs.size() == CleanupArgs &&
11152            "Size of arguments does not match.");
11153     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11154   }
11155   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11156     if (!CGF.HaveInsertPoint())
11157       return;
11158     CGF.EmitRuntimeCall(RTLFn, Args);
11159   }
11160 };
11161 } // namespace
11162 
11163 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11164                                                    const VarDecl *VD) {
11165   if (!VD)
11166     return Address::invalid();
11167   const VarDecl *CVD = VD->getCanonicalDecl();
11168   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
11169     return Address::invalid();
11170   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11171   // Use the default allocation.
11172   if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
11173        AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
11174       !AA->getAllocator())
11175     return Address::invalid();
11176   llvm::Value *Size;
11177   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11178   if (CVD->getType()->isVariablyModifiedType()) {
11179     Size = CGF.getTypeSize(CVD->getType());
11180     // Align the size: ((size + align - 1) / align) * align
11181     Size = CGF.Builder.CreateNUWAdd(
11182         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11183     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11184     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11185   } else {
11186     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11187     Size = CGM.getSize(Sz.alignTo(Align));
11188   }
11189   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11190   assert(AA->getAllocator() &&
11191          "Expected allocator expression for non-default allocator.");
11192   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
11193   // According to the standard, the original allocator type is a enum (integer).
11194   // Convert to pointer type, if required.
11195   if (Allocator->getType()->isIntegerTy())
11196     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
11197   else if (Allocator->getType()->isPointerTy())
11198     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
11199                                                                 CGM.VoidPtrTy);
11200   llvm::Value *Args[] = {ThreadID, Size, Allocator};
11201 
11202   llvm::Value *Addr =
11203       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11204                               CGM.getModule(), OMPRTL___kmpc_alloc),
11205                           Args, getName({CVD->getName(), ".void.addr"}));
11206   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11207                                                               Allocator};
11208   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11209       CGM.getModule(), OMPRTL___kmpc_free);
11210 
11211   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11212                                                 llvm::makeArrayRef(FiniArgs));
11213   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11214       Addr,
11215       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11216       getName({CVD->getName(), ".addr"}));
11217   return Address(Addr, Align);
11218 }
11219 
11220 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11221     CodeGenModule &CGM, const OMPLoopDirective &S)
11222     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11223   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11224   if (!NeedToPush)
11225     return;
11226   NontemporalDeclsSet &DS =
11227       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11228   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11229     for (const Stmt *Ref : C->private_refs()) {
11230       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11231       const ValueDecl *VD;
11232       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11233         VD = DRE->getDecl();
11234       } else {
11235         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11236         assert((ME->isImplicitCXXThis() ||
11237                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11238                "Expected member of current class.");
11239         VD = ME->getMemberDecl();
11240       }
11241       DS.insert(VD);
11242     }
11243   }
11244 }
11245 
11246 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11247   if (!NeedToPush)
11248     return;
11249   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11250 }
11251 
11252 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11253   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11254 
11255   return llvm::any_of(
11256       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11257       [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11258 }
11259 
11260 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11261     const OMPExecutableDirective &S,
11262     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11263     const {
11264   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11265   // Vars in target/task regions must be excluded completely.
11266   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11267       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11268     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11269     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11270     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11271     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11272       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11273         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11274     }
11275   }
11276   // Exclude vars in private clauses.
11277   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11278     for (const Expr *Ref : C->varlists()) {
11279       if (!Ref->getType()->isScalarType())
11280         continue;
11281       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11282       if (!DRE)
11283         continue;
11284       NeedToCheckForLPCs.insert(DRE->getDecl());
11285     }
11286   }
11287   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11288     for (const Expr *Ref : C->varlists()) {
11289       if (!Ref->getType()->isScalarType())
11290         continue;
11291       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11292       if (!DRE)
11293         continue;
11294       NeedToCheckForLPCs.insert(DRE->getDecl());
11295     }
11296   }
11297   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11298     for (const Expr *Ref : C->varlists()) {
11299       if (!Ref->getType()->isScalarType())
11300         continue;
11301       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11302       if (!DRE)
11303         continue;
11304       NeedToCheckForLPCs.insert(DRE->getDecl());
11305     }
11306   }
11307   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11308     for (const Expr *Ref : C->varlists()) {
11309       if (!Ref->getType()->isScalarType())
11310         continue;
11311       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11312       if (!DRE)
11313         continue;
11314       NeedToCheckForLPCs.insert(DRE->getDecl());
11315     }
11316   }
11317   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11318     for (const Expr *Ref : C->varlists()) {
11319       if (!Ref->getType()->isScalarType())
11320         continue;
11321       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11322       if (!DRE)
11323         continue;
11324       NeedToCheckForLPCs.insert(DRE->getDecl());
11325     }
11326   }
11327   for (const Decl *VD : NeedToCheckForLPCs) {
11328     for (const LastprivateConditionalData &Data :
11329          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11330       if (Data.DeclToUniqueName.count(VD) > 0) {
11331         if (!Data.Disabled)
11332           NeedToAddForLPCsAsDisabled.insert(VD);
11333         break;
11334       }
11335     }
11336   }
11337 }
11338 
11339 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11340     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11341     : CGM(CGF.CGM),
11342       Action((CGM.getLangOpts().OpenMP >= 50 &&
11343               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11344                            [](const OMPLastprivateClause *C) {
11345                              return C->getKind() ==
11346                                     OMPC_LASTPRIVATE_conditional;
11347                            }))
11348                  ? ActionToDo::PushAsLastprivateConditional
11349                  : ActionToDo::DoNotPush) {
11350   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11351   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11352     return;
11353   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11354          "Expected a push action.");
11355   LastprivateConditionalData &Data =
11356       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11357   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11358     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11359       continue;
11360 
11361     for (const Expr *Ref : C->varlists()) {
11362       Data.DeclToUniqueName.insert(std::make_pair(
11363           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11364           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11365     }
11366   }
11367   Data.IVLVal = IVLVal;
11368   Data.Fn = CGF.CurFn;
11369 }
11370 
11371 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11372     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11373     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11374   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11375   if (CGM.getLangOpts().OpenMP < 50)
11376     return;
11377   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11378   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11379   if (!NeedToAddForLPCsAsDisabled.empty()) {
11380     Action = ActionToDo::DisableLastprivateConditional;
11381     LastprivateConditionalData &Data =
11382         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11383     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11384       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11385     Data.Fn = CGF.CurFn;
11386     Data.Disabled = true;
11387   }
11388 }
11389 
11390 CGOpenMPRuntime::LastprivateConditionalRAII
11391 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11392     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11393   return LastprivateConditionalRAII(CGF, S);
11394 }
11395 
11396 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11397   if (CGM.getLangOpts().OpenMP < 50)
11398     return;
11399   if (Action == ActionToDo::DisableLastprivateConditional) {
11400     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11401            "Expected list of disabled private vars.");
11402     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11403   }
11404   if (Action == ActionToDo::PushAsLastprivateConditional) {
11405     assert(
11406         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11407         "Expected list of lastprivate conditional vars.");
11408     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11409   }
11410 }
11411 
11412 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11413                                                         const VarDecl *VD) {
11414   ASTContext &C = CGM.getContext();
11415   auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11416   if (I == LastprivateConditionalToTypes.end())
11417     I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11418   QualType NewType;
11419   const FieldDecl *VDField;
11420   const FieldDecl *FiredField;
11421   LValue BaseLVal;
11422   auto VI = I->getSecond().find(VD);
11423   if (VI == I->getSecond().end()) {
11424     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11425     RD->startDefinition();
11426     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11427     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11428     RD->completeDefinition();
11429     NewType = C.getRecordType(RD);
11430     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11431     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11432     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11433   } else {
11434     NewType = std::get<0>(VI->getSecond());
11435     VDField = std::get<1>(VI->getSecond());
11436     FiredField = std::get<2>(VI->getSecond());
11437     BaseLVal = std::get<3>(VI->getSecond());
11438   }
11439   LValue FiredLVal =
11440       CGF.EmitLValueForField(BaseLVal, FiredField);
11441   CGF.EmitStoreOfScalar(
11442       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11443       FiredLVal);
11444   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
11445 }
11446 
11447 namespace {
11448 /// Checks if the lastprivate conditional variable is referenced in LHS.
11449 class LastprivateConditionalRefChecker final
11450     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11451   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11452   const Expr *FoundE = nullptr;
11453   const Decl *FoundD = nullptr;
11454   StringRef UniqueDeclName;
11455   LValue IVLVal;
11456   llvm::Function *FoundFn = nullptr;
11457   SourceLocation Loc;
11458 
11459 public:
11460   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11461     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11462          llvm::reverse(LPM)) {
11463       auto It = D.DeclToUniqueName.find(E->getDecl());
11464       if (It == D.DeclToUniqueName.end())
11465         continue;
11466       if (D.Disabled)
11467         return false;
11468       FoundE = E;
11469       FoundD = E->getDecl()->getCanonicalDecl();
11470       UniqueDeclName = It->second;
11471       IVLVal = D.IVLVal;
11472       FoundFn = D.Fn;
11473       break;
11474     }
11475     return FoundE == E;
11476   }
11477   bool VisitMemberExpr(const MemberExpr *E) {
11478     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11479       return false;
11480     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11481          llvm::reverse(LPM)) {
11482       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11483       if (It == D.DeclToUniqueName.end())
11484         continue;
11485       if (D.Disabled)
11486         return false;
11487       FoundE = E;
11488       FoundD = E->getMemberDecl()->getCanonicalDecl();
11489       UniqueDeclName = It->second;
11490       IVLVal = D.IVLVal;
11491       FoundFn = D.Fn;
11492       break;
11493     }
11494     return FoundE == E;
11495   }
11496   bool VisitStmt(const Stmt *S) {
11497     for (const Stmt *Child : S->children()) {
11498       if (!Child)
11499         continue;
11500       if (const auto *E = dyn_cast<Expr>(Child))
11501         if (!E->isGLValue())
11502           continue;
11503       if (Visit(Child))
11504         return true;
11505     }
11506     return false;
11507   }
11508   explicit LastprivateConditionalRefChecker(
11509       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11510       : LPM(LPM) {}
11511   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11512   getFoundData() const {
11513     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11514   }
11515 };
11516 } // namespace
11517 
11518 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11519                                                        LValue IVLVal,
11520                                                        StringRef UniqueDeclName,
11521                                                        LValue LVal,
11522                                                        SourceLocation Loc) {
11523   // Last updated loop counter for the lastprivate conditional var.
11524   // int<xx> last_iv = 0;
11525   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11526   llvm::Constant *LastIV =
11527       getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
11528   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11529       IVLVal.getAlignment().getAsAlign());
11530   LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11531 
11532   // Last value of the lastprivate conditional.
11533   // decltype(priv_a) last_a;
11534   llvm::Constant *Last = getOrCreateInternalVariable(
11535       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11536   cast<llvm::GlobalVariable>(Last)->setAlignment(
11537       LVal.getAlignment().getAsAlign());
11538   LValue LastLVal =
11539       CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11540 
11541   // Global loop counter. Required to handle inner parallel-for regions.
11542   // iv
11543   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11544 
11545   // #pragma omp critical(a)
11546   // if (last_iv <= iv) {
11547   //   last_iv = iv;
11548   //   last_a = priv_a;
11549   // }
11550   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11551                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11552     Action.Enter(CGF);
11553     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11554     // (last_iv <= iv) ? Check if the variable is updated and store new
11555     // value in global var.
11556     llvm::Value *CmpRes;
11557     if (IVLVal.getType()->isSignedIntegerType()) {
11558       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11559     } else {
11560       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11561              "Loop iteration variable must be integer.");
11562       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11563     }
11564     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11565     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11566     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11567     // {
11568     CGF.EmitBlock(ThenBB);
11569 
11570     //   last_iv = iv;
11571     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11572 
11573     //   last_a = priv_a;
11574     switch (CGF.getEvaluationKind(LVal.getType())) {
11575     case TEK_Scalar: {
11576       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11577       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11578       break;
11579     }
11580     case TEK_Complex: {
11581       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11582       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11583       break;
11584     }
11585     case TEK_Aggregate:
11586       llvm_unreachable(
11587           "Aggregates are not supported in lastprivate conditional.");
11588     }
11589     // }
11590     CGF.EmitBranch(ExitBB);
11591     // There is no need to emit line number for unconditional branch.
11592     (void)ApplyDebugLocation::CreateEmpty(CGF);
11593     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11594   };
11595 
11596   if (CGM.getLangOpts().OpenMPSimd) {
11597     // Do not emit as a critical region as no parallel region could be emitted.
11598     RegionCodeGenTy ThenRCG(CodeGen);
11599     ThenRCG(CGF);
11600   } else {
11601     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11602   }
11603 }
11604 
11605 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11606                                                          const Expr *LHS) {
11607   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11608     return;
11609   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11610   if (!Checker.Visit(LHS))
11611     return;
11612   const Expr *FoundE;
11613   const Decl *FoundD;
11614   StringRef UniqueDeclName;
11615   LValue IVLVal;
11616   llvm::Function *FoundFn;
11617   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11618       Checker.getFoundData();
11619   if (FoundFn != CGF.CurFn) {
11620     // Special codegen for inner parallel regions.
11621     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11622     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11623     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11624            "Lastprivate conditional is not found in outer region.");
11625     QualType StructTy = std::get<0>(It->getSecond());
11626     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11627     LValue PrivLVal = CGF.EmitLValue(FoundE);
11628     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11629         PrivLVal.getAddress(CGF),
11630         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
11631     LValue BaseLVal =
11632         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11633     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11634     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11635                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11636                         FiredLVal, llvm::AtomicOrdering::Unordered,
11637                         /*IsVolatile=*/true, /*isInit=*/false);
11638     return;
11639   }
11640 
11641   // Private address of the lastprivate conditional in the current context.
11642   // priv_a
11643   LValue LVal = CGF.EmitLValue(FoundE);
11644   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11645                                    FoundE->getExprLoc());
11646 }
11647 
11648 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11649     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11650     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11651   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11652     return;
11653   auto Range = llvm::reverse(LastprivateConditionalStack);
11654   auto It = llvm::find_if(
11655       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11656   if (It == Range.end() || It->Fn != CGF.CurFn)
11657     return;
11658   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11659   assert(LPCI != LastprivateConditionalToTypes.end() &&
11660          "Lastprivates must be registered already.");
11661   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11662   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11663   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11664   for (const auto &Pair : It->DeclToUniqueName) {
11665     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11666     if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0)
11667       continue;
11668     auto I = LPCI->getSecond().find(Pair.first);
11669     assert(I != LPCI->getSecond().end() &&
11670            "Lastprivate must be rehistered already.");
11671     // bool Cmp = priv_a.Fired != 0;
11672     LValue BaseLVal = std::get<3>(I->getSecond());
11673     LValue FiredLVal =
11674         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11675     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11676     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11677     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11678     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11679     // if (Cmp) {
11680     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11681     CGF.EmitBlock(ThenBB);
11682     Address Addr = CGF.GetAddrOfLocalVar(VD);
11683     LValue LVal;
11684     if (VD->getType()->isReferenceType())
11685       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11686                                            AlignmentSource::Decl);
11687     else
11688       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11689                                 AlignmentSource::Decl);
11690     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11691                                      D.getBeginLoc());
11692     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11693     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11694     // }
11695   }
11696 }
11697 
11698 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11699     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11700     SourceLocation Loc) {
11701   if (CGF.getLangOpts().OpenMP < 50)
11702     return;
11703   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11704   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11705          "Unknown lastprivate conditional variable.");
11706   StringRef UniqueName = It->second;
11707   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11708   // The variable was not updated in the region - exit.
11709   if (!GV)
11710     return;
11711   LValue LPLVal = CGF.MakeAddrLValue(
11712       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11713   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11714   CGF.EmitStoreOfScalar(Res, PrivLVal);
11715 }
11716 
11717 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11718     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11719     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11720   llvm_unreachable("Not supported in SIMD-only mode");
11721 }
11722 
11723 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11724     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11725     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11726   llvm_unreachable("Not supported in SIMD-only mode");
11727 }
11728 
11729 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11730     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11731     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11732     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11733     bool Tied, unsigned &NumberOfParts) {
11734   llvm_unreachable("Not supported in SIMD-only mode");
11735 }
11736 
11737 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11738                                            SourceLocation Loc,
11739                                            llvm::Function *OutlinedFn,
11740                                            ArrayRef<llvm::Value *> CapturedVars,
11741                                            const Expr *IfCond) {
11742   llvm_unreachable("Not supported in SIMD-only mode");
11743 }
11744 
11745 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11746     CodeGenFunction &CGF, StringRef CriticalName,
11747     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11748     const Expr *Hint) {
11749   llvm_unreachable("Not supported in SIMD-only mode");
11750 }
11751 
11752 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11753                                            const RegionCodeGenTy &MasterOpGen,
11754                                            SourceLocation Loc) {
11755   llvm_unreachable("Not supported in SIMD-only mode");
11756 }
11757 
11758 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11759                                             SourceLocation Loc) {
11760   llvm_unreachable("Not supported in SIMD-only mode");
11761 }
11762 
11763 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11764     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11765     SourceLocation Loc) {
11766   llvm_unreachable("Not supported in SIMD-only mode");
11767 }
11768 
11769 void CGOpenMPSIMDRuntime::emitSingleRegion(
11770     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11771     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11772     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11773     ArrayRef<const Expr *> AssignmentOps) {
11774   llvm_unreachable("Not supported in SIMD-only mode");
11775 }
11776 
11777 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11778                                             const RegionCodeGenTy &OrderedOpGen,
11779                                             SourceLocation Loc,
11780                                             bool IsThreads) {
11781   llvm_unreachable("Not supported in SIMD-only mode");
11782 }
11783 
11784 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11785                                           SourceLocation Loc,
11786                                           OpenMPDirectiveKind Kind,
11787                                           bool EmitChecks,
11788                                           bool ForceSimpleCall) {
11789   llvm_unreachable("Not supported in SIMD-only mode");
11790 }
11791 
11792 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11793     CodeGenFunction &CGF, SourceLocation Loc,
11794     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11795     bool Ordered, const DispatchRTInput &DispatchValues) {
11796   llvm_unreachable("Not supported in SIMD-only mode");
11797 }
11798 
11799 void CGOpenMPSIMDRuntime::emitForStaticInit(
11800     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11801     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11802   llvm_unreachable("Not supported in SIMD-only mode");
11803 }
11804 
11805 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11806     CodeGenFunction &CGF, SourceLocation Loc,
11807     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11808   llvm_unreachable("Not supported in SIMD-only mode");
11809 }
11810 
11811 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11812                                                      SourceLocation Loc,
11813                                                      unsigned IVSize,
11814                                                      bool IVSigned) {
11815   llvm_unreachable("Not supported in SIMD-only mode");
11816 }
11817 
11818 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11819                                               SourceLocation Loc,
11820                                               OpenMPDirectiveKind DKind) {
11821   llvm_unreachable("Not supported in SIMD-only mode");
11822 }
11823 
11824 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11825                                               SourceLocation Loc,
11826                                               unsigned IVSize, bool IVSigned,
11827                                               Address IL, Address LB,
11828                                               Address UB, Address ST) {
11829   llvm_unreachable("Not supported in SIMD-only mode");
11830 }
11831 
11832 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11833                                                llvm::Value *NumThreads,
11834                                                SourceLocation Loc) {
11835   llvm_unreachable("Not supported in SIMD-only mode");
11836 }
11837 
11838 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11839                                              ProcBindKind ProcBind,
11840                                              SourceLocation Loc) {
11841   llvm_unreachable("Not supported in SIMD-only mode");
11842 }
11843 
11844 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11845                                                     const VarDecl *VD,
11846                                                     Address VDAddr,
11847                                                     SourceLocation Loc) {
11848   llvm_unreachable("Not supported in SIMD-only mode");
11849 }
11850 
11851 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11852     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11853     CodeGenFunction *CGF) {
11854   llvm_unreachable("Not supported in SIMD-only mode");
11855 }
11856 
11857 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11858     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11859   llvm_unreachable("Not supported in SIMD-only mode");
11860 }
11861 
11862 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11863                                     ArrayRef<const Expr *> Vars,
11864                                     SourceLocation Loc,
11865                                     llvm::AtomicOrdering AO) {
11866   llvm_unreachable("Not supported in SIMD-only mode");
11867 }
11868 
11869 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11870                                        const OMPExecutableDirective &D,
11871                                        llvm::Function *TaskFunction,
11872                                        QualType SharedsTy, Address Shareds,
11873                                        const Expr *IfCond,
11874                                        const OMPTaskDataTy &Data) {
11875   llvm_unreachable("Not supported in SIMD-only mode");
11876 }
11877 
11878 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11879     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11880     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11881     const Expr *IfCond, const OMPTaskDataTy &Data) {
11882   llvm_unreachable("Not supported in SIMD-only mode");
11883 }
11884 
11885 void CGOpenMPSIMDRuntime::emitReduction(
11886     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11887     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11888     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11889   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11890   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11891                                  ReductionOps, Options);
11892 }
11893 
11894 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11895     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11896     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11897   llvm_unreachable("Not supported in SIMD-only mode");
11898 }
11899 
11900 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11901                                                 SourceLocation Loc,
11902                                                 bool IsWorksharingReduction) {
11903   llvm_unreachable("Not supported in SIMD-only mode");
11904 }
11905 
11906 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11907                                                   SourceLocation Loc,
11908                                                   ReductionCodeGen &RCG,
11909                                                   unsigned N) {
11910   llvm_unreachable("Not supported in SIMD-only mode");
11911 }
11912 
11913 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11914                                                   SourceLocation Loc,
11915                                                   llvm::Value *ReductionsPtr,
11916                                                   LValue SharedLVal) {
11917   llvm_unreachable("Not supported in SIMD-only mode");
11918 }
11919 
11920 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11921                                            SourceLocation Loc) {
11922   llvm_unreachable("Not supported in SIMD-only mode");
11923 }
11924 
11925 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11926     CodeGenFunction &CGF, SourceLocation Loc,
11927     OpenMPDirectiveKind CancelRegion) {
11928   llvm_unreachable("Not supported in SIMD-only mode");
11929 }
11930 
11931 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11932                                          SourceLocation Loc, const Expr *IfCond,
11933                                          OpenMPDirectiveKind CancelRegion) {
11934   llvm_unreachable("Not supported in SIMD-only mode");
11935 }
11936 
11937 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11938     const OMPExecutableDirective &D, StringRef ParentName,
11939     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11940     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11941   llvm_unreachable("Not supported in SIMD-only mode");
11942 }
11943 
11944 void CGOpenMPSIMDRuntime::emitTargetCall(
11945     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11946     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11947     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11948     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11949                                      const OMPLoopDirective &D)>
11950         SizeEmitter) {
11951   llvm_unreachable("Not supported in SIMD-only mode");
11952 }
11953 
11954 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11955   llvm_unreachable("Not supported in SIMD-only mode");
11956 }
11957 
11958 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11959   llvm_unreachable("Not supported in SIMD-only mode");
11960 }
11961 
11962 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11963   return false;
11964 }
11965 
11966 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11967                                         const OMPExecutableDirective &D,
11968                                         SourceLocation Loc,
11969                                         llvm::Function *OutlinedFn,
11970                                         ArrayRef<llvm::Value *> CapturedVars) {
11971   llvm_unreachable("Not supported in SIMD-only mode");
11972 }
11973 
11974 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11975                                              const Expr *NumTeams,
11976                                              const Expr *ThreadLimit,
11977                                              SourceLocation Loc) {
11978   llvm_unreachable("Not supported in SIMD-only mode");
11979 }
11980 
11981 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11982     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11983     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11984   llvm_unreachable("Not supported in SIMD-only mode");
11985 }
11986 
11987 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11988     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11989     const Expr *Device) {
11990   llvm_unreachable("Not supported in SIMD-only mode");
11991 }
11992 
11993 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11994                                            const OMPLoopDirective &D,
11995                                            ArrayRef<Expr *> NumIterations) {
11996   llvm_unreachable("Not supported in SIMD-only mode");
11997 }
11998 
11999 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12000                                               const OMPDependClause *C) {
12001   llvm_unreachable("Not supported in SIMD-only mode");
12002 }
12003 
12004 const VarDecl *
12005 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12006                                         const VarDecl *NativeParam) const {
12007   llvm_unreachable("Not supported in SIMD-only mode");
12008 }
12009 
12010 Address
12011 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12012                                          const VarDecl *NativeParam,
12013                                          const VarDecl *TargetParam) const {
12014   llvm_unreachable("Not supported in SIMD-only mode");
12015 }
12016