xref: /llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp (revision 8e702735090388a3231a863e343f880d0f96fecb)
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
15 #include "CGCXXABI.h"
16 #include "CGCleanup.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/APValue.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <numeric>
43 #include <optional>
44 
45 using namespace clang;
46 using namespace CodeGen;
47 using namespace llvm::omp;
48 
49 namespace {
50 /// Base class for handling code generation inside OpenMP regions.
51 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
52 public:
53   /// Kinds of OpenMP regions used in codegen.
54   enum CGOpenMPRegionKind {
55     /// Region with outlined function for standalone 'parallel'
56     /// directive.
57     ParallelOutlinedRegion,
58     /// Region with outlined function for standalone 'task' directive.
59     TaskOutlinedRegion,
60     /// Region for constructs that do not require function outlining,
61     /// like 'for', 'sections', 'atomic' etc. directives.
62     InlinedRegion,
63     /// Region with outlined function for standalone 'target' directive.
64     TargetRegion,
65   };
66 
67   CGOpenMPRegionInfo(const CapturedStmt &CS,
68                      const CGOpenMPRegionKind RegionKind,
69                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
70                      bool HasCancel)
71       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
72         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
73 
74   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
75                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
76                      bool HasCancel)
77       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
78         Kind(Kind), HasCancel(HasCancel) {}
79 
80   /// Get a variable or parameter for storing global thread id
81   /// inside OpenMP construct.
82   virtual const VarDecl *getThreadIDVariable() const = 0;
83 
84   /// Emit the captured statement body.
85   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
86 
87   /// Get an LValue for the current ThreadID variable.
88   /// \return LValue for thread id variable. This LValue always has type int32*.
89   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
90 
91   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
92 
93   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
94 
95   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
96 
97   bool hasCancel() const { return HasCancel; }
98 
99   static bool classof(const CGCapturedStmtInfo *Info) {
100     return Info->getKind() == CR_OpenMP;
101   }
102 
103   ~CGOpenMPRegionInfo() override = default;
104 
105 protected:
106   CGOpenMPRegionKind RegionKind;
107   RegionCodeGenTy CodeGen;
108   OpenMPDirectiveKind Kind;
109   bool HasCancel;
110 };
111 
112 /// API for captured statement code generation in OpenMP constructs.
113 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
114 public:
115   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
116                              const RegionCodeGenTy &CodeGen,
117                              OpenMPDirectiveKind Kind, bool HasCancel,
118                              StringRef HelperName)
119       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
120                            HasCancel),
121         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
122     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
123   }
124 
125   /// Get a variable or parameter for storing global thread id
126   /// inside OpenMP construct.
127   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
128 
129   /// Get the name of the capture helper.
130   StringRef getHelperName() const override { return HelperName; }
131 
132   static bool classof(const CGCapturedStmtInfo *Info) {
133     return CGOpenMPRegionInfo::classof(Info) &&
134            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135                ParallelOutlinedRegion;
136   }
137 
138 private:
139   /// A variable or parameter storing global thread id for OpenMP
140   /// constructs.
141   const VarDecl *ThreadIDVar;
142   StringRef HelperName;
143 };
144 
145 /// API for captured statement code generation in OpenMP constructs.
146 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
147 public:
148   class UntiedTaskActionTy final : public PrePostActionTy {
149     bool Untied;
150     const VarDecl *PartIDVar;
151     const RegionCodeGenTy UntiedCodeGen;
152     llvm::SwitchInst *UntiedSwitch = nullptr;
153 
154   public:
155     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
156                        const RegionCodeGenTy &UntiedCodeGen)
157         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
158     void Enter(CodeGenFunction &CGF) override {
159       if (Untied) {
160         // Emit task switching point.
161         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
162             CGF.GetAddrOfLocalVar(PartIDVar),
163             PartIDVar->getType()->castAs<PointerType>());
164         llvm::Value *Res =
165             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
166         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
167         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
168         CGF.EmitBlock(DoneBB);
169         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
170         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
171         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
172                               CGF.Builder.GetInsertBlock());
173         emitUntiedSwitch(CGF);
174       }
175     }
176     void emitUntiedSwitch(CodeGenFunction &CGF) const {
177       if (Untied) {
178         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
179             CGF.GetAddrOfLocalVar(PartIDVar),
180             PartIDVar->getType()->castAs<PointerType>());
181         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
182                               PartIdLVal);
183         UntiedCodeGen(CGF);
184         CodeGenFunction::JumpDest CurPoint =
185             CGF.getJumpDestInCurrentScope(".untied.next.");
186         CGF.EmitBranch(CGF.ReturnBlock.getBlock());
187         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
188         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
189                               CGF.Builder.GetInsertBlock());
190         CGF.EmitBranchThroughCleanup(CurPoint);
191         CGF.EmitBlock(CurPoint.getBlock());
192       }
193     }
194     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
195   };
196   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
197                                  const VarDecl *ThreadIDVar,
198                                  const RegionCodeGenTy &CodeGen,
199                                  OpenMPDirectiveKind Kind, bool HasCancel,
200                                  const UntiedTaskActionTy &Action)
201       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
202         ThreadIDVar(ThreadIDVar), Action(Action) {
203     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
204   }
205 
206   /// Get a variable or parameter for storing global thread id
207   /// inside OpenMP construct.
208   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
209 
210   /// Get an LValue for the current ThreadID variable.
211   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
212 
213   /// Get the name of the capture helper.
214   StringRef getHelperName() const override { return ".omp_outlined."; }
215 
216   void emitUntiedSwitch(CodeGenFunction &CGF) override {
217     Action.emitUntiedSwitch(CGF);
218   }
219 
220   static bool classof(const CGCapturedStmtInfo *Info) {
221     return CGOpenMPRegionInfo::classof(Info) &&
222            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
223                TaskOutlinedRegion;
224   }
225 
226 private:
227   /// A variable or parameter storing global thread id for OpenMP
228   /// constructs.
229   const VarDecl *ThreadIDVar;
230   /// Action for emitting code for untied tasks.
231   const UntiedTaskActionTy &Action;
232 };
233 
234 /// API for inlined captured statement code generation in OpenMP
235 /// constructs.
236 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
237 public:
238   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
239                             const RegionCodeGenTy &CodeGen,
240                             OpenMPDirectiveKind Kind, bool HasCancel)
241       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
242         OldCSI(OldCSI),
243         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
244 
245   // Retrieve the value of the context parameter.
246   llvm::Value *getContextValue() const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->getContextValue();
249     llvm_unreachable("No context value for inlined OpenMP region");
250   }
251 
252   void setContextValue(llvm::Value *V) override {
253     if (OuterRegionInfo) {
254       OuterRegionInfo->setContextValue(V);
255       return;
256     }
257     llvm_unreachable("No context value for inlined OpenMP region");
258   }
259 
260   /// Lookup the captured field decl for a variable.
261   const FieldDecl *lookup(const VarDecl *VD) const override {
262     if (OuterRegionInfo)
263       return OuterRegionInfo->lookup(VD);
264     // If there is no outer outlined region,no need to lookup in a list of
265     // captured variables, we can use the original one.
266     return nullptr;
267   }
268 
269   FieldDecl *getThisFieldDecl() const override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThisFieldDecl();
272     return nullptr;
273   }
274 
275   /// Get a variable or parameter for storing global thread id
276   /// inside OpenMP construct.
277   const VarDecl *getThreadIDVariable() const override {
278     if (OuterRegionInfo)
279       return OuterRegionInfo->getThreadIDVariable();
280     return nullptr;
281   }
282 
283   /// Get an LValue for the current ThreadID variable.
284   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
285     if (OuterRegionInfo)
286       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
287     llvm_unreachable("No LValue for inlined OpenMP construct");
288   }
289 
290   /// Get the name of the capture helper.
291   StringRef getHelperName() const override {
292     if (auto *OuterRegionInfo = getOldCSI())
293       return OuterRegionInfo->getHelperName();
294     llvm_unreachable("No helper name for inlined OpenMP construct");
295   }
296 
297   void emitUntiedSwitch(CodeGenFunction &CGF) override {
298     if (OuterRegionInfo)
299       OuterRegionInfo->emitUntiedSwitch(CGF);
300   }
301 
302   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
303 
304   static bool classof(const CGCapturedStmtInfo *Info) {
305     return CGOpenMPRegionInfo::classof(Info) &&
306            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
307   }
308 
309   ~CGOpenMPInlinedRegionInfo() override = default;
310 
311 private:
312   /// CodeGen info about outer OpenMP region.
313   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
314   CGOpenMPRegionInfo *OuterRegionInfo;
315 };
316 
317 /// API for captured statement code generation in OpenMP target
318 /// constructs. For this captures, implicit parameters are used instead of the
319 /// captured fields. The name of the target region has to be unique in a given
320 /// application so it is provided by the client, because only the client has
321 /// the information to generate that.
322 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
323 public:
324   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
325                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
326       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
327                            /*HasCancel=*/false),
328         HelperName(HelperName) {}
329 
330   /// This is unused for target regions because each starts executing
331   /// with a single thread.
332   const VarDecl *getThreadIDVariable() const override { return nullptr; }
333 
334   /// Get the name of the capture helper.
335   StringRef getHelperName() const override { return HelperName; }
336 
337   static bool classof(const CGCapturedStmtInfo *Info) {
338     return CGOpenMPRegionInfo::classof(Info) &&
339            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
340   }
341 
342 private:
343   StringRef HelperName;
344 };
345 
346 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
347   llvm_unreachable("No codegen for expressions");
348 }
349 /// API for generation of expressions captured in a innermost OpenMP
350 /// region.
351 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
352 public:
353   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
354       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
355                                   OMPD_unknown,
356                                   /*HasCancel=*/false),
357         PrivScope(CGF) {
358     // Make sure the globals captured in the provided statement are local by
359     // using the privatization logic. We assume the same variable is not
360     // captured more than once.
361     for (const auto &C : CS.captures()) {
362       if (!C.capturesVariable() && !C.capturesVariableByCopy())
363         continue;
364 
365       const VarDecl *VD = C.getCapturedVar();
366       if (VD->isLocalVarDeclOrParm())
367         continue;
368 
369       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
370                       /*RefersToEnclosingVariableOrCapture=*/false,
371                       VD->getType().getNonReferenceType(), VK_LValue,
372                       C.getLocation());
373       PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
374     }
375     (void)PrivScope.Privatize();
376   }
377 
378   /// Lookup the captured field decl for a variable.
379   const FieldDecl *lookup(const VarDecl *VD) const override {
380     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381       return FD;
382     return nullptr;
383   }
384 
385   /// Emit the captured statement body.
386   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387     llvm_unreachable("No body for expressions");
388   }
389 
390   /// Get a variable or parameter for storing global thread id
391   /// inside OpenMP construct.
392   const VarDecl *getThreadIDVariable() const override {
393     llvm_unreachable("No thread id for expressions");
394   }
395 
396   /// Get the name of the capture helper.
397   StringRef getHelperName() const override {
398     llvm_unreachable("No helper name for expressions");
399   }
400 
401   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402 
403 private:
404   /// Private scope to capture global variables.
405   CodeGenFunction::OMPPrivateScope PrivScope;
406 };
407 
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII {
410   CodeGenFunction &CGF;
411   llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
412   FieldDecl *LambdaThisCaptureField = nullptr;
413   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414   bool NoInheritance = false;
415 
416 public:
417   /// Constructs region for combined constructs.
418   /// \param CodeGen Code generation sequence for combined directives. Includes
419   /// a list of functions used for code generation of implicitly inlined
420   /// regions.
421   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422                           OpenMPDirectiveKind Kind, bool HasCancel,
423                           bool NoInheritance = true)
424       : CGF(CGF), NoInheritance(NoInheritance) {
425     // Start emission for the construct.
426     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428     if (NoInheritance) {
429       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430       LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431       CGF.LambdaThisCaptureField = nullptr;
432       BlockInfo = CGF.BlockInfo;
433       CGF.BlockInfo = nullptr;
434     }
435   }
436 
437   ~InlinedOpenMPRegionRAII() {
438     // Restore original CapturedStmtInfo only if we're done with code emission.
439     auto *OldCSI =
440         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441     delete CGF.CapturedStmtInfo;
442     CGF.CapturedStmtInfo = OldCSI;
443     if (NoInheritance) {
444       std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445       CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446       CGF.BlockInfo = BlockInfo;
447     }
448   }
449 };
450 
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags : unsigned {
455   /// Use trampoline for internal microtask.
456   OMP_IDENT_IMD = 0x01,
457   /// Use c-style ident structure.
458   OMP_IDENT_KMPC = 0x02,
459   /// Atomic reduction option for kmpc_reduce.
460   OMP_ATOMIC_REDUCE = 0x10,
461   /// Explicit 'barrier' directive.
462   OMP_IDENT_BARRIER_EXPL = 0x20,
463   /// Implicit barrier in code.
464   OMP_IDENT_BARRIER_IMPL = 0x40,
465   /// Implicit barrier in 'for' directive.
466   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467   /// Implicit barrier in 'sections' directive.
468   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469   /// Implicit barrier in 'single' directive.
470   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471   /// Call of __kmp_for_static_init for static loop.
472   OMP_IDENT_WORK_LOOP = 0x200,
473   /// Call of __kmp_for_static_init for sections.
474   OMP_IDENT_WORK_SECTIONS = 0x400,
475   /// Call of __kmp_for_static_init for distribute.
476   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
478 };
479 
480 /// Describes ident structure that describes a source location.
481 /// All descriptions are taken from
482 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
483 /// Original structure:
484 /// typedef struct ident {
485 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
486 ///                                  see above  */
487 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
488 ///                                  KMP_IDENT_KMPC identifies this union
489 ///                                  member  */
490 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
491 ///                                  see above */
492 ///#if USE_ITT_BUILD
493 ///                            /*  but currently used for storing
494 ///                                region-specific ITT */
495 ///                            /*  contextual information. */
496 ///#endif /* USE_ITT_BUILD */
497 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
498 ///                                 C++  */
499 ///    char const *psource;    /**< String describing the source location.
500 ///                            The string is composed of semi-colon separated
501 //                             fields which describe the source file,
502 ///                            the function and a pair of line numbers that
503 ///                            delimit the construct.
504 ///                             */
505 /// } ident_t;
506 enum IdentFieldIndex {
507   /// might be used in Fortran
508   IdentField_Reserved_1,
509   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
510   IdentField_Flags,
511   /// Not really used in Fortran any more
512   IdentField_Reserved_2,
513   /// Source[4] in Fortran, do not use for C++
514   IdentField_Reserved_3,
515   /// String describing the source location. The string is composed of
516   /// semi-colon separated fields which describe the source file, the function
517   /// and a pair of line numbers that delimit the construct.
518   IdentField_PSource
519 };
520 
521 /// Schedule types for 'omp for' loops (these enumerators are taken from
522 /// the enum sched_type in kmp.h).
523 enum OpenMPSchedType {
524   /// Lower bound for default (unordered) versions.
525   OMP_sch_lower = 32,
526   OMP_sch_static_chunked = 33,
527   OMP_sch_static = 34,
528   OMP_sch_dynamic_chunked = 35,
529   OMP_sch_guided_chunked = 36,
530   OMP_sch_runtime = 37,
531   OMP_sch_auto = 38,
532   /// static with chunk adjustment (e.g., simd)
533   OMP_sch_static_balanced_chunked = 45,
534   /// Lower bound for 'ordered' versions.
535   OMP_ord_lower = 64,
536   OMP_ord_static_chunked = 65,
537   OMP_ord_static = 66,
538   OMP_ord_dynamic_chunked = 67,
539   OMP_ord_guided_chunked = 68,
540   OMP_ord_runtime = 69,
541   OMP_ord_auto = 70,
542   OMP_sch_default = OMP_sch_static,
543   /// dist_schedule types
544   OMP_dist_sch_static_chunked = 91,
545   OMP_dist_sch_static = 92,
546   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
547   /// Set if the monotonic schedule modifier was present.
548   OMP_sch_modifier_monotonic = (1 << 29),
549   /// Set if the nonmonotonic schedule modifier was present.
550   OMP_sch_modifier_nonmonotonic = (1 << 30),
551 };
552 
553 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
554 /// region.
555 class CleanupTy final : public EHScopeStack::Cleanup {
556   PrePostActionTy *Action;
557 
558 public:
559   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
560   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
561     if (!CGF.HaveInsertPoint())
562       return;
563     Action->Exit(CGF);
564   }
565 };
566 
567 } // anonymous namespace
568 
569 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
570   CodeGenFunction::RunCleanupsScope Scope(CGF);
571   if (PrePostAction) {
572     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
573     Callback(CodeGen, CGF, *PrePostAction);
574   } else {
575     PrePostActionTy Action;
576     Callback(CodeGen, CGF, Action);
577   }
578 }
579 
580 /// Check if the combiner is a call to UDR combiner and if it is so return the
581 /// UDR decl used for reduction.
582 static const OMPDeclareReductionDecl *
583 getReductionInit(const Expr *ReductionOp) {
584   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
585     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
586       if (const auto *DRE =
587               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
588         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
589           return DRD;
590   return nullptr;
591 }
592 
593 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
594                                              const OMPDeclareReductionDecl *DRD,
595                                              const Expr *InitOp,
596                                              Address Private, Address Original,
597                                              QualType Ty) {
598   if (DRD->getInitializer()) {
599     std::pair<llvm::Function *, llvm::Function *> Reduction =
600         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
601     const auto *CE = cast<CallExpr>(InitOp);
602     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
603     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
604     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
605     const auto *LHSDRE =
606         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
607     const auto *RHSDRE =
608         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
609     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
610     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
611     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
612     (void)PrivateScope.Privatize();
613     RValue Func = RValue::get(Reduction.second);
614     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
615     CGF.EmitIgnoredExpr(InitOp);
616   } else {
617     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
618     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
619     auto *GV = new llvm::GlobalVariable(
620         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
621         llvm::GlobalValue::PrivateLinkage, Init, Name);
622     LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
623     RValue InitRVal;
624     switch (CGF.getEvaluationKind(Ty)) {
625     case TEK_Scalar:
626       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
627       break;
628     case TEK_Complex:
629       InitRVal =
630           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
631       break;
632     case TEK_Aggregate: {
633       OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
634       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
635       CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
636                            /*IsInitializer=*/false);
637       return;
638     }
639     }
640     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
641     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
642     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
643                          /*IsInitializer=*/false);
644   }
645 }
646 
647 /// Emit initialization of arrays of complex types.
648 /// \param DestAddr Address of the array.
649 /// \param Type Type of array.
650 /// \param Init Initial expression of array.
651 /// \param SrcAddr Address of the original array.
652 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
653                                  QualType Type, bool EmitDeclareReductionInit,
654                                  const Expr *Init,
655                                  const OMPDeclareReductionDecl *DRD,
656                                  Address SrcAddr = Address::invalid()) {
657   // Perform element-by-element initialization.
658   QualType ElementTy;
659 
660   // Drill down to the base element type on both arrays.
661   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
662   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
663   if (DRD)
664     SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
665 
666   llvm::Value *SrcBegin = nullptr;
667   if (DRD)
668     SrcBegin = SrcAddr.emitRawPointer(CGF);
669   llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
670   // Cast from pointer to array type to pointer to single element.
671   llvm::Value *DestEnd =
672       CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
673   // The basic structure here is a while-do loop.
674   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
675   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
676   llvm::Value *IsEmpty =
677       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
678   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
679 
680   // Enter the loop body, making that address the current address.
681   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
682   CGF.EmitBlock(BodyBB);
683 
684   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
685 
686   llvm::PHINode *SrcElementPHI = nullptr;
687   Address SrcElementCurrent = Address::invalid();
688   if (DRD) {
689     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
690                                           "omp.arraycpy.srcElementPast");
691     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
692     SrcElementCurrent =
693         Address(SrcElementPHI, SrcAddr.getElementType(),
694                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
695   }
696   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
697       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
698   DestElementPHI->addIncoming(DestBegin, EntryBB);
699   Address DestElementCurrent =
700       Address(DestElementPHI, DestAddr.getElementType(),
701               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
702 
703   // Emit copy.
704   {
705     CodeGenFunction::RunCleanupsScope InitScope(CGF);
706     if (EmitDeclareReductionInit) {
707       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
708                                        SrcElementCurrent, ElementTy);
709     } else
710       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
711                            /*IsInitializer=*/false);
712   }
713 
714   if (DRD) {
715     // Shift the address forward by one element.
716     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
717         SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
718         "omp.arraycpy.dest.element");
719     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
720   }
721 
722   // Shift the address forward by one element.
723   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
724       DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
725       "omp.arraycpy.dest.element");
726   // Check whether we've reached the end.
727   llvm::Value *Done =
728       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
729   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
730   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
731 
732   // Done.
733   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
734 }
735 
736 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
737   return CGF.EmitOMPSharedLValue(E);
738 }
739 
740 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
741                                             const Expr *E) {
742   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
743     return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
744   return LValue();
745 }
746 
747 void ReductionCodeGen::emitAggregateInitialization(
748     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
749     const OMPDeclareReductionDecl *DRD) {
750   // Emit VarDecl with copy init for arrays.
751   // Get the address of the original variable captured in current
752   // captured region.
753   const auto *PrivateVD =
754       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
755   bool EmitDeclareReductionInit =
756       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
757   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
758                        EmitDeclareReductionInit,
759                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
760                                                 : PrivateVD->getInit(),
761                        DRD, SharedAddr);
762 }
763 
764 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
765                                    ArrayRef<const Expr *> Origs,
766                                    ArrayRef<const Expr *> Privates,
767                                    ArrayRef<const Expr *> ReductionOps) {
768   ClausesData.reserve(Shareds.size());
769   SharedAddresses.reserve(Shareds.size());
770   Sizes.reserve(Shareds.size());
771   BaseDecls.reserve(Shareds.size());
772   const auto *IOrig = Origs.begin();
773   const auto *IPriv = Privates.begin();
774   const auto *IRed = ReductionOps.begin();
775   for (const Expr *Ref : Shareds) {
776     ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
777     std::advance(IOrig, 1);
778     std::advance(IPriv, 1);
779     std::advance(IRed, 1);
780   }
781 }
782 
783 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
784   assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
785          "Number of generated lvalues must be exactly N.");
786   LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
787   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
788   SharedAddresses.emplace_back(First, Second);
789   if (ClausesData[N].Shared == ClausesData[N].Ref) {
790     OrigAddresses.emplace_back(First, Second);
791   } else {
792     LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
793     LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
794     OrigAddresses.emplace_back(First, Second);
795   }
796 }
797 
798 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
799   QualType PrivateType = getPrivateType(N);
800   bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
801   if (!PrivateType->isVariablyModifiedType()) {
802     Sizes.emplace_back(
803         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
804         nullptr);
805     return;
806   }
807   llvm::Value *Size;
808   llvm::Value *SizeInChars;
809   auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
810   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
811   if (AsArraySection) {
812     Size = CGF.Builder.CreatePtrDiff(ElemType,
813                                      OrigAddresses[N].second.getPointer(CGF),
814                                      OrigAddresses[N].first.getPointer(CGF));
815     Size = CGF.Builder.CreateNUWAdd(
816         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
817     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
818   } else {
819     SizeInChars =
820         CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
821     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
822   }
823   Sizes.emplace_back(SizeInChars, Size);
824   CodeGenFunction::OpaqueValueMapping OpaqueMap(
825       CGF,
826       cast<OpaqueValueExpr>(
827           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
828       RValue::get(Size));
829   CGF.EmitVariablyModifiedType(PrivateType);
830 }
831 
832 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
833                                          llvm::Value *Size) {
834   QualType PrivateType = getPrivateType(N);
835   if (!PrivateType->isVariablyModifiedType()) {
836     assert(!Size && !Sizes[N].second &&
837            "Size should be nullptr for non-variably modified reduction "
838            "items.");
839     return;
840   }
841   CodeGenFunction::OpaqueValueMapping OpaqueMap(
842       CGF,
843       cast<OpaqueValueExpr>(
844           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
845       RValue::get(Size));
846   CGF.EmitVariablyModifiedType(PrivateType);
847 }
848 
849 void ReductionCodeGen::emitInitialization(
850     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
851     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
852   assert(SharedAddresses.size() > N && "No variable was generated");
853   const auto *PrivateVD =
854       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
855   const OMPDeclareReductionDecl *DRD =
856       getReductionInit(ClausesData[N].ReductionOp);
857   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
858     if (DRD && DRD->getInitializer())
859       (void)DefaultInit(CGF);
860     emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
861   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
862     (void)DefaultInit(CGF);
863     QualType SharedType = SharedAddresses[N].first.getType();
864     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
865                                      PrivateAddr, SharedAddr, SharedType);
866   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
867              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
868     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
869                          PrivateVD->getType().getQualifiers(),
870                          /*IsInitializer=*/false);
871   }
872 }
873 
874 bool ReductionCodeGen::needCleanups(unsigned N) {
875   QualType PrivateType = getPrivateType(N);
876   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
877   return DTorKind != QualType::DK_none;
878 }
879 
880 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
881                                     Address PrivateAddr) {
882   QualType PrivateType = getPrivateType(N);
883   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
884   if (needCleanups(N)) {
885     PrivateAddr =
886         PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
887     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
888   }
889 }
890 
891 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
892                           LValue BaseLV) {
893   BaseTy = BaseTy.getNonReferenceType();
894   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
895          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
896     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
897       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
898     } else {
899       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
900       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
901     }
902     BaseTy = BaseTy->getPointeeType();
903   }
904   return CGF.MakeAddrLValue(
905       BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
906       BaseLV.getType(), BaseLV.getBaseInfo(),
907       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
908 }
909 
910 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
911                           Address OriginalBaseAddress, llvm::Value *Addr) {
912   RawAddress Tmp = RawAddress::invalid();
913   Address TopTmp = Address::invalid();
914   Address MostTopTmp = Address::invalid();
915   BaseTy = BaseTy.getNonReferenceType();
916   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
917          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
918     Tmp = CGF.CreateMemTemp(BaseTy);
919     if (TopTmp.isValid())
920       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
921     else
922       MostTopTmp = Tmp;
923     TopTmp = Tmp;
924     BaseTy = BaseTy->getPointeeType();
925   }
926 
927   if (Tmp.isValid()) {
928     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
929         Addr, Tmp.getElementType());
930     CGF.Builder.CreateStore(Addr, Tmp);
931     return MostTopTmp;
932   }
933 
934   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
935       Addr, OriginalBaseAddress.getType());
936   return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
937 }
938 
939 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
940   const VarDecl *OrigVD = nullptr;
941   if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
942     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
943     while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
944       Base = TempOASE->getBase()->IgnoreParenImpCasts();
945     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
946       Base = TempASE->getBase()->IgnoreParenImpCasts();
947     DE = cast<DeclRefExpr>(Base);
948     OrigVD = cast<VarDecl>(DE->getDecl());
949   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
950     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
951     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
952       Base = TempASE->getBase()->IgnoreParenImpCasts();
953     DE = cast<DeclRefExpr>(Base);
954     OrigVD = cast<VarDecl>(DE->getDecl());
955   }
956   return OrigVD;
957 }
958 
959 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
960                                                Address PrivateAddr) {
961   const DeclRefExpr *DE;
962   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
963     BaseDecls.emplace_back(OrigVD);
964     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
965     LValue BaseLValue =
966         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
967                     OriginalBaseLValue);
968     Address SharedAddr = SharedAddresses[N].first.getAddress();
969     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
970         SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
971         SharedAddr.emitRawPointer(CGF));
972     llvm::Value *PrivatePointer =
973         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
974             PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
975     llvm::Value *Ptr = CGF.Builder.CreateGEP(
976         SharedAddr.getElementType(), PrivatePointer, Adjustment);
977     return castToBase(CGF, OrigVD->getType(),
978                       SharedAddresses[N].first.getType(),
979                       OriginalBaseLValue.getAddress(), Ptr);
980   }
981   BaseDecls.emplace_back(
982       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
983   return PrivateAddr;
984 }
985 
986 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
987   const OMPDeclareReductionDecl *DRD =
988       getReductionInit(ClausesData[N].ReductionOp);
989   return DRD && DRD->getInitializer();
990 }
991 
992 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
993   return CGF.EmitLoadOfPointerLValue(
994       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
995       getThreadIDVariable()->getType()->castAs<PointerType>());
996 }
997 
998 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
999   if (!CGF.HaveInsertPoint())
1000     return;
1001   // 1.2.2 OpenMP Language Terminology
1002   // Structured block - An executable statement with a single entry at the
1003   // top and a single exit at the bottom.
1004   // The point of exit cannot be a branch out of the structured block.
1005   // longjmp() and throw() must not violate the entry/exit criteria.
1006   CGF.EHStack.pushTerminate();
1007   if (S)
1008     CGF.incrementProfileCounter(S);
1009   CodeGen(CGF);
1010   CGF.EHStack.popTerminate();
1011 }
1012 
1013 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1014     CodeGenFunction &CGF) {
1015   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1016                             getThreadIDVariable()->getType(),
1017                             AlignmentSource::Decl);
1018 }
1019 
1020 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1021                                        QualType FieldTy) {
1022   auto *Field = FieldDecl::Create(
1023       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1024       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1025       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1026   Field->setAccess(AS_public);
1027   DC->addDecl(Field);
1028   return Field;
1029 }
1030 
1031 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1032     : CGM(CGM), OMPBuilder(CGM.getModule()) {
1033   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1034   llvm::OpenMPIRBuilderConfig Config(
1035       CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1036       CGM.getLangOpts().OpenMPOffloadMandatory,
1037       /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1038       hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1039   OMPBuilder.initialize();
1040   OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1041                                          ? CGM.getLangOpts().OMPHostIRFile
1042                                          : StringRef{});
1043   OMPBuilder.setConfig(Config);
1044 
1045   // The user forces the compiler to behave as if omp requires
1046   // unified_shared_memory was given.
1047   if (CGM.getLangOpts().OpenMPForceUSM) {
1048     HasRequiresUnifiedSharedMemory = true;
1049     OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1050   }
1051 }
1052 
1053 void CGOpenMPRuntime::clear() {
1054   InternalVars.clear();
1055   // Clean non-target variable declarations possibly used only in debug info.
1056   for (const auto &Data : EmittedNonTargetVariables) {
1057     if (!Data.getValue().pointsToAliveValue())
1058       continue;
1059     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1060     if (!GV)
1061       continue;
1062     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1063       continue;
1064     GV->eraseFromParent();
1065   }
1066 }
1067 
1068 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1069   return OMPBuilder.createPlatformSpecificName(Parts);
1070 }
1071 
1072 static llvm::Function *
1073 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1074                           const Expr *CombinerInitializer, const VarDecl *In,
1075                           const VarDecl *Out, bool IsCombiner) {
1076   // void .omp_combiner.(Ty *in, Ty *out);
1077   ASTContext &C = CGM.getContext();
1078   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1079   FunctionArgList Args;
1080   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1081                                /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1082   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1083                               /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1084   Args.push_back(&OmpOutParm);
1085   Args.push_back(&OmpInParm);
1086   const CGFunctionInfo &FnInfo =
1087       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1088   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1089   std::string Name = CGM.getOpenMPRuntime().getName(
1090       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1091   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1092                                     Name, &CGM.getModule());
1093   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1094   if (CGM.getLangOpts().Optimize) {
1095     Fn->removeFnAttr(llvm::Attribute::NoInline);
1096     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1097     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1098   }
1099   CodeGenFunction CGF(CGM);
1100   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1101   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1102   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1103                     Out->getLocation());
1104   CodeGenFunction::OMPPrivateScope Scope(CGF);
1105   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1106   Scope.addPrivate(
1107       In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1108               .getAddress());
1109   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1110   Scope.addPrivate(
1111       Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1112                .getAddress());
1113   (void)Scope.Privatize();
1114   if (!IsCombiner && Out->hasInit() &&
1115       !CGF.isTrivialInitializer(Out->getInit())) {
1116     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1117                          Out->getType().getQualifiers(),
1118                          /*IsInitializer=*/true);
1119   }
1120   if (CombinerInitializer)
1121     CGF.EmitIgnoredExpr(CombinerInitializer);
1122   Scope.ForceCleanup();
1123   CGF.FinishFunction();
1124   return Fn;
1125 }
1126 
1127 void CGOpenMPRuntime::emitUserDefinedReduction(
1128     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1129   if (UDRMap.count(D) > 0)
1130     return;
1131   llvm::Function *Combiner = emitCombinerOrInitializer(
1132       CGM, D->getType(), D->getCombiner(),
1133       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1134       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1135       /*IsCombiner=*/true);
1136   llvm::Function *Initializer = nullptr;
1137   if (const Expr *Init = D->getInitializer()) {
1138     Initializer = emitCombinerOrInitializer(
1139         CGM, D->getType(),
1140         D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1141                                                                      : nullptr,
1142         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1143         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1144         /*IsCombiner=*/false);
1145   }
1146   UDRMap.try_emplace(D, Combiner, Initializer);
1147   if (CGF)
1148     FunctionUDRMap[CGF->CurFn].push_back(D);
1149 }
1150 
1151 std::pair<llvm::Function *, llvm::Function *>
1152 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1153   auto I = UDRMap.find(D);
1154   if (I != UDRMap.end())
1155     return I->second;
1156   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1157   return UDRMap.lookup(D);
1158 }
1159 
1160 namespace {
1161 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1162 // Builder if one is present.
1163 struct PushAndPopStackRAII {
1164   PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1165                       bool HasCancel, llvm::omp::Directive Kind)
1166       : OMPBuilder(OMPBuilder) {
1167     if (!OMPBuilder)
1168       return;
1169 
1170     // The following callback is the crucial part of clangs cleanup process.
1171     //
1172     // NOTE:
1173     // Once the OpenMPIRBuilder is used to create parallel regions (and
1174     // similar), the cancellation destination (Dest below) is determined via
1175     // IP. That means if we have variables to finalize we split the block at IP,
1176     // use the new block (=BB) as destination to build a JumpDest (via
1177     // getJumpDestInCurrentScope(BB)) which then is fed to
1178     // EmitBranchThroughCleanup. Furthermore, there will not be the need
1179     // to push & pop an FinalizationInfo object.
1180     // The FiniCB will still be needed but at the point where the
1181     // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1182     auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1183       assert(IP.getBlock()->end() == IP.getPoint() &&
1184              "Clang CG should cause non-terminated block!");
1185       CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1186       CGF.Builder.restoreIP(IP);
1187       CodeGenFunction::JumpDest Dest =
1188           CGF.getOMPCancelDestination(OMPD_parallel);
1189       CGF.EmitBranchThroughCleanup(Dest);
1190       return llvm::Error::success();
1191     };
1192 
1193     // TODO: Remove this once we emit parallel regions through the
1194     //       OpenMPIRBuilder as it can do this setup internally.
1195     llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1196     OMPBuilder->pushFinalizationCB(std::move(FI));
1197   }
1198   ~PushAndPopStackRAII() {
1199     if (OMPBuilder)
1200       OMPBuilder->popFinalizationCB();
1201   }
1202   llvm::OpenMPIRBuilder *OMPBuilder;
1203 };
1204 } // namespace
1205 
1206 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1207     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1208     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1209     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1210   assert(ThreadIDVar->getType()->isPointerType() &&
1211          "thread id variable must be of type kmp_int32 *");
1212   CodeGenFunction CGF(CGM, true);
1213   bool HasCancel = false;
1214   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1215     HasCancel = OPD->hasCancel();
1216   else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1217     HasCancel = OPD->hasCancel();
1218   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1219     HasCancel = OPSD->hasCancel();
1220   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1221     HasCancel = OPFD->hasCancel();
1222   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1223     HasCancel = OPFD->hasCancel();
1224   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1225     HasCancel = OPFD->hasCancel();
1226   else if (const auto *OPFD =
1227                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1228     HasCancel = OPFD->hasCancel();
1229   else if (const auto *OPFD =
1230                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1231     HasCancel = OPFD->hasCancel();
1232 
1233   // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1234   //       parallel region to make cancellation barriers work properly.
1235   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1236   PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1237   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1238                                     HasCancel, OutlinedHelperName);
1239   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1240   return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1241 }
1242 
1243 std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1244   std::string Suffix = getName({"omp_outlined"});
1245   return (Name + Suffix).str();
1246 }
1247 
1248 std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1249   return getOutlinedHelperName(CGF.CurFn->getName());
1250 }
1251 
1252 std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1253   std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1254   return (Name + Suffix).str();
1255 }
1256 
1257 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1258     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1259     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1260     const RegionCodeGenTy &CodeGen) {
1261   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1262   return emitParallelOrTeamsOutlinedFunction(
1263       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1264       CodeGen);
1265 }
1266 
1267 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1268     CodeGenFunction &CGF, const OMPExecutableDirective &D,
1269     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1270     const RegionCodeGenTy &CodeGen) {
1271   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1272   return emitParallelOrTeamsOutlinedFunction(
1273       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1274       CodeGen);
1275 }
1276 
1277 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1278     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1280     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1281     bool Tied, unsigned &NumberOfParts) {
1282   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1283                                               PrePostActionTy &) {
1284     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1285     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1286     llvm::Value *TaskArgs[] = {
1287         UpLoc, ThreadID,
1288         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1289                                     TaskTVar->getType()->castAs<PointerType>())
1290             .getPointer(CGF)};
1291     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1292                             CGM.getModule(), OMPRTL___kmpc_omp_task),
1293                         TaskArgs);
1294   };
1295   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1296                                                             UntiedCodeGen);
1297   CodeGen.setAction(Action);
1298   assert(!ThreadIDVar->getType()->isPointerType() &&
1299          "thread id variable must be of type kmp_int32 for tasks");
1300   const OpenMPDirectiveKind Region =
1301       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1302                                                       : OMPD_task;
1303   const CapturedStmt *CS = D.getCapturedStmt(Region);
1304   bool HasCancel = false;
1305   if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1306     HasCancel = TD->hasCancel();
1307   else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1308     HasCancel = TD->hasCancel();
1309   else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1310     HasCancel = TD->hasCancel();
1311   else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1312     HasCancel = TD->hasCancel();
1313 
1314   CodeGenFunction CGF(CGM, true);
1315   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1316                                         InnermostKind, HasCancel, Action);
1317   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1318   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1319   if (!Tied)
1320     NumberOfParts = Action.getNumberOfParts();
1321   return Res;
1322 }
1323 
1324 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1325                                              bool AtCurrentPoint) {
1326   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1327   assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1328 
1329   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1330   if (AtCurrentPoint) {
1331     Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1332                                                  CGF.Builder.GetInsertBlock());
1333   } else {
1334     Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1335     Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1336   }
1337 }
1338 
1339 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1340   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1341   if (Elem.ServiceInsertPt) {
1342     llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1343     Elem.ServiceInsertPt = nullptr;
1344     Ptr->eraseFromParent();
1345   }
1346 }
1347 
1348 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1349                                                   SourceLocation Loc,
1350                                                   SmallString<128> &Buffer) {
1351   llvm::raw_svector_ostream OS(Buffer);
1352   // Build debug location
1353   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1354   OS << ";" << PLoc.getFilename() << ";";
1355   if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1356     OS << FD->getQualifiedNameAsString();
1357   OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1358   return OS.str();
1359 }
1360 
1361 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1362                                                  SourceLocation Loc,
1363                                                  unsigned Flags, bool EmitLoc) {
1364   uint32_t SrcLocStrSize;
1365   llvm::Constant *SrcLocStr;
1366   if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1367                        llvm::codegenoptions::NoDebugInfo) ||
1368       Loc.isInvalid()) {
1369     SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1370   } else {
1371     std::string FunctionName;
1372     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1373       FunctionName = FD->getQualifiedNameAsString();
1374     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1375     const char *FileName = PLoc.getFilename();
1376     unsigned Line = PLoc.getLine();
1377     unsigned Column = PLoc.getColumn();
1378     SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1379                                                 Column, SrcLocStrSize);
1380   }
1381   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1382   return OMPBuilder.getOrCreateIdent(
1383       SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1384 }
1385 
1386 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1387                                           SourceLocation Loc) {
1388   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1389   // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1390   // the clang invariants used below might be broken.
1391   if (CGM.getLangOpts().OpenMPIRBuilder) {
1392     SmallString<128> Buffer;
1393     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1394     uint32_t SrcLocStrSize;
1395     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1396         getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1397     return OMPBuilder.getOrCreateThreadID(
1398         OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1399   }
1400 
1401   llvm::Value *ThreadID = nullptr;
1402   // Check whether we've already cached a load of the thread id in this
1403   // function.
1404   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1405   if (I != OpenMPLocThreadIDMap.end()) {
1406     ThreadID = I->second.ThreadID;
1407     if (ThreadID != nullptr)
1408       return ThreadID;
1409   }
1410   // If exceptions are enabled, do not use parameter to avoid possible crash.
1411   if (auto *OMPRegionInfo =
1412           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1413     if (OMPRegionInfo->getThreadIDVariable()) {
1414       // Check if this an outlined function with thread id passed as argument.
1415       LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1416       llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1417       if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1418           !CGF.getLangOpts().CXXExceptions ||
1419           CGF.Builder.GetInsertBlock() == TopBlock ||
1420           !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1421           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1422               TopBlock ||
1423           cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1424               CGF.Builder.GetInsertBlock()) {
1425         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1426         // If value loaded in entry block, cache it and use it everywhere in
1427         // function.
1428         if (CGF.Builder.GetInsertBlock() == TopBlock)
1429           OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1430         return ThreadID;
1431       }
1432     }
1433   }
1434 
1435   // This is not an outlined function region - need to call __kmpc_int32
1436   // kmpc_global_thread_num(ident_t *loc).
1437   // Generate thread id value and cache this value for use across the
1438   // function.
1439   auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1440   if (!Elem.ServiceInsertPt)
1441     setLocThreadIdInsertPt(CGF);
1442   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1443   CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1444   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
1445   llvm::CallInst *Call = CGF.Builder.CreateCall(
1446       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447                                             OMPRTL___kmpc_global_thread_num),
1448       emitUpdateLocation(CGF, Loc));
1449   Call->setCallingConv(CGF.getRuntimeCC());
1450   Elem.ThreadID = Call;
1451   return Call;
1452 }
1453 
1454 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1455   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1456   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1457     clearLocThreadIdInsertPt(CGF);
1458     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459   }
1460   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461     for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462       UDRMap.erase(D);
1463     FunctionUDRMap.erase(CGF.CurFn);
1464   }
1465   auto I = FunctionUDMMap.find(CGF.CurFn);
1466   if (I != FunctionUDMMap.end()) {
1467     for(const auto *D : I->second)
1468       UDMMap.erase(D);
1469     FunctionUDMMap.erase(I);
1470   }
1471   LastprivateConditionalToTypes.erase(CGF.CurFn);
1472   FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1473 }
1474 
1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1476   return OMPBuilder.IdentPtr;
1477 }
1478 
1479 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1480   if (!Kmpc_MicroTy) {
1481     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485   }
1486   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1487 }
1488 
1489 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1490 convertDeviceClause(const VarDecl *VD) {
1491   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1492       OMPDeclareTargetDeclAttr::getDeviceType(VD);
1493   if (!DevTy)
1494     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1495 
1496   switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1497   case OMPDeclareTargetDeclAttr::DT_Host:
1498     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1499     break;
1500   case OMPDeclareTargetDeclAttr::DT_NoHost:
1501     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1502     break;
1503   case OMPDeclareTargetDeclAttr::DT_Any:
1504     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1505     break;
1506   default:
1507     return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1508     break;
1509   }
1510 }
1511 
1512 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1513 convertCaptureClause(const VarDecl *VD) {
1514   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1515       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1516   if (!MapType)
1517     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1518   switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1519   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1520     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1521     break;
1522   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1523     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1524     break;
1525   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1526     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1527     break;
1528   default:
1529     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1530     break;
1531   }
1532 }
1533 
1534 static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1535     CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1536     SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1537 
1538   auto FileInfoCallBack = [&]() {
1539     SourceManager &SM = CGM.getContext().getSourceManager();
1540     PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1541 
1542     llvm::sys::fs::UniqueID ID;
1543     if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1544       PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1545     }
1546 
1547     return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1548   };
1549 
1550   return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1551 }
1552 
1553 ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1554   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1555 
1556   auto LinkageForVariable = [&VD, this]() {
1557     return CGM.getLLVMLinkageVarDefinition(VD);
1558   };
1559 
1560   std::vector<llvm::GlobalVariable *> GeneratedRefs;
1561 
1562   llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1563       CGM.getContext().getPointerType(VD->getType()));
1564   llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1565       convertCaptureClause(VD), convertDeviceClause(VD),
1566       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1567       VD->isExternallyVisible(),
1568       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1569                                   VD->getCanonicalDecl()->getBeginLoc()),
1570       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1571       CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1572       LinkageForVariable);
1573 
1574   if (!addr)
1575     return ConstantAddress::invalid();
1576   return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1577 }
1578 
1579 llvm::Constant *
1580 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1581   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1582          !CGM.getContext().getTargetInfo().isTLSSupported());
1583   // Lookup the entry, lazily creating it if necessary.
1584   std::string Suffix = getName({"cache", ""});
1585   return OMPBuilder.getOrCreateInternalVariable(
1586       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1587 }
1588 
1589 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1590                                                 const VarDecl *VD,
1591                                                 Address VDAddr,
1592                                                 SourceLocation Loc) {
1593   if (CGM.getLangOpts().OpenMPUseTLS &&
1594       CGM.getContext().getTargetInfo().isTLSSupported())
1595     return VDAddr;
1596 
1597   llvm::Type *VarTy = VDAddr.getElementType();
1598   llvm::Value *Args[] = {
1599       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1600       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1601       CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1602       getOrCreateThreadPrivateCache(VD)};
1603   return Address(
1604       CGF.EmitRuntimeCall(
1605           OMPBuilder.getOrCreateRuntimeFunction(
1606               CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1607           Args),
1608       CGF.Int8Ty, VDAddr.getAlignment());
1609 }
1610 
1611 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1612     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1613     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1614   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615   // library.
1616   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1617   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1618                           CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1619                       OMPLoc);
1620   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621   // to register constructor/destructor for variable.
1622   llvm::Value *Args[] = {
1623       OMPLoc,
1624       CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1625       Ctor, CopyCtor, Dtor};
1626   CGF.EmitRuntimeCall(
1627       OMPBuilder.getOrCreateRuntimeFunction(
1628           CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1629       Args);
1630 }
1631 
1632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1633     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1634     bool PerformInit, CodeGenFunction *CGF) {
1635   if (CGM.getLangOpts().OpenMPUseTLS &&
1636       CGM.getContext().getTargetInfo().isTLSSupported())
1637     return nullptr;
1638 
1639   VD = VD->getDefinition(CGM.getContext());
1640   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1641     QualType ASTTy = VD->getType();
1642 
1643     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1644     const Expr *Init = VD->getAnyInitializer();
1645     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1646       // Generate function that re-emits the declaration's initializer into the
1647       // threadprivate copy of the variable VD
1648       CodeGenFunction CtorCGF(CGM);
1649       FunctionArgList Args;
1650       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1651                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1652                             ImplicitParamKind::Other);
1653       Args.push_back(&Dst);
1654 
1655       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1656           CGM.getContext().VoidPtrTy, Args);
1657       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1658       std::string Name = getName({"__kmpc_global_ctor_", ""});
1659       llvm::Function *Fn =
1660           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1661       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1662                             Args, Loc, Loc);
1663       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1664           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1665           CGM.getContext().VoidPtrTy, Dst.getLocation());
1666       Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1667                   VDAddr.getAlignment());
1668       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1669                                /*IsInitializer=*/true);
1670       ArgVal = CtorCGF.EmitLoadOfScalar(
1671           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1672           CGM.getContext().VoidPtrTy, Dst.getLocation());
1673       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1674       CtorCGF.FinishFunction();
1675       Ctor = Fn;
1676     }
1677     if (VD->getType().isDestructedType() != QualType::DK_none) {
1678       // Generate function that emits destructor call for the threadprivate copy
1679       // of the variable VD
1680       CodeGenFunction DtorCGF(CGM);
1681       FunctionArgList Args;
1682       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1683                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1684                             ImplicitParamKind::Other);
1685       Args.push_back(&Dst);
1686 
1687       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1688           CGM.getContext().VoidTy, Args);
1689       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1690       std::string Name = getName({"__kmpc_global_dtor_", ""});
1691       llvm::Function *Fn =
1692           CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1693       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1694       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1695                             Loc, Loc);
1696       // Create a scope with an artificial location for the body of this function.
1697       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1698       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1699           DtorCGF.GetAddrOfLocalVar(&Dst),
1700           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1701       DtorCGF.emitDestroy(
1702           Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1703           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1704           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1705       DtorCGF.FinishFunction();
1706       Dtor = Fn;
1707     }
1708     // Do not emit init function if it is not required.
1709     if (!Ctor && !Dtor)
1710       return nullptr;
1711 
1712     // Copying constructor for the threadprivate variable.
1713     // Must be NULL - reserved by runtime, but currently it requires that this
1714     // parameter is always NULL. Otherwise it fires assertion.
1715     CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1716     if (Ctor == nullptr) {
1717       Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1718     }
1719     if (Dtor == nullptr) {
1720       Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1721     }
1722     if (!CGF) {
1723       auto *InitFunctionTy =
1724           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1725       std::string Name = getName({"__omp_threadprivate_init_", ""});
1726       llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1727           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1728       CodeGenFunction InitCGF(CGM);
1729       FunctionArgList ArgList;
1730       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1731                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1732                             Loc, Loc);
1733       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1734       InitCGF.FinishFunction();
1735       return InitFunction;
1736     }
1737     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1738   }
1739   return nullptr;
1740 }
1741 
1742 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1743                                                 llvm::GlobalValue *GV) {
1744   std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1745       OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1746 
1747   // We only need to handle active 'indirect' declare target functions.
1748   if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1749     return;
1750 
1751   // Get a mangled name to store the new device global in.
1752   llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1753       CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1754   SmallString<128> Name;
1755   OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1756 
1757   // We need to generate a new global to hold the address of the indirectly
1758   // called device function. Doing this allows us to keep the visibility and
1759   // linkage of the associated function unchanged while allowing the runtime to
1760   // access its value.
1761   llvm::GlobalValue *Addr = GV;
1762   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1763     Addr = new llvm::GlobalVariable(
1764         CGM.getModule(), CGM.VoidPtrTy,
1765         /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1766         nullptr, llvm::GlobalValue::NotThreadLocal,
1767         CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1768     Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1769   }
1770 
1771   OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1772       Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1773       llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1774       llvm::GlobalValue::WeakODRLinkage);
1775 }
1776 
1777 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1778                                                           QualType VarType,
1779                                                           StringRef Name) {
1780   std::string Suffix = getName({"artificial", ""});
1781   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1782   llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1783       VarLVType, Twine(Name).concat(Suffix).str());
1784   if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1785       CGM.getTarget().isTLSSupported()) {
1786     GAddr->setThreadLocal(/*Val=*/true);
1787     return Address(GAddr, GAddr->getValueType(),
1788                    CGM.getContext().getTypeAlignInChars(VarType));
1789   }
1790   std::string CacheSuffix = getName({"cache", ""});
1791   llvm::Value *Args[] = {
1792       emitUpdateLocation(CGF, SourceLocation()),
1793       getThreadID(CGF, SourceLocation()),
1794       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1795       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1796                                 /*isSigned=*/false),
1797       OMPBuilder.getOrCreateInternalVariable(
1798           CGM.VoidPtrPtrTy,
1799           Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1800   return Address(
1801       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1802           CGF.EmitRuntimeCall(
1803               OMPBuilder.getOrCreateRuntimeFunction(
1804                   CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1805               Args),
1806           CGF.Builder.getPtrTy(0)),
1807       VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1808 }
1809 
1810 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1811                                    const RegionCodeGenTy &ThenGen,
1812                                    const RegionCodeGenTy &ElseGen) {
1813   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1814 
1815   // If the condition constant folds and can be elided, try to avoid emitting
1816   // the condition and the dead arm of the if/else.
1817   bool CondConstant;
1818   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1819     if (CondConstant)
1820       ThenGen(CGF);
1821     else
1822       ElseGen(CGF);
1823     return;
1824   }
1825 
1826   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1827   // emit the conditional branch.
1828   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1829   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1830   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1831   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1832 
1833   // Emit the 'then' code.
1834   CGF.EmitBlock(ThenBlock);
1835   ThenGen(CGF);
1836   CGF.EmitBranch(ContBlock);
1837   // Emit the 'else' code if present.
1838   // There is no need to emit line number for unconditional branch.
1839   (void)ApplyDebugLocation::CreateEmpty(CGF);
1840   CGF.EmitBlock(ElseBlock);
1841   ElseGen(CGF);
1842   // There is no need to emit line number for unconditional branch.
1843   (void)ApplyDebugLocation::CreateEmpty(CGF);
1844   CGF.EmitBranch(ContBlock);
1845   // Emit the continuation block for code after the if.
1846   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1847 }
1848 
1849 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1850                                        llvm::Function *OutlinedFn,
1851                                        ArrayRef<llvm::Value *> CapturedVars,
1852                                        const Expr *IfCond,
1853                                        llvm::Value *NumThreads) {
1854   if (!CGF.HaveInsertPoint())
1855     return;
1856   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1857   auto &M = CGM.getModule();
1858   auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1859                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1860     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1861     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1862     llvm::Value *Args[] = {
1863         RTLoc,
1864         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1865         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1866     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1867     RealArgs.append(std::begin(Args), std::end(Args));
1868     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1869 
1870     llvm::FunctionCallee RTLFn =
1871         OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1872     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1873   };
1874   auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1875                     this](CodeGenFunction &CGF, PrePostActionTy &) {
1876     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1877     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1878     // Build calls:
1879     // __kmpc_serialized_parallel(&Loc, GTid);
1880     llvm::Value *Args[] = {RTLoc, ThreadID};
1881     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1882                             M, OMPRTL___kmpc_serialized_parallel),
1883                         Args);
1884 
1885     // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1886     Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1887     RawAddress ZeroAddrBound =
1888         CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
1889                                          /*Name=*/".bound.zero.addr");
1890     CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1891     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1892     // ThreadId for serialized parallels is 0.
1893     OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1894     OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1895     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1896 
1897     // Ensure we do not inline the function. This is trivially true for the ones
1898     // passed to __kmpc_fork_call but the ones called in serialized regions
1899     // could be inlined. This is not a perfect but it is closer to the invariant
1900     // we want, namely, every data environment starts with a new function.
1901     // TODO: We should pass the if condition to the runtime function and do the
1902     //       handling there. Much cleaner code.
1903     OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1904     OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1905     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1906 
1907     // __kmpc_end_serialized_parallel(&Loc, GTid);
1908     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1909     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1910                             M, OMPRTL___kmpc_end_serialized_parallel),
1911                         EndArgs);
1912   };
1913   if (IfCond) {
1914     emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1915   } else {
1916     RegionCodeGenTy ThenRCG(ThenGen);
1917     ThenRCG(CGF);
1918   }
1919 }
1920 
1921 // If we're inside an (outlined) parallel region, use the region info's
1922 // thread-ID variable (it is passed in a first argument of the outlined function
1923 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1924 // regular serial code region, get thread ID by calling kmp_int32
1925 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1926 // return the address of that temp.
1927 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1928                                              SourceLocation Loc) {
1929   if (auto *OMPRegionInfo =
1930           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1931     if (OMPRegionInfo->getThreadIDVariable())
1932       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1933 
1934   llvm::Value *ThreadID = getThreadID(CGF, Loc);
1935   QualType Int32Ty =
1936       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1937   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1938   CGF.EmitStoreOfScalar(ThreadID,
1939                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1940 
1941   return ThreadIDTemp;
1942 }
1943 
1944 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1945   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1946   std::string Name = getName({Prefix, "var"});
1947   return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1948 }
1949 
1950 namespace {
1951 /// Common pre(post)-action for different OpenMP constructs.
1952 class CommonActionTy final : public PrePostActionTy {
1953   llvm::FunctionCallee EnterCallee;
1954   ArrayRef<llvm::Value *> EnterArgs;
1955   llvm::FunctionCallee ExitCallee;
1956   ArrayRef<llvm::Value *> ExitArgs;
1957   bool Conditional;
1958   llvm::BasicBlock *ContBlock = nullptr;
1959 
1960 public:
1961   CommonActionTy(llvm::FunctionCallee EnterCallee,
1962                  ArrayRef<llvm::Value *> EnterArgs,
1963                  llvm::FunctionCallee ExitCallee,
1964                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1965       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1966         ExitArgs(ExitArgs), Conditional(Conditional) {}
1967   void Enter(CodeGenFunction &CGF) override {
1968     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1969     if (Conditional) {
1970       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1971       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1972       ContBlock = CGF.createBasicBlock("omp_if.end");
1973       // Generate the branch (If-stmt)
1974       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1975       CGF.EmitBlock(ThenBlock);
1976     }
1977   }
1978   void Done(CodeGenFunction &CGF) {
1979     // Emit the rest of blocks/branches
1980     CGF.EmitBranch(ContBlock);
1981     CGF.EmitBlock(ContBlock, true);
1982   }
1983   void Exit(CodeGenFunction &CGF) override {
1984     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1985   }
1986 };
1987 } // anonymous namespace
1988 
1989 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1990                                          StringRef CriticalName,
1991                                          const RegionCodeGenTy &CriticalOpGen,
1992                                          SourceLocation Loc, const Expr *Hint) {
1993   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1994   // CriticalOpGen();
1995   // __kmpc_end_critical(ident_t *, gtid, Lock);
1996   // Prepare arguments and build a call to __kmpc_critical
1997   if (!CGF.HaveInsertPoint())
1998     return;
1999   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2000                          getCriticalRegionLock(CriticalName)};
2001   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2002                                                 std::end(Args));
2003   if (Hint) {
2004     EnterArgs.push_back(CGF.Builder.CreateIntCast(
2005         CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2006   }
2007   CommonActionTy Action(
2008       OMPBuilder.getOrCreateRuntimeFunction(
2009           CGM.getModule(),
2010           Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2011       EnterArgs,
2012       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2013                                             OMPRTL___kmpc_end_critical),
2014       Args);
2015   CriticalOpGen.setAction(Action);
2016   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2017 }
2018 
2019 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2020                                        const RegionCodeGenTy &MasterOpGen,
2021                                        SourceLocation Loc) {
2022   if (!CGF.HaveInsertPoint())
2023     return;
2024   // if(__kmpc_master(ident_t *, gtid)) {
2025   //   MasterOpGen();
2026   //   __kmpc_end_master(ident_t *, gtid);
2027   // }
2028   // Prepare arguments and build a call to __kmpc_master
2029   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2030   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2031                             CGM.getModule(), OMPRTL___kmpc_master),
2032                         Args,
2033                         OMPBuilder.getOrCreateRuntimeFunction(
2034                             CGM.getModule(), OMPRTL___kmpc_end_master),
2035                         Args,
2036                         /*Conditional=*/true);
2037   MasterOpGen.setAction(Action);
2038   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2039   Action.Done(CGF);
2040 }
2041 
2042 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2043                                        const RegionCodeGenTy &MaskedOpGen,
2044                                        SourceLocation Loc, const Expr *Filter) {
2045   if (!CGF.HaveInsertPoint())
2046     return;
2047   // if(__kmpc_masked(ident_t *, gtid, filter)) {
2048   //   MaskedOpGen();
2049   //   __kmpc_end_masked(iden_t *, gtid);
2050   // }
2051   // Prepare arguments and build a call to __kmpc_masked
2052   llvm::Value *FilterVal = Filter
2053                                ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2054                                : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2055   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2056                          FilterVal};
2057   llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2058                             getThreadID(CGF, Loc)};
2059   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2060                             CGM.getModule(), OMPRTL___kmpc_masked),
2061                         Args,
2062                         OMPBuilder.getOrCreateRuntimeFunction(
2063                             CGM.getModule(), OMPRTL___kmpc_end_masked),
2064                         ArgsEnd,
2065                         /*Conditional=*/true);
2066   MaskedOpGen.setAction(Action);
2067   emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2068   Action.Done(CGF);
2069 }
2070 
2071 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2072                                         SourceLocation Loc) {
2073   if (!CGF.HaveInsertPoint())
2074     return;
2075   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2076     OMPBuilder.createTaskyield(CGF.Builder);
2077   } else {
2078     // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2079     llvm::Value *Args[] = {
2080         emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2081         llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2082     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2083                             CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2084                         Args);
2085   }
2086 
2087   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2088     Region->emitUntiedSwitch(CGF);
2089 }
2090 
2091 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2092                                           const RegionCodeGenTy &TaskgroupOpGen,
2093                                           SourceLocation Loc) {
2094   if (!CGF.HaveInsertPoint())
2095     return;
2096   // __kmpc_taskgroup(ident_t *, gtid);
2097   // TaskgroupOpGen();
2098   // __kmpc_end_taskgroup(ident_t *, gtid);
2099   // Prepare arguments and build a call to __kmpc_taskgroup
2100   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2101   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2102                             CGM.getModule(), OMPRTL___kmpc_taskgroup),
2103                         Args,
2104                         OMPBuilder.getOrCreateRuntimeFunction(
2105                             CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2106                         Args);
2107   TaskgroupOpGen.setAction(Action);
2108   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2109 }
2110 
2111 /// Given an array of pointers to variables, project the address of a
2112 /// given variable.
2113 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2114                                       unsigned Index, const VarDecl *Var) {
2115   // Pull out the pointer to the variable.
2116   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2117   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2118 
2119   llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2120   return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2121 }
2122 
2123 static llvm::Value *emitCopyprivateCopyFunction(
2124     CodeGenModule &CGM, llvm::Type *ArgsElemType,
2125     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2126     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2127     SourceLocation Loc) {
2128   ASTContext &C = CGM.getContext();
2129   // void copy_func(void *LHSArg, void *RHSArg);
2130   FunctionArgList Args;
2131   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132                            ImplicitParamKind::Other);
2133   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2134                            ImplicitParamKind::Other);
2135   Args.push_back(&LHSArg);
2136   Args.push_back(&RHSArg);
2137   const auto &CGFI =
2138       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2139   std::string Name =
2140       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2141   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2142                                     llvm::GlobalValue::InternalLinkage, Name,
2143                                     &CGM.getModule());
2144   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2145   Fn->setDoesNotRecurse();
2146   CodeGenFunction CGF(CGM);
2147   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2148   // Dest = (void*[n])(LHSArg);
2149   // Src = (void*[n])(RHSArg);
2150   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2151                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2152                   CGF.Builder.getPtrTy(0)),
2153               ArgsElemType, CGF.getPointerAlign());
2154   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2155                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2156                   CGF.Builder.getPtrTy(0)),
2157               ArgsElemType, CGF.getPointerAlign());
2158   // *(Type0*)Dst[0] = *(Type0*)Src[0];
2159   // *(Type1*)Dst[1] = *(Type1*)Src[1];
2160   // ...
2161   // *(Typen*)Dst[n] = *(Typen*)Src[n];
2162   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2163     const auto *DestVar =
2164         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2165     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2166 
2167     const auto *SrcVar =
2168         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2169     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2170 
2171     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2172     QualType Type = VD->getType();
2173     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2174   }
2175   CGF.FinishFunction();
2176   return Fn;
2177 }
2178 
2179 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2180                                        const RegionCodeGenTy &SingleOpGen,
2181                                        SourceLocation Loc,
2182                                        ArrayRef<const Expr *> CopyprivateVars,
2183                                        ArrayRef<const Expr *> SrcExprs,
2184                                        ArrayRef<const Expr *> DstExprs,
2185                                        ArrayRef<const Expr *> AssignmentOps) {
2186   if (!CGF.HaveInsertPoint())
2187     return;
2188   assert(CopyprivateVars.size() == SrcExprs.size() &&
2189          CopyprivateVars.size() == DstExprs.size() &&
2190          CopyprivateVars.size() == AssignmentOps.size());
2191   ASTContext &C = CGM.getContext();
2192   // int32 did_it = 0;
2193   // if(__kmpc_single(ident_t *, gtid)) {
2194   //   SingleOpGen();
2195   //   __kmpc_end_single(ident_t *, gtid);
2196   //   did_it = 1;
2197   // }
2198   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2199   // <copy_func>, did_it);
2200 
2201   Address DidIt = Address::invalid();
2202   if (!CopyprivateVars.empty()) {
2203     // int32 did_it = 0;
2204     QualType KmpInt32Ty =
2205         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2206     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2207     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2208   }
2209   // Prepare arguments and build a call to __kmpc_single
2210   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2211   CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2212                             CGM.getModule(), OMPRTL___kmpc_single),
2213                         Args,
2214                         OMPBuilder.getOrCreateRuntimeFunction(
2215                             CGM.getModule(), OMPRTL___kmpc_end_single),
2216                         Args,
2217                         /*Conditional=*/true);
2218   SingleOpGen.setAction(Action);
2219   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2220   if (DidIt.isValid()) {
2221     // did_it = 1;
2222     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2223   }
2224   Action.Done(CGF);
2225   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2226   // <copy_func>, did_it);
2227   if (DidIt.isValid()) {
2228     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2229     QualType CopyprivateArrayTy = C.getConstantArrayType(
2230         C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2231         /*IndexTypeQuals=*/0);
2232     // Create a list of all private variables for copyprivate.
2233     Address CopyprivateList =
2234         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2235     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2236       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2237       CGF.Builder.CreateStore(
2238           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2239               CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2240               CGF.VoidPtrTy),
2241           Elem);
2242     }
2243     // Build function that copies private values from single region to all other
2244     // threads in the corresponding parallel region.
2245     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2246         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2247         SrcExprs, DstExprs, AssignmentOps, Loc);
2248     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2249     Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2250         CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2251     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2252     llvm::Value *Args[] = {
2253         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2254         getThreadID(CGF, Loc),        // i32 <gtid>
2255         BufSize,                      // size_t <buf_size>
2256         CL.emitRawPointer(CGF),       // void *<copyprivate list>
2257         CpyFn,                        // void (*) (void *, void *) <copy_func>
2258         DidItVal                      // i32 did_it
2259     };
2260     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2261                             CGM.getModule(), OMPRTL___kmpc_copyprivate),
2262                         Args);
2263   }
2264 }
2265 
2266 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2267                                         const RegionCodeGenTy &OrderedOpGen,
2268                                         SourceLocation Loc, bool IsThreads) {
2269   if (!CGF.HaveInsertPoint())
2270     return;
2271   // __kmpc_ordered(ident_t *, gtid);
2272   // OrderedOpGen();
2273   // __kmpc_end_ordered(ident_t *, gtid);
2274   // Prepare arguments and build a call to __kmpc_ordered
2275   if (IsThreads) {
2276     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2277     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2278                               CGM.getModule(), OMPRTL___kmpc_ordered),
2279                           Args,
2280                           OMPBuilder.getOrCreateRuntimeFunction(
2281                               CGM.getModule(), OMPRTL___kmpc_end_ordered),
2282                           Args);
2283     OrderedOpGen.setAction(Action);
2284     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2285     return;
2286   }
2287   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2288 }
2289 
2290 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2291   unsigned Flags;
2292   if (Kind == OMPD_for)
2293     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2294   else if (Kind == OMPD_sections)
2295     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2296   else if (Kind == OMPD_single)
2297     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2298   else if (Kind == OMPD_barrier)
2299     Flags = OMP_IDENT_BARRIER_EXPL;
2300   else
2301     Flags = OMP_IDENT_BARRIER_IMPL;
2302   return Flags;
2303 }
2304 
2305 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2306     CodeGenFunction &CGF, const OMPLoopDirective &S,
2307     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2308   // Check if the loop directive is actually a doacross loop directive. In this
2309   // case choose static, 1 schedule.
2310   if (llvm::any_of(
2311           S.getClausesOfKind<OMPOrderedClause>(),
2312           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2313     ScheduleKind = OMPC_SCHEDULE_static;
2314     // Chunk size is 1 in this case.
2315     llvm::APInt ChunkSize(32, 1);
2316     ChunkExpr = IntegerLiteral::Create(
2317         CGF.getContext(), ChunkSize,
2318         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2319         SourceLocation());
2320   }
2321 }
2322 
2323 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2324                                       OpenMPDirectiveKind Kind, bool EmitChecks,
2325                                       bool ForceSimpleCall) {
2326   // Check if we should use the OMPBuilder
2327   auto *OMPRegionInfo =
2328       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2329   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2330     llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2331         cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2332                                           EmitChecks));
2333     CGF.Builder.restoreIP(AfterIP);
2334     return;
2335   }
2336 
2337   if (!CGF.HaveInsertPoint())
2338     return;
2339   // Build call __kmpc_cancel_barrier(loc, thread_id);
2340   // Build call __kmpc_barrier(loc, thread_id);
2341   unsigned Flags = getDefaultFlagsForBarriers(Kind);
2342   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2343   // thread_id);
2344   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2345                          getThreadID(CGF, Loc)};
2346   if (OMPRegionInfo) {
2347     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2348       llvm::Value *Result = CGF.EmitRuntimeCall(
2349           OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2350                                                 OMPRTL___kmpc_cancel_barrier),
2351           Args);
2352       if (EmitChecks) {
2353         // if (__kmpc_cancel_barrier()) {
2354         //   exit from construct;
2355         // }
2356         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2357         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2358         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2359         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2360         CGF.EmitBlock(ExitBB);
2361         //   exit from construct;
2362         CodeGenFunction::JumpDest CancelDestination =
2363             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2364         CGF.EmitBranchThroughCleanup(CancelDestination);
2365         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2366       }
2367       return;
2368     }
2369   }
2370   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2371                           CGM.getModule(), OMPRTL___kmpc_barrier),
2372                       Args);
2373 }
2374 
2375 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2376                                     Expr *ME, bool IsFatal) {
2377   llvm::Value *MVL =
2378       ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2379          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2380   // Build call void __kmpc_error(ident_t *loc, int severity, const char
2381   // *message)
2382   llvm::Value *Args[] = {
2383       emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2384       llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2385       CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2386   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2387                           CGM.getModule(), OMPRTL___kmpc_error),
2388                       Args);
2389 }
2390 
2391 /// Map the OpenMP loop schedule to the runtime enumeration.
2392 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2393                                           bool Chunked, bool Ordered) {
2394   switch (ScheduleKind) {
2395   case OMPC_SCHEDULE_static:
2396     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2397                    : (Ordered ? OMP_ord_static : OMP_sch_static);
2398   case OMPC_SCHEDULE_dynamic:
2399     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2400   case OMPC_SCHEDULE_guided:
2401     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2402   case OMPC_SCHEDULE_runtime:
2403     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2404   case OMPC_SCHEDULE_auto:
2405     return Ordered ? OMP_ord_auto : OMP_sch_auto;
2406   case OMPC_SCHEDULE_unknown:
2407     assert(!Chunked && "chunk was specified but schedule kind not known");
2408     return Ordered ? OMP_ord_static : OMP_sch_static;
2409   }
2410   llvm_unreachable("Unexpected runtime schedule");
2411 }
2412 
2413 /// Map the OpenMP distribute schedule to the runtime enumeration.
2414 static OpenMPSchedType
2415 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2416   // only static is allowed for dist_schedule
2417   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2418 }
2419 
2420 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2421                                          bool Chunked) const {
2422   OpenMPSchedType Schedule =
2423       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2424   return Schedule == OMP_sch_static;
2425 }
2426 
2427 bool CGOpenMPRuntime::isStaticNonchunked(
2428     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2429   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2430   return Schedule == OMP_dist_sch_static;
2431 }
2432 
2433 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2434                                       bool Chunked) const {
2435   OpenMPSchedType Schedule =
2436       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2437   return Schedule == OMP_sch_static_chunked;
2438 }
2439 
2440 bool CGOpenMPRuntime::isStaticChunked(
2441     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2442   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2443   return Schedule == OMP_dist_sch_static_chunked;
2444 }
2445 
2446 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2447   OpenMPSchedType Schedule =
2448       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2449   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2450   return Schedule != OMP_sch_static;
2451 }
2452 
2453 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2454                                   OpenMPScheduleClauseModifier M1,
2455                                   OpenMPScheduleClauseModifier M2) {
2456   int Modifier = 0;
2457   switch (M1) {
2458   case OMPC_SCHEDULE_MODIFIER_monotonic:
2459     Modifier = OMP_sch_modifier_monotonic;
2460     break;
2461   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2462     Modifier = OMP_sch_modifier_nonmonotonic;
2463     break;
2464   case OMPC_SCHEDULE_MODIFIER_simd:
2465     if (Schedule == OMP_sch_static_chunked)
2466       Schedule = OMP_sch_static_balanced_chunked;
2467     break;
2468   case OMPC_SCHEDULE_MODIFIER_last:
2469   case OMPC_SCHEDULE_MODIFIER_unknown:
2470     break;
2471   }
2472   switch (M2) {
2473   case OMPC_SCHEDULE_MODIFIER_monotonic:
2474     Modifier = OMP_sch_modifier_monotonic;
2475     break;
2476   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2477     Modifier = OMP_sch_modifier_nonmonotonic;
2478     break;
2479   case OMPC_SCHEDULE_MODIFIER_simd:
2480     if (Schedule == OMP_sch_static_chunked)
2481       Schedule = OMP_sch_static_balanced_chunked;
2482     break;
2483   case OMPC_SCHEDULE_MODIFIER_last:
2484   case OMPC_SCHEDULE_MODIFIER_unknown:
2485     break;
2486   }
2487   // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2488   // If the static schedule kind is specified or if the ordered clause is
2489   // specified, and if the nonmonotonic modifier is not specified, the effect is
2490   // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2491   // modifier is specified, the effect is as if the nonmonotonic modifier is
2492   // specified.
2493   if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2494     if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2495           Schedule == OMP_sch_static_balanced_chunked ||
2496           Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2497           Schedule == OMP_dist_sch_static_chunked ||
2498           Schedule == OMP_dist_sch_static))
2499       Modifier = OMP_sch_modifier_nonmonotonic;
2500   }
2501   return Schedule | Modifier;
2502 }
2503 
2504 void CGOpenMPRuntime::emitForDispatchInit(
2505     CodeGenFunction &CGF, SourceLocation Loc,
2506     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2507     bool Ordered, const DispatchRTInput &DispatchValues) {
2508   if (!CGF.HaveInsertPoint())
2509     return;
2510   OpenMPSchedType Schedule = getRuntimeSchedule(
2511       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2512   assert(Ordered ||
2513          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2514           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2515           Schedule != OMP_sch_static_balanced_chunked));
2516   // Call __kmpc_dispatch_init(
2517   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2518   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
2519   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
2520 
2521   // If the Chunk was not specified in the clause - use default value 1.
2522   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2523                                             : CGF.Builder.getIntN(IVSize, 1);
2524   llvm::Value *Args[] = {
2525       emitUpdateLocation(CGF, Loc),
2526       getThreadID(CGF, Loc),
2527       CGF.Builder.getInt32(addMonoNonMonoModifier(
2528           CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2529       DispatchValues.LB,                                     // Lower
2530       DispatchValues.UB,                                     // Upper
2531       CGF.Builder.getIntN(IVSize, 1),                        // Stride
2532       Chunk                                                  // Chunk
2533   };
2534   CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2535                       Args);
2536 }
2537 
2538 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2539                                             SourceLocation Loc) {
2540   if (!CGF.HaveInsertPoint())
2541     return;
2542   // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2543   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2544   CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2545 }
2546 
2547 static void emitForStaticInitCall(
2548     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2549     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2550     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2551     const CGOpenMPRuntime::StaticRTInput &Values) {
2552   if (!CGF.HaveInsertPoint())
2553     return;
2554 
2555   assert(!Values.Ordered);
2556   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2557          Schedule == OMP_sch_static_balanced_chunked ||
2558          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2559          Schedule == OMP_dist_sch_static ||
2560          Schedule == OMP_dist_sch_static_chunked);
2561 
2562   // Call __kmpc_for_static_init(
2563   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2564   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2565   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2566   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
2567   llvm::Value *Chunk = Values.Chunk;
2568   if (Chunk == nullptr) {
2569     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2570             Schedule == OMP_dist_sch_static) &&
2571            "expected static non-chunked schedule");
2572     // If the Chunk was not specified in the clause - use default value 1.
2573     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2574   } else {
2575     assert((Schedule == OMP_sch_static_chunked ||
2576             Schedule == OMP_sch_static_balanced_chunked ||
2577             Schedule == OMP_ord_static_chunked ||
2578             Schedule == OMP_dist_sch_static_chunked) &&
2579            "expected static chunked schedule");
2580   }
2581   llvm::Value *Args[] = {
2582       UpdateLocation,
2583       ThreadId,
2584       CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2585                                                   M2)), // Schedule type
2586       Values.IL.emitRawPointer(CGF),                    // &isLastIter
2587       Values.LB.emitRawPointer(CGF),                    // &LB
2588       Values.UB.emitRawPointer(CGF),                    // &UB
2589       Values.ST.emitRawPointer(CGF),                    // &Stride
2590       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
2591       Chunk                                             // Chunk
2592   };
2593   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2594 }
2595 
2596 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2597                                         SourceLocation Loc,
2598                                         OpenMPDirectiveKind DKind,
2599                                         const OpenMPScheduleTy &ScheduleKind,
2600                                         const StaticRTInput &Values) {
2601   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2602       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2603   assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2604          "Expected loop-based or sections-based directive.");
2605   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2606                                              isOpenMPLoopDirective(DKind)
2607                                                  ? OMP_IDENT_WORK_LOOP
2608                                                  : OMP_IDENT_WORK_SECTIONS);
2609   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2610   llvm::FunctionCallee StaticInitFunction =
2611       OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2612                                              false);
2613   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2614   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2615                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2616 }
2617 
2618 void CGOpenMPRuntime::emitDistributeStaticInit(
2619     CodeGenFunction &CGF, SourceLocation Loc,
2620     OpenMPDistScheduleClauseKind SchedKind,
2621     const CGOpenMPRuntime::StaticRTInput &Values) {
2622   OpenMPSchedType ScheduleNum =
2623       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2624   llvm::Value *UpdatedLocation =
2625       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2626   llvm::Value *ThreadId = getThreadID(CGF, Loc);
2627   llvm::FunctionCallee StaticInitFunction;
2628   bool isGPUDistribute =
2629       CGM.getLangOpts().OpenMPIsTargetDevice &&
2630       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2631   StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2632       Values.IVSize, Values.IVSigned, isGPUDistribute);
2633 
2634   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2636                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
2637 }
2638 
2639 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2640                                           SourceLocation Loc,
2641                                           OpenMPDirectiveKind DKind) {
2642   assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2643           DKind == OMPD_sections) &&
2644          "Expected distribute, for, or sections directive kind");
2645   if (!CGF.HaveInsertPoint())
2646     return;
2647   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2648   llvm::Value *Args[] = {
2649       emitUpdateLocation(CGF, Loc,
2650                          isOpenMPDistributeDirective(DKind) ||
2651                                  (DKind == OMPD_target_teams_loop)
2652                              ? OMP_IDENT_WORK_DISTRIBUTE
2653                          : isOpenMPLoopDirective(DKind)
2654                              ? OMP_IDENT_WORK_LOOP
2655                              : OMP_IDENT_WORK_SECTIONS),
2656       getThreadID(CGF, Loc)};
2657   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2658   if (isOpenMPDistributeDirective(DKind) &&
2659       CGM.getLangOpts().OpenMPIsTargetDevice &&
2660       (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2661     CGF.EmitRuntimeCall(
2662         OMPBuilder.getOrCreateRuntimeFunction(
2663             CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2664         Args);
2665   else
2666     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2667                             CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2668                         Args);
2669 }
2670 
2671 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2672                                                  SourceLocation Loc,
2673                                                  unsigned IVSize,
2674                                                  bool IVSigned) {
2675   if (!CGF.HaveInsertPoint())
2676     return;
2677   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2678   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2679   CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2680                       Args);
2681 }
2682 
2683 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2684                                           SourceLocation Loc, unsigned IVSize,
2685                                           bool IVSigned, Address IL,
2686                                           Address LB, Address UB,
2687                                           Address ST) {
2688   // Call __kmpc_dispatch_next(
2689   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2690   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2691   //          kmp_int[32|64] *p_stride);
2692   llvm::Value *Args[] = {
2693       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2694       IL.emitRawPointer(CGF), // &isLastIter
2695       LB.emitRawPointer(CGF), // &Lower
2696       UB.emitRawPointer(CGF), // &Upper
2697       ST.emitRawPointer(CGF)  // &Stride
2698   };
2699   llvm::Value *Call = CGF.EmitRuntimeCall(
2700       OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2701   return CGF.EmitScalarConversion(
2702       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2703       CGF.getContext().BoolTy, Loc);
2704 }
2705 
2706 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2707                                            llvm::Value *NumThreads,
2708                                            SourceLocation Loc) {
2709   if (!CGF.HaveInsertPoint())
2710     return;
2711   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2712   llvm::Value *Args[] = {
2713       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2714       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2715   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2716                           CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2717                       Args);
2718 }
2719 
2720 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2721                                          ProcBindKind ProcBind,
2722                                          SourceLocation Loc) {
2723   if (!CGF.HaveInsertPoint())
2724     return;
2725   assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2726   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2727   llvm::Value *Args[] = {
2728       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2729       llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2730   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2731                           CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2732                       Args);
2733 }
2734 
2735 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2736                                 SourceLocation Loc, llvm::AtomicOrdering AO) {
2737   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2738     OMPBuilder.createFlush(CGF.Builder);
2739   } else {
2740     if (!CGF.HaveInsertPoint())
2741       return;
2742     // Build call void __kmpc_flush(ident_t *loc)
2743     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2744                             CGM.getModule(), OMPRTL___kmpc_flush),
2745                         emitUpdateLocation(CGF, Loc));
2746   }
2747 }
2748 
2749 namespace {
2750 /// Indexes of fields for type kmp_task_t.
2751 enum KmpTaskTFields {
2752   /// List of shared variables.
2753   KmpTaskTShareds,
2754   /// Task routine.
2755   KmpTaskTRoutine,
2756   /// Partition id for the untied tasks.
2757   KmpTaskTPartId,
2758   /// Function with call of destructors for private variables.
2759   Data1,
2760   /// Task priority.
2761   Data2,
2762   /// (Taskloops only) Lower bound.
2763   KmpTaskTLowerBound,
2764   /// (Taskloops only) Upper bound.
2765   KmpTaskTUpperBound,
2766   /// (Taskloops only) Stride.
2767   KmpTaskTStride,
2768   /// (Taskloops only) Is last iteration flag.
2769   KmpTaskTLastIter,
2770   /// (Taskloops only) Reduction data.
2771   KmpTaskTReductions,
2772 };
2773 } // anonymous namespace
2774 
2775 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2776   // If we are in simd mode or there are no entries, we don't need to do
2777   // anything.
2778   if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2779     return;
2780 
2781   llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2782       [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2783              const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2784     SourceLocation Loc;
2785     if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2786       for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2787                 E = CGM.getContext().getSourceManager().fileinfo_end();
2788            I != E; ++I) {
2789         if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2790             I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2791           Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2792               I->getFirst(), EntryInfo.Line, 1);
2793           break;
2794         }
2795       }
2796     }
2797     switch (Kind) {
2798     case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2799       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2800           DiagnosticsEngine::Error, "Offloading entry for target region in "
2801                                     "%0 is incorrect: either the "
2802                                     "address or the ID is invalid.");
2803       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2804     } break;
2805     case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2806       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2807           DiagnosticsEngine::Error, "Offloading entry for declare target "
2808                                     "variable %0 is incorrect: the "
2809                                     "address is invalid.");
2810       CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2811     } break;
2812     case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2813       unsigned DiagID = CGM.getDiags().getCustomDiagID(
2814           DiagnosticsEngine::Error,
2815           "Offloading entry for declare target variable is incorrect: the "
2816           "address is invalid.");
2817       CGM.getDiags().Report(DiagID);
2818     } break;
2819     }
2820   };
2821 
2822   OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2823 }
2824 
2825 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2826   if (!KmpRoutineEntryPtrTy) {
2827     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2828     ASTContext &C = CGM.getContext();
2829     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2830     FunctionProtoType::ExtProtoInfo EPI;
2831     KmpRoutineEntryPtrQTy = C.getPointerType(
2832         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2833     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2834   }
2835 }
2836 
2837 namespace {
2838 struct PrivateHelpersTy {
2839   PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2840                    const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2841       : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2842         PrivateElemInit(PrivateElemInit) {}
2843   PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2844   const Expr *OriginalRef = nullptr;
2845   const VarDecl *Original = nullptr;
2846   const VarDecl *PrivateCopy = nullptr;
2847   const VarDecl *PrivateElemInit = nullptr;
2848   bool isLocalPrivate() const {
2849     return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2850   }
2851 };
2852 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2853 } // anonymous namespace
2854 
2855 static bool isAllocatableDecl(const VarDecl *VD) {
2856   const VarDecl *CVD = VD->getCanonicalDecl();
2857   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2858     return false;
2859   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2860   // Use the default allocation.
2861   return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2862            !AA->getAllocator());
2863 }
2864 
2865 static RecordDecl *
2866 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2867   if (!Privates.empty()) {
2868     ASTContext &C = CGM.getContext();
2869     // Build struct .kmp_privates_t. {
2870     //         /*  private vars  */
2871     //       };
2872     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2873     RD->startDefinition();
2874     for (const auto &Pair : Privates) {
2875       const VarDecl *VD = Pair.second.Original;
2876       QualType Type = VD->getType().getNonReferenceType();
2877       // If the private variable is a local variable with lvalue ref type,
2878       // allocate the pointer instead of the pointee type.
2879       if (Pair.second.isLocalPrivate()) {
2880         if (VD->getType()->isLValueReferenceType())
2881           Type = C.getPointerType(Type);
2882         if (isAllocatableDecl(VD))
2883           Type = C.getPointerType(Type);
2884       }
2885       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2886       if (VD->hasAttrs()) {
2887         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2888              E(VD->getAttrs().end());
2889              I != E; ++I)
2890           FD->addAttr(*I);
2891       }
2892     }
2893     RD->completeDefinition();
2894     return RD;
2895   }
2896   return nullptr;
2897 }
2898 
2899 static RecordDecl *
2900 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2901                          QualType KmpInt32Ty,
2902                          QualType KmpRoutineEntryPointerQTy) {
2903   ASTContext &C = CGM.getContext();
2904   // Build struct kmp_task_t {
2905   //         void *              shareds;
2906   //         kmp_routine_entry_t routine;
2907   //         kmp_int32           part_id;
2908   //         kmp_cmplrdata_t data1;
2909   //         kmp_cmplrdata_t data2;
2910   // For taskloops additional fields:
2911   //         kmp_uint64          lb;
2912   //         kmp_uint64          ub;
2913   //         kmp_int64           st;
2914   //         kmp_int32           liter;
2915   //         void *              reductions;
2916   //       };
2917   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2918   UD->startDefinition();
2919   addFieldToRecordDecl(C, UD, KmpInt32Ty);
2920   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2921   UD->completeDefinition();
2922   QualType KmpCmplrdataTy = C.getRecordType(UD);
2923   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2924   RD->startDefinition();
2925   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2926   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2927   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2928   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2929   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2930   if (isOpenMPTaskLoopDirective(Kind)) {
2931     QualType KmpUInt64Ty =
2932         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2933     QualType KmpInt64Ty =
2934         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2935     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2936     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2937     addFieldToRecordDecl(C, RD, KmpInt64Ty);
2938     addFieldToRecordDecl(C, RD, KmpInt32Ty);
2939     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2940   }
2941   RD->completeDefinition();
2942   return RD;
2943 }
2944 
2945 static RecordDecl *
2946 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2947                                      ArrayRef<PrivateDataTy> Privates) {
2948   ASTContext &C = CGM.getContext();
2949   // Build struct kmp_task_t_with_privates {
2950   //         kmp_task_t task_data;
2951   //         .kmp_privates_t. privates;
2952   //       };
2953   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2954   RD->startDefinition();
2955   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2956   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2957     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2958   RD->completeDefinition();
2959   return RD;
2960 }
2961 
2962 /// Emit a proxy function which accepts kmp_task_t as the second
2963 /// argument.
2964 /// \code
2965 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2966 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2967 ///   For taskloops:
2968 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2969 ///   tt->reductions, tt->shareds);
2970 ///   return 0;
2971 /// }
2972 /// \endcode
2973 static llvm::Function *
2974 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2975                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2976                       QualType KmpTaskTWithPrivatesPtrQTy,
2977                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2978                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
2979                       llvm::Value *TaskPrivatesMap) {
2980   ASTContext &C = CGM.getContext();
2981   FunctionArgList Args;
2982   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2983                             ImplicitParamKind::Other);
2984   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2985                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2986                                 ImplicitParamKind::Other);
2987   Args.push_back(&GtidArg);
2988   Args.push_back(&TaskTypeArg);
2989   const auto &TaskEntryFnInfo =
2990       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
2991   llvm::FunctionType *TaskEntryTy =
2992       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2993   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
2994   auto *TaskEntry = llvm::Function::Create(
2995       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
2996   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
2997   TaskEntry->setDoesNotRecurse();
2998   CodeGenFunction CGF(CGM);
2999   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3000                     Loc, Loc);
3001 
3002   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3003   // tt,
3004   // For taskloops:
3005   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3006   // tt->task_data.shareds);
3007   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3008       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3009   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3010       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3011       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3012   const auto *KmpTaskTWithPrivatesQTyRD =
3013       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3014   LValue Base =
3015       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3016   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3017   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3018   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3019   llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3020 
3021   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3022   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3023   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3024       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3025       CGF.ConvertTypeForMem(SharedsPtrTy));
3026 
3027   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3028   llvm::Value *PrivatesParam;
3029   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3030     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3031     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3032         PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3033   } else {
3034     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3035   }
3036 
3037   llvm::Value *CommonArgs[] = {
3038       GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3039       CGF.Builder
3040           .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3041                                                CGF.VoidPtrTy, CGF.Int8Ty)
3042           .emitRawPointer(CGF)};
3043   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3044                                           std::end(CommonArgs));
3045   if (isOpenMPTaskLoopDirective(Kind)) {
3046     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3047     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3048     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3049     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3050     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3051     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3052     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3053     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3054     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3055     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3056     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3057     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3058     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3059     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3060     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3061     CallArgs.push_back(LBParam);
3062     CallArgs.push_back(UBParam);
3063     CallArgs.push_back(StParam);
3064     CallArgs.push_back(LIParam);
3065     CallArgs.push_back(RParam);
3066   }
3067   CallArgs.push_back(SharedsParam);
3068 
3069   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3070                                                   CallArgs);
3071   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3072                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3073   CGF.FinishFunction();
3074   return TaskEntry;
3075 }
3076 
3077 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3078                                             SourceLocation Loc,
3079                                             QualType KmpInt32Ty,
3080                                             QualType KmpTaskTWithPrivatesPtrQTy,
3081                                             QualType KmpTaskTWithPrivatesQTy) {
3082   ASTContext &C = CGM.getContext();
3083   FunctionArgList Args;
3084   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3085                             ImplicitParamKind::Other);
3086   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3087                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3088                                 ImplicitParamKind::Other);
3089   Args.push_back(&GtidArg);
3090   Args.push_back(&TaskTypeArg);
3091   const auto &DestructorFnInfo =
3092       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3093   llvm::FunctionType *DestructorFnTy =
3094       CGM.getTypes().GetFunctionType(DestructorFnInfo);
3095   std::string Name =
3096       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3097   auto *DestructorFn =
3098       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3099                              Name, &CGM.getModule());
3100   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3101                                     DestructorFnInfo);
3102   DestructorFn->setDoesNotRecurse();
3103   CodeGenFunction CGF(CGM);
3104   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3105                     Args, Loc, Loc);
3106 
3107   LValue Base = CGF.EmitLoadOfPointerLValue(
3108       CGF.GetAddrOfLocalVar(&TaskTypeArg),
3109       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3110   const auto *KmpTaskTWithPrivatesQTyRD =
3111       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3112   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3113   Base = CGF.EmitLValueForField(Base, *FI);
3114   for (const auto *Field :
3115        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3116     if (QualType::DestructionKind DtorKind =
3117             Field->getType().isDestructedType()) {
3118       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3119       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3120     }
3121   }
3122   CGF.FinishFunction();
3123   return DestructorFn;
3124 }
3125 
3126 /// Emit a privates mapping function for correct handling of private and
3127 /// firstprivate variables.
3128 /// \code
3129 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3130 /// **noalias priv1,...,  <tyn> **noalias privn) {
3131 ///   *priv1 = &.privates.priv1;
3132 ///   ...;
3133 ///   *privn = &.privates.privn;
3134 /// }
3135 /// \endcode
3136 static llvm::Value *
3137 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3138                                const OMPTaskDataTy &Data, QualType PrivatesQTy,
3139                                ArrayRef<PrivateDataTy> Privates) {
3140   ASTContext &C = CGM.getContext();
3141   FunctionArgList Args;
3142   ImplicitParamDecl TaskPrivatesArg(
3143       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3144       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3145       ImplicitParamKind::Other);
3146   Args.push_back(&TaskPrivatesArg);
3147   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3148   unsigned Counter = 1;
3149   for (const Expr *E : Data.PrivateVars) {
3150     Args.push_back(ImplicitParamDecl::Create(
3151         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3152         C.getPointerType(C.getPointerType(E->getType()))
3153             .withConst()
3154             .withRestrict(),
3155         ImplicitParamKind::Other));
3156     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3157     PrivateVarsPos[VD] = Counter;
3158     ++Counter;
3159   }
3160   for (const Expr *E : Data.FirstprivateVars) {
3161     Args.push_back(ImplicitParamDecl::Create(
3162         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3163         C.getPointerType(C.getPointerType(E->getType()))
3164             .withConst()
3165             .withRestrict(),
3166         ImplicitParamKind::Other));
3167     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3168     PrivateVarsPos[VD] = Counter;
3169     ++Counter;
3170   }
3171   for (const Expr *E : Data.LastprivateVars) {
3172     Args.push_back(ImplicitParamDecl::Create(
3173         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3174         C.getPointerType(C.getPointerType(E->getType()))
3175             .withConst()
3176             .withRestrict(),
3177         ImplicitParamKind::Other));
3178     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3179     PrivateVarsPos[VD] = Counter;
3180     ++Counter;
3181   }
3182   for (const VarDecl *VD : Data.PrivateLocals) {
3183     QualType Ty = VD->getType().getNonReferenceType();
3184     if (VD->getType()->isLValueReferenceType())
3185       Ty = C.getPointerType(Ty);
3186     if (isAllocatableDecl(VD))
3187       Ty = C.getPointerType(Ty);
3188     Args.push_back(ImplicitParamDecl::Create(
3189         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3190         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3191         ImplicitParamKind::Other));
3192     PrivateVarsPos[VD] = Counter;
3193     ++Counter;
3194   }
3195   const auto &TaskPrivatesMapFnInfo =
3196       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3197   llvm::FunctionType *TaskPrivatesMapTy =
3198       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3199   std::string Name =
3200       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3201   auto *TaskPrivatesMap = llvm::Function::Create(
3202       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3203       &CGM.getModule());
3204   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3205                                     TaskPrivatesMapFnInfo);
3206   if (CGM.getLangOpts().Optimize) {
3207     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3208     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3209     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3210   }
3211   CodeGenFunction CGF(CGM);
3212   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3213                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
3214 
3215   // *privi = &.privates.privi;
3216   LValue Base = CGF.EmitLoadOfPointerLValue(
3217       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3218       TaskPrivatesArg.getType()->castAs<PointerType>());
3219   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3220   Counter = 0;
3221   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3222     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3223     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3224     LValue RefLVal =
3225         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3226     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3227         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3228     CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3229     ++Counter;
3230   }
3231   CGF.FinishFunction();
3232   return TaskPrivatesMap;
3233 }
3234 
3235 /// Emit initialization for private variables in task-based directives.
3236 static void emitPrivatesInit(CodeGenFunction &CGF,
3237                              const OMPExecutableDirective &D,
3238                              Address KmpTaskSharedsPtr, LValue TDBase,
3239                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3240                              QualType SharedsTy, QualType SharedsPtrTy,
3241                              const OMPTaskDataTy &Data,
3242                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3243   ASTContext &C = CGF.getContext();
3244   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3245   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3246   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3247                                  ? OMPD_taskloop
3248                                  : OMPD_task;
3249   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3250   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3251   LValue SrcBase;
3252   bool IsTargetTask =
3253       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3254       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3255   // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3256   // PointersArray, SizesArray, and MappersArray. The original variables for
3257   // these arrays are not captured and we get their addresses explicitly.
3258   if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3259       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3260     SrcBase = CGF.MakeAddrLValue(
3261         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3262             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3263             CGF.ConvertTypeForMem(SharedsTy)),
3264         SharedsTy);
3265   }
3266   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3267   for (const PrivateDataTy &Pair : Privates) {
3268     // Do not initialize private locals.
3269     if (Pair.second.isLocalPrivate()) {
3270       ++FI;
3271       continue;
3272     }
3273     const VarDecl *VD = Pair.second.PrivateCopy;
3274     const Expr *Init = VD->getAnyInitializer();
3275     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3276                              !CGF.isTrivialInitializer(Init)))) {
3277       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3278       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3279         const VarDecl *OriginalVD = Pair.second.Original;
3280         // Check if the variable is the target-based BasePointersArray,
3281         // PointersArray, SizesArray, or MappersArray.
3282         LValue SharedRefLValue;
3283         QualType Type = PrivateLValue.getType();
3284         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3285         if (IsTargetTask && !SharedField) {
3286           assert(isa<ImplicitParamDecl>(OriginalVD) &&
3287                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3288                  cast<CapturedDecl>(OriginalVD->getDeclContext())
3289                          ->getNumParams() == 0 &&
3290                  isa<TranslationUnitDecl>(
3291                      cast<CapturedDecl>(OriginalVD->getDeclContext())
3292                          ->getDeclContext()) &&
3293                  "Expected artificial target data variable.");
3294           SharedRefLValue =
3295               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3296         } else if (ForDup) {
3297           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3298           SharedRefLValue = CGF.MakeAddrLValue(
3299               SharedRefLValue.getAddress().withAlignment(
3300                   C.getDeclAlign(OriginalVD)),
3301               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3302               SharedRefLValue.getTBAAInfo());
3303         } else if (CGF.LambdaCaptureFields.count(
3304                        Pair.second.Original->getCanonicalDecl()) > 0 ||
3305                    isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3306           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3307         } else {
3308           // Processing for implicitly captured variables.
3309           InlinedOpenMPRegionRAII Region(
3310               CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3311               /*HasCancel=*/false, /*NoInheritance=*/true);
3312           SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3313         }
3314         if (Type->isArrayType()) {
3315           // Initialize firstprivate array.
3316           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3317             // Perform simple memcpy.
3318             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3319           } else {
3320             // Initialize firstprivate array using element-by-element
3321             // initialization.
3322             CGF.EmitOMPAggregateAssign(
3323                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3324                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3325                                                   Address SrcElement) {
3326                   // Clean up any temporaries needed by the initialization.
3327                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
3328                   InitScope.addPrivate(Elem, SrcElement);
3329                   (void)InitScope.Privatize();
3330                   // Emit initialization for single element.
3331                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3332                       CGF, &CapturesInfo);
3333                   CGF.EmitAnyExprToMem(Init, DestElement,
3334                                        Init->getType().getQualifiers(),
3335                                        /*IsInitializer=*/false);
3336                 });
3337           }
3338         } else {
3339           CodeGenFunction::OMPPrivateScope InitScope(CGF);
3340           InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3341           (void)InitScope.Privatize();
3342           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3343           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3344                              /*capturedByInit=*/false);
3345         }
3346       } else {
3347         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3348       }
3349     }
3350     ++FI;
3351   }
3352 }
3353 
3354 /// Check if duplication function is required for taskloops.
3355 static bool checkInitIsRequired(CodeGenFunction &CGF,
3356                                 ArrayRef<PrivateDataTy> Privates) {
3357   bool InitRequired = false;
3358   for (const PrivateDataTy &Pair : Privates) {
3359     if (Pair.second.isLocalPrivate())
3360       continue;
3361     const VarDecl *VD = Pair.second.PrivateCopy;
3362     const Expr *Init = VD->getAnyInitializer();
3363     InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3364                                     !CGF.isTrivialInitializer(Init));
3365     if (InitRequired)
3366       break;
3367   }
3368   return InitRequired;
3369 }
3370 
3371 
3372 /// Emit task_dup function (for initialization of
3373 /// private/firstprivate/lastprivate vars and last_iter flag)
3374 /// \code
3375 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3376 /// lastpriv) {
3377 /// // setup lastprivate flag
3378 ///    task_dst->last = lastpriv;
3379 /// // could be constructor calls here...
3380 /// }
3381 /// \endcode
3382 static llvm::Value *
3383 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3384                     const OMPExecutableDirective &D,
3385                     QualType KmpTaskTWithPrivatesPtrQTy,
3386                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3387                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3388                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3389                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3390   ASTContext &C = CGM.getContext();
3391   FunctionArgList Args;
3392   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3393                            KmpTaskTWithPrivatesPtrQTy,
3394                            ImplicitParamKind::Other);
3395   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3396                            KmpTaskTWithPrivatesPtrQTy,
3397                            ImplicitParamKind::Other);
3398   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3399                                 ImplicitParamKind::Other);
3400   Args.push_back(&DstArg);
3401   Args.push_back(&SrcArg);
3402   Args.push_back(&LastprivArg);
3403   const auto &TaskDupFnInfo =
3404       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3405   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3406   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3407   auto *TaskDup = llvm::Function::Create(
3408       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3409   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3410   TaskDup->setDoesNotRecurse();
3411   CodeGenFunction CGF(CGM);
3412   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3413                     Loc);
3414 
3415   LValue TDBase = CGF.EmitLoadOfPointerLValue(
3416       CGF.GetAddrOfLocalVar(&DstArg),
3417       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3418   // task_dst->liter = lastpriv;
3419   if (WithLastIter) {
3420     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3421     LValue Base = CGF.EmitLValueForField(
3422         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3423     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3424     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3425         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3426     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3427   }
3428 
3429   // Emit initial values for private copies (if any).
3430   assert(!Privates.empty());
3431   Address KmpTaskSharedsPtr = Address::invalid();
3432   if (!Data.FirstprivateVars.empty()) {
3433     LValue TDBase = CGF.EmitLoadOfPointerLValue(
3434         CGF.GetAddrOfLocalVar(&SrcArg),
3435         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3436     LValue Base = CGF.EmitLValueForField(
3437         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3438     KmpTaskSharedsPtr = Address(
3439         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3440                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3441                                                   KmpTaskTShareds)),
3442                              Loc),
3443         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3444   }
3445   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3446                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3447   CGF.FinishFunction();
3448   return TaskDup;
3449 }
3450 
3451 /// Checks if destructor function is required to be generated.
3452 /// \return true if cleanups are required, false otherwise.
3453 static bool
3454 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3455                          ArrayRef<PrivateDataTy> Privates) {
3456   for (const PrivateDataTy &P : Privates) {
3457     if (P.second.isLocalPrivate())
3458       continue;
3459     QualType Ty = P.second.Original->getType().getNonReferenceType();
3460     if (Ty.isDestructedType())
3461       return true;
3462   }
3463   return false;
3464 }
3465 
3466 namespace {
3467 /// Loop generator for OpenMP iterator expression.
3468 class OMPIteratorGeneratorScope final
3469     : public CodeGenFunction::OMPPrivateScope {
3470   CodeGenFunction &CGF;
3471   const OMPIteratorExpr *E = nullptr;
3472   SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3473   SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3474   OMPIteratorGeneratorScope() = delete;
3475   OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3476 
3477 public:
3478   OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3479       : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3480     if (!E)
3481       return;
3482     SmallVector<llvm::Value *, 4> Uppers;
3483     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3484       Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3485       const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3486       addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3487       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3488       addPrivate(
3489           HelperData.CounterVD,
3490           CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3491     }
3492     Privatize();
3493 
3494     for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3495       const OMPIteratorHelperData &HelperData = E->getHelper(I);
3496       LValue CLVal =
3497           CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3498                              HelperData.CounterVD->getType());
3499       // Counter = 0;
3500       CGF.EmitStoreOfScalar(
3501           llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3502           CLVal);
3503       CodeGenFunction::JumpDest &ContDest =
3504           ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3505       CodeGenFunction::JumpDest &ExitDest =
3506           ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3507       // N = <number-of_iterations>;
3508       llvm::Value *N = Uppers[I];
3509       // cont:
3510       // if (Counter < N) goto body; else goto exit;
3511       CGF.EmitBlock(ContDest.getBlock());
3512       auto *CVal =
3513           CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3514       llvm::Value *Cmp =
3515           HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3516               ? CGF.Builder.CreateICmpSLT(CVal, N)
3517               : CGF.Builder.CreateICmpULT(CVal, N);
3518       llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3519       CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3520       // body:
3521       CGF.EmitBlock(BodyBB);
3522       // Iteri = Begini + Counter * Stepi;
3523       CGF.EmitIgnoredExpr(HelperData.Update);
3524     }
3525   }
3526   ~OMPIteratorGeneratorScope() {
3527     if (!E)
3528       return;
3529     for (unsigned I = E->numOfIterators(); I > 0; --I) {
3530       // Counter = Counter + 1;
3531       const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3532       CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3533       // goto cont;
3534       CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3535       // exit:
3536       CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3537     }
3538   }
3539 };
3540 } // namespace
3541 
3542 static std::pair<llvm::Value *, llvm::Value *>
3543 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3544   const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3545   llvm::Value *Addr;
3546   if (OASE) {
3547     const Expr *Base = OASE->getBase();
3548     Addr = CGF.EmitScalarExpr(Base);
3549   } else {
3550     Addr = CGF.EmitLValue(E).getPointer(CGF);
3551   }
3552   llvm::Value *SizeVal;
3553   QualType Ty = E->getType();
3554   if (OASE) {
3555     SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3556     for (const Expr *SE : OASE->getDimensions()) {
3557       llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3558       Sz = CGF.EmitScalarConversion(
3559           Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3560       SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3561     }
3562   } else if (const auto *ASE =
3563                  dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3564     LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3565     Address UpAddrAddress = UpAddrLVal.getAddress();
3566     llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3567         UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3568         /*Idx0=*/1);
3569     llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3570     llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3571     SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3572   } else {
3573     SizeVal = CGF.getTypeSize(Ty);
3574   }
3575   return std::make_pair(Addr, SizeVal);
3576 }
3577 
3578 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3579 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3580   QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3581   if (KmpTaskAffinityInfoTy.isNull()) {
3582     RecordDecl *KmpAffinityInfoRD =
3583         C.buildImplicitRecord("kmp_task_affinity_info_t");
3584     KmpAffinityInfoRD->startDefinition();
3585     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3586     addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3587     addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3588     KmpAffinityInfoRD->completeDefinition();
3589     KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3590   }
3591 }
3592 
3593 CGOpenMPRuntime::TaskResultTy
3594 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3595                               const OMPExecutableDirective &D,
3596                               llvm::Function *TaskFunction, QualType SharedsTy,
3597                               Address Shareds, const OMPTaskDataTy &Data) {
3598   ASTContext &C = CGM.getContext();
3599   llvm::SmallVector<PrivateDataTy, 4> Privates;
3600   // Aggregate privates and sort them by the alignment.
3601   const auto *I = Data.PrivateCopies.begin();
3602   for (const Expr *E : Data.PrivateVars) {
3603     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3604     Privates.emplace_back(
3605         C.getDeclAlign(VD),
3606         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3607                          /*PrivateElemInit=*/nullptr));
3608     ++I;
3609   }
3610   I = Data.FirstprivateCopies.begin();
3611   const auto *IElemInitRef = Data.FirstprivateInits.begin();
3612   for (const Expr *E : Data.FirstprivateVars) {
3613     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3614     Privates.emplace_back(
3615         C.getDeclAlign(VD),
3616         PrivateHelpersTy(
3617             E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3618             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3619     ++I;
3620     ++IElemInitRef;
3621   }
3622   I = Data.LastprivateCopies.begin();
3623   for (const Expr *E : Data.LastprivateVars) {
3624     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3625     Privates.emplace_back(
3626         C.getDeclAlign(VD),
3627         PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3628                          /*PrivateElemInit=*/nullptr));
3629     ++I;
3630   }
3631   for (const VarDecl *VD : Data.PrivateLocals) {
3632     if (isAllocatableDecl(VD))
3633       Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3634     else
3635       Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3636   }
3637   llvm::stable_sort(Privates,
3638                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
3639                       return L.first > R.first;
3640                     });
3641   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3642   // Build type kmp_routine_entry_t (if not built yet).
3643   emitKmpRoutineEntryT(KmpInt32Ty);
3644   // Build type kmp_task_t (if not built yet).
3645   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3646     if (SavedKmpTaskloopTQTy.isNull()) {
3647       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3648           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3649     }
3650     KmpTaskTQTy = SavedKmpTaskloopTQTy;
3651   } else {
3652     assert((D.getDirectiveKind() == OMPD_task ||
3653             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3654             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3655            "Expected taskloop, task or target directive");
3656     if (SavedKmpTaskTQTy.isNull()) {
3657       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3658           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3659     }
3660     KmpTaskTQTy = SavedKmpTaskTQTy;
3661   }
3662   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3663   // Build particular struct kmp_task_t for the given task.
3664   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3665       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3666   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3667   QualType KmpTaskTWithPrivatesPtrQTy =
3668       C.getPointerType(KmpTaskTWithPrivatesQTy);
3669   llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3670   llvm::Value *KmpTaskTWithPrivatesTySize =
3671       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3672   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3673 
3674   // Emit initial values for private copies (if any).
3675   llvm::Value *TaskPrivatesMap = nullptr;
3676   llvm::Type *TaskPrivatesMapTy =
3677       std::next(TaskFunction->arg_begin(), 3)->getType();
3678   if (!Privates.empty()) {
3679     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3680     TaskPrivatesMap =
3681         emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3682     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3683         TaskPrivatesMap, TaskPrivatesMapTy);
3684   } else {
3685     TaskPrivatesMap = llvm::ConstantPointerNull::get(
3686         cast<llvm::PointerType>(TaskPrivatesMapTy));
3687   }
3688   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3689   // kmp_task_t *tt);
3690   llvm::Function *TaskEntry = emitProxyTaskFunction(
3691       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3692       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3693       TaskPrivatesMap);
3694 
3695   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3696   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3697   // kmp_routine_entry_t *task_entry);
3698   // Task flags. Format is taken from
3699   // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3700   // description of kmp_tasking_flags struct.
3701   enum {
3702     TiedFlag = 0x1,
3703     FinalFlag = 0x2,
3704     DestructorsFlag = 0x8,
3705     PriorityFlag = 0x20,
3706     DetachableFlag = 0x40,
3707   };
3708   unsigned Flags = Data.Tied ? TiedFlag : 0;
3709   bool NeedsCleanup = false;
3710   if (!Privates.empty()) {
3711     NeedsCleanup =
3712         checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3713     if (NeedsCleanup)
3714       Flags = Flags | DestructorsFlag;
3715   }
3716   if (Data.Priority.getInt())
3717     Flags = Flags | PriorityFlag;
3718   if (D.hasClausesOfKind<OMPDetachClause>())
3719     Flags = Flags | DetachableFlag;
3720   llvm::Value *TaskFlags =
3721       Data.Final.getPointer()
3722           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3723                                      CGF.Builder.getInt32(FinalFlag),
3724                                      CGF.Builder.getInt32(/*C=*/0))
3725           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3726   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3727   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3728   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3729       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3730       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3731           TaskEntry, KmpRoutineEntryPtrTy)};
3732   llvm::Value *NewTask;
3733   if (D.hasClausesOfKind<OMPNowaitClause>()) {
3734     // Check if we have any device clause associated with the directive.
3735     const Expr *Device = nullptr;
3736     if (auto *C = D.getSingleClause<OMPDeviceClause>())
3737       Device = C->getDevice();
3738     // Emit device ID if any otherwise use default value.
3739     llvm::Value *DeviceID;
3740     if (Device)
3741       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3742                                            CGF.Int64Ty, /*isSigned=*/true);
3743     else
3744       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3745     AllocArgs.push_back(DeviceID);
3746     NewTask = CGF.EmitRuntimeCall(
3747         OMPBuilder.getOrCreateRuntimeFunction(
3748             CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3749         AllocArgs);
3750   } else {
3751     NewTask =
3752         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3753                                 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3754                             AllocArgs);
3755   }
3756   // Emit detach clause initialization.
3757   // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3758   // task_descriptor);
3759   if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3760     const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3761     LValue EvtLVal = CGF.EmitLValue(Evt);
3762 
3763     // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3764     // int gtid, kmp_task_t *task);
3765     llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3766     llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3767     Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3768     llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3769         OMPBuilder.getOrCreateRuntimeFunction(
3770             CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3771         {Loc, Tid, NewTask});
3772     EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3773                                       Evt->getExprLoc());
3774     CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3775   }
3776   // Process affinity clauses.
3777   if (D.hasClausesOfKind<OMPAffinityClause>()) {
3778     // Process list of affinity data.
3779     ASTContext &C = CGM.getContext();
3780     Address AffinitiesArray = Address::invalid();
3781     // Calculate number of elements to form the array of affinity data.
3782     llvm::Value *NumOfElements = nullptr;
3783     unsigned NumAffinities = 0;
3784     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3785       if (const Expr *Modifier = C->getModifier()) {
3786         const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3787         for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3788           llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3789           Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3790           NumOfElements =
3791               NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3792         }
3793       } else {
3794         NumAffinities += C->varlist_size();
3795       }
3796     }
3797     getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3798     // Fields ids in kmp_task_affinity_info record.
3799     enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3800 
3801     QualType KmpTaskAffinityInfoArrayTy;
3802     if (NumOfElements) {
3803       NumOfElements = CGF.Builder.CreateNUWAdd(
3804           llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3805       auto *OVE = new (C) OpaqueValueExpr(
3806           Loc,
3807           C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3808           VK_PRValue);
3809       CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3810                                                     RValue::get(NumOfElements));
3811       KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3812           KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3813           /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3814       // Properly emit variable-sized array.
3815       auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3816                                            ImplicitParamKind::Other);
3817       CGF.EmitVarDecl(*PD);
3818       AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3819       NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3820                                                 /*isSigned=*/false);
3821     } else {
3822       KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3823           KmpTaskAffinityInfoTy,
3824           llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3825           ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3826       AffinitiesArray =
3827           CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3828       AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3829       NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3830                                              /*isSigned=*/false);
3831     }
3832 
3833     const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3834     // Fill array by elements without iterators.
3835     unsigned Pos = 0;
3836     bool HasIterator = false;
3837     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3838       if (C->getModifier()) {
3839         HasIterator = true;
3840         continue;
3841       }
3842       for (const Expr *E : C->varlist()) {
3843         llvm::Value *Addr;
3844         llvm::Value *Size;
3845         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3846         LValue Base =
3847             CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3848                                KmpTaskAffinityInfoTy);
3849         // affs[i].base_addr = &<Affinities[i].second>;
3850         LValue BaseAddrLVal = CGF.EmitLValueForField(
3851             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3852         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3853                               BaseAddrLVal);
3854         // affs[i].len = sizeof(<Affinities[i].second>);
3855         LValue LenLVal = CGF.EmitLValueForField(
3856             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3857         CGF.EmitStoreOfScalar(Size, LenLVal);
3858         ++Pos;
3859       }
3860     }
3861     LValue PosLVal;
3862     if (HasIterator) {
3863       PosLVal = CGF.MakeAddrLValue(
3864           CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3865           C.getSizeType());
3866       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3867     }
3868     // Process elements with iterators.
3869     for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3870       const Expr *Modifier = C->getModifier();
3871       if (!Modifier)
3872         continue;
3873       OMPIteratorGeneratorScope IteratorScope(
3874           CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3875       for (const Expr *E : C->varlist()) {
3876         llvm::Value *Addr;
3877         llvm::Value *Size;
3878         std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3879         llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3880         LValue Base =
3881             CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3882                                KmpTaskAffinityInfoTy);
3883         // affs[i].base_addr = &<Affinities[i].second>;
3884         LValue BaseAddrLVal = CGF.EmitLValueForField(
3885             Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3886         CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3887                               BaseAddrLVal);
3888         // affs[i].len = sizeof(<Affinities[i].second>);
3889         LValue LenLVal = CGF.EmitLValueForField(
3890             Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3891         CGF.EmitStoreOfScalar(Size, LenLVal);
3892         Idx = CGF.Builder.CreateNUWAdd(
3893             Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3894         CGF.EmitStoreOfScalar(Idx, PosLVal);
3895       }
3896     }
3897     // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3898     // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3899     // naffins, kmp_task_affinity_info_t *affin_list);
3900     llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3901     llvm::Value *GTid = getThreadID(CGF, Loc);
3902     llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3903         AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3904     // FIXME: Emit the function and ignore its result for now unless the
3905     // runtime function is properly implemented.
3906     (void)CGF.EmitRuntimeCall(
3907         OMPBuilder.getOrCreateRuntimeFunction(
3908             CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3909         {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3910   }
3911   llvm::Value *NewTaskNewTaskTTy =
3912       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3913           NewTask, KmpTaskTWithPrivatesPtrTy);
3914   LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3915                                                   KmpTaskTWithPrivatesQTy);
3916   LValue TDBase =
3917       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3918   // Fill the data in the resulting kmp_task_t record.
3919   // Copy shareds if there are any.
3920   Address KmpTaskSharedsPtr = Address::invalid();
3921   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3922     KmpTaskSharedsPtr = Address(
3923         CGF.EmitLoadOfScalar(
3924             CGF.EmitLValueForField(
3925                 TDBase,
3926                 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3927             Loc),
3928         CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3929     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3930     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3931     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3932   }
3933   // Emit initial values for private copies (if any).
3934   TaskResultTy Result;
3935   if (!Privates.empty()) {
3936     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3937                      SharedsTy, SharedsPtrTy, Data, Privates,
3938                      /*ForDup=*/false);
3939     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3940         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3941       Result.TaskDupFn = emitTaskDupFunction(
3942           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3943           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3944           /*WithLastIter=*/!Data.LastprivateVars.empty());
3945     }
3946   }
3947   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3948   enum { Priority = 0, Destructors = 1 };
3949   // Provide pointer to function with destructors for privates.
3950   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3951   const RecordDecl *KmpCmplrdataUD =
3952       (*FI)->getType()->getAsUnionType()->getDecl();
3953   if (NeedsCleanup) {
3954     llvm::Value *DestructorFn = emitDestructorsFunction(
3955         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3956         KmpTaskTWithPrivatesQTy);
3957     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3958     LValue DestructorsLV = CGF.EmitLValueForField(
3959         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3960     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3961                               DestructorFn, KmpRoutineEntryPtrTy),
3962                           DestructorsLV);
3963   }
3964   // Set priority.
3965   if (Data.Priority.getInt()) {
3966     LValue Data2LV = CGF.EmitLValueForField(
3967         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3968     LValue PriorityLV = CGF.EmitLValueForField(
3969         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3970     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3971   }
3972   Result.NewTask = NewTask;
3973   Result.TaskEntry = TaskEntry;
3974   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3975   Result.TDBase = TDBase;
3976   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3977   return Result;
3978 }
3979 
3980 /// Translates internal dependency kind into the runtime kind.
3981 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3982   RTLDependenceKindTy DepKind;
3983   switch (K) {
3984   case OMPC_DEPEND_in:
3985     DepKind = RTLDependenceKindTy::DepIn;
3986     break;
3987   // Out and InOut dependencies must use the same code.
3988   case OMPC_DEPEND_out:
3989   case OMPC_DEPEND_inout:
3990     DepKind = RTLDependenceKindTy::DepInOut;
3991     break;
3992   case OMPC_DEPEND_mutexinoutset:
3993     DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3994     break;
3995   case OMPC_DEPEND_inoutset:
3996     DepKind = RTLDependenceKindTy::DepInOutSet;
3997     break;
3998   case OMPC_DEPEND_outallmemory:
3999     DepKind = RTLDependenceKindTy::DepOmpAllMem;
4000     break;
4001   case OMPC_DEPEND_source:
4002   case OMPC_DEPEND_sink:
4003   case OMPC_DEPEND_depobj:
4004   case OMPC_DEPEND_inoutallmemory:
4005   case OMPC_DEPEND_unknown:
4006     llvm_unreachable("Unknown task dependence type");
4007   }
4008   return DepKind;
4009 }
4010 
4011 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4012 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4013                            QualType &FlagsTy) {
4014   FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4015   if (KmpDependInfoTy.isNull()) {
4016     RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4017     KmpDependInfoRD->startDefinition();
4018     addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4019     addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4020     addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4021     KmpDependInfoRD->completeDefinition();
4022     KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4023   }
4024 }
4025 
4026 std::pair<llvm::Value *, LValue>
4027 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4028                                    SourceLocation Loc) {
4029   ASTContext &C = CGM.getContext();
4030   QualType FlagsTy;
4031   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4032   RecordDecl *KmpDependInfoRD =
4033       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4034   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4035   LValue Base = CGF.EmitLoadOfPointerLValue(
4036       DepobjLVal.getAddress().withElementType(
4037           CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4038       KmpDependInfoPtrTy->castAs<PointerType>());
4039   Address DepObjAddr = CGF.Builder.CreateGEP(
4040       CGF, Base.getAddress(),
4041       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4042   LValue NumDepsBase = CGF.MakeAddrLValue(
4043       DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4044   // NumDeps = deps[i].base_addr;
4045   LValue BaseAddrLVal = CGF.EmitLValueForField(
4046       NumDepsBase,
4047       *std::next(KmpDependInfoRD->field_begin(),
4048                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4049   llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4050   return std::make_pair(NumDeps, Base);
4051 }
4052 
4053 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4054                            llvm::PointerUnion<unsigned *, LValue *> Pos,
4055                            const OMPTaskDataTy::DependData &Data,
4056                            Address DependenciesArray) {
4057   CodeGenModule &CGM = CGF.CGM;
4058   ASTContext &C = CGM.getContext();
4059   QualType FlagsTy;
4060   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4061   RecordDecl *KmpDependInfoRD =
4062       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4063   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4064 
4065   OMPIteratorGeneratorScope IteratorScope(
4066       CGF, cast_or_null<OMPIteratorExpr>(
4067                Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4068                                  : nullptr));
4069   for (const Expr *E : Data.DepExprs) {
4070     llvm::Value *Addr;
4071     llvm::Value *Size;
4072 
4073     // The expression will be a nullptr in the 'omp_all_memory' case.
4074     if (E) {
4075       std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4076       Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4077     } else {
4078       Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4079       Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4080     }
4081     LValue Base;
4082     if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4083       Base = CGF.MakeAddrLValue(
4084           CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4085     } else {
4086       assert(E && "Expected a non-null expression");
4087       LValue &PosLVal = *cast<LValue *>(Pos);
4088       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4089       Base = CGF.MakeAddrLValue(
4090           CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4091     }
4092     // deps[i].base_addr = &<Dependencies[i].second>;
4093     LValue BaseAddrLVal = CGF.EmitLValueForField(
4094         Base,
4095         *std::next(KmpDependInfoRD->field_begin(),
4096                    static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4097     CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4098     // deps[i].len = sizeof(<Dependencies[i].second>);
4099     LValue LenLVal = CGF.EmitLValueForField(
4100         Base, *std::next(KmpDependInfoRD->field_begin(),
4101                          static_cast<unsigned int>(RTLDependInfoFields::Len)));
4102     CGF.EmitStoreOfScalar(Size, LenLVal);
4103     // deps[i].flags = <Dependencies[i].first>;
4104     RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4105     LValue FlagsLVal = CGF.EmitLValueForField(
4106         Base,
4107         *std::next(KmpDependInfoRD->field_begin(),
4108                    static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4109     CGF.EmitStoreOfScalar(
4110         llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4111         FlagsLVal);
4112     if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4113       ++(*P);
4114     } else {
4115       LValue &PosLVal = *cast<LValue *>(Pos);
4116       llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4117       Idx = CGF.Builder.CreateNUWAdd(Idx,
4118                                      llvm::ConstantInt::get(Idx->getType(), 1));
4119       CGF.EmitStoreOfScalar(Idx, PosLVal);
4120     }
4121   }
4122 }
4123 
4124 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4125     CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4126     const OMPTaskDataTy::DependData &Data) {
4127   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4128          "Expected depobj dependency kind.");
4129   SmallVector<llvm::Value *, 4> Sizes;
4130   SmallVector<LValue, 4> SizeLVals;
4131   ASTContext &C = CGF.getContext();
4132   {
4133     OMPIteratorGeneratorScope IteratorScope(
4134         CGF, cast_or_null<OMPIteratorExpr>(
4135                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4136                                    : nullptr));
4137     for (const Expr *E : Data.DepExprs) {
4138       llvm::Value *NumDeps;
4139       LValue Base;
4140       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4141       std::tie(NumDeps, Base) =
4142           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4143       LValue NumLVal = CGF.MakeAddrLValue(
4144           CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4145           C.getUIntPtrType());
4146       CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4147                               NumLVal.getAddress());
4148       llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4149       llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4150       CGF.EmitStoreOfScalar(Add, NumLVal);
4151       SizeLVals.push_back(NumLVal);
4152     }
4153   }
4154   for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4155     llvm::Value *Size =
4156         CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4157     Sizes.push_back(Size);
4158   }
4159   return Sizes;
4160 }
4161 
4162 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4163                                          QualType &KmpDependInfoTy,
4164                                          LValue PosLVal,
4165                                          const OMPTaskDataTy::DependData &Data,
4166                                          Address DependenciesArray) {
4167   assert(Data.DepKind == OMPC_DEPEND_depobj &&
4168          "Expected depobj dependency kind.");
4169   llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4170   {
4171     OMPIteratorGeneratorScope IteratorScope(
4172         CGF, cast_or_null<OMPIteratorExpr>(
4173                  Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4174                                    : nullptr));
4175     for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4176       const Expr *E = Data.DepExprs[I];
4177       llvm::Value *NumDeps;
4178       LValue Base;
4179       LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4180       std::tie(NumDeps, Base) =
4181           getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4182 
4183       // memcopy dependency data.
4184       llvm::Value *Size = CGF.Builder.CreateNUWMul(
4185           ElSize,
4186           CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4187       llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4188       Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4189       CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4190 
4191       // Increase pos.
4192       // pos += size;
4193       llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4194       CGF.EmitStoreOfScalar(Add, PosLVal);
4195     }
4196   }
4197 }
4198 
4199 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4200     CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4201     SourceLocation Loc) {
4202   if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4203         return D.DepExprs.empty();
4204       }))
4205     return std::make_pair(nullptr, Address::invalid());
4206   // Process list of dependencies.
4207   ASTContext &C = CGM.getContext();
4208   Address DependenciesArray = Address::invalid();
4209   llvm::Value *NumOfElements = nullptr;
4210   unsigned NumDependencies = std::accumulate(
4211       Dependencies.begin(), Dependencies.end(), 0,
4212       [](unsigned V, const OMPTaskDataTy::DependData &D) {
4213         return D.DepKind == OMPC_DEPEND_depobj
4214                    ? V
4215                    : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4216       });
4217   QualType FlagsTy;
4218   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4219   bool HasDepobjDeps = false;
4220   bool HasRegularWithIterators = false;
4221   llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4222   llvm::Value *NumOfRegularWithIterators =
4223       llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4224   // Calculate number of depobj dependencies and regular deps with the
4225   // iterators.
4226   for (const OMPTaskDataTy::DependData &D : Dependencies) {
4227     if (D.DepKind == OMPC_DEPEND_depobj) {
4228       SmallVector<llvm::Value *, 4> Sizes =
4229           emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4230       for (llvm::Value *Size : Sizes) {
4231         NumOfDepobjElements =
4232             CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4233       }
4234       HasDepobjDeps = true;
4235       continue;
4236     }
4237     // Include number of iterations, if any.
4238 
4239     if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4240       llvm::Value *ClauseIteratorSpace =
4241           llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4242       for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4243         llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4244         Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4245         ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4246       }
4247       llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4248           ClauseIteratorSpace,
4249           llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4250       NumOfRegularWithIterators =
4251           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4252       HasRegularWithIterators = true;
4253       continue;
4254     }
4255   }
4256 
4257   QualType KmpDependInfoArrayTy;
4258   if (HasDepobjDeps || HasRegularWithIterators) {
4259     NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4260                                            /*isSigned=*/false);
4261     if (HasDepobjDeps) {
4262       NumOfElements =
4263           CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4264     }
4265     if (HasRegularWithIterators) {
4266       NumOfElements =
4267           CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4268     }
4269     auto *OVE = new (C) OpaqueValueExpr(
4270         Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4271         VK_PRValue);
4272     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4273                                                   RValue::get(NumOfElements));
4274     KmpDependInfoArrayTy =
4275         C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4276                                /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4277     // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4278     // Properly emit variable-sized array.
4279     auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4280                                          ImplicitParamKind::Other);
4281     CGF.EmitVarDecl(*PD);
4282     DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4283     NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4284                                               /*isSigned=*/false);
4285   } else {
4286     KmpDependInfoArrayTy = C.getConstantArrayType(
4287         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4288         ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4289     DependenciesArray =
4290         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4291     DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4292     NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4293                                            /*isSigned=*/false);
4294   }
4295   unsigned Pos = 0;
4296   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4297     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4298         Dependencies[I].IteratorExpr)
4299       continue;
4300     emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4301                    DependenciesArray);
4302   }
4303   // Copy regular dependencies with iterators.
4304   LValue PosLVal = CGF.MakeAddrLValue(
4305       CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4306   CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4307   for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4308     if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4309         !Dependencies[I].IteratorExpr)
4310       continue;
4311     emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4312                    DependenciesArray);
4313   }
4314   // Copy final depobj arrays without iterators.
4315   if (HasDepobjDeps) {
4316     for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4317       if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4318         continue;
4319       emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4320                          DependenciesArray);
4321     }
4322   }
4323   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4324       DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4325   return std::make_pair(NumOfElements, DependenciesArray);
4326 }
4327 
4328 Address CGOpenMPRuntime::emitDepobjDependClause(
4329     CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4330     SourceLocation Loc) {
4331   if (Dependencies.DepExprs.empty())
4332     return Address::invalid();
4333   // Process list of dependencies.
4334   ASTContext &C = CGM.getContext();
4335   Address DependenciesArray = Address::invalid();
4336   unsigned NumDependencies = Dependencies.DepExprs.size();
4337   QualType FlagsTy;
4338   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4339   RecordDecl *KmpDependInfoRD =
4340       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4341 
4342   llvm::Value *Size;
4343   // Define type kmp_depend_info[<Dependencies.size()>];
4344   // For depobj reserve one extra element to store the number of elements.
4345   // It is required to handle depobj(x) update(in) construct.
4346   // kmp_depend_info[<Dependencies.size()>] deps;
4347   llvm::Value *NumDepsVal;
4348   CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4349   if (const auto *IE =
4350           cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4351     NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4352     for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4353       llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4354       Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4355       NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4356     }
4357     Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4358                                     NumDepsVal);
4359     CharUnits SizeInBytes =
4360         C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4361     llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4362     Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4363     NumDepsVal =
4364         CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4365   } else {
4366     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4367         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4368         nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4369     CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4370     Size = CGM.getSize(Sz.alignTo(Align));
4371     NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4372   }
4373   // Need to allocate on the dynamic memory.
4374   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4375   // Use default allocator.
4376   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4377   llvm::Value *Args[] = {ThreadID, Size, Allocator};
4378 
4379   llvm::Value *Addr =
4380       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4381                               CGM.getModule(), OMPRTL___kmpc_alloc),
4382                           Args, ".dep.arr.addr");
4383   llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4384   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4385       Addr, CGF.Builder.getPtrTy(0));
4386   DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4387   // Write number of elements in the first element of array for depobj.
4388   LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4389   // deps[i].base_addr = NumDependencies;
4390   LValue BaseAddrLVal = CGF.EmitLValueForField(
4391       Base,
4392       *std::next(KmpDependInfoRD->field_begin(),
4393                  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4394   CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4395   llvm::PointerUnion<unsigned *, LValue *> Pos;
4396   unsigned Idx = 1;
4397   LValue PosLVal;
4398   if (Dependencies.IteratorExpr) {
4399     PosLVal = CGF.MakeAddrLValue(
4400         CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4401         C.getSizeType());
4402     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4403                           /*IsInit=*/true);
4404     Pos = &PosLVal;
4405   } else {
4406     Pos = &Idx;
4407   }
4408   emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4409   DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4410       CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4411       CGF.Int8Ty);
4412   return DependenciesArray;
4413 }
4414 
4415 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4416                                         SourceLocation Loc) {
4417   ASTContext &C = CGM.getContext();
4418   QualType FlagsTy;
4419   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4420   LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4421                                             C.VoidPtrTy.castAs<PointerType>());
4422   QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4423   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4424       Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4425       CGF.ConvertTypeForMem(KmpDependInfoTy));
4426   llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4427       Addr.getElementType(), Addr.emitRawPointer(CGF),
4428       llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4429   DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4430                                                                CGF.VoidPtrTy);
4431   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4432   // Use default allocator.
4433   llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4434   llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4435 
4436   // _kmpc_free(gtid, addr, nullptr);
4437   (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4438                                 CGM.getModule(), OMPRTL___kmpc_free),
4439                             Args);
4440 }
4441 
4442 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4443                                        OpenMPDependClauseKind NewDepKind,
4444                                        SourceLocation Loc) {
4445   ASTContext &C = CGM.getContext();
4446   QualType FlagsTy;
4447   getDependTypes(C, KmpDependInfoTy, FlagsTy);
4448   RecordDecl *KmpDependInfoRD =
4449       cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4450   llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4451   llvm::Value *NumDeps;
4452   LValue Base;
4453   std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4454 
4455   Address Begin = Base.getAddress();
4456   // Cast from pointer to array type to pointer to single element.
4457   llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4458                                            Begin.emitRawPointer(CGF), NumDeps);
4459   // The basic structure here is a while-do loop.
4460   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4461   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4462   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4463   CGF.EmitBlock(BodyBB);
4464   llvm::PHINode *ElementPHI =
4465       CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4466   ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4467   Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4468   Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4469                             Base.getTBAAInfo());
4470   // deps[i].flags = NewDepKind;
4471   RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4472   LValue FlagsLVal = CGF.EmitLValueForField(
4473       Base, *std::next(KmpDependInfoRD->field_begin(),
4474                        static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4475   CGF.EmitStoreOfScalar(
4476       llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4477       FlagsLVal);
4478 
4479   // Shift the address forward by one element.
4480   llvm::Value *ElementNext =
4481       CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4482           .emitRawPointer(CGF);
4483   ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4484   llvm::Value *IsEmpty =
4485       CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4486   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4487   // Done.
4488   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4489 }
4490 
4491 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4492                                    const OMPExecutableDirective &D,
4493                                    llvm::Function *TaskFunction,
4494                                    QualType SharedsTy, Address Shareds,
4495                                    const Expr *IfCond,
4496                                    const OMPTaskDataTy &Data) {
4497   if (!CGF.HaveInsertPoint())
4498     return;
4499 
4500   TaskResultTy Result =
4501       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4502   llvm::Value *NewTask = Result.NewTask;
4503   llvm::Function *TaskEntry = Result.TaskEntry;
4504   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4505   LValue TDBase = Result.TDBase;
4506   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4507   // Process list of dependences.
4508   Address DependenciesArray = Address::invalid();
4509   llvm::Value *NumOfElements;
4510   std::tie(NumOfElements, DependenciesArray) =
4511       emitDependClause(CGF, Data.Dependences, Loc);
4512 
4513   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4514   // libcall.
4515   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4516   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4517   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4518   // list is not empty
4519   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4520   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4521   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4522   llvm::Value *DepTaskArgs[7];
4523   if (!Data.Dependences.empty()) {
4524     DepTaskArgs[0] = UpLoc;
4525     DepTaskArgs[1] = ThreadID;
4526     DepTaskArgs[2] = NewTask;
4527     DepTaskArgs[3] = NumOfElements;
4528     DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4529     DepTaskArgs[5] = CGF.Builder.getInt32(0);
4530     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4531   }
4532   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4533                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4534     if (!Data.Tied) {
4535       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4536       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4537       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4538     }
4539     if (!Data.Dependences.empty()) {
4540       CGF.EmitRuntimeCall(
4541           OMPBuilder.getOrCreateRuntimeFunction(
4542               CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4543           DepTaskArgs);
4544     } else {
4545       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4546                               CGM.getModule(), OMPRTL___kmpc_omp_task),
4547                           TaskArgs);
4548     }
4549     // Check if parent region is untied and build return for untied task;
4550     if (auto *Region =
4551             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4552       Region->emitUntiedSwitch(CGF);
4553   };
4554 
4555   llvm::Value *DepWaitTaskArgs[7];
4556   if (!Data.Dependences.empty()) {
4557     DepWaitTaskArgs[0] = UpLoc;
4558     DepWaitTaskArgs[1] = ThreadID;
4559     DepWaitTaskArgs[2] = NumOfElements;
4560     DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4561     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4562     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4563     DepWaitTaskArgs[6] =
4564         llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4565   }
4566   auto &M = CGM.getModule();
4567   auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4568                         TaskEntry, &Data, &DepWaitTaskArgs,
4569                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4570     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4571     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4572     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4573     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4574     // is specified.
4575     if (!Data.Dependences.empty())
4576       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4577                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
4578                           DepWaitTaskArgs);
4579     // Call proxy_task_entry(gtid, new_task);
4580     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4581                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4582       Action.Enter(CGF);
4583       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4584       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4585                                                           OutlinedFnArgs);
4586     };
4587 
4588     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4589     // kmp_task_t *new_task);
4590     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4591     // kmp_task_t *new_task);
4592     RegionCodeGenTy RCG(CodeGen);
4593     CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4594                               M, OMPRTL___kmpc_omp_task_begin_if0),
4595                           TaskArgs,
4596                           OMPBuilder.getOrCreateRuntimeFunction(
4597                               M, OMPRTL___kmpc_omp_task_complete_if0),
4598                           TaskArgs);
4599     RCG.setAction(Action);
4600     RCG(CGF);
4601   };
4602 
4603   if (IfCond) {
4604     emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4605   } else {
4606     RegionCodeGenTy ThenRCG(ThenCodeGen);
4607     ThenRCG(CGF);
4608   }
4609 }
4610 
4611 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4612                                        const OMPLoopDirective &D,
4613                                        llvm::Function *TaskFunction,
4614                                        QualType SharedsTy, Address Shareds,
4615                                        const Expr *IfCond,
4616                                        const OMPTaskDataTy &Data) {
4617   if (!CGF.HaveInsertPoint())
4618     return;
4619   TaskResultTy Result =
4620       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4621   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4622   // libcall.
4623   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4624   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4625   // sched, kmp_uint64 grainsize, void *task_dup);
4626   llvm::Value *ThreadID = getThreadID(CGF, Loc);
4627   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4628   llvm::Value *IfVal;
4629   if (IfCond) {
4630     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4631                                       /*isSigned=*/true);
4632   } else {
4633     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4634   }
4635 
4636   LValue LBLVal = CGF.EmitLValueForField(
4637       Result.TDBase,
4638       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4639   const auto *LBVar =
4640       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4641   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4642                        /*IsInitializer=*/true);
4643   LValue UBLVal = CGF.EmitLValueForField(
4644       Result.TDBase,
4645       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4646   const auto *UBVar =
4647       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4648   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4649                        /*IsInitializer=*/true);
4650   LValue StLVal = CGF.EmitLValueForField(
4651       Result.TDBase,
4652       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4653   const auto *StVar =
4654       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4655   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4656                        /*IsInitializer=*/true);
4657   // Store reductions address.
4658   LValue RedLVal = CGF.EmitLValueForField(
4659       Result.TDBase,
4660       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4661   if (Data.Reductions) {
4662     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4663   } else {
4664     CGF.EmitNullInitialization(RedLVal.getAddress(),
4665                                CGF.getContext().VoidPtrTy);
4666   }
4667   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4668   llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4669       UpLoc,
4670       ThreadID,
4671       Result.NewTask,
4672       IfVal,
4673       LBLVal.getPointer(CGF),
4674       UBLVal.getPointer(CGF),
4675       CGF.EmitLoadOfScalar(StLVal, Loc),
4676       llvm::ConstantInt::getSigned(
4677           CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4678       llvm::ConstantInt::getSigned(
4679           CGF.IntTy, Data.Schedule.getPointer()
4680                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
4681                          : NoSchedule),
4682       Data.Schedule.getPointer()
4683           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4684                                       /*isSigned=*/false)
4685           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4686   if (Data.HasModifier)
4687     TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4688 
4689   TaskArgs.push_back(Result.TaskDupFn
4690                          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4691                                Result.TaskDupFn, CGF.VoidPtrTy)
4692                          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4693   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4694                           CGM.getModule(), Data.HasModifier
4695                                                ? OMPRTL___kmpc_taskloop_5
4696                                                : OMPRTL___kmpc_taskloop),
4697                       TaskArgs);
4698 }
4699 
4700 /// Emit reduction operation for each element of array (required for
4701 /// array sections) LHS op = RHS.
4702 /// \param Type Type of array.
4703 /// \param LHSVar Variable on the left side of the reduction operation
4704 /// (references element of array in original variable).
4705 /// \param RHSVar Variable on the right side of the reduction operation
4706 /// (references element of array in original variable).
4707 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4708 /// RHSVar.
4709 static void EmitOMPAggregateReduction(
4710     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4711     const VarDecl *RHSVar,
4712     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4713                                   const Expr *, const Expr *)> &RedOpGen,
4714     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4715     const Expr *UpExpr = nullptr) {
4716   // Perform element-by-element initialization.
4717   QualType ElementTy;
4718   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4719   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4720 
4721   // Drill down to the base element type on both arrays.
4722   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4723   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4724 
4725   llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4726   llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4727   // Cast from pointer to array type to pointer to single element.
4728   llvm::Value *LHSEnd =
4729       CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4730   // The basic structure here is a while-do loop.
4731   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4732   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4733   llvm::Value *IsEmpty =
4734       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4735   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4736 
4737   // Enter the loop body, making that address the current address.
4738   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4739   CGF.EmitBlock(BodyBB);
4740 
4741   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4742 
4743   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4744       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4745   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4746   Address RHSElementCurrent(
4747       RHSElementPHI, RHSAddr.getElementType(),
4748       RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4749 
4750   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4751       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4752   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4753   Address LHSElementCurrent(
4754       LHSElementPHI, LHSAddr.getElementType(),
4755       LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4756 
4757   // Emit copy.
4758   CodeGenFunction::OMPPrivateScope Scope(CGF);
4759   Scope.addPrivate(LHSVar, LHSElementCurrent);
4760   Scope.addPrivate(RHSVar, RHSElementCurrent);
4761   Scope.Privatize();
4762   RedOpGen(CGF, XExpr, EExpr, UpExpr);
4763   Scope.ForceCleanup();
4764 
4765   // Shift the address forward by one element.
4766   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4767       LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4768       "omp.arraycpy.dest.element");
4769   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4770       RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4771       "omp.arraycpy.src.element");
4772   // Check whether we've reached the end.
4773   llvm::Value *Done =
4774       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4775   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4776   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4777   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4778 
4779   // Done.
4780   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4781 }
4782 
4783 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4784 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4785 /// UDR combiner function.
4786 static void emitReductionCombiner(CodeGenFunction &CGF,
4787                                   const Expr *ReductionOp) {
4788   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4789     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4790       if (const auto *DRE =
4791               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4792         if (const auto *DRD =
4793                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4794           std::pair<llvm::Function *, llvm::Function *> Reduction =
4795               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4796           RValue Func = RValue::get(Reduction.first);
4797           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4798           CGF.EmitIgnoredExpr(ReductionOp);
4799           return;
4800         }
4801   CGF.EmitIgnoredExpr(ReductionOp);
4802 }
4803 
4804 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4805     StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4806     ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4807     ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4808   ASTContext &C = CGM.getContext();
4809 
4810   // void reduction_func(void *LHSArg, void *RHSArg);
4811   FunctionArgList Args;
4812   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4813                            ImplicitParamKind::Other);
4814   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4815                            ImplicitParamKind::Other);
4816   Args.push_back(&LHSArg);
4817   Args.push_back(&RHSArg);
4818   const auto &CGFI =
4819       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4820   std::string Name = getReductionFuncName(ReducerName);
4821   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4822                                     llvm::GlobalValue::InternalLinkage, Name,
4823                                     &CGM.getModule());
4824   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4825   Fn->setDoesNotRecurse();
4826   CodeGenFunction CGF(CGM);
4827   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4828 
4829   // Dst = (void*[n])(LHSArg);
4830   // Src = (void*[n])(RHSArg);
4831   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4833                   CGF.Builder.getPtrTy(0)),
4834               ArgsElemType, CGF.getPointerAlign());
4835   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4836                   CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4837                   CGF.Builder.getPtrTy(0)),
4838               ArgsElemType, CGF.getPointerAlign());
4839 
4840   //  ...
4841   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4842   //  ...
4843   CodeGenFunction::OMPPrivateScope Scope(CGF);
4844   const auto *IPriv = Privates.begin();
4845   unsigned Idx = 0;
4846   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4847     const auto *RHSVar =
4848         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4849     Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4850     const auto *LHSVar =
4851         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4852     Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4853     QualType PrivTy = (*IPriv)->getType();
4854     if (PrivTy->isVariablyModifiedType()) {
4855       // Get array size and emit VLA type.
4856       ++Idx;
4857       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4858       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4859       const VariableArrayType *VLA =
4860           CGF.getContext().getAsVariableArrayType(PrivTy);
4861       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4862       CodeGenFunction::OpaqueValueMapping OpaqueMap(
4863           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4864       CGF.EmitVariablyModifiedType(PrivTy);
4865     }
4866   }
4867   Scope.Privatize();
4868   IPriv = Privates.begin();
4869   const auto *ILHS = LHSExprs.begin();
4870   const auto *IRHS = RHSExprs.begin();
4871   for (const Expr *E : ReductionOps) {
4872     if ((*IPriv)->getType()->isArrayType()) {
4873       // Emit reduction for array section.
4874       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4875       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4876       EmitOMPAggregateReduction(
4877           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4878           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4879             emitReductionCombiner(CGF, E);
4880           });
4881     } else {
4882       // Emit reduction for array subscript or single variable.
4883       emitReductionCombiner(CGF, E);
4884     }
4885     ++IPriv;
4886     ++ILHS;
4887     ++IRHS;
4888   }
4889   Scope.ForceCleanup();
4890   CGF.FinishFunction();
4891   return Fn;
4892 }
4893 
4894 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4895                                                   const Expr *ReductionOp,
4896                                                   const Expr *PrivateRef,
4897                                                   const DeclRefExpr *LHS,
4898                                                   const DeclRefExpr *RHS) {
4899   if (PrivateRef->getType()->isArrayType()) {
4900     // Emit reduction for array section.
4901     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4902     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4903     EmitOMPAggregateReduction(
4904         CGF, PrivateRef->getType(), LHSVar, RHSVar,
4905         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4906           emitReductionCombiner(CGF, ReductionOp);
4907         });
4908   } else {
4909     // Emit reduction for array subscript or single variable.
4910     emitReductionCombiner(CGF, ReductionOp);
4911   }
4912 }
4913 
4914 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4915                                     ArrayRef<const Expr *> Privates,
4916                                     ArrayRef<const Expr *> LHSExprs,
4917                                     ArrayRef<const Expr *> RHSExprs,
4918                                     ArrayRef<const Expr *> ReductionOps,
4919                                     ReductionOptionsTy Options) {
4920   if (!CGF.HaveInsertPoint())
4921     return;
4922 
4923   bool WithNowait = Options.WithNowait;
4924   bool SimpleReduction = Options.SimpleReduction;
4925 
4926   // Next code should be emitted for reduction:
4927   //
4928   // static kmp_critical_name lock = { 0 };
4929   //
4930   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4931   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4932   //  ...
4933   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4934   //  *(Type<n>-1*)rhs[<n>-1]);
4935   // }
4936   //
4937   // ...
4938   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4939   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4940   // RedList, reduce_func, &<lock>)) {
4941   // case 1:
4942   //  ...
4943   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4944   //  ...
4945   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4946   // break;
4947   // case 2:
4948   //  ...
4949   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4950   //  ...
4951   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4952   // break;
4953   // default:;
4954   // }
4955   //
4956   // if SimpleReduction is true, only the next code is generated:
4957   //  ...
4958   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4959   //  ...
4960 
4961   ASTContext &C = CGM.getContext();
4962 
4963   if (SimpleReduction) {
4964     CodeGenFunction::RunCleanupsScope Scope(CGF);
4965     const auto *IPriv = Privates.begin();
4966     const auto *ILHS = LHSExprs.begin();
4967     const auto *IRHS = RHSExprs.begin();
4968     for (const Expr *E : ReductionOps) {
4969       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4970                                   cast<DeclRefExpr>(*IRHS));
4971       ++IPriv;
4972       ++ILHS;
4973       ++IRHS;
4974     }
4975     return;
4976   }
4977 
4978   // 1. Build a list of reduction variables.
4979   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4980   auto Size = RHSExprs.size();
4981   for (const Expr *E : Privates) {
4982     if (E->getType()->isVariablyModifiedType())
4983       // Reserve place for array size.
4984       ++Size;
4985   }
4986   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4987   QualType ReductionArrayTy = C.getConstantArrayType(
4988       C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
4989       /*IndexTypeQuals=*/0);
4990   RawAddress ReductionList =
4991       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4992   const auto *IPriv = Privates.begin();
4993   unsigned Idx = 0;
4994   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4995     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
4996     CGF.Builder.CreateStore(
4997         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998             CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
4999         Elem);
5000     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5001       // Store array size.
5002       ++Idx;
5003       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5004       llvm::Value *Size = CGF.Builder.CreateIntCast(
5005           CGF.getVLASize(
5006                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5007               .NumElts,
5008           CGF.SizeTy, /*isSigned=*/false);
5009       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5010                               Elem);
5011     }
5012   }
5013 
5014   // 2. Emit reduce_func().
5015   llvm::Function *ReductionFn = emitReductionFunction(
5016       CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5017       Privates, LHSExprs, RHSExprs, ReductionOps);
5018 
5019   // 3. Create static kmp_critical_name lock = { 0 };
5020   std::string Name = getName({"reduction"});
5021   llvm::Value *Lock = getCriticalRegionLock(Name);
5022 
5023   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5024   // RedList, reduce_func, &<lock>);
5025   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5026   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5027   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5028   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5029       ReductionList.getPointer(), CGF.VoidPtrTy);
5030   llvm::Value *Args[] = {
5031       IdentTLoc,                             // ident_t *<loc>
5032       ThreadId,                              // i32 <gtid>
5033       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5034       ReductionArrayTySize,                  // size_type sizeof(RedList)
5035       RL,                                    // void *RedList
5036       ReductionFn, // void (*) (void *, void *) <reduce_func>
5037       Lock         // kmp_critical_name *&<lock>
5038   };
5039   llvm::Value *Res = CGF.EmitRuntimeCall(
5040       OMPBuilder.getOrCreateRuntimeFunction(
5041           CGM.getModule(),
5042           WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5043       Args);
5044 
5045   // 5. Build switch(res)
5046   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5047   llvm::SwitchInst *SwInst =
5048       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5049 
5050   // 6. Build case 1:
5051   //  ...
5052   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5053   //  ...
5054   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5055   // break;
5056   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5057   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5058   CGF.EmitBlock(Case1BB);
5059 
5060   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5061   llvm::Value *EndArgs[] = {
5062       IdentTLoc, // ident_t *<loc>
5063       ThreadId,  // i32 <gtid>
5064       Lock       // kmp_critical_name *&<lock>
5065   };
5066   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5067                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5068     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5069     const auto *IPriv = Privates.begin();
5070     const auto *ILHS = LHSExprs.begin();
5071     const auto *IRHS = RHSExprs.begin();
5072     for (const Expr *E : ReductionOps) {
5073       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5074                                      cast<DeclRefExpr>(*IRHS));
5075       ++IPriv;
5076       ++ILHS;
5077       ++IRHS;
5078     }
5079   };
5080   RegionCodeGenTy RCG(CodeGen);
5081   CommonActionTy Action(
5082       nullptr, {},
5083       OMPBuilder.getOrCreateRuntimeFunction(
5084           CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5085                                       : OMPRTL___kmpc_end_reduce),
5086       EndArgs);
5087   RCG.setAction(Action);
5088   RCG(CGF);
5089 
5090   CGF.EmitBranch(DefaultBB);
5091 
5092   // 7. Build case 2:
5093   //  ...
5094   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5095   //  ...
5096   // break;
5097   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5098   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5099   CGF.EmitBlock(Case2BB);
5100 
5101   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5102                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5103     const auto *ILHS = LHSExprs.begin();
5104     const auto *IRHS = RHSExprs.begin();
5105     const auto *IPriv = Privates.begin();
5106     for (const Expr *E : ReductionOps) {
5107       const Expr *XExpr = nullptr;
5108       const Expr *EExpr = nullptr;
5109       const Expr *UpExpr = nullptr;
5110       BinaryOperatorKind BO = BO_Comma;
5111       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5112         if (BO->getOpcode() == BO_Assign) {
5113           XExpr = BO->getLHS();
5114           UpExpr = BO->getRHS();
5115         }
5116       }
5117       // Try to emit update expression as a simple atomic.
5118       const Expr *RHSExpr = UpExpr;
5119       if (RHSExpr) {
5120         // Analyze RHS part of the whole expression.
5121         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5122                 RHSExpr->IgnoreParenImpCasts())) {
5123           // If this is a conditional operator, analyze its condition for
5124           // min/max reduction operator.
5125           RHSExpr = ACO->getCond();
5126         }
5127         if (const auto *BORHS =
5128                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5129           EExpr = BORHS->getRHS();
5130           BO = BORHS->getOpcode();
5131         }
5132       }
5133       if (XExpr) {
5134         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5135         auto &&AtomicRedGen = [BO, VD,
5136                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5137                                     const Expr *EExpr, const Expr *UpExpr) {
5138           LValue X = CGF.EmitLValue(XExpr);
5139           RValue E;
5140           if (EExpr)
5141             E = CGF.EmitAnyExpr(EExpr);
5142           CGF.EmitOMPAtomicSimpleUpdateExpr(
5143               X, E, BO, /*IsXLHSInRHSPart=*/true,
5144               llvm::AtomicOrdering::Monotonic, Loc,
5145               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5146                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5147                 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5148                 CGF.emitOMPSimpleStore(
5149                     CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5150                     VD->getType().getNonReferenceType(), Loc);
5151                 PrivateScope.addPrivate(VD, LHSTemp);
5152                 (void)PrivateScope.Privatize();
5153                 return CGF.EmitAnyExpr(UpExpr);
5154               });
5155         };
5156         if ((*IPriv)->getType()->isArrayType()) {
5157           // Emit atomic reduction for array section.
5158           const auto *RHSVar =
5159               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5160           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5161                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5162         } else {
5163           // Emit atomic reduction for array subscript or single variable.
5164           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5165         }
5166       } else {
5167         // Emit as a critical region.
5168         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5169                                            const Expr *, const Expr *) {
5170           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5171           std::string Name = RT.getName({"atomic_reduction"});
5172           RT.emitCriticalRegion(
5173               CGF, Name,
5174               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5175                 Action.Enter(CGF);
5176                 emitReductionCombiner(CGF, E);
5177               },
5178               Loc);
5179         };
5180         if ((*IPriv)->getType()->isArrayType()) {
5181           const auto *LHSVar =
5182               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5183           const auto *RHSVar =
5184               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5185           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5186                                     CritRedGen);
5187         } else {
5188           CritRedGen(CGF, nullptr, nullptr, nullptr);
5189         }
5190       }
5191       ++ILHS;
5192       ++IRHS;
5193       ++IPriv;
5194     }
5195   };
5196   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5197   if (!WithNowait) {
5198     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5199     llvm::Value *EndArgs[] = {
5200         IdentTLoc, // ident_t *<loc>
5201         ThreadId,  // i32 <gtid>
5202         Lock       // kmp_critical_name *&<lock>
5203     };
5204     CommonActionTy Action(nullptr, {},
5205                           OMPBuilder.getOrCreateRuntimeFunction(
5206                               CGM.getModule(), OMPRTL___kmpc_end_reduce),
5207                           EndArgs);
5208     AtomicRCG.setAction(Action);
5209     AtomicRCG(CGF);
5210   } else {
5211     AtomicRCG(CGF);
5212   }
5213 
5214   CGF.EmitBranch(DefaultBB);
5215   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5216 }
5217 
5218 /// Generates unique name for artificial threadprivate variables.
5219 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5220 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5221                                       const Expr *Ref) {
5222   SmallString<256> Buffer;
5223   llvm::raw_svector_ostream Out(Buffer);
5224   const clang::DeclRefExpr *DE;
5225   const VarDecl *D = ::getBaseDecl(Ref, DE);
5226   if (!D)
5227     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5228   D = D->getCanonicalDecl();
5229   std::string Name = CGM.getOpenMPRuntime().getName(
5230       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5231   Out << Prefix << Name << "_"
5232       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5233   return std::string(Out.str());
5234 }
5235 
5236 /// Emits reduction initializer function:
5237 /// \code
5238 /// void @.red_init(void* %arg, void* %orig) {
5239 /// %0 = bitcast void* %arg to <type>*
5240 /// store <type> <init>, <type>* %0
5241 /// ret void
5242 /// }
5243 /// \endcode
5244 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5245                                            SourceLocation Loc,
5246                                            ReductionCodeGen &RCG, unsigned N) {
5247   ASTContext &C = CGM.getContext();
5248   QualType VoidPtrTy = C.VoidPtrTy;
5249   VoidPtrTy.addRestrict();
5250   FunctionArgList Args;
5251   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5252                           ImplicitParamKind::Other);
5253   ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5254                               ImplicitParamKind::Other);
5255   Args.emplace_back(&Param);
5256   Args.emplace_back(&ParamOrig);
5257   const auto &FnInfo =
5258       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5259   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5260   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5261   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5262                                     Name, &CGM.getModule());
5263   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5264   Fn->setDoesNotRecurse();
5265   CodeGenFunction CGF(CGM);
5266   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5267   QualType PrivateType = RCG.getPrivateType(N);
5268   Address PrivateAddr = CGF.EmitLoadOfPointer(
5269       CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5270       C.getPointerType(PrivateType)->castAs<PointerType>());
5271   llvm::Value *Size = nullptr;
5272   // If the size of the reduction item is non-constant, load it from global
5273   // threadprivate variable.
5274   if (RCG.getSizes(N).second) {
5275     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5276         CGF, CGM.getContext().getSizeType(),
5277         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5278     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5279                                 CGM.getContext().getSizeType(), Loc);
5280   }
5281   RCG.emitAggregateType(CGF, N, Size);
5282   Address OrigAddr = Address::invalid();
5283   // If initializer uses initializer from declare reduction construct, emit a
5284   // pointer to the address of the original reduction item (reuired by reduction
5285   // initializer)
5286   if (RCG.usesReductionInitializer(N)) {
5287     Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5288     OrigAddr = CGF.EmitLoadOfPointer(
5289         SharedAddr,
5290         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5291   }
5292   // Emit the initializer:
5293   // %0 = bitcast void* %arg to <type>*
5294   // store <type> <init>, <type>* %0
5295   RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5296                          [](CodeGenFunction &) { return false; });
5297   CGF.FinishFunction();
5298   return Fn;
5299 }
5300 
5301 /// Emits reduction combiner function:
5302 /// \code
5303 /// void @.red_comb(void* %arg0, void* %arg1) {
5304 /// %lhs = bitcast void* %arg0 to <type>*
5305 /// %rhs = bitcast void* %arg1 to <type>*
5306 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5307 /// store <type> %2, <type>* %lhs
5308 /// ret void
5309 /// }
5310 /// \endcode
5311 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5312                                            SourceLocation Loc,
5313                                            ReductionCodeGen &RCG, unsigned N,
5314                                            const Expr *ReductionOp,
5315                                            const Expr *LHS, const Expr *RHS,
5316                                            const Expr *PrivateRef) {
5317   ASTContext &C = CGM.getContext();
5318   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5319   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5320   FunctionArgList Args;
5321   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5322                                C.VoidPtrTy, ImplicitParamKind::Other);
5323   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5324                             ImplicitParamKind::Other);
5325   Args.emplace_back(&ParamInOut);
5326   Args.emplace_back(&ParamIn);
5327   const auto &FnInfo =
5328       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5329   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5330   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5331   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5332                                     Name, &CGM.getModule());
5333   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5334   Fn->setDoesNotRecurse();
5335   CodeGenFunction CGF(CGM);
5336   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5337   llvm::Value *Size = nullptr;
5338   // If the size of the reduction item is non-constant, load it from global
5339   // threadprivate variable.
5340   if (RCG.getSizes(N).second) {
5341     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5342         CGF, CGM.getContext().getSizeType(),
5343         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5344     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5345                                 CGM.getContext().getSizeType(), Loc);
5346   }
5347   RCG.emitAggregateType(CGF, N, Size);
5348   // Remap lhs and rhs variables to the addresses of the function arguments.
5349   // %lhs = bitcast void* %arg0 to <type>*
5350   // %rhs = bitcast void* %arg1 to <type>*
5351   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5352   PrivateScope.addPrivate(
5353       LHSVD,
5354       // Pull out the pointer to the variable.
5355       CGF.EmitLoadOfPointer(
5356           CGF.GetAddrOfLocalVar(&ParamInOut)
5357               .withElementType(CGF.Builder.getPtrTy(0)),
5358           C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5359   PrivateScope.addPrivate(
5360       RHSVD,
5361       // Pull out the pointer to the variable.
5362       CGF.EmitLoadOfPointer(
5363           CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5364               CGF.Builder.getPtrTy(0)),
5365           C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5366   PrivateScope.Privatize();
5367   // Emit the combiner body:
5368   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5369   // store <type> %2, <type>* %lhs
5370   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5371       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5372       cast<DeclRefExpr>(RHS));
5373   CGF.FinishFunction();
5374   return Fn;
5375 }
5376 
5377 /// Emits reduction finalizer function:
5378 /// \code
5379 /// void @.red_fini(void* %arg) {
5380 /// %0 = bitcast void* %arg to <type>*
5381 /// <destroy>(<type>* %0)
5382 /// ret void
5383 /// }
5384 /// \endcode
5385 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5386                                            SourceLocation Loc,
5387                                            ReductionCodeGen &RCG, unsigned N) {
5388   if (!RCG.needCleanups(N))
5389     return nullptr;
5390   ASTContext &C = CGM.getContext();
5391   FunctionArgList Args;
5392   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5393                           ImplicitParamKind::Other);
5394   Args.emplace_back(&Param);
5395   const auto &FnInfo =
5396       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5397   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5398   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5399   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5400                                     Name, &CGM.getModule());
5401   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5402   Fn->setDoesNotRecurse();
5403   CodeGenFunction CGF(CGM);
5404   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5405   Address PrivateAddr = CGF.EmitLoadOfPointer(
5406       CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5407   llvm::Value *Size = nullptr;
5408   // If the size of the reduction item is non-constant, load it from global
5409   // threadprivate variable.
5410   if (RCG.getSizes(N).second) {
5411     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5412         CGF, CGM.getContext().getSizeType(),
5413         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5414     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5415                                 CGM.getContext().getSizeType(), Loc);
5416   }
5417   RCG.emitAggregateType(CGF, N, Size);
5418   // Emit the finalizer body:
5419   // <destroy>(<type>* %0)
5420   RCG.emitCleanups(CGF, N, PrivateAddr);
5421   CGF.FinishFunction(Loc);
5422   return Fn;
5423 }
5424 
5425 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5426     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5427     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5428   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5429     return nullptr;
5430 
5431   // Build typedef struct:
5432   // kmp_taskred_input {
5433   //   void *reduce_shar; // shared reduction item
5434   //   void *reduce_orig; // original reduction item used for initialization
5435   //   size_t reduce_size; // size of data item
5436   //   void *reduce_init; // data initialization routine
5437   //   void *reduce_fini; // data finalization routine
5438   //   void *reduce_comb; // data combiner routine
5439   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
5440   // } kmp_taskred_input_t;
5441   ASTContext &C = CGM.getContext();
5442   RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5443   RD->startDefinition();
5444   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5445   const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5446   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5447   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5448   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5449   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5450   const FieldDecl *FlagsFD = addFieldToRecordDecl(
5451       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5452   RD->completeDefinition();
5453   QualType RDType = C.getRecordType(RD);
5454   unsigned Size = Data.ReductionVars.size();
5455   llvm::APInt ArraySize(/*numBits=*/64, Size);
5456   QualType ArrayRDType =
5457       C.getConstantArrayType(RDType, ArraySize, nullptr,
5458                              ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5459   // kmp_task_red_input_t .rd_input.[Size];
5460   RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5461   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5462                        Data.ReductionCopies, Data.ReductionOps);
5463   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5464     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5465     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5466                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5467     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5468         TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5469         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5470         ".rd_input.gep.");
5471     LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5472     // ElemLVal.reduce_shar = &Shareds[Cnt];
5473     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5474     RCG.emitSharedOrigLValue(CGF, Cnt);
5475     llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5476     CGF.EmitStoreOfScalar(Shared, SharedLVal);
5477     // ElemLVal.reduce_orig = &Origs[Cnt];
5478     LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5479     llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5480     CGF.EmitStoreOfScalar(Orig, OrigLVal);
5481     RCG.emitAggregateType(CGF, Cnt);
5482     llvm::Value *SizeValInChars;
5483     llvm::Value *SizeVal;
5484     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5485     // We use delayed creation/initialization for VLAs and array sections. It is
5486     // required because runtime does not provide the way to pass the sizes of
5487     // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5488     // threadprivate global variables are used to store these values and use
5489     // them in the functions.
5490     bool DelayedCreation = !!SizeVal;
5491     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5492                                                /*isSigned=*/false);
5493     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5494     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5495     // ElemLVal.reduce_init = init;
5496     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5497     llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5498     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5499     // ElemLVal.reduce_fini = fini;
5500     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5501     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5502     llvm::Value *FiniAddr =
5503         Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5504     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5505     // ElemLVal.reduce_comb = comb;
5506     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5507     llvm::Value *CombAddr = emitReduceCombFunction(
5508         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5509         RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5510     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5511     // ElemLVal.flags = 0;
5512     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5513     if (DelayedCreation) {
5514       CGF.EmitStoreOfScalar(
5515           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5516           FlagsLVal);
5517     } else
5518       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5519   }
5520   if (Data.IsReductionWithTaskMod) {
5521     // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5522     // is_ws, int num, void *data);
5523     llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5524     llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5525                                                   CGM.IntTy, /*isSigned=*/true);
5526     llvm::Value *Args[] = {
5527         IdentTLoc, GTid,
5528         llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5529                                /*isSigned=*/true),
5530         llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5531         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5532             TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5533     return CGF.EmitRuntimeCall(
5534         OMPBuilder.getOrCreateRuntimeFunction(
5535             CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5536         Args);
5537   }
5538   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5539   llvm::Value *Args[] = {
5540       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5541                                 /*isSigned=*/true),
5542       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5543       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5544                                                       CGM.VoidPtrTy)};
5545   return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5546                                  CGM.getModule(), OMPRTL___kmpc_taskred_init),
5547                              Args);
5548 }
5549 
5550 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5551                                             SourceLocation Loc,
5552                                             bool IsWorksharingReduction) {
5553   // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5554   // is_ws, int num, void *data);
5555   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5556   llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5557                                                 CGM.IntTy, /*isSigned=*/true);
5558   llvm::Value *Args[] = {IdentTLoc, GTid,
5559                          llvm::ConstantInt::get(CGM.IntTy,
5560                                                 IsWorksharingReduction ? 1 : 0,
5561                                                 /*isSigned=*/true)};
5562   (void)CGF.EmitRuntimeCall(
5563       OMPBuilder.getOrCreateRuntimeFunction(
5564           CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5565       Args);
5566 }
5567 
5568 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5569                                               SourceLocation Loc,
5570                                               ReductionCodeGen &RCG,
5571                                               unsigned N) {
5572   auto Sizes = RCG.getSizes(N);
5573   // Emit threadprivate global variable if the type is non-constant
5574   // (Sizes.second = nullptr).
5575   if (Sizes.second) {
5576     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5577                                                      /*isSigned=*/false);
5578     Address SizeAddr = getAddrOfArtificialThreadPrivate(
5579         CGF, CGM.getContext().getSizeType(),
5580         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5581     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5582   }
5583 }
5584 
5585 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5586                                               SourceLocation Loc,
5587                                               llvm::Value *ReductionsPtr,
5588                                               LValue SharedLVal) {
5589   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5590   // *d);
5591   llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5592                                                    CGM.IntTy,
5593                                                    /*isSigned=*/true),
5594                          ReductionsPtr,
5595                          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5596                              SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5597   return Address(
5598       CGF.EmitRuntimeCall(
5599           OMPBuilder.getOrCreateRuntimeFunction(
5600               CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5601           Args),
5602       CGF.Int8Ty, SharedLVal.getAlignment());
5603 }
5604 
5605 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5606                                        const OMPTaskDataTy &Data) {
5607   if (!CGF.HaveInsertPoint())
5608     return;
5609 
5610   if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5611     // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5612     OMPBuilder.createTaskwait(CGF.Builder);
5613   } else {
5614     llvm::Value *ThreadID = getThreadID(CGF, Loc);
5615     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5616     auto &M = CGM.getModule();
5617     Address DependenciesArray = Address::invalid();
5618     llvm::Value *NumOfElements;
5619     std::tie(NumOfElements, DependenciesArray) =
5620         emitDependClause(CGF, Data.Dependences, Loc);
5621     if (!Data.Dependences.empty()) {
5622       llvm::Value *DepWaitTaskArgs[7];
5623       DepWaitTaskArgs[0] = UpLoc;
5624       DepWaitTaskArgs[1] = ThreadID;
5625       DepWaitTaskArgs[2] = NumOfElements;
5626       DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5627       DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5628       DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5629       DepWaitTaskArgs[6] =
5630           llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5631 
5632       CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5633 
5634       // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5635       // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5636       // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5637       // kmp_int32 has_no_wait); if dependence info is specified.
5638       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5639                               M, OMPRTL___kmpc_omp_taskwait_deps_51),
5640                           DepWaitTaskArgs);
5641 
5642     } else {
5643 
5644       // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5645       // global_tid);
5646       llvm::Value *Args[] = {UpLoc, ThreadID};
5647       // Ignore return result until untied tasks are supported.
5648       CGF.EmitRuntimeCall(
5649           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5650           Args);
5651     }
5652   }
5653 
5654   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5655     Region->emitUntiedSwitch(CGF);
5656 }
5657 
5658 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5659                                            OpenMPDirectiveKind InnerKind,
5660                                            const RegionCodeGenTy &CodeGen,
5661                                            bool HasCancel) {
5662   if (!CGF.HaveInsertPoint())
5663     return;
5664   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5665                                  InnerKind != OMPD_critical &&
5666                                      InnerKind != OMPD_master &&
5667                                      InnerKind != OMPD_masked);
5668   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5669 }
5670 
5671 namespace {
5672 enum RTCancelKind {
5673   CancelNoreq = 0,
5674   CancelParallel = 1,
5675   CancelLoop = 2,
5676   CancelSections = 3,
5677   CancelTaskgroup = 4
5678 };
5679 } // anonymous namespace
5680 
5681 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5682   RTCancelKind CancelKind = CancelNoreq;
5683   if (CancelRegion == OMPD_parallel)
5684     CancelKind = CancelParallel;
5685   else if (CancelRegion == OMPD_for)
5686     CancelKind = CancelLoop;
5687   else if (CancelRegion == OMPD_sections)
5688     CancelKind = CancelSections;
5689   else {
5690     assert(CancelRegion == OMPD_taskgroup);
5691     CancelKind = CancelTaskgroup;
5692   }
5693   return CancelKind;
5694 }
5695 
5696 void CGOpenMPRuntime::emitCancellationPointCall(
5697     CodeGenFunction &CGF, SourceLocation Loc,
5698     OpenMPDirectiveKind CancelRegion) {
5699   if (!CGF.HaveInsertPoint())
5700     return;
5701   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5702   // global_tid, kmp_int32 cncl_kind);
5703   if (auto *OMPRegionInfo =
5704           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5705     // For 'cancellation point taskgroup', the task region info may not have a
5706     // cancel. This may instead happen in another adjacent task.
5707     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5708       llvm::Value *Args[] = {
5709           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5710           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5711       // Ignore return result until untied tasks are supported.
5712       llvm::Value *Result = CGF.EmitRuntimeCall(
5713           OMPBuilder.getOrCreateRuntimeFunction(
5714               CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5715           Args);
5716       // if (__kmpc_cancellationpoint()) {
5717       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5718       //   exit from construct;
5719       // }
5720       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5721       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5722       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5723       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5724       CGF.EmitBlock(ExitBB);
5725       if (CancelRegion == OMPD_parallel)
5726         emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5727       // exit from construct;
5728       CodeGenFunction::JumpDest CancelDest =
5729           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5730       CGF.EmitBranchThroughCleanup(CancelDest);
5731       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5732     }
5733   }
5734 }
5735 
5736 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5737                                      const Expr *IfCond,
5738                                      OpenMPDirectiveKind CancelRegion) {
5739   if (!CGF.HaveInsertPoint())
5740     return;
5741   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5742   // kmp_int32 cncl_kind);
5743   auto &M = CGM.getModule();
5744   if (auto *OMPRegionInfo =
5745           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5746     auto &&ThenGen = [this, &M, Loc, CancelRegion,
5747                       OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5748       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5749       llvm::Value *Args[] = {
5750           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5751           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5752       // Ignore return result until untied tasks are supported.
5753       llvm::Value *Result = CGF.EmitRuntimeCall(
5754           OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5755       // if (__kmpc_cancel()) {
5756       //   call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5757       //   exit from construct;
5758       // }
5759       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5760       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5761       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5762       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5763       CGF.EmitBlock(ExitBB);
5764       if (CancelRegion == OMPD_parallel)
5765         RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5766       // exit from construct;
5767       CodeGenFunction::JumpDest CancelDest =
5768           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5769       CGF.EmitBranchThroughCleanup(CancelDest);
5770       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5771     };
5772     if (IfCond) {
5773       emitIfClause(CGF, IfCond, ThenGen,
5774                    [](CodeGenFunction &, PrePostActionTy &) {});
5775     } else {
5776       RegionCodeGenTy ThenRCG(ThenGen);
5777       ThenRCG(CGF);
5778     }
5779   }
5780 }
5781 
5782 namespace {
5783 /// Cleanup action for uses_allocators support.
5784 class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5785   ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5786 
5787 public:
5788   OMPUsesAllocatorsActionTy(
5789       ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5790       : Allocators(Allocators) {}
5791   void Enter(CodeGenFunction &CGF) override {
5792     if (!CGF.HaveInsertPoint())
5793       return;
5794     for (const auto &AllocatorData : Allocators) {
5795       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5796           CGF, AllocatorData.first, AllocatorData.second);
5797     }
5798   }
5799   void Exit(CodeGenFunction &CGF) override {
5800     if (!CGF.HaveInsertPoint())
5801       return;
5802     for (const auto &AllocatorData : Allocators) {
5803       CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5804                                                         AllocatorData.first);
5805     }
5806   }
5807 };
5808 } // namespace
5809 
5810 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5811     const OMPExecutableDirective &D, StringRef ParentName,
5812     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5813     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5814   assert(!ParentName.empty() && "Invalid target entry parent name!");
5815   HasEmittedTargetRegion = true;
5816   SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5817   for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5818     for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5819       const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5820       if (!D.AllocatorTraits)
5821         continue;
5822       Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5823     }
5824   }
5825   OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5826   CodeGen.setAction(UsesAllocatorAction);
5827   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5828                                    IsOffloadEntry, CodeGen);
5829 }
5830 
5831 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5832                                              const Expr *Allocator,
5833                                              const Expr *AllocatorTraits) {
5834   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5835   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5836   // Use default memspace handle.
5837   llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5838   llvm::Value *NumTraits = llvm::ConstantInt::get(
5839       CGF.IntTy, cast<ConstantArrayType>(
5840                      AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5841                      ->getSize()
5842                      .getLimitedValue());
5843   LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5844   Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5845       AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5846   AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5847                                            AllocatorTraitsLVal.getBaseInfo(),
5848                                            AllocatorTraitsLVal.getTBAAInfo());
5849   llvm::Value *Traits = Addr.emitRawPointer(CGF);
5850 
5851   llvm::Value *AllocatorVal =
5852       CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5853                               CGM.getModule(), OMPRTL___kmpc_init_allocator),
5854                           {ThreadId, MemSpaceHandle, NumTraits, Traits});
5855   // Store to allocator.
5856   CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5857       cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5858   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5859   AllocatorVal =
5860       CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5861                                Allocator->getType(), Allocator->getExprLoc());
5862   CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5863 }
5864 
5865 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5866                                              const Expr *Allocator) {
5867   llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5868   ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5869   LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5870   llvm::Value *AllocatorVal =
5871       CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5872   AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5873                                           CGF.getContext().VoidPtrTy,
5874                                           Allocator->getExprLoc());
5875   (void)CGF.EmitRuntimeCall(
5876       OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5877                                             OMPRTL___kmpc_destroy_allocator),
5878       {ThreadId, AllocatorVal});
5879 }
5880 
5881 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5882     const OMPExecutableDirective &D, CodeGenFunction &CGF,
5883     llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
5884   assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
5885          "invalid default attrs structure");
5886   int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
5887   int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
5888 
5889   getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
5890   getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5891                                       /*UpperBoundOnly=*/true);
5892 
5893   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5894     for (auto *A : C->getAttrs()) {
5895       int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5896       int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5897       if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5898         CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5899                                        &AttrMinBlocksVal, &AttrMaxBlocksVal);
5900       else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5901         CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5902             nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5903             &AttrMaxThreadsVal);
5904       else
5905         continue;
5906 
5907       Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
5908       if (AttrMaxThreadsVal > 0)
5909         MaxThreadsVal = MaxThreadsVal > 0
5910                             ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5911                             : AttrMaxThreadsVal;
5912       Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
5913       if (AttrMaxBlocksVal > 0)
5914         MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5915                                       : AttrMaxBlocksVal;
5916     }
5917   }
5918 }
5919 
5920 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5921     const OMPExecutableDirective &D, StringRef ParentName,
5922     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5923     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5924 
5925   llvm::TargetRegionEntryInfo EntryInfo =
5926       getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
5927 
5928   CodeGenFunction CGF(CGM, true);
5929   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5930       [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5931         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5932 
5933         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5934         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5935         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
5936       };
5937 
5938   cantFail(OMPBuilder.emitTargetRegionFunction(
5939       EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
5940       OutlinedFnID));
5941 
5942   if (!OutlinedFn)
5943     return;
5944 
5945   CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5946 
5947   for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5948     for (auto *A : C->getAttrs()) {
5949       if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5950         CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5951     }
5952   }
5953 }
5954 
5955 /// Checks if the expression is constant or does not have non-trivial function
5956 /// calls.
5957 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5958   // We can skip constant expressions.
5959   // We can skip expressions with trivial calls or simple expressions.
5960   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
5961           !E->hasNonTrivialCall(Ctx)) &&
5962          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5963 }
5964 
5965 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5966                                                     const Stmt *Body) {
5967   const Stmt *Child = Body->IgnoreContainers();
5968   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5969     Child = nullptr;
5970     for (const Stmt *S : C->body()) {
5971       if (const auto *E = dyn_cast<Expr>(S)) {
5972         if (isTrivial(Ctx, E))
5973           continue;
5974       }
5975       // Some of the statements can be ignored.
5976       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5977           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5978         continue;
5979       // Analyze declarations.
5980       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5981         if (llvm::all_of(DS->decls(), [](const Decl *D) {
5982               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5983                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5984                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5985                   isa<UsingDirectiveDecl>(D) ||
5986                   isa<OMPDeclareReductionDecl>(D) ||
5987                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
5988                 return true;
5989               const auto *VD = dyn_cast<VarDecl>(D);
5990               if (!VD)
5991                 return false;
5992               return VD->hasGlobalStorage() || !VD->isUsed();
5993             }))
5994           continue;
5995       }
5996       // Found multiple children - cannot get the one child only.
5997       if (Child)
5998         return nullptr;
5999       Child = S;
6000     }
6001     if (Child)
6002       Child = Child->IgnoreContainers();
6003   }
6004   return Child;
6005 }
6006 
6007 const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6008     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6009     int32_t &MaxTeamsVal) {
6010 
6011   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6012   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6013          "Expected target-based executable directive.");
6014   switch (DirectiveKind) {
6015   case OMPD_target: {
6016     const auto *CS = D.getInnermostCapturedStmt();
6017     const auto *Body =
6018         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6019     const Stmt *ChildStmt =
6020         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6021     if (const auto *NestedDir =
6022             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6023       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6024         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6025           const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6026                                      ->getNumTeams()
6027                                      .front();
6028           if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6029             if (auto Constant =
6030                     NumTeams->getIntegerConstantExpr(CGF.getContext()))
6031               MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6032           return NumTeams;
6033         }
6034         MinTeamsVal = MaxTeamsVal = 0;
6035         return nullptr;
6036       }
6037       MinTeamsVal = MaxTeamsVal = 1;
6038       return nullptr;
6039     }
6040     // A value of -1 is used to check if we need to emit no teams region
6041     MinTeamsVal = MaxTeamsVal = -1;
6042     return nullptr;
6043   }
6044   case OMPD_target_teams_loop:
6045   case OMPD_target_teams:
6046   case OMPD_target_teams_distribute:
6047   case OMPD_target_teams_distribute_simd:
6048   case OMPD_target_teams_distribute_parallel_for:
6049   case OMPD_target_teams_distribute_parallel_for_simd: {
6050     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6051       const Expr *NumTeams =
6052           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6053       if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6054         if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6055           MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6056       return NumTeams;
6057     }
6058     MinTeamsVal = MaxTeamsVal = 0;
6059     return nullptr;
6060   }
6061   case OMPD_target_parallel:
6062   case OMPD_target_parallel_for:
6063   case OMPD_target_parallel_for_simd:
6064   case OMPD_target_parallel_loop:
6065   case OMPD_target_simd:
6066     MinTeamsVal = MaxTeamsVal = 1;
6067     return nullptr;
6068   case OMPD_parallel:
6069   case OMPD_for:
6070   case OMPD_parallel_for:
6071   case OMPD_parallel_loop:
6072   case OMPD_parallel_master:
6073   case OMPD_parallel_sections:
6074   case OMPD_for_simd:
6075   case OMPD_parallel_for_simd:
6076   case OMPD_cancel:
6077   case OMPD_cancellation_point:
6078   case OMPD_ordered:
6079   case OMPD_threadprivate:
6080   case OMPD_allocate:
6081   case OMPD_task:
6082   case OMPD_simd:
6083   case OMPD_tile:
6084   case OMPD_unroll:
6085   case OMPD_sections:
6086   case OMPD_section:
6087   case OMPD_single:
6088   case OMPD_master:
6089   case OMPD_critical:
6090   case OMPD_taskyield:
6091   case OMPD_barrier:
6092   case OMPD_taskwait:
6093   case OMPD_taskgroup:
6094   case OMPD_atomic:
6095   case OMPD_flush:
6096   case OMPD_depobj:
6097   case OMPD_scan:
6098   case OMPD_teams:
6099   case OMPD_target_data:
6100   case OMPD_target_exit_data:
6101   case OMPD_target_enter_data:
6102   case OMPD_distribute:
6103   case OMPD_distribute_simd:
6104   case OMPD_distribute_parallel_for:
6105   case OMPD_distribute_parallel_for_simd:
6106   case OMPD_teams_distribute:
6107   case OMPD_teams_distribute_simd:
6108   case OMPD_teams_distribute_parallel_for:
6109   case OMPD_teams_distribute_parallel_for_simd:
6110   case OMPD_target_update:
6111   case OMPD_declare_simd:
6112   case OMPD_declare_variant:
6113   case OMPD_begin_declare_variant:
6114   case OMPD_end_declare_variant:
6115   case OMPD_declare_target:
6116   case OMPD_end_declare_target:
6117   case OMPD_declare_reduction:
6118   case OMPD_declare_mapper:
6119   case OMPD_taskloop:
6120   case OMPD_taskloop_simd:
6121   case OMPD_master_taskloop:
6122   case OMPD_master_taskloop_simd:
6123   case OMPD_parallel_master_taskloop:
6124   case OMPD_parallel_master_taskloop_simd:
6125   case OMPD_requires:
6126   case OMPD_metadirective:
6127   case OMPD_unknown:
6128     break;
6129   default:
6130     break;
6131   }
6132   llvm_unreachable("Unexpected directive kind.");
6133 }
6134 
6135 llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6136     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6137   assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6138          "Clauses associated with the teams directive expected to be emitted "
6139          "only for the host!");
6140   CGBuilderTy &Bld = CGF.Builder;
6141   int32_t MinNT = -1, MaxNT = -1;
6142   const Expr *NumTeams =
6143       getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6144   if (NumTeams != nullptr) {
6145     OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6146 
6147     switch (DirectiveKind) {
6148     case OMPD_target: {
6149       const auto *CS = D.getInnermostCapturedStmt();
6150       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6151       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6152       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6153                                                   /*IgnoreResultAssign*/ true);
6154       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6155                              /*isSigned=*/true);
6156     }
6157     case OMPD_target_teams:
6158     case OMPD_target_teams_distribute:
6159     case OMPD_target_teams_distribute_simd:
6160     case OMPD_target_teams_distribute_parallel_for:
6161     case OMPD_target_teams_distribute_parallel_for_simd: {
6162       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6163       llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6164                                                   /*IgnoreResultAssign*/ true);
6165       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6166                              /*isSigned=*/true);
6167     }
6168     default:
6169       break;
6170     }
6171   }
6172 
6173   assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6174   return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6175 }
6176 
6177 /// Check for a num threads constant value (stored in \p DefaultVal), or
6178 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6179 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6180 /// nullptr, no expression evaluation is perfomed.
6181 static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6182                           const Expr **E, int32_t &UpperBound,
6183                           bool UpperBoundOnly, llvm::Value **CondVal) {
6184   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6185       CGF.getContext(), CS->getCapturedStmt());
6186   const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6187   if (!Dir)
6188     return;
6189 
6190   if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6191     // Handle if clause. If if clause present, the number of threads is
6192     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6193     if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6194       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6195       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6196       const OMPIfClause *IfClause = nullptr;
6197       for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6198         if (C->getNameModifier() == OMPD_unknown ||
6199             C->getNameModifier() == OMPD_parallel) {
6200           IfClause = C;
6201           break;
6202         }
6203       }
6204       if (IfClause) {
6205         const Expr *CondExpr = IfClause->getCondition();
6206         bool Result;
6207         if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6208           if (!Result) {
6209             UpperBound = 1;
6210             return;
6211           }
6212         } else {
6213           CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6214           if (const auto *PreInit =
6215                   cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6216             for (const auto *I : PreInit->decls()) {
6217               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6218                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6219               } else {
6220                 CodeGenFunction::AutoVarEmission Emission =
6221                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6222                 CGF.EmitAutoVarCleanups(Emission);
6223               }
6224             }
6225             *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6226           }
6227         }
6228       }
6229     }
6230     // Check the value of num_threads clause iff if clause was not specified
6231     // or is not evaluated to false.
6232     if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6233       CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6234       CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6235       const auto *NumThreadsClause =
6236           Dir->getSingleClause<OMPNumThreadsClause>();
6237       const Expr *NTExpr = NumThreadsClause->getNumThreads();
6238       if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6239         if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6240           UpperBound =
6241               UpperBound
6242                   ? Constant->getZExtValue()
6243                   : std::min(UpperBound,
6244                              static_cast<int32_t>(Constant->getZExtValue()));
6245       // If we haven't found a upper bound, remember we saw a thread limiting
6246       // clause.
6247       if (UpperBound == -1)
6248         UpperBound = 0;
6249       if (!E)
6250         return;
6251       CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6252       if (const auto *PreInit =
6253               cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6254         for (const auto *I : PreInit->decls()) {
6255           if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6256             CGF.EmitVarDecl(cast<VarDecl>(*I));
6257           } else {
6258             CodeGenFunction::AutoVarEmission Emission =
6259                 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6260             CGF.EmitAutoVarCleanups(Emission);
6261           }
6262         }
6263       }
6264       *E = NTExpr;
6265     }
6266     return;
6267   }
6268   if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6269     UpperBound = 1;
6270 }
6271 
6272 const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6273     CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6274     bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6275   assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6276          "Clauses associated with the teams directive expected to be emitted "
6277          "only for the host!");
6278   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6279   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6280          "Expected target-based executable directive.");
6281 
6282   const Expr *NT = nullptr;
6283   const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6284 
6285   auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6286     if (E->isIntegerConstantExpr(CGF.getContext())) {
6287       if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6288         UpperBound = UpperBound ? Constant->getZExtValue()
6289                                 : std::min(UpperBound,
6290                                            int32_t(Constant->getZExtValue()));
6291     }
6292     // If we haven't found a upper bound, remember we saw a thread limiting
6293     // clause.
6294     if (UpperBound == -1)
6295       UpperBound = 0;
6296     if (EPtr)
6297       *EPtr = E;
6298   };
6299 
6300   auto ReturnSequential = [&]() {
6301     UpperBound = 1;
6302     return NT;
6303   };
6304 
6305   switch (DirectiveKind) {
6306   case OMPD_target: {
6307     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6308     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6309     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6310         CGF.getContext(), CS->getCapturedStmt());
6311     // TODO: The standard is not clear how to resolve two thread limit clauses,
6312     //       let's pick the teams one if it's present, otherwise the target one.
6313     const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6314     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6315       if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6316         ThreadLimitClause = TLC;
6317         if (ThreadLimitExpr) {
6318           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6319           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6320           CodeGenFunction::LexicalScope Scope(
6321               CGF,
6322               ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6323           if (const auto *PreInit =
6324                   cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6325             for (const auto *I : PreInit->decls()) {
6326               if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6327                 CGF.EmitVarDecl(cast<VarDecl>(*I));
6328               } else {
6329                 CodeGenFunction::AutoVarEmission Emission =
6330                     CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6331                 CGF.EmitAutoVarCleanups(Emission);
6332               }
6333             }
6334           }
6335         }
6336       }
6337     }
6338     if (ThreadLimitClause)
6339       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6340                         ThreadLimitExpr);
6341     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6342       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6343           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6344         CS = Dir->getInnermostCapturedStmt();
6345         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6346             CGF.getContext(), CS->getCapturedStmt());
6347         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6348       }
6349       if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6350         CS = Dir->getInnermostCapturedStmt();
6351         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6352       } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6353         return ReturnSequential();
6354     }
6355     return NT;
6356   }
6357   case OMPD_target_teams: {
6358     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6359       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6360       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6361       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6362                         ThreadLimitExpr);
6363     }
6364     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6365     getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6366     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6367         CGF.getContext(), CS->getCapturedStmt());
6368     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6369       if (Dir->getDirectiveKind() == OMPD_distribute) {
6370         CS = Dir->getInnermostCapturedStmt();
6371         getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372       }
6373     }
6374     return NT;
6375   }
6376   case OMPD_target_teams_distribute:
6377     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6378       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6379       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6380       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6381                         ThreadLimitExpr);
6382     }
6383     getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6384                   UpperBoundOnly, CondVal);
6385     return NT;
6386   case OMPD_target_teams_loop:
6387   case OMPD_target_parallel_loop:
6388   case OMPD_target_parallel:
6389   case OMPD_target_parallel_for:
6390   case OMPD_target_parallel_for_simd:
6391   case OMPD_target_teams_distribute_parallel_for:
6392   case OMPD_target_teams_distribute_parallel_for_simd: {
6393     if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6394       const OMPIfClause *IfClause = nullptr;
6395       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6396         if (C->getNameModifier() == OMPD_unknown ||
6397             C->getNameModifier() == OMPD_parallel) {
6398           IfClause = C;
6399           break;
6400         }
6401       }
6402       if (IfClause) {
6403         const Expr *Cond = IfClause->getCondition();
6404         bool Result;
6405         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6406           if (!Result)
6407             return ReturnSequential();
6408         } else {
6409           CodeGenFunction::RunCleanupsScope Scope(CGF);
6410           *CondVal = CGF.EvaluateExprAsBool(Cond);
6411         }
6412       }
6413     }
6414     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6415       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6416       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6417       CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6418                         ThreadLimitExpr);
6419     }
6420     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6421       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6422       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6423       CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6424       return NumThreadsClause->getNumThreads();
6425     }
6426     return NT;
6427   }
6428   case OMPD_target_teams_distribute_simd:
6429   case OMPD_target_simd:
6430     return ReturnSequential();
6431   default:
6432     break;
6433   }
6434   llvm_unreachable("Unsupported directive kind.");
6435 }
6436 
6437 llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6438     CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6439   llvm::Value *NumThreadsVal = nullptr;
6440   llvm::Value *CondVal = nullptr;
6441   llvm::Value *ThreadLimitVal = nullptr;
6442   const Expr *ThreadLimitExpr = nullptr;
6443   int32_t UpperBound = -1;
6444 
6445   const Expr *NT = getNumThreadsExprForTargetDirective(
6446       CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6447       &ThreadLimitExpr);
6448 
6449   // Thread limit expressions are used below, emit them.
6450   if (ThreadLimitExpr) {
6451     ThreadLimitVal =
6452         CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6453     ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6454                                                /*isSigned=*/false);
6455   }
6456 
6457   // Generate the num teams expression.
6458   if (UpperBound == 1) {
6459     NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6460   } else if (NT) {
6461     NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6462     NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6463                                               /*isSigned=*/false);
6464   } else if (ThreadLimitVal) {
6465     // If we do not have a num threads value but a thread limit, replace the
6466     // former with the latter. We know handled the thread limit expression.
6467     NumThreadsVal = ThreadLimitVal;
6468     ThreadLimitVal = nullptr;
6469   } else {
6470     // Default to "0" which means runtime choice.
6471     assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6472     NumThreadsVal = CGF.Builder.getInt32(0);
6473   }
6474 
6475   // Handle if clause. If if clause present, the number of threads is
6476   // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6477   if (CondVal) {
6478     CodeGenFunction::RunCleanupsScope Scope(CGF);
6479     NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6480                                              CGF.Builder.getInt32(1));
6481   }
6482 
6483   // If the thread limit and num teams expression were present, take the
6484   // minimum.
6485   if (ThreadLimitVal) {
6486     NumThreadsVal = CGF.Builder.CreateSelect(
6487         CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6488         ThreadLimitVal, NumThreadsVal);
6489   }
6490 
6491   return NumThreadsVal;
6492 }
6493 
6494 namespace {
6495 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6496 
6497 // Utility to handle information from clauses associated with a given
6498 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6499 // It provides a convenient interface to obtain the information and generate
6500 // code for that information.
6501 class MappableExprsHandler {
6502 public:
6503   /// Get the offset of the OMP_MAP_MEMBER_OF field.
6504   static unsigned getFlagMemberOffset() {
6505     unsigned Offset = 0;
6506     for (uint64_t Remain =
6507              static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6508                  OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6509          !(Remain & 1); Remain = Remain >> 1)
6510       Offset++;
6511     return Offset;
6512   }
6513 
6514   /// Class that holds debugging information for a data mapping to be passed to
6515   /// the runtime library.
6516   class MappingExprInfo {
6517     /// The variable declaration used for the data mapping.
6518     const ValueDecl *MapDecl = nullptr;
6519     /// The original expression used in the map clause, or null if there is
6520     /// none.
6521     const Expr *MapExpr = nullptr;
6522 
6523   public:
6524     MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6525         : MapDecl(MapDecl), MapExpr(MapExpr) {}
6526 
6527     const ValueDecl *getMapDecl() const { return MapDecl; }
6528     const Expr *getMapExpr() const { return MapExpr; }
6529   };
6530 
6531   using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6532   using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6533   using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6534   using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6535   using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6536   using MapNonContiguousArrayTy =
6537       llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6538   using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6539   using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6540 
6541   /// This structure contains combined information generated for mappable
6542   /// clauses, including base pointers, pointers, sizes, map types, user-defined
6543   /// mappers, and non-contiguous information.
6544   struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6545     MapExprsArrayTy Exprs;
6546     MapValueDeclsArrayTy Mappers;
6547     MapValueDeclsArrayTy DevicePtrDecls;
6548 
6549     /// Append arrays in \a CurInfo.
6550     void append(MapCombinedInfoTy &CurInfo) {
6551       Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6552       DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6553                             CurInfo.DevicePtrDecls.end());
6554       Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6555       llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6556     }
6557   };
6558 
6559   /// Map between a struct and the its lowest & highest elements which have been
6560   /// mapped.
6561   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6562   ///                    HE(FieldIndex, Pointer)}
6563   struct StructRangeInfoTy {
6564     MapCombinedInfoTy PreliminaryMapData;
6565     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6566         0, Address::invalid()};
6567     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6568         0, Address::invalid()};
6569     Address Base = Address::invalid();
6570     Address LB = Address::invalid();
6571     bool IsArraySection = false;
6572     bool HasCompleteRecord = false;
6573   };
6574 
6575 private:
6576   /// Kind that defines how a device pointer has to be returned.
6577   struct MapInfo {
6578     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6579     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6580     ArrayRef<OpenMPMapModifierKind> MapModifiers;
6581     ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6582     bool ReturnDevicePointer = false;
6583     bool IsImplicit = false;
6584     const ValueDecl *Mapper = nullptr;
6585     const Expr *VarRef = nullptr;
6586     bool ForDeviceAddr = false;
6587 
6588     MapInfo() = default;
6589     MapInfo(
6590         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6591         OpenMPMapClauseKind MapType,
6592         ArrayRef<OpenMPMapModifierKind> MapModifiers,
6593         ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6594         bool ReturnDevicePointer, bool IsImplicit,
6595         const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6596         bool ForDeviceAddr = false)
6597         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6598           MotionModifiers(MotionModifiers),
6599           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6600           Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6601   };
6602 
6603   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6604   /// member and there is no map information about it, then emission of that
6605   /// entry is deferred until the whole struct has been processed.
6606   struct DeferredDevicePtrEntryTy {
6607     const Expr *IE = nullptr;
6608     const ValueDecl *VD = nullptr;
6609     bool ForDeviceAddr = false;
6610 
6611     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6612                              bool ForDeviceAddr)
6613         : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6614   };
6615 
6616   /// The target directive from where the mappable clauses were extracted. It
6617   /// is either a executable directive or a user-defined mapper directive.
6618   llvm::PointerUnion<const OMPExecutableDirective *,
6619                      const OMPDeclareMapperDecl *>
6620       CurDir;
6621 
6622   /// Function the directive is being generated for.
6623   CodeGenFunction &CGF;
6624 
6625   /// Set of all first private variables in the current directive.
6626   /// bool data is set to true if the variable is implicitly marked as
6627   /// firstprivate, false otherwise.
6628   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6629 
6630   /// Map between device pointer declarations and their expression components.
6631   /// The key value for declarations in 'this' is null.
6632   llvm::DenseMap<
6633       const ValueDecl *,
6634       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6635       DevPointersMap;
6636 
6637   /// Map between device addr declarations and their expression components.
6638   /// The key value for declarations in 'this' is null.
6639   llvm::DenseMap<
6640       const ValueDecl *,
6641       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6642       HasDevAddrsMap;
6643 
6644   /// Map between lambda declarations and their map type.
6645   llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6646 
6647   llvm::Value *getExprTypeSize(const Expr *E) const {
6648     QualType ExprTy = E->getType().getCanonicalType();
6649 
6650     // Calculate the size for array shaping expression.
6651     if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6652       llvm::Value *Size =
6653           CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6654       for (const Expr *SE : OAE->getDimensions()) {
6655         llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6656         Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6657                                       CGF.getContext().getSizeType(),
6658                                       SE->getExprLoc());
6659         Size = CGF.Builder.CreateNUWMul(Size, Sz);
6660       }
6661       return Size;
6662     }
6663 
6664     // Reference types are ignored for mapping purposes.
6665     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6666       ExprTy = RefTy->getPointeeType().getCanonicalType();
6667 
6668     // Given that an array section is considered a built-in type, we need to
6669     // do the calculation based on the length of the section instead of relying
6670     // on CGF.getTypeSize(E->getType()).
6671     if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6672       QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6673                             OAE->getBase()->IgnoreParenImpCasts())
6674                             .getCanonicalType();
6675 
6676       // If there is no length associated with the expression and lower bound is
6677       // not specified too, that means we are using the whole length of the
6678       // base.
6679       if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6680           !OAE->getLowerBound())
6681         return CGF.getTypeSize(BaseTy);
6682 
6683       llvm::Value *ElemSize;
6684       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6685         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6686       } else {
6687         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6688         assert(ATy && "Expecting array type if not a pointer type.");
6689         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6690       }
6691 
6692       // If we don't have a length at this point, that is because we have an
6693       // array section with a single element.
6694       if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6695         return ElemSize;
6696 
6697       if (const Expr *LenExpr = OAE->getLength()) {
6698         llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6699         LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6700                                              CGF.getContext().getSizeType(),
6701                                              LenExpr->getExprLoc());
6702         return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6703       }
6704       assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6705              OAE->getLowerBound() && "expected array_section[lb:].");
6706       // Size = sizetype - lb * elemtype;
6707       llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6708       llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6709       LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6710                                        CGF.getContext().getSizeType(),
6711                                        OAE->getLowerBound()->getExprLoc());
6712       LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6713       llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6714       llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6715       LengthVal = CGF.Builder.CreateSelect(
6716           Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6717       return LengthVal;
6718     }
6719     return CGF.getTypeSize(ExprTy);
6720   }
6721 
6722   /// Return the corresponding bits for a given map clause modifier. Add
6723   /// a flag marking the map as a pointer if requested. Add a flag marking the
6724   /// map as the first one of a series of maps that relate to the same map
6725   /// expression.
6726   OpenMPOffloadMappingFlags getMapTypeBits(
6727       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6728       ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6729       bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6730     OpenMPOffloadMappingFlags Bits =
6731         IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6732                    : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6733     switch (MapType) {
6734     case OMPC_MAP_alloc:
6735     case OMPC_MAP_release:
6736       // alloc and release is the default behavior in the runtime library,  i.e.
6737       // if we don't pass any bits alloc/release that is what the runtime is
6738       // going to do. Therefore, we don't need to signal anything for these two
6739       // type modifiers.
6740       break;
6741     case OMPC_MAP_to:
6742       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6743       break;
6744     case OMPC_MAP_from:
6745       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6746       break;
6747     case OMPC_MAP_tofrom:
6748       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6749               OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6750       break;
6751     case OMPC_MAP_delete:
6752       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6753       break;
6754     case OMPC_MAP_unknown:
6755       llvm_unreachable("Unexpected map type!");
6756     }
6757     if (AddPtrFlag)
6758       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6759     if (AddIsTargetParamFlag)
6760       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6761     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6762       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6763     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6764       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6765     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6766         llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6767       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6768     if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6769       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6770     if (IsNonContiguous)
6771       Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6772     return Bits;
6773   }
6774 
6775   /// Return true if the provided expression is a final array section. A
6776   /// final array section, is one whose length can't be proved to be one.
6777   bool isFinalArraySectionExpression(const Expr *E) const {
6778     const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6779 
6780     // It is not an array section and therefore not a unity-size one.
6781     if (!OASE)
6782       return false;
6783 
6784     // An array section with no colon always refer to a single element.
6785     if (OASE->getColonLocFirst().isInvalid())
6786       return false;
6787 
6788     const Expr *Length = OASE->getLength();
6789 
6790     // If we don't have a length we have to check if the array has size 1
6791     // for this dimension. Also, we should always expect a length if the
6792     // base type is pointer.
6793     if (!Length) {
6794       QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
6795                              OASE->getBase()->IgnoreParenImpCasts())
6796                              .getCanonicalType();
6797       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6798         return ATy->getSExtSize() != 1;
6799       // If we don't have a constant dimension length, we have to consider
6800       // the current section as having any size, so it is not necessarily
6801       // unitary. If it happen to be unity size, that's user fault.
6802       return true;
6803     }
6804 
6805     // Check if the length evaluates to 1.
6806     Expr::EvalResult Result;
6807     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6808       return true; // Can have more that size 1.
6809 
6810     llvm::APSInt ConstLength = Result.Val.getInt();
6811     return ConstLength.getSExtValue() != 1;
6812   }
6813 
6814   /// Generate the base pointers, section pointers, sizes, map type bits, and
6815   /// user-defined mappers (all included in \a CombinedInfo) for the provided
6816   /// map type, map or motion modifiers, and expression components.
6817   /// \a IsFirstComponent should be set to true if the provided set of
6818   /// components is the first associated with a capture.
6819   void generateInfoForComponentList(
6820       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6821       ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6822       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6823       MapCombinedInfoTy &CombinedInfo,
6824       MapCombinedInfoTy &StructBaseCombinedInfo,
6825       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6826       bool IsImplicit, bool GenerateAllInfoForClauses,
6827       const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6828       const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6829       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6830           OverlappedElements = {},
6831       bool AreBothBasePtrAndPteeMapped = false) const {
6832     // The following summarizes what has to be generated for each map and the
6833     // types below. The generated information is expressed in this order:
6834     // base pointer, section pointer, size, flags
6835     // (to add to the ones that come from the map type and modifier).
6836     //
6837     // double d;
6838     // int i[100];
6839     // float *p;
6840     // int **a = &i;
6841     //
6842     // struct S1 {
6843     //   int i;
6844     //   float f[50];
6845     // }
6846     // struct S2 {
6847     //   int i;
6848     //   float f[50];
6849     //   S1 s;
6850     //   double *p;
6851     //   struct S2 *ps;
6852     //   int &ref;
6853     // }
6854     // S2 s;
6855     // S2 *ps;
6856     //
6857     // map(d)
6858     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6859     //
6860     // map(i)
6861     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6862     //
6863     // map(i[1:23])
6864     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6865     //
6866     // map(p)
6867     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6868     //
6869     // map(p[1:24])
6870     // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6871     // in unified shared memory mode or for local pointers
6872     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6873     //
6874     // map((*a)[0:3])
6875     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6876     // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6877     //
6878     // map(**a)
6879     // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6880     // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6881     //
6882     // map(s)
6883     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6884     //
6885     // map(s.i)
6886     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6887     //
6888     // map(s.s.f)
6889     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6890     //
6891     // map(s.p)
6892     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6893     //
6894     // map(to: s.p[:22])
6895     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6896     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6897     // &(s.p), &(s.p[0]), 22*sizeof(double),
6898     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6899     // (*) alloc space for struct members, only this is a target parameter
6900     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6901     //      optimizes this entry out, same in the examples below)
6902     // (***) map the pointee (map: to)
6903     //
6904     // map(to: s.ref)
6905     // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6906     // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6907     // (*) alloc space for struct members, only this is a target parameter
6908     // (**) map the pointer (nothing to be mapped in this example) (the compiler
6909     //      optimizes this entry out, same in the examples below)
6910     // (***) map the pointee (map: to)
6911     //
6912     // map(s.ps)
6913     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6914     //
6915     // map(from: s.ps->s.i)
6916     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6917     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6918     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
6919     //
6920     // map(to: s.ps->ps)
6921     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6922     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6923     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
6924     //
6925     // map(s.ps->ps->ps)
6926     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6927     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6928     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6929     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6930     //
6931     // map(to: s.ps->ps->s.f[:22])
6932     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6933     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6934     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6935     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6936     //
6937     // map(ps)
6938     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6939     //
6940     // map(ps->i)
6941     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6942     //
6943     // map(ps->s.f)
6944     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6945     //
6946     // map(from: ps->p)
6947     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6948     //
6949     // map(to: ps->p[:22])
6950     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6951     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6952     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6953     //
6954     // map(ps->ps)
6955     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6956     //
6957     // map(from: ps->ps->s.i)
6958     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6959     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6960     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6961     //
6962     // map(from: ps->ps->ps)
6963     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6964     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6965     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6966     //
6967     // map(ps->ps->ps->ps)
6968     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6969     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6970     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6971     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6972     //
6973     // map(to: ps->ps->ps->s.f[:22])
6974     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6975     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6976     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6977     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6978     //
6979     // map(to: s.f[:22]) map(from: s.p[:33])
6980     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6981     //     sizeof(double*) (**), TARGET_PARAM
6982     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6983     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6984     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6985     // (*) allocate contiguous space needed to fit all mapped members even if
6986     //     we allocate space for members not mapped (in this example,
6987     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
6988     //     them as well because they fall between &s.f[0] and &s.p)
6989     //
6990     // map(from: s.f[:22]) map(to: ps->p[:33])
6991     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6992     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6993     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6994     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6995     // (*) the struct this entry pertains to is the 2nd element in the list of
6996     //     arguments, hence MEMBER_OF(2)
6997     //
6998     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6999     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7000     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7001     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7002     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7003     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7004     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7005     // (*) the struct this entry pertains to is the 4th element in the list
7006     //     of arguments, hence MEMBER_OF(4)
7007     //
7008     // map(p, p[:100])
7009     // ===> map(p[:100])
7010     // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7011 
7012     // Track if the map information being generated is the first for a capture.
7013     bool IsCaptureFirstInfo = IsFirstComponentList;
7014     // When the variable is on a declare target link or in a to clause with
7015     // unified memory, a reference is needed to hold the host/device address
7016     // of the variable.
7017     bool RequiresReference = false;
7018 
7019     // Scan the components from the base to the complete expression.
7020     auto CI = Components.rbegin();
7021     auto CE = Components.rend();
7022     auto I = CI;
7023 
7024     // Track if the map information being generated is the first for a list of
7025     // components.
7026     bool IsExpressionFirstInfo = true;
7027     bool FirstPointerInComplexData = false;
7028     Address BP = Address::invalid();
7029     const Expr *AssocExpr = I->getAssociatedExpression();
7030     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7031     const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7032     const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7033 
7034     if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7035       return;
7036     if (isa<MemberExpr>(AssocExpr)) {
7037       // The base is the 'this' pointer. The content of the pointer is going
7038       // to be the base of the field being mapped.
7039       BP = CGF.LoadCXXThisAddress();
7040     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7041                (OASE &&
7042                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7043       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7044     } else if (OAShE &&
7045                isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7046       BP = Address(
7047           CGF.EmitScalarExpr(OAShE->getBase()),
7048           CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7049           CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7050     } else {
7051       // The base is the reference to the variable.
7052       // BP = &Var.
7053       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7054       if (const auto *VD =
7055               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7056         if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7057                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7058           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7059               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7060                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7061                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7062             RequiresReference = true;
7063             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7064           }
7065         }
7066       }
7067 
7068       // If the variable is a pointer and is being dereferenced (i.e. is not
7069       // the last component), the base has to be the pointer itself, not its
7070       // reference. References are ignored for mapping purposes.
7071       QualType Ty =
7072           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7073       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7074         // No need to generate individual map information for the pointer, it
7075         // can be associated with the combined storage if shared memory mode is
7076         // active or the base declaration is not global variable.
7077         const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7078         if (!AreBothBasePtrAndPteeMapped &&
7079             (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7080              !VD || VD->hasLocalStorage()))
7081           BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7082         else
7083           FirstPointerInComplexData = true;
7084         ++I;
7085       }
7086     }
7087 
7088     // Track whether a component of the list should be marked as MEMBER_OF some
7089     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7090     // in a component list should be marked as MEMBER_OF, all subsequent entries
7091     // do not belong to the base struct. E.g.
7092     // struct S2 s;
7093     // s.ps->ps->ps->f[:]
7094     //   (1) (2) (3) (4)
7095     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7096     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7097     // is the pointee of ps(2) which is not member of struct s, so it should not
7098     // be marked as such (it is still PTR_AND_OBJ).
7099     // The variable is initialized to false so that PTR_AND_OBJ entries which
7100     // are not struct members are not considered (e.g. array of pointers to
7101     // data).
7102     bool ShouldBeMemberOf = false;
7103 
7104     // Variable keeping track of whether or not we have encountered a component
7105     // in the component list which is a member expression. Useful when we have a
7106     // pointer or a final array section, in which case it is the previous
7107     // component in the list which tells us whether we have a member expression.
7108     // E.g. X.f[:]
7109     // While processing the final array section "[:]" it is "f" which tells us
7110     // whether we are dealing with a member of a declared struct.
7111     const MemberExpr *EncounteredME = nullptr;
7112 
7113     // Track for the total number of dimension. Start from one for the dummy
7114     // dimension.
7115     uint64_t DimSize = 1;
7116 
7117     bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7118     bool IsPrevMemberReference = false;
7119 
7120     bool IsPartialMapped =
7121         !PartialStruct.PreliminaryMapData.BasePointers.empty();
7122 
7123     // We need to check if we will be encountering any MEs. If we do not
7124     // encounter any ME expression it means we will be mapping the whole struct.
7125     // In that case we need to skip adding an entry for the struct to the
7126     // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7127     // list only when generating all info for clauses.
7128     bool IsMappingWholeStruct = true;
7129     if (!GenerateAllInfoForClauses) {
7130       IsMappingWholeStruct = false;
7131     } else {
7132       for (auto TempI = I; TempI != CE; ++TempI) {
7133         const MemberExpr *PossibleME =
7134             dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7135         if (PossibleME) {
7136           IsMappingWholeStruct = false;
7137           break;
7138         }
7139       }
7140     }
7141 
7142     for (; I != CE; ++I) {
7143       // If the current component is member of a struct (parent struct) mark it.
7144       if (!EncounteredME) {
7145         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7146         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7147         // as MEMBER_OF the parent struct.
7148         if (EncounteredME) {
7149           ShouldBeMemberOf = true;
7150           // Do not emit as complex pointer if this is actually not array-like
7151           // expression.
7152           if (FirstPointerInComplexData) {
7153             QualType Ty = std::prev(I)
7154                               ->getAssociatedDeclaration()
7155                               ->getType()
7156                               .getNonReferenceType();
7157             BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7158             FirstPointerInComplexData = false;
7159           }
7160         }
7161       }
7162 
7163       auto Next = std::next(I);
7164 
7165       // We need to generate the addresses and sizes if this is the last
7166       // component, if the component is a pointer or if it is an array section
7167       // whose length can't be proved to be one. If this is a pointer, it
7168       // becomes the base address for the following components.
7169 
7170       // A final array section, is one whose length can't be proved to be one.
7171       // If the map item is non-contiguous then we don't treat any array section
7172       // as final array section.
7173       bool IsFinalArraySection =
7174           !IsNonContiguous &&
7175           isFinalArraySectionExpression(I->getAssociatedExpression());
7176 
7177       // If we have a declaration for the mapping use that, otherwise use
7178       // the base declaration of the map clause.
7179       const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7180                                      ? I->getAssociatedDeclaration()
7181                                      : BaseDecl;
7182       MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7183                                                : MapExpr;
7184 
7185       // Get information on whether the element is a pointer. Have to do a
7186       // special treatment for array sections given that they are built-in
7187       // types.
7188       const auto *OASE =
7189           dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7190       const auto *OAShE =
7191           dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7192       const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7193       const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7194       bool IsPointer =
7195           OAShE ||
7196           (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7197                        .getCanonicalType()
7198                        ->isAnyPointerType()) ||
7199           I->getAssociatedExpression()->getType()->isAnyPointerType();
7200       bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7201                                MapDecl &&
7202                                MapDecl->getType()->isLValueReferenceType();
7203       bool IsNonDerefPointer = IsPointer &&
7204                                !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7205                                !IsNonContiguous;
7206 
7207       if (OASE)
7208         ++DimSize;
7209 
7210       if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7211           IsFinalArraySection) {
7212         // If this is not the last component, we expect the pointer to be
7213         // associated with an array expression or member expression.
7214         assert((Next == CE ||
7215                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7216                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7217                 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7218                 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7219                 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7220                 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7221                "Unexpected expression");
7222 
7223         Address LB = Address::invalid();
7224         Address LowestElem = Address::invalid();
7225         auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7226                                        const MemberExpr *E) {
7227           const Expr *BaseExpr = E->getBase();
7228           // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a
7229           // scalar.
7230           LValue BaseLV;
7231           if (E->isArrow()) {
7232             LValueBaseInfo BaseInfo;
7233             TBAAAccessInfo TBAAInfo;
7234             Address Addr =
7235                 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7236             QualType PtrTy = BaseExpr->getType()->getPointeeType();
7237             BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7238           } else {
7239             BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7240           }
7241           return BaseLV;
7242         };
7243         if (OAShE) {
7244           LowestElem = LB =
7245               Address(CGF.EmitScalarExpr(OAShE->getBase()),
7246                       CGF.ConvertTypeForMem(
7247                           OAShE->getBase()->getType()->getPointeeType()),
7248                       CGF.getContext().getTypeAlignInChars(
7249                           OAShE->getBase()->getType()));
7250         } else if (IsMemberReference) {
7251           const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7252           LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7253           LowestElem = CGF.EmitLValueForFieldInitialization(
7254                               BaseLVal, cast<FieldDecl>(MapDecl))
7255                            .getAddress();
7256           LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7257                    .getAddress();
7258         } else {
7259           LowestElem = LB =
7260               CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7261                   .getAddress();
7262         }
7263 
7264         // If this component is a pointer inside the base struct then we don't
7265         // need to create any entry for it - it will be combined with the object
7266         // it is pointing to into a single PTR_AND_OBJ entry.
7267         bool IsMemberPointerOrAddr =
7268             EncounteredME &&
7269             (((IsPointer || ForDeviceAddr) &&
7270               I->getAssociatedExpression() == EncounteredME) ||
7271              (IsPrevMemberReference && !IsPointer) ||
7272              (IsMemberReference && Next != CE &&
7273               !Next->getAssociatedExpression()->getType()->isPointerType()));
7274         if (!OverlappedElements.empty() && Next == CE) {
7275           // Handle base element with the info for overlapped elements.
7276           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7277           assert(!IsPointer &&
7278                  "Unexpected base element with the pointer type.");
7279           // Mark the whole struct as the struct that requires allocation on the
7280           // device.
7281           PartialStruct.LowestElem = {0, LowestElem};
7282           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7283               I->getAssociatedExpression()->getType());
7284           Address HB = CGF.Builder.CreateConstGEP(
7285               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7286                   LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7287               TypeSize.getQuantity() - 1);
7288           PartialStruct.HighestElem = {
7289               std::numeric_limits<decltype(
7290                   PartialStruct.HighestElem.first)>::max(),
7291               HB};
7292           PartialStruct.Base = BP;
7293           PartialStruct.LB = LB;
7294           assert(
7295               PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7296               "Overlapped elements must be used only once for the variable.");
7297           std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7298           // Emit data for non-overlapped data.
7299           OpenMPOffloadMappingFlags Flags =
7300               OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7301               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7302                              /*AddPtrFlag=*/false,
7303                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7304           llvm::Value *Size = nullptr;
7305           // Do bitcopy of all non-overlapped structure elements.
7306           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7307                    Component : OverlappedElements) {
7308             Address ComponentLB = Address::invalid();
7309             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7310                  Component) {
7311               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7312                 const auto *FD = dyn_cast<FieldDecl>(VD);
7313                 if (FD && FD->getType()->isLValueReferenceType()) {
7314                   const auto *ME =
7315                       cast<MemberExpr>(MC.getAssociatedExpression());
7316                   LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7317                   ComponentLB =
7318                       CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7319                           .getAddress();
7320                 } else {
7321                   ComponentLB =
7322                       CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7323                           .getAddress();
7324                 }
7325                 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7326                 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7327                 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7328                                                  LBPtr);
7329                 break;
7330               }
7331             }
7332             assert(Size && "Failed to determine structure size");
7333             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7334             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7335             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7336             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7337             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7338             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7339                 Size, CGF.Int64Ty, /*isSigned=*/true));
7340             CombinedInfo.Types.push_back(Flags);
7341             CombinedInfo.Mappers.push_back(nullptr);
7342             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7343                                                                       : 1);
7344             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7345           }
7346           CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7347           CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7348           CombinedInfo.DevicePtrDecls.push_back(nullptr);
7349           CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7350           CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7351           llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7352           Size = CGF.Builder.CreatePtrDiff(
7353               CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7354               LBPtr);
7355           CombinedInfo.Sizes.push_back(
7356               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7357           CombinedInfo.Types.push_back(Flags);
7358           CombinedInfo.Mappers.push_back(nullptr);
7359           CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7360                                                                     : 1);
7361           break;
7362         }
7363         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7364         // Skip adding an entry in the CurInfo of this combined entry if the
7365         // whole struct is currently being mapped. The struct needs to be added
7366         // in the first position before any data internal to the struct is being
7367         // mapped.
7368         // Skip adding an entry in the CurInfo of this combined entry if the
7369         // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7370         if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7371             (Next == CE && MapType != OMPC_MAP_unknown)) {
7372           if (!IsMappingWholeStruct) {
7373             CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7374             CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7375             CombinedInfo.DevicePtrDecls.push_back(nullptr);
7376             CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7377             CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7378             CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7379                 Size, CGF.Int64Ty, /*isSigned=*/true));
7380             CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7381                                                                       : 1);
7382           } else {
7383             StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7384             StructBaseCombinedInfo.BasePointers.push_back(
7385                 BP.emitRawPointer(CGF));
7386             StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7387             StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7388             StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7389             StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7390                 Size, CGF.Int64Ty, /*isSigned=*/true));
7391             StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7392                 IsNonContiguous ? DimSize : 1);
7393           }
7394 
7395           // If Mapper is valid, the last component inherits the mapper.
7396           bool HasMapper = Mapper && Next == CE;
7397           if (!IsMappingWholeStruct)
7398             CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7399           else
7400             StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7401                                                                : nullptr);
7402 
7403           // We need to add a pointer flag for each map that comes from the
7404           // same expression except for the first one. We also need to signal
7405           // this map is the first one that relates with the current capture
7406           // (there is a set of entries for each capture).
7407           OpenMPOffloadMappingFlags Flags =
7408               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7409                              !IsExpressionFirstInfo || RequiresReference ||
7410                                  FirstPointerInComplexData || IsMemberReference,
7411                              AreBothBasePtrAndPteeMapped ||
7412                                  (IsCaptureFirstInfo && !RequiresReference),
7413                              IsNonContiguous);
7414 
7415           if (!IsExpressionFirstInfo || IsMemberReference) {
7416             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7417             // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7418             if (IsPointer || (IsMemberReference && Next != CE))
7419               Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7420                          OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7421                          OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7422                          OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7423                          OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7424 
7425             if (ShouldBeMemberOf) {
7426               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7427               // should be later updated with the correct value of MEMBER_OF.
7428               Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7429               // From now on, all subsequent PTR_AND_OBJ entries should not be
7430               // marked as MEMBER_OF.
7431               ShouldBeMemberOf = false;
7432             }
7433           }
7434 
7435           if (!IsMappingWholeStruct)
7436             CombinedInfo.Types.push_back(Flags);
7437           else
7438             StructBaseCombinedInfo.Types.push_back(Flags);
7439         }
7440 
7441         // If we have encountered a member expression so far, keep track of the
7442         // mapped member. If the parent is "*this", then the value declaration
7443         // is nullptr.
7444         if (EncounteredME) {
7445           const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7446           unsigned FieldIndex = FD->getFieldIndex();
7447 
7448           // Update info about the lowest and highest elements for this struct
7449           if (!PartialStruct.Base.isValid()) {
7450             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7451             if (IsFinalArraySection) {
7452               Address HB =
7453                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7454                       .getAddress();
7455               PartialStruct.HighestElem = {FieldIndex, HB};
7456             } else {
7457               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7458             }
7459             PartialStruct.Base = BP;
7460             PartialStruct.LB = BP;
7461           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7462             PartialStruct.LowestElem = {FieldIndex, LowestElem};
7463           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7464             if (IsFinalArraySection) {
7465               Address HB =
7466                   CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7467                       .getAddress();
7468               PartialStruct.HighestElem = {FieldIndex, HB};
7469             } else {
7470               PartialStruct.HighestElem = {FieldIndex, LowestElem};
7471             }
7472           }
7473         }
7474 
7475         // Need to emit combined struct for array sections.
7476         if (IsFinalArraySection || IsNonContiguous)
7477           PartialStruct.IsArraySection = true;
7478 
7479         // If we have a final array section, we are done with this expression.
7480         if (IsFinalArraySection)
7481           break;
7482 
7483         // The pointer becomes the base for the next element.
7484         if (Next != CE)
7485           BP = IsMemberReference ? LowestElem : LB;
7486         if (!IsPartialMapped)
7487           IsExpressionFirstInfo = false;
7488         IsCaptureFirstInfo = false;
7489         FirstPointerInComplexData = false;
7490         IsPrevMemberReference = IsMemberReference;
7491       } else if (FirstPointerInComplexData) {
7492         QualType Ty = Components.rbegin()
7493                           ->getAssociatedDeclaration()
7494                           ->getType()
7495                           .getNonReferenceType();
7496         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7497         FirstPointerInComplexData = false;
7498       }
7499     }
7500     // If ran into the whole component - allocate the space for the whole
7501     // record.
7502     if (!EncounteredME)
7503       PartialStruct.HasCompleteRecord = true;
7504 
7505     if (!IsNonContiguous)
7506       return;
7507 
7508     const ASTContext &Context = CGF.getContext();
7509 
7510     // For supporting stride in array section, we need to initialize the first
7511     // dimension size as 1, first offset as 0, and first count as 1
7512     MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7513     MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7514     MapValuesArrayTy CurStrides;
7515     MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7516     uint64_t ElementTypeSize;
7517 
7518     // Collect Size information for each dimension and get the element size as
7519     // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7520     // should be [10, 10] and the first stride is 4 btyes.
7521     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7522          Components) {
7523       const Expr *AssocExpr = Component.getAssociatedExpression();
7524       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7525 
7526       if (!OASE)
7527         continue;
7528 
7529       QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7530       auto *CAT = Context.getAsConstantArrayType(Ty);
7531       auto *VAT = Context.getAsVariableArrayType(Ty);
7532 
7533       // We need all the dimension size except for the last dimension.
7534       assert((VAT || CAT || &Component == &*Components.begin()) &&
7535              "Should be either ConstantArray or VariableArray if not the "
7536              "first Component");
7537 
7538       // Get element size if CurStrides is empty.
7539       if (CurStrides.empty()) {
7540         const Type *ElementType = nullptr;
7541         if (CAT)
7542           ElementType = CAT->getElementType().getTypePtr();
7543         else if (VAT)
7544           ElementType = VAT->getElementType().getTypePtr();
7545         else
7546           assert(&Component == &*Components.begin() &&
7547                  "Only expect pointer (non CAT or VAT) when this is the "
7548                  "first Component");
7549         // If ElementType is null, then it means the base is a pointer
7550         // (neither CAT nor VAT) and we'll attempt to get ElementType again
7551         // for next iteration.
7552         if (ElementType) {
7553           // For the case that having pointer as base, we need to remove one
7554           // level of indirection.
7555           if (&Component != &*Components.begin())
7556             ElementType = ElementType->getPointeeOrArrayElementType();
7557           ElementTypeSize =
7558               Context.getTypeSizeInChars(ElementType).getQuantity();
7559           CurStrides.push_back(
7560               llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7561         }
7562       }
7563       // Get dimension value except for the last dimension since we don't need
7564       // it.
7565       if (DimSizes.size() < Components.size() - 1) {
7566         if (CAT)
7567           DimSizes.push_back(
7568               llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7569         else if (VAT)
7570           DimSizes.push_back(CGF.Builder.CreateIntCast(
7571               CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7572               /*IsSigned=*/false));
7573       }
7574     }
7575 
7576     // Skip the dummy dimension since we have already have its information.
7577     auto *DI = DimSizes.begin() + 1;
7578     // Product of dimension.
7579     llvm::Value *DimProd =
7580         llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7581 
7582     // Collect info for non-contiguous. Notice that offset, count, and stride
7583     // are only meaningful for array-section, so we insert a null for anything
7584     // other than array-section.
7585     // Also, the size of offset, count, and stride are not the same as
7586     // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7587     // count, and stride are the same as the number of non-contiguous
7588     // declaration in target update to/from clause.
7589     for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7590          Components) {
7591       const Expr *AssocExpr = Component.getAssociatedExpression();
7592 
7593       if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7594         llvm::Value *Offset = CGF.Builder.CreateIntCast(
7595             CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7596             /*isSigned=*/false);
7597         CurOffsets.push_back(Offset);
7598         CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7599         CurStrides.push_back(CurStrides.back());
7600         continue;
7601       }
7602 
7603       const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7604 
7605       if (!OASE)
7606         continue;
7607 
7608       // Offset
7609       const Expr *OffsetExpr = OASE->getLowerBound();
7610       llvm::Value *Offset = nullptr;
7611       if (!OffsetExpr) {
7612         // If offset is absent, then we just set it to zero.
7613         Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7614       } else {
7615         Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7616                                            CGF.Int64Ty,
7617                                            /*isSigned=*/false);
7618       }
7619       CurOffsets.push_back(Offset);
7620 
7621       // Count
7622       const Expr *CountExpr = OASE->getLength();
7623       llvm::Value *Count = nullptr;
7624       if (!CountExpr) {
7625         // In Clang, once a high dimension is an array section, we construct all
7626         // the lower dimension as array section, however, for case like
7627         // arr[0:2][2], Clang construct the inner dimension as an array section
7628         // but it actually is not in an array section form according to spec.
7629         if (!OASE->getColonLocFirst().isValid() &&
7630             !OASE->getColonLocSecond().isValid()) {
7631           Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7632         } else {
7633           // OpenMP 5.0, 2.1.5 Array Sections, Description.
7634           // When the length is absent it defaults to ⌈(size −
7635           // lower-bound)/stride⌉, where size is the size of the array
7636           // dimension.
7637           const Expr *StrideExpr = OASE->getStride();
7638           llvm::Value *Stride =
7639               StrideExpr
7640                   ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7641                                               CGF.Int64Ty, /*isSigned=*/false)
7642                   : nullptr;
7643           if (Stride)
7644             Count = CGF.Builder.CreateUDiv(
7645                 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7646           else
7647             Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7648         }
7649       } else {
7650         Count = CGF.EmitScalarExpr(CountExpr);
7651       }
7652       Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7653       CurCounts.push_back(Count);
7654 
7655       // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7656       // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7657       //              Offset      Count     Stride
7658       //    D0          0           1         4    (int)    <- dummy dimension
7659       //    D1          0           2         8    (2 * (1) * 4)
7660       //    D2          1           2         20   (1 * (1 * 5) * 4)
7661       //    D3          0           2         200  (2 * (1 * 5 * 4) * 4)
7662       const Expr *StrideExpr = OASE->getStride();
7663       llvm::Value *Stride =
7664           StrideExpr
7665               ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7666                                           CGF.Int64Ty, /*isSigned=*/false)
7667               : nullptr;
7668       DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7669       if (Stride)
7670         CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7671       else
7672         CurStrides.push_back(DimProd);
7673       if (DI != DimSizes.end())
7674         ++DI;
7675     }
7676 
7677     CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7678     CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7679     CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7680   }
7681 
7682   /// Return the adjusted map modifiers if the declaration a capture refers to
7683   /// appears in a first-private clause. This is expected to be used only with
7684   /// directives that start with 'target'.
7685   OpenMPOffloadMappingFlags
7686   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7687     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7688 
7689     // A first private variable captured by reference will use only the
7690     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7691     // declaration is known as first-private in this handler.
7692     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7693       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7694         return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7695                OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7696       return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7697              OpenMPOffloadMappingFlags::OMP_MAP_TO;
7698     }
7699     auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7700     if (I != LambdasMap.end())
7701       // for map(to: lambda): using user specified map type.
7702       return getMapTypeBits(
7703           I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7704           /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
7705           /*AddPtrFlag=*/false,
7706           /*AddIsTargetParamFlag=*/false,
7707           /*isNonContiguous=*/false);
7708     return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7709            OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7710   }
7711 
7712   void getPlainLayout(const CXXRecordDecl *RD,
7713                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7714                       bool AsBase) const {
7715     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7716 
7717     llvm::StructType *St =
7718         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7719 
7720     unsigned NumElements = St->getNumElements();
7721     llvm::SmallVector<
7722         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7723         RecordLayout(NumElements);
7724 
7725     // Fill bases.
7726     for (const auto &I : RD->bases()) {
7727       if (I.isVirtual())
7728         continue;
7729 
7730       QualType BaseTy = I.getType();
7731       const auto *Base = BaseTy->getAsCXXRecordDecl();
7732       // Ignore empty bases.
7733       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7734           CGF.getContext()
7735               .getASTRecordLayout(Base)
7736               .getNonVirtualSize()
7737               .isZero())
7738         continue;
7739 
7740       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7741       RecordLayout[FieldIndex] = Base;
7742     }
7743     // Fill in virtual bases.
7744     for (const auto &I : RD->vbases()) {
7745       QualType BaseTy = I.getType();
7746       // Ignore empty bases.
7747       if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7748         continue;
7749 
7750       const auto *Base = BaseTy->getAsCXXRecordDecl();
7751       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7752       if (RecordLayout[FieldIndex])
7753         continue;
7754       RecordLayout[FieldIndex] = Base;
7755     }
7756     // Fill in all the fields.
7757     assert(!RD->isUnion() && "Unexpected union.");
7758     for (const auto *Field : RD->fields()) {
7759       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7760       // will fill in later.)
7761       if (!Field->isBitField() &&
7762           !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7763         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7764         RecordLayout[FieldIndex] = Field;
7765       }
7766     }
7767     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7768              &Data : RecordLayout) {
7769       if (Data.isNull())
7770         continue;
7771       if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
7772         getPlainLayout(Base, Layout, /*AsBase=*/true);
7773       else
7774         Layout.push_back(cast<const FieldDecl *>(Data));
7775     }
7776   }
7777 
7778   /// Generate all the base pointers, section pointers, sizes, map types, and
7779   /// mappers for the extracted mappable expressions (all included in \a
7780   /// CombinedInfo). Also, for each item that relates with a device pointer, a
7781   /// pair of the relevant declaration and index where it occurs is appended to
7782   /// the device pointers info array.
7783   void generateAllInfoForClauses(
7784       ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7785       llvm::OpenMPIRBuilder &OMPBuilder,
7786       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7787           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7788     // We have to process the component lists that relate with the same
7789     // declaration in a single chunk so that we can generate the map flags
7790     // correctly. Therefore, we organize all lists in a map.
7791     enum MapKind { Present, Allocs, Other, Total };
7792     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7793                     SmallVector<SmallVector<MapInfo, 8>, 4>>
7794         Info;
7795 
7796     // Helper function to fill the information map for the different supported
7797     // clauses.
7798     auto &&InfoGen =
7799         [&Info, &SkipVarSet](
7800             const ValueDecl *D, MapKind Kind,
7801             OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7802             OpenMPMapClauseKind MapType,
7803             ArrayRef<OpenMPMapModifierKind> MapModifiers,
7804             ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7805             bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7806             const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7807           if (SkipVarSet.contains(D))
7808             return;
7809           auto It = Info.try_emplace(D, Total).first;
7810           It->second[Kind].emplace_back(
7811               L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7812               IsImplicit, Mapper, VarRef, ForDeviceAddr);
7813         };
7814 
7815     for (const auto *Cl : Clauses) {
7816       const auto *C = dyn_cast<OMPMapClause>(Cl);
7817       if (!C)
7818         continue;
7819       MapKind Kind = Other;
7820       if (llvm::is_contained(C->getMapTypeModifiers(),
7821                              OMPC_MAP_MODIFIER_present))
7822         Kind = Present;
7823       else if (C->getMapType() == OMPC_MAP_alloc)
7824         Kind = Allocs;
7825       const auto *EI = C->getVarRefs().begin();
7826       for (const auto L : C->component_lists()) {
7827         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7828         InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7829                 C->getMapTypeModifiers(), {},
7830                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7831                 E);
7832         ++EI;
7833       }
7834     }
7835     for (const auto *Cl : Clauses) {
7836       const auto *C = dyn_cast<OMPToClause>(Cl);
7837       if (!C)
7838         continue;
7839       MapKind Kind = Other;
7840       if (llvm::is_contained(C->getMotionModifiers(),
7841                              OMPC_MOTION_MODIFIER_present))
7842         Kind = Present;
7843       const auto *EI = C->getVarRefs().begin();
7844       for (const auto L : C->component_lists()) {
7845         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
7846                 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7847                 C->isImplicit(), std::get<2>(L), *EI);
7848         ++EI;
7849       }
7850     }
7851     for (const auto *Cl : Clauses) {
7852       const auto *C = dyn_cast<OMPFromClause>(Cl);
7853       if (!C)
7854         continue;
7855       MapKind Kind = Other;
7856       if (llvm::is_contained(C->getMotionModifiers(),
7857                              OMPC_MOTION_MODIFIER_present))
7858         Kind = Present;
7859       const auto *EI = C->getVarRefs().begin();
7860       for (const auto L : C->component_lists()) {
7861         InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
7862                 C->getMotionModifiers(),
7863                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7864                 *EI);
7865         ++EI;
7866       }
7867     }
7868 
7869     // Look at the use_device_ptr and use_device_addr clauses information and
7870     // mark the existing map entries as such. If there is no map information for
7871     // an entry in the use_device_ptr and use_device_addr list, we create one
7872     // with map type 'alloc' and zero size section. It is the user fault if that
7873     // was not mapped before. If there is no map information and the pointer is
7874     // a struct member, then we defer the emission of that entry until the whole
7875     // struct has been processed.
7876     llvm::MapVector<CanonicalDeclPtr<const Decl>,
7877                     SmallVector<DeferredDevicePtrEntryTy, 4>>
7878         DeferredInfo;
7879     MapCombinedInfoTy UseDeviceDataCombinedInfo;
7880 
7881     auto &&UseDeviceDataCombinedInfoGen =
7882         [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7883                                      CodeGenFunction &CGF, bool IsDevAddr) {
7884           UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7885           UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7886           UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7887           UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7888               IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7889           UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7890           UseDeviceDataCombinedInfo.Sizes.push_back(
7891               llvm::Constant::getNullValue(CGF.Int64Ty));
7892           UseDeviceDataCombinedInfo.Types.push_back(
7893               OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7894           UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7895         };
7896 
7897     auto &&MapInfoGen =
7898         [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7899          &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7900                    OMPClauseMappableExprCommon::MappableExprComponentListRef
7901                        Components,
7902                    bool IsImplicit, bool IsDevAddr) {
7903           // We didn't find any match in our map information - generate a zero
7904           // size array section - if the pointer is a struct member we defer
7905           // this action until the whole struct has been processed.
7906           if (isa<MemberExpr>(IE)) {
7907             // Insert the pointer into Info to be processed by
7908             // generateInfoForComponentList. Because it is a member pointer
7909             // without a pointee, no entry will be generated for it, therefore
7910             // we need to generate one after the whole struct has been
7911             // processed. Nonetheless, generateInfoForComponentList must be
7912             // called to take the pointer into account for the calculation of
7913             // the range of the partial struct.
7914             InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
7915                     /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
7916                     IsDevAddr);
7917             DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7918           } else {
7919             llvm::Value *Ptr;
7920             if (IsDevAddr) {
7921               if (IE->isGLValue())
7922                 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7923               else
7924                 Ptr = CGF.EmitScalarExpr(IE);
7925             } else {
7926               Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7927             }
7928             UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7929           }
7930         };
7931 
7932     auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7933                                     const Expr *IE, bool IsDevAddr) -> bool {
7934       // We potentially have map information for this declaration already.
7935       // Look for the first set of components that refer to it. If found,
7936       // return true.
7937       // If the first component is a member expression, we have to look into
7938       // 'this', which maps to null in the map of map information. Otherwise
7939       // look directly for the information.
7940       auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7941       if (It != Info.end()) {
7942         bool Found = false;
7943         for (auto &Data : It->second) {
7944           auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7945             return MI.Components.back().getAssociatedDeclaration() == VD;
7946           });
7947           // If we found a map entry, signal that the pointer has to be
7948           // returned and move on to the next declaration. Exclude cases where
7949           // the base pointer is mapped as array subscript, array section or
7950           // array shaping. The base address is passed as a pointer to base in
7951           // this case and cannot be used as a base for use_device_ptr list
7952           // item.
7953           if (CI != Data.end()) {
7954             if (IsDevAddr) {
7955               CI->ForDeviceAddr = IsDevAddr;
7956               CI->ReturnDevicePointer = true;
7957               Found = true;
7958               break;
7959             } else {
7960               auto PrevCI = std::next(CI->Components.rbegin());
7961               const auto *VarD = dyn_cast<VarDecl>(VD);
7962               if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7963                   isa<MemberExpr>(IE) ||
7964                   !VD->getType().getNonReferenceType()->isPointerType() ||
7965                   PrevCI == CI->Components.rend() ||
7966                   isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7967                   VarD->hasLocalStorage()) {
7968                 CI->ForDeviceAddr = IsDevAddr;
7969                 CI->ReturnDevicePointer = true;
7970                 Found = true;
7971                 break;
7972               }
7973             }
7974           }
7975         }
7976         return Found;
7977       }
7978       return false;
7979     };
7980 
7981     // Look at the use_device_ptr clause information and mark the existing map
7982     // entries as such. If there is no map information for an entry in the
7983     // use_device_ptr list, we create one with map type 'alloc' and zero size
7984     // section. It is the user fault if that was not mapped before. If there is
7985     // no map information and the pointer is a struct member, then we defer the
7986     // emission of that entry until the whole struct has been processed.
7987     for (const auto *Cl : Clauses) {
7988       const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7989       if (!C)
7990         continue;
7991       for (const auto L : C->component_lists()) {
7992         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7993             std::get<1>(L);
7994         assert(!Components.empty() &&
7995                "Not expecting empty list of components!");
7996         const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7997         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7998         const Expr *IE = Components.back().getAssociatedExpression();
7999         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8000           continue;
8001         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8002                    /*IsDevAddr=*/false);
8003       }
8004     }
8005 
8006     llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8007     for (const auto *Cl : Clauses) {
8008       const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8009       if (!C)
8010         continue;
8011       for (const auto L : C->component_lists()) {
8012         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8013             std::get<1>(L);
8014         assert(!std::get<1>(L).empty() &&
8015                "Not expecting empty list of components!");
8016         const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8017         if (!Processed.insert(VD).second)
8018           continue;
8019         VD = cast<ValueDecl>(VD->getCanonicalDecl());
8020         const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8021         if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8022           continue;
8023         MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8024                    /*IsDevAddr=*/true);
8025       }
8026     }
8027 
8028     for (const auto &Data : Info) {
8029       StructRangeInfoTy PartialStruct;
8030       // Current struct information:
8031       MapCombinedInfoTy CurInfo;
8032       // Current struct base information:
8033       MapCombinedInfoTy StructBaseCurInfo;
8034       const Decl *D = Data.first;
8035       const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8036       bool HasMapBasePtr = false;
8037       bool HasMapArraySec = false;
8038       if (VD && VD->getType()->isAnyPointerType()) {
8039         for (const auto &M : Data.second) {
8040           HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8041             return isa_and_present<DeclRefExpr>(L.VarRef);
8042           });
8043           HasMapArraySec = any_of(M, [](const MapInfo &L) {
8044             return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8045                 L.VarRef);
8046           });
8047           if (HasMapBasePtr && HasMapArraySec)
8048             break;
8049         }
8050       }
8051       for (const auto &M : Data.second) {
8052         for (const MapInfo &L : M) {
8053           assert(!L.Components.empty() &&
8054                  "Not expecting declaration with no component lists.");
8055 
8056           // Remember the current base pointer index.
8057           unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8058           unsigned StructBasePointersIdx =
8059               StructBaseCurInfo.BasePointers.size();
8060           CurInfo.NonContigInfo.IsNonContiguous =
8061               L.Components.back().isNonContiguous();
8062           generateInfoForComponentList(
8063               L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8064               CurInfo, StructBaseCurInfo, PartialStruct,
8065               /*IsFirstComponentList=*/false, L.IsImplicit,
8066               /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8067               L.VarRef, /*OverlappedElements*/ {},
8068               HasMapBasePtr && HasMapArraySec);
8069 
8070           // If this entry relates to a device pointer, set the relevant
8071           // declaration and add the 'return pointer' flag.
8072           if (L.ReturnDevicePointer) {
8073             // Check whether a value was added to either CurInfo or
8074             // StructBaseCurInfo and error if no value was added to either of
8075             // them:
8076             assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8077                     StructBasePointersIdx <
8078                         StructBaseCurInfo.BasePointers.size()) &&
8079                    "Unexpected number of mapped base pointers.");
8080 
8081             // Choose a base pointer index which is always valid:
8082             const ValueDecl *RelevantVD =
8083                 L.Components.back().getAssociatedDeclaration();
8084             assert(RelevantVD &&
8085                    "No relevant declaration related with device pointer??");
8086 
8087             // If StructBaseCurInfo has been updated this iteration then work on
8088             // the first new entry added to it i.e. make sure that when multiple
8089             // values are added to any of the lists, the first value added is
8090             // being modified by the assignments below (not the last value
8091             // added).
8092             if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8093               StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8094                   RelevantVD;
8095               StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8096                   L.ForDeviceAddr ? DeviceInfoTy::Address
8097                                   : DeviceInfoTy::Pointer;
8098               StructBaseCurInfo.Types[StructBasePointersIdx] |=
8099                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8100             } else {
8101               CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8102               CurInfo.DevicePointers[CurrentBasePointersIdx] =
8103                   L.ForDeviceAddr ? DeviceInfoTy::Address
8104                                   : DeviceInfoTy::Pointer;
8105               CurInfo.Types[CurrentBasePointersIdx] |=
8106                   OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8107             }
8108           }
8109         }
8110       }
8111 
8112       // Append any pending zero-length pointers which are struct members and
8113       // used with use_device_ptr or use_device_addr.
8114       auto CI = DeferredInfo.find(Data.first);
8115       if (CI != DeferredInfo.end()) {
8116         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8117           llvm::Value *BasePtr;
8118           llvm::Value *Ptr;
8119           if (L.ForDeviceAddr) {
8120             if (L.IE->isGLValue())
8121               Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8122             else
8123               Ptr = this->CGF.EmitScalarExpr(L.IE);
8124             BasePtr = Ptr;
8125             // Entry is RETURN_PARAM. Also, set the placeholder value
8126             // MEMBER_OF=FFFF so that the entry is later updated with the
8127             // correct value of MEMBER_OF.
8128             CurInfo.Types.push_back(
8129                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8130                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8131           } else {
8132             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8133             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8134                                              L.IE->getExprLoc());
8135             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8136             // placeholder value MEMBER_OF=FFFF so that the entry is later
8137             // updated with the correct value of MEMBER_OF.
8138             CurInfo.Types.push_back(
8139                 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8140                 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8141                 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8142           }
8143           CurInfo.Exprs.push_back(L.VD);
8144           CurInfo.BasePointers.emplace_back(BasePtr);
8145           CurInfo.DevicePtrDecls.emplace_back(L.VD);
8146           CurInfo.DevicePointers.emplace_back(
8147               L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8148           CurInfo.Pointers.push_back(Ptr);
8149           CurInfo.Sizes.push_back(
8150               llvm::Constant::getNullValue(this->CGF.Int64Ty));
8151           CurInfo.Mappers.push_back(nullptr);
8152         }
8153       }
8154 
8155       // Unify entries in one list making sure the struct mapping precedes the
8156       // individual fields:
8157       MapCombinedInfoTy UnionCurInfo;
8158       UnionCurInfo.append(StructBaseCurInfo);
8159       UnionCurInfo.append(CurInfo);
8160 
8161       // If there is an entry in PartialStruct it means we have a struct with
8162       // individual members mapped. Emit an extra combined entry.
8163       if (PartialStruct.Base.isValid()) {
8164         UnionCurInfo.NonContigInfo.Dims.push_back(0);
8165         // Emit a combined entry:
8166         emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8167                           /*IsMapThis*/ !VD, OMPBuilder, VD);
8168       }
8169 
8170       // We need to append the results of this capture to what we already have.
8171       CombinedInfo.append(UnionCurInfo);
8172     }
8173     // Append data for use_device_ptr clauses.
8174     CombinedInfo.append(UseDeviceDataCombinedInfo);
8175   }
8176 
8177 public:
8178   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8179       : CurDir(&Dir), CGF(CGF) {
8180     // Extract firstprivate clause information.
8181     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8182       for (const auto *D : C->varlist())
8183         FirstPrivateDecls.try_emplace(
8184             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8185     // Extract implicit firstprivates from uses_allocators clauses.
8186     for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8187       for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8188         OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8189         if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8190           FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8191                                         /*Implicit=*/true);
8192         else if (const auto *VD = dyn_cast<VarDecl>(
8193                      cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8194                          ->getDecl()))
8195           FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8196       }
8197     }
8198     // Extract device pointer clause information.
8199     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8200       for (auto L : C->component_lists())
8201         DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8202     // Extract device addr clause information.
8203     for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8204       for (auto L : C->component_lists())
8205         HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8206     // Extract map information.
8207     for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8208       if (C->getMapType() != OMPC_MAP_to)
8209         continue;
8210       for (auto L : C->component_lists()) {
8211         const ValueDecl *VD = std::get<0>(L);
8212         const auto *RD = VD ? VD->getType()
8213                                   .getCanonicalType()
8214                                   .getNonReferenceType()
8215                                   ->getAsCXXRecordDecl()
8216                             : nullptr;
8217         if (RD && RD->isLambda())
8218           LambdasMap.try_emplace(std::get<0>(L), C);
8219       }
8220     }
8221   }
8222 
8223   /// Constructor for the declare mapper directive.
8224   MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8225       : CurDir(&Dir), CGF(CGF) {}
8226 
8227   /// Generate code for the combined entry if we have a partially mapped struct
8228   /// and take care of the mapping flags of the arguments corresponding to
8229   /// individual struct members.
8230   void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8231                          MapFlagsArrayTy &CurTypes,
8232                          const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8233                          llvm::OpenMPIRBuilder &OMPBuilder,
8234                          const ValueDecl *VD = nullptr,
8235                          bool NotTargetParams = true) const {
8236     if (CurTypes.size() == 1 &&
8237         ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8238          OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8239         !PartialStruct.IsArraySection)
8240       return;
8241     Address LBAddr = PartialStruct.LowestElem.second;
8242     Address HBAddr = PartialStruct.HighestElem.second;
8243     if (PartialStruct.HasCompleteRecord) {
8244       LBAddr = PartialStruct.LB;
8245       HBAddr = PartialStruct.LB;
8246     }
8247     CombinedInfo.Exprs.push_back(VD);
8248     // Base is the base of the struct
8249     CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8250     CombinedInfo.DevicePtrDecls.push_back(nullptr);
8251     CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8252     // Pointer is the address of the lowest element
8253     llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8254     const CXXMethodDecl *MD =
8255         CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8256     const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8257     bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8258     // There should not be a mapper for a combined entry.
8259     if (HasBaseClass) {
8260       // OpenMP 5.2 148:21:
8261       // If the target construct is within a class non-static member function,
8262       // and a variable is an accessible data member of the object for which the
8263       // non-static data member function is invoked, the variable is treated as
8264       // if the this[:1] expression had appeared in a map clause with a map-type
8265       // of tofrom.
8266       // Emit this[:1]
8267       CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8268       QualType Ty = MD->getFunctionObjectParameterType();
8269       llvm::Value *Size =
8270           CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8271                                     /*isSigned=*/true);
8272       CombinedInfo.Sizes.push_back(Size);
8273     } else {
8274       CombinedInfo.Pointers.push_back(LB);
8275       // Size is (addr of {highest+1} element) - (addr of lowest element)
8276       llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8277       llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8278           HBAddr.getElementType(), HB, /*Idx0=*/1);
8279       llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8280       llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8281       llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8282       llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8283                                                     /*isSigned=*/false);
8284       CombinedInfo.Sizes.push_back(Size);
8285     }
8286     CombinedInfo.Mappers.push_back(nullptr);
8287     // Map type is always TARGET_PARAM, if generate info for captures.
8288     CombinedInfo.Types.push_back(
8289         NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8290         : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8291             ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8292             : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8293     // If any element has the present modifier, then make sure the runtime
8294     // doesn't attempt to allocate the struct.
8295     if (CurTypes.end() !=
8296         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8297           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8298               Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8299         }))
8300       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8301     // Remove TARGET_PARAM flag from the first element
8302     (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8303     // If any element has the ompx_hold modifier, then make sure the runtime
8304     // uses the hold reference count for the struct as a whole so that it won't
8305     // be unmapped by an extra dynamic reference count decrement.  Add it to all
8306     // elements as well so the runtime knows which reference count to check
8307     // when determining whether it's time for device-to-host transfers of
8308     // individual elements.
8309     if (CurTypes.end() !=
8310         llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8311           return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8312               Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8313         })) {
8314       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8315       for (auto &M : CurTypes)
8316         M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8317     }
8318 
8319     // All other current entries will be MEMBER_OF the combined entry
8320     // (except for PTR_AND_OBJ entries which do not have a placeholder value
8321     // 0xFFFF in the MEMBER_OF field).
8322     OpenMPOffloadMappingFlags MemberOfFlag =
8323         OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8324     for (auto &M : CurTypes)
8325       OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8326   }
8327 
8328   /// Generate all the base pointers, section pointers, sizes, map types, and
8329   /// mappers for the extracted mappable expressions (all included in \a
8330   /// CombinedInfo). Also, for each item that relates with a device pointer, a
8331   /// pair of the relevant declaration and index where it occurs is appended to
8332   /// the device pointers info array.
8333   void generateAllInfo(
8334       MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8335       const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8336           llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8337     assert(isa<const OMPExecutableDirective *>(CurDir) &&
8338            "Expect a executable directive");
8339     const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8340     generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8341                               SkipVarSet);
8342   }
8343 
8344   /// Generate all the base pointers, section pointers, sizes, map types, and
8345   /// mappers for the extracted map clauses of user-defined mapper (all included
8346   /// in \a CombinedInfo).
8347   void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8348                                 llvm::OpenMPIRBuilder &OMPBuilder) const {
8349     assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8350            "Expect a declare mapper directive");
8351     const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8352     generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8353                               OMPBuilder);
8354   }
8355 
8356   /// Emit capture info for lambdas for variables captured by reference.
8357   void generateInfoForLambdaCaptures(
8358       const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8359       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8360     QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8361     const auto *RD = VDType->getAsCXXRecordDecl();
8362     if (!RD || !RD->isLambda())
8363       return;
8364     Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8365                    CGF.getContext().getDeclAlign(VD));
8366     LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8367     llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8368     FieldDecl *ThisCapture = nullptr;
8369     RD->getCaptureFields(Captures, ThisCapture);
8370     if (ThisCapture) {
8371       LValue ThisLVal =
8372           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8373       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8374       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8375                                  VDLVal.getPointer(CGF));
8376       CombinedInfo.Exprs.push_back(VD);
8377       CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8378       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8379       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8380       CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8381       CombinedInfo.Sizes.push_back(
8382           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8383                                     CGF.Int64Ty, /*isSigned=*/true));
8384       CombinedInfo.Types.push_back(
8385           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8386           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8387           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8388           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8389       CombinedInfo.Mappers.push_back(nullptr);
8390     }
8391     for (const LambdaCapture &LC : RD->captures()) {
8392       if (!LC.capturesVariable())
8393         continue;
8394       const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8395       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8396         continue;
8397       auto It = Captures.find(VD);
8398       assert(It != Captures.end() && "Found lambda capture without field.");
8399       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8400       if (LC.getCaptureKind() == LCK_ByRef) {
8401         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8402         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8403                                    VDLVal.getPointer(CGF));
8404         CombinedInfo.Exprs.push_back(VD);
8405         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8406         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8407         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8408         CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8409         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8410             CGF.getTypeSize(
8411                 VD->getType().getCanonicalType().getNonReferenceType()),
8412             CGF.Int64Ty, /*isSigned=*/true));
8413       } else {
8414         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8415         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8416                                    VDLVal.getPointer(CGF));
8417         CombinedInfo.Exprs.push_back(VD);
8418         CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8419         CombinedInfo.DevicePtrDecls.push_back(nullptr);
8420         CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8421         CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8422         CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8423       }
8424       CombinedInfo.Types.push_back(
8425           OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8426           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8427           OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8428           OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8429       CombinedInfo.Mappers.push_back(nullptr);
8430     }
8431   }
8432 
8433   /// Set correct indices for lambdas captures.
8434   void adjustMemberOfForLambdaCaptures(
8435       llvm::OpenMPIRBuilder &OMPBuilder,
8436       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8437       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8438       MapFlagsArrayTy &Types) const {
8439     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8440       // Set correct member_of idx for all implicit lambda captures.
8441       if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8442                        OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8443                        OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8444                        OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8445         continue;
8446       llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8447       assert(BasePtr && "Unable to find base lambda address.");
8448       int TgtIdx = -1;
8449       for (unsigned J = I; J > 0; --J) {
8450         unsigned Idx = J - 1;
8451         if (Pointers[Idx] != BasePtr)
8452           continue;
8453         TgtIdx = Idx;
8454         break;
8455       }
8456       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8457       // All other current entries will be MEMBER_OF the combined entry
8458       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8459       // 0xFFFF in the MEMBER_OF field).
8460       OpenMPOffloadMappingFlags MemberOfFlag =
8461           OMPBuilder.getMemberOfFlag(TgtIdx);
8462       OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8463     }
8464   }
8465 
8466   /// Generate the base pointers, section pointers, sizes, map types, and
8467   /// mappers associated to a given capture (all included in \a CombinedInfo).
8468   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8469                               llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8470                               StructRangeInfoTy &PartialStruct) const {
8471     assert(!Cap->capturesVariableArrayType() &&
8472            "Not expecting to generate map info for a variable array type!");
8473 
8474     // We need to know when we generating information for the first component
8475     const ValueDecl *VD = Cap->capturesThis()
8476                               ? nullptr
8477                               : Cap->getCapturedVar()->getCanonicalDecl();
8478 
8479     // for map(to: lambda): skip here, processing it in
8480     // generateDefaultMapInfo
8481     if (LambdasMap.count(VD))
8482       return;
8483 
8484     // If this declaration appears in a is_device_ptr clause we just have to
8485     // pass the pointer by value. If it is a reference to a declaration, we just
8486     // pass its value.
8487     if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8488       CombinedInfo.Exprs.push_back(VD);
8489       CombinedInfo.BasePointers.emplace_back(Arg);
8490       CombinedInfo.DevicePtrDecls.emplace_back(VD);
8491       CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8492       CombinedInfo.Pointers.push_back(Arg);
8493       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8494           CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8495           /*isSigned=*/true));
8496       CombinedInfo.Types.push_back(
8497           OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8498           OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8499       CombinedInfo.Mappers.push_back(nullptr);
8500       return;
8501     }
8502 
8503     using MapData =
8504         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8505                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8506                    const ValueDecl *, const Expr *>;
8507     SmallVector<MapData, 4> DeclComponentLists;
8508     // For member fields list in is_device_ptr, store it in
8509     // DeclComponentLists for generating components info.
8510     static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8511     auto It = DevPointersMap.find(VD);
8512     if (It != DevPointersMap.end())
8513       for (const auto &MCL : It->second)
8514         DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8515                                         /*IsImpicit = */ true, nullptr,
8516                                         nullptr);
8517     auto I = HasDevAddrsMap.find(VD);
8518     if (I != HasDevAddrsMap.end())
8519       for (const auto &MCL : I->second)
8520         DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8521                                         /*IsImpicit = */ true, nullptr,
8522                                         nullptr);
8523     assert(isa<const OMPExecutableDirective *>(CurDir) &&
8524            "Expect a executable directive");
8525     const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8526     bool HasMapBasePtr = false;
8527     bool HasMapArraySec = false;
8528     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8529       const auto *EI = C->getVarRefs().begin();
8530       for (const auto L : C->decl_component_lists(VD)) {
8531         const ValueDecl *VDecl, *Mapper;
8532         // The Expression is not correct if the mapping is implicit
8533         const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8534         OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8535         std::tie(VDecl, Components, Mapper) = L;
8536         assert(VDecl == VD && "We got information for the wrong declaration??");
8537         assert(!Components.empty() &&
8538                "Not expecting declaration with no component lists.");
8539         if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8540           HasMapBasePtr = true;
8541         if (VD && E && VD->getType()->isAnyPointerType() &&
8542             (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8543           HasMapArraySec = true;
8544         DeclComponentLists.emplace_back(Components, C->getMapType(),
8545                                         C->getMapTypeModifiers(),
8546                                         C->isImplicit(), Mapper, E);
8547         ++EI;
8548       }
8549     }
8550     llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8551                                              const MapData &RHS) {
8552       ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8553       OpenMPMapClauseKind MapType = std::get<1>(RHS);
8554       bool HasPresent =
8555           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8556       bool HasAllocs = MapType == OMPC_MAP_alloc;
8557       MapModifiers = std::get<2>(RHS);
8558       MapType = std::get<1>(LHS);
8559       bool HasPresentR =
8560           llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8561       bool HasAllocsR = MapType == OMPC_MAP_alloc;
8562       return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8563     });
8564 
8565     // Find overlapping elements (including the offset from the base element).
8566     llvm::SmallDenseMap<
8567         const MapData *,
8568         llvm::SmallVector<
8569             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8570         4>
8571         OverlappedData;
8572     size_t Count = 0;
8573     for (const MapData &L : DeclComponentLists) {
8574       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8575       OpenMPMapClauseKind MapType;
8576       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8577       bool IsImplicit;
8578       const ValueDecl *Mapper;
8579       const Expr *VarRef;
8580       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8581           L;
8582       ++Count;
8583       for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8584         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8585         std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8586                  VarRef) = L1;
8587         auto CI = Components.rbegin();
8588         auto CE = Components.rend();
8589         auto SI = Components1.rbegin();
8590         auto SE = Components1.rend();
8591         for (; CI != CE && SI != SE; ++CI, ++SI) {
8592           if (CI->getAssociatedExpression()->getStmtClass() !=
8593               SI->getAssociatedExpression()->getStmtClass())
8594             break;
8595           // Are we dealing with different variables/fields?
8596           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8597             break;
8598         }
8599         // Found overlapping if, at least for one component, reached the head
8600         // of the components list.
8601         if (CI == CE || SI == SE) {
8602           // Ignore it if it is the same component.
8603           if (CI == CE && SI == SE)
8604             continue;
8605           const auto It = (SI == SE) ? CI : SI;
8606           // If one component is a pointer and another one is a kind of
8607           // dereference of this pointer (array subscript, section, dereference,
8608           // etc.), it is not an overlapping.
8609           // Same, if one component is a base and another component is a
8610           // dereferenced pointer memberexpr with the same base.
8611           if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8612               (std::prev(It)->getAssociatedDeclaration() &&
8613                std::prev(It)
8614                    ->getAssociatedDeclaration()
8615                    ->getType()
8616                    ->isPointerType()) ||
8617               (It->getAssociatedDeclaration() &&
8618                It->getAssociatedDeclaration()->getType()->isPointerType() &&
8619                std::next(It) != CE && std::next(It) != SE))
8620             continue;
8621           const MapData &BaseData = CI == CE ? L : L1;
8622           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8623               SI == SE ? Components : Components1;
8624           OverlappedData[&BaseData].push_back(SubData);
8625         }
8626       }
8627     }
8628     // Sort the overlapped elements for each item.
8629     llvm::SmallVector<const FieldDecl *, 4> Layout;
8630     if (!OverlappedData.empty()) {
8631       const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8632       const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8633       while (BaseType != OrigType) {
8634         BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8635         OrigType = BaseType->getPointeeOrArrayElementType();
8636       }
8637 
8638       if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8639         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8640       else {
8641         const auto *RD = BaseType->getAsRecordDecl();
8642         Layout.append(RD->field_begin(), RD->field_end());
8643       }
8644     }
8645     for (auto &Pair : OverlappedData) {
8646       llvm::stable_sort(
8647           Pair.getSecond(),
8648           [&Layout](
8649               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8650               OMPClauseMappableExprCommon::MappableExprComponentListRef
8651                   Second) {
8652             auto CI = First.rbegin();
8653             auto CE = First.rend();
8654             auto SI = Second.rbegin();
8655             auto SE = Second.rend();
8656             for (; CI != CE && SI != SE; ++CI, ++SI) {
8657               if (CI->getAssociatedExpression()->getStmtClass() !=
8658                   SI->getAssociatedExpression()->getStmtClass())
8659                 break;
8660               // Are we dealing with different variables/fields?
8661               if (CI->getAssociatedDeclaration() !=
8662                   SI->getAssociatedDeclaration())
8663                 break;
8664             }
8665 
8666             // Lists contain the same elements.
8667             if (CI == CE && SI == SE)
8668               return false;
8669 
8670             // List with less elements is less than list with more elements.
8671             if (CI == CE || SI == SE)
8672               return CI == CE;
8673 
8674             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8675             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8676             if (FD1->getParent() == FD2->getParent())
8677               return FD1->getFieldIndex() < FD2->getFieldIndex();
8678             const auto *It =
8679                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8680                   return FD == FD1 || FD == FD2;
8681                 });
8682             return *It == FD1;
8683           });
8684     }
8685 
8686     // Associated with a capture, because the mapping flags depend on it.
8687     // Go through all of the elements with the overlapped elements.
8688     bool IsFirstComponentList = true;
8689     MapCombinedInfoTy StructBaseCombinedInfo;
8690     for (const auto &Pair : OverlappedData) {
8691       const MapData &L = *Pair.getFirst();
8692       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8693       OpenMPMapClauseKind MapType;
8694       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8695       bool IsImplicit;
8696       const ValueDecl *Mapper;
8697       const Expr *VarRef;
8698       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8699           L;
8700       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8701           OverlappedComponents = Pair.getSecond();
8702       generateInfoForComponentList(
8703           MapType, MapModifiers, {}, Components, CombinedInfo,
8704           StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8705           IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8706           /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8707       IsFirstComponentList = false;
8708     }
8709     // Go through other elements without overlapped elements.
8710     for (const MapData &L : DeclComponentLists) {
8711       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8712       OpenMPMapClauseKind MapType;
8713       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8714       bool IsImplicit;
8715       const ValueDecl *Mapper;
8716       const Expr *VarRef;
8717       std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8718           L;
8719       auto It = OverlappedData.find(&L);
8720       if (It == OverlappedData.end())
8721         generateInfoForComponentList(
8722             MapType, MapModifiers, {}, Components, CombinedInfo,
8723             StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8724             IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8725             /*ForDeviceAddr=*/false, VD, VarRef,
8726             /*OverlappedElements*/ {}, HasMapBasePtr && HasMapArraySec);
8727       IsFirstComponentList = false;
8728     }
8729   }
8730 
8731   /// Generate the default map information for a given capture \a CI,
8732   /// record field declaration \a RI and captured value \a CV.
8733   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8734                               const FieldDecl &RI, llvm::Value *CV,
8735                               MapCombinedInfoTy &CombinedInfo) const {
8736     bool IsImplicit = true;
8737     // Do the default mapping.
8738     if (CI.capturesThis()) {
8739       CombinedInfo.Exprs.push_back(nullptr);
8740       CombinedInfo.BasePointers.push_back(CV);
8741       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8742       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8743       CombinedInfo.Pointers.push_back(CV);
8744       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8745       CombinedInfo.Sizes.push_back(
8746           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8747                                     CGF.Int64Ty, /*isSigned=*/true));
8748       // Default map type.
8749       CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8750                                    OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8751     } else if (CI.capturesVariableByCopy()) {
8752       const VarDecl *VD = CI.getCapturedVar();
8753       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8754       CombinedInfo.BasePointers.push_back(CV);
8755       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8756       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8757       CombinedInfo.Pointers.push_back(CV);
8758       if (!RI.getType()->isAnyPointerType()) {
8759         // We have to signal to the runtime captures passed by value that are
8760         // not pointers.
8761         CombinedInfo.Types.push_back(
8762             OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8763         CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8764             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8765       } else {
8766         // Pointers are implicitly mapped with a zero size and no flags
8767         // (other than first map that is added for all implicit maps).
8768         CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8769         CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8770       }
8771       auto I = FirstPrivateDecls.find(VD);
8772       if (I != FirstPrivateDecls.end())
8773         IsImplicit = I->getSecond();
8774     } else {
8775       assert(CI.capturesVariable() && "Expected captured reference.");
8776       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8777       QualType ElementType = PtrTy->getPointeeType();
8778       CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8779           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8780       // The default map type for a scalar/complex type is 'to' because by
8781       // default the value doesn't have to be retrieved. For an aggregate
8782       // type, the default is 'tofrom'.
8783       CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8784       const VarDecl *VD = CI.getCapturedVar();
8785       auto I = FirstPrivateDecls.find(VD);
8786       CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8787       CombinedInfo.BasePointers.push_back(CV);
8788       CombinedInfo.DevicePtrDecls.push_back(nullptr);
8789       CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8790       if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8791         Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8792             CV, ElementType, CGF.getContext().getDeclAlign(VD),
8793             AlignmentSource::Decl));
8794         CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8795       } else {
8796         CombinedInfo.Pointers.push_back(CV);
8797       }
8798       if (I != FirstPrivateDecls.end())
8799         IsImplicit = I->getSecond();
8800     }
8801     // Every default map produces a single argument which is a target parameter.
8802     CombinedInfo.Types.back() |=
8803         OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8804 
8805     // Add flag stating this is an implicit map.
8806     if (IsImplicit)
8807       CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8808 
8809     // No user-defined mapper for default mapping.
8810     CombinedInfo.Mappers.push_back(nullptr);
8811   }
8812 };
8813 } // anonymous namespace
8814 
8815 // Try to extract the base declaration from a `this->x` expression if possible.
8816 static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8817   if (!E)
8818     return nullptr;
8819 
8820   if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8821     if (const MemberExpr *ME =
8822             dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8823       return ME->getMemberDecl();
8824   return nullptr;
8825 }
8826 
8827 /// Emit a string constant containing the names of the values mapped to the
8828 /// offloading runtime library.
8829 static llvm::Constant *
8830 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8831                        MappableExprsHandler::MappingExprInfo &MapExprs) {
8832 
8833   uint32_t SrcLocStrSize;
8834   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8835     return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8836 
8837   SourceLocation Loc;
8838   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8839     if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8840       Loc = VD->getLocation();
8841     else
8842       Loc = MapExprs.getMapExpr()->getExprLoc();
8843   } else {
8844     Loc = MapExprs.getMapDecl()->getLocation();
8845   }
8846 
8847   std::string ExprName;
8848   if (MapExprs.getMapExpr()) {
8849     PrintingPolicy P(CGF.getContext().getLangOpts());
8850     llvm::raw_string_ostream OS(ExprName);
8851     MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8852   } else {
8853     ExprName = MapExprs.getMapDecl()->getNameAsString();
8854   }
8855 
8856   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8857   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8858                                          PLoc.getLine(), PLoc.getColumn(),
8859                                          SrcLocStrSize);
8860 }
8861 /// Emit the arrays used to pass the captures and map information to the
8862 /// offloading runtime library. If there is no map or capture information,
8863 /// return nullptr by reference.
8864 static void emitOffloadingArraysAndArgs(
8865     CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8866     CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8867     bool IsNonContiguous = false, bool ForEndCall = false) {
8868   CodeGenModule &CGM = CGF.CGM;
8869 
8870   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8871   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8872                          CGF.AllocaInsertPt->getIterator());
8873   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8874                           CGF.Builder.GetInsertPoint());
8875 
8876   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8877     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8878       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8879     }
8880   };
8881 
8882   auto CustomMapperCB = [&](unsigned int I) {
8883     llvm::Value *MFunc = nullptr;
8884     if (CombinedInfo.Mappers[I]) {
8885       Info.HasMapper = true;
8886       MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8887           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8888     }
8889     return MFunc;
8890   };
8891   OMPBuilder.emitOffloadingArraysAndArgs(
8892       AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
8893       ForEndCall, DeviceAddrCB, CustomMapperCB);
8894 }
8895 
8896 /// Check for inner distribute directive.
8897 static const OMPExecutableDirective *
8898 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8899   const auto *CS = D.getInnermostCapturedStmt();
8900   const auto *Body =
8901       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8902   const Stmt *ChildStmt =
8903       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8904 
8905   if (const auto *NestedDir =
8906           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8907     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8908     switch (D.getDirectiveKind()) {
8909     case OMPD_target:
8910       // For now, treat 'target' with nested 'teams loop' as if it's
8911       // distributed (target teams distribute).
8912       if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8913         return NestedDir;
8914       if (DKind == OMPD_teams) {
8915         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8916             /*IgnoreCaptured=*/true);
8917         if (!Body)
8918           return nullptr;
8919         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8920         if (const auto *NND =
8921                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8922           DKind = NND->getDirectiveKind();
8923           if (isOpenMPDistributeDirective(DKind))
8924             return NND;
8925         }
8926       }
8927       return nullptr;
8928     case OMPD_target_teams:
8929       if (isOpenMPDistributeDirective(DKind))
8930         return NestedDir;
8931       return nullptr;
8932     case OMPD_target_parallel:
8933     case OMPD_target_simd:
8934     case OMPD_target_parallel_for:
8935     case OMPD_target_parallel_for_simd:
8936       return nullptr;
8937     case OMPD_target_teams_distribute:
8938     case OMPD_target_teams_distribute_simd:
8939     case OMPD_target_teams_distribute_parallel_for:
8940     case OMPD_target_teams_distribute_parallel_for_simd:
8941     case OMPD_parallel:
8942     case OMPD_for:
8943     case OMPD_parallel_for:
8944     case OMPD_parallel_master:
8945     case OMPD_parallel_sections:
8946     case OMPD_for_simd:
8947     case OMPD_parallel_for_simd:
8948     case OMPD_cancel:
8949     case OMPD_cancellation_point:
8950     case OMPD_ordered:
8951     case OMPD_threadprivate:
8952     case OMPD_allocate:
8953     case OMPD_task:
8954     case OMPD_simd:
8955     case OMPD_tile:
8956     case OMPD_unroll:
8957     case OMPD_sections:
8958     case OMPD_section:
8959     case OMPD_single:
8960     case OMPD_master:
8961     case OMPD_critical:
8962     case OMPD_taskyield:
8963     case OMPD_barrier:
8964     case OMPD_taskwait:
8965     case OMPD_taskgroup:
8966     case OMPD_atomic:
8967     case OMPD_flush:
8968     case OMPD_depobj:
8969     case OMPD_scan:
8970     case OMPD_teams:
8971     case OMPD_target_data:
8972     case OMPD_target_exit_data:
8973     case OMPD_target_enter_data:
8974     case OMPD_distribute:
8975     case OMPD_distribute_simd:
8976     case OMPD_distribute_parallel_for:
8977     case OMPD_distribute_parallel_for_simd:
8978     case OMPD_teams_distribute:
8979     case OMPD_teams_distribute_simd:
8980     case OMPD_teams_distribute_parallel_for:
8981     case OMPD_teams_distribute_parallel_for_simd:
8982     case OMPD_target_update:
8983     case OMPD_declare_simd:
8984     case OMPD_declare_variant:
8985     case OMPD_begin_declare_variant:
8986     case OMPD_end_declare_variant:
8987     case OMPD_declare_target:
8988     case OMPD_end_declare_target:
8989     case OMPD_declare_reduction:
8990     case OMPD_declare_mapper:
8991     case OMPD_taskloop:
8992     case OMPD_taskloop_simd:
8993     case OMPD_master_taskloop:
8994     case OMPD_master_taskloop_simd:
8995     case OMPD_parallel_master_taskloop:
8996     case OMPD_parallel_master_taskloop_simd:
8997     case OMPD_requires:
8998     case OMPD_metadirective:
8999     case OMPD_unknown:
9000     default:
9001       llvm_unreachable("Unexpected directive.");
9002     }
9003   }
9004 
9005   return nullptr;
9006 }
9007 
9008 /// Emit the user-defined mapper function. The code generation follows the
9009 /// pattern in the example below.
9010 /// \code
9011 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9012 ///                                           void *base, void *begin,
9013 ///                                           int64_t size, int64_t type,
9014 ///                                           void *name = nullptr) {
9015 ///   // Allocate space for an array section first or add a base/begin for
9016 ///   // pointer dereference.
9017 ///   if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9018 ///       !maptype.IsDelete)
9019 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9020 ///                                 size*sizeof(Ty), clearToFromMember(type));
9021 ///   // Map members.
9022 ///   for (unsigned i = 0; i < size; i++) {
9023 ///     // For each component specified by this mapper:
9024 ///     for (auto c : begin[i]->all_components) {
9025 ///       if (c.hasMapper())
9026 ///         (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9027 ///                       c.arg_type, c.arg_name);
9028 ///       else
9029 ///         __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9030 ///                                     c.arg_begin, c.arg_size, c.arg_type,
9031 ///                                     c.arg_name);
9032 ///     }
9033 ///   }
9034 ///   // Delete the array section.
9035 ///   if (size > 1 && maptype.IsDelete)
9036 ///     __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9037 ///                                 size*sizeof(Ty), clearToFromMember(type));
9038 /// }
9039 /// \endcode
9040 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9041                                             CodeGenFunction *CGF) {
9042   if (UDMMap.count(D) > 0)
9043     return;
9044   ASTContext &C = CGM.getContext();
9045   QualType Ty = D->getType();
9046   auto *MapperVarDecl =
9047       cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9048   CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9049   llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9050 
9051   CodeGenFunction MapperCGF(CGM);
9052   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9053   auto PrivatizeAndGenMapInfoCB =
9054       [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9055           llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9056     MapperCGF.Builder.restoreIP(CodeGenIP);
9057 
9058     // Privatize the declared variable of mapper to be the current array
9059     // element.
9060     Address PtrCurrent(
9061         PtrPHI, ElemTy,
9062         Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9063             .getAlignment()
9064             .alignmentOfArrayElement(ElementSize));
9065     CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9066     Scope.addPrivate(MapperVarDecl, PtrCurrent);
9067     (void)Scope.Privatize();
9068 
9069     // Get map clause information.
9070     MappableExprsHandler MEHandler(*D, MapperCGF);
9071     MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9072 
9073     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9074       return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9075     };
9076     if (CGM.getCodeGenOpts().getDebugInfo() !=
9077         llvm::codegenoptions::NoDebugInfo) {
9078       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9079       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9080                       FillInfoMap);
9081     }
9082 
9083     return CombinedInfo;
9084   };
9085 
9086   auto CustomMapperCB = [&](unsigned I, llvm::Function **MapperFunc) {
9087     if (CombinedInfo.Mappers[I]) {
9088       // Call the corresponding mapper function.
9089       *MapperFunc = getOrCreateUserDefinedMapperFunc(
9090           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9091       assert(*MapperFunc && "Expect a valid mapper function is available.");
9092       return true;
9093     }
9094     return false;
9095   };
9096 
9097   SmallString<64> TyStr;
9098   llvm::raw_svector_ostream Out(TyStr);
9099   CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9100   std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9101 
9102   auto *NewFn = OMPBuilder.emitUserDefinedMapper(PrivatizeAndGenMapInfoCB,
9103                                                  ElemTy, Name, CustomMapperCB);
9104   UDMMap.try_emplace(D, NewFn);
9105   if (CGF)
9106     FunctionUDMMap[CGF->CurFn].push_back(D);
9107 }
9108 
9109 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9110     const OMPDeclareMapperDecl *D) {
9111   auto I = UDMMap.find(D);
9112   if (I != UDMMap.end())
9113     return I->second;
9114   emitUserDefinedMapper(D);
9115   return UDMMap.lookup(D);
9116 }
9117 
9118 llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9119     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9120     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9121                                      const OMPLoopDirective &D)>
9122         SizeEmitter) {
9123   OpenMPDirectiveKind Kind = D.getDirectiveKind();
9124   const OMPExecutableDirective *TD = &D;
9125   // Get nested teams distribute kind directive, if any. For now, treat
9126   // 'target_teams_loop' as if it's really a target_teams_distribute.
9127   if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9128       Kind != OMPD_target_teams_loop)
9129     TD = getNestedDistributeDirective(CGM.getContext(), D);
9130   if (!TD)
9131     return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9132 
9133   const auto *LD = cast<OMPLoopDirective>(TD);
9134   if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9135     return NumIterations;
9136   return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9137 }
9138 
9139 static void
9140 emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9141                        const OMPExecutableDirective &D,
9142                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9143                        bool RequiresOuterTask, const CapturedStmt &CS,
9144                        bool OffloadingMandatory, CodeGenFunction &CGF) {
9145   if (OffloadingMandatory) {
9146     CGF.Builder.CreateUnreachable();
9147   } else {
9148     if (RequiresOuterTask) {
9149       CapturedVars.clear();
9150       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9151     }
9152     OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9153                                          CapturedVars);
9154   }
9155 }
9156 
9157 static llvm::Value *emitDeviceID(
9158     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9159     CodeGenFunction &CGF) {
9160   // Emit device ID if any.
9161   llvm::Value *DeviceID;
9162   if (Device.getPointer()) {
9163     assert((Device.getInt() == OMPC_DEVICE_unknown ||
9164             Device.getInt() == OMPC_DEVICE_device_num) &&
9165            "Expected device_num modifier.");
9166     llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9167     DeviceID =
9168         CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9169   } else {
9170     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9171   }
9172   return DeviceID;
9173 }
9174 
9175 static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9176                                       CodeGenFunction &CGF) {
9177   llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9178 
9179   if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9180     CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9181     llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9182         DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9183     DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9184                                              /*isSigned=*/false);
9185   }
9186   return DynCGroupMem;
9187 }
9188 static void genMapInfoForCaptures(
9189     MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9190     const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9191     llvm::OpenMPIRBuilder &OMPBuilder,
9192     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9193     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9194 
9195   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9196   auto RI = CS.getCapturedRecordDecl()->field_begin();
9197   auto *CV = CapturedVars.begin();
9198   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9199                                             CE = CS.capture_end();
9200        CI != CE; ++CI, ++RI, ++CV) {
9201     MappableExprsHandler::MapCombinedInfoTy CurInfo;
9202     MappableExprsHandler::StructRangeInfoTy PartialStruct;
9203 
9204     // VLA sizes are passed to the outlined region by copy and do not have map
9205     // information associated.
9206     if (CI->capturesVariableArrayType()) {
9207       CurInfo.Exprs.push_back(nullptr);
9208       CurInfo.BasePointers.push_back(*CV);
9209       CurInfo.DevicePtrDecls.push_back(nullptr);
9210       CurInfo.DevicePointers.push_back(
9211           MappableExprsHandler::DeviceInfoTy::None);
9212       CurInfo.Pointers.push_back(*CV);
9213       CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9214           CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9215       // Copy to the device as an argument. No need to retrieve it.
9216       CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9217                               OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9218                               OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9219       CurInfo.Mappers.push_back(nullptr);
9220     } else {
9221       // If we have any information in the map clause, we use it, otherwise we
9222       // just do a default mapping.
9223       MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9224       if (!CI->capturesThis())
9225         MappedVarSet.insert(CI->getCapturedVar());
9226       else
9227         MappedVarSet.insert(nullptr);
9228       if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9229         MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9230       // Generate correct mapping for variables captured by reference in
9231       // lambdas.
9232       if (CI->capturesVariable())
9233         MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9234                                                 CurInfo, LambdaPointers);
9235     }
9236     // We expect to have at least an element of information for this capture.
9237     assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9238            "Non-existing map pointer for capture!");
9239     assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9240            CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9241            CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9242            CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9243            "Inconsistent map information sizes!");
9244 
9245     // If there is an entry in PartialStruct it means we have a struct with
9246     // individual members mapped. Emit an extra combined entry.
9247     if (PartialStruct.Base.isValid()) {
9248       CombinedInfo.append(PartialStruct.PreliminaryMapData);
9249       MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
9250                                   CI->capturesThis(), OMPBuilder, nullptr,
9251                                   /*NotTargetParams*/ false);
9252     }
9253 
9254     // We need to append the results of this capture to what we already have.
9255     CombinedInfo.append(CurInfo);
9256   }
9257   // Adjust MEMBER_OF flags for the lambdas captures.
9258   MEHandler.adjustMemberOfForLambdaCaptures(
9259       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9260       CombinedInfo.Pointers, CombinedInfo.Types);
9261 }
9262 static void
9263 genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9264            MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9265            llvm::OpenMPIRBuilder &OMPBuilder,
9266            const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9267                llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9268 
9269   CodeGenModule &CGM = CGF.CGM;
9270   // Map any list items in a map clause that were not captures because they
9271   // weren't referenced within the construct.
9272   MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9273 
9274   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9275     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9276   };
9277   if (CGM.getCodeGenOpts().getDebugInfo() !=
9278       llvm::codegenoptions::NoDebugInfo) {
9279     CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9280     llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9281                     FillInfoMap);
9282   }
9283 }
9284 
9285 static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9286                        const CapturedStmt &CS,
9287                        llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9288                        llvm::OpenMPIRBuilder &OMPBuilder,
9289                        MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9290   // Get mappable expression information.
9291   MappableExprsHandler MEHandler(D, CGF);
9292   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9293 
9294   genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9295                         MappedVarSet, CombinedInfo);
9296   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9297 }
9298 
9299 template <typename ClauseTy>
9300 static void
9301 emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9302                                  const OMPExecutableDirective &D,
9303                                  llvm::SmallVectorImpl<llvm::Value *> &Values) {
9304   const auto *C = D.getSingleClause<ClauseTy>();
9305   assert(!C->varlist_empty() &&
9306          "ompx_bare requires explicit num_teams and thread_limit");
9307   CodeGenFunction::RunCleanupsScope Scope(CGF);
9308   for (auto *E : C->varlist()) {
9309     llvm::Value *V = CGF.EmitScalarExpr(E);
9310     Values.push_back(
9311         CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9312   }
9313 }
9314 
9315 static void emitTargetCallKernelLaunch(
9316     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9317     const OMPExecutableDirective &D,
9318     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9319     const CapturedStmt &CS, bool OffloadingMandatory,
9320     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9321     llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9322     llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9323     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9324                                      const OMPLoopDirective &D)>
9325         SizeEmitter,
9326     CodeGenFunction &CGF, CodeGenModule &CGM) {
9327   llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9328 
9329   // Fill up the arrays with all the captured variables.
9330   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9331   CGOpenMPRuntime::TargetDataInfo Info;
9332   genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9333 
9334   emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9335                               /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9336 
9337   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9338   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9339                                         CGF.VoidPtrTy, CGM.getPointerAlign());
9340   InputInfo.PointersArray =
9341       Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9342   InputInfo.SizesArray =
9343       Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9344   InputInfo.MappersArray =
9345       Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9346   MapTypesArray = Info.RTArgs.MapTypesArray;
9347   MapNamesArray = Info.RTArgs.MapNamesArray;
9348 
9349   auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9350                     RequiresOuterTask, &CS, OffloadingMandatory, Device,
9351                     OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9352                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9353     bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9354 
9355     if (IsReverseOffloading) {
9356       // Reverse offloading is not supported, so just execute on the host.
9357       // FIXME: This fallback solution is incorrect since it ignores the
9358       // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9359       // assert here and ensure SEMA emits an error.
9360       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9361                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9362       return;
9363     }
9364 
9365     bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9366     unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9367 
9368     llvm::Value *BasePointersArray =
9369         InputInfo.BasePointersArray.emitRawPointer(CGF);
9370     llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9371     llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9372     llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9373 
9374     auto &&EmitTargetCallFallbackCB =
9375         [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9376          OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9377         -> llvm::OpenMPIRBuilder::InsertPointTy {
9378       CGF.Builder.restoreIP(IP);
9379       emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9380                              RequiresOuterTask, CS, OffloadingMandatory, CGF);
9381       return CGF.Builder.saveIP();
9382     };
9383 
9384     bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9385     SmallVector<llvm::Value *, 3> NumTeams;
9386     SmallVector<llvm::Value *, 3> NumThreads;
9387     if (IsBare) {
9388       emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9389       emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9390                                                              NumThreads);
9391     } else {
9392       NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9393       NumThreads.push_back(
9394           OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9395     }
9396 
9397     llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9398     llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9399     llvm::Value *NumIterations =
9400         OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9401     llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9402     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9403         CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9404 
9405     llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9406         BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9407         nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9408 
9409     llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9410         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9411         DynCGGroupMem, HasNoWait);
9412 
9413     llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9414         cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9415             CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9416             RTLoc, AllocaIP));
9417     CGF.Builder.restoreIP(AfterIP);
9418   };
9419 
9420   if (RequiresOuterTask)
9421     CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9422   else
9423     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9424 }
9425 
9426 static void
9427 emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9428                    const OMPExecutableDirective &D,
9429                    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9430                    bool RequiresOuterTask, const CapturedStmt &CS,
9431                    bool OffloadingMandatory, CodeGenFunction &CGF) {
9432 
9433   // Notify that the host version must be executed.
9434   auto &&ElseGen =
9435       [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9436        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9437         emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9438                                RequiresOuterTask, CS, OffloadingMandatory, CGF);
9439       };
9440 
9441   if (RequiresOuterTask) {
9442     CodeGenFunction::OMPTargetDataInfo InputInfo;
9443     CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9444   } else {
9445     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9446   }
9447 }
9448 
9449 void CGOpenMPRuntime::emitTargetCall(
9450     CodeGenFunction &CGF, const OMPExecutableDirective &D,
9451     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9452     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9453     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9454                                      const OMPLoopDirective &D)>
9455         SizeEmitter) {
9456   if (!CGF.HaveInsertPoint())
9457     return;
9458 
9459   const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9460                                    CGM.getLangOpts().OpenMPOffloadMandatory;
9461 
9462   assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9463 
9464   const bool RequiresOuterTask =
9465       D.hasClausesOfKind<OMPDependClause>() ||
9466       D.hasClausesOfKind<OMPNowaitClause>() ||
9467       D.hasClausesOfKind<OMPInReductionClause>() ||
9468       (CGM.getLangOpts().OpenMP >= 51 &&
9469        needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9470        D.hasClausesOfKind<OMPThreadLimitClause>());
9471   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9472   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9473   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9474                                             PrePostActionTy &) {
9475     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9476   };
9477   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9478 
9479   CodeGenFunction::OMPTargetDataInfo InputInfo;
9480   llvm::Value *MapTypesArray = nullptr;
9481   llvm::Value *MapNamesArray = nullptr;
9482 
9483   auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9484                           RequiresOuterTask, &CS, OffloadingMandatory, Device,
9485                           OutlinedFnID, &InputInfo, &MapTypesArray,
9486                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9487                                                        PrePostActionTy &) {
9488     emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9489                                RequiresOuterTask, CS, OffloadingMandatory,
9490                                Device, OutlinedFnID, InputInfo, MapTypesArray,
9491                                MapNamesArray, SizeEmitter, CGF, CGM);
9492   };
9493 
9494   auto &&TargetElseGen =
9495       [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9496        OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9497         emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9498                            CS, OffloadingMandatory, CGF);
9499       };
9500 
9501   // If we have a target function ID it means that we need to support
9502   // offloading, otherwise, just execute on the host. We need to execute on host
9503   // regardless of the conditional in the if clause if, e.g., the user do not
9504   // specify target triples.
9505   if (OutlinedFnID) {
9506     if (IfCond) {
9507       emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9508     } else {
9509       RegionCodeGenTy ThenRCG(TargetThenGen);
9510       ThenRCG(CGF);
9511     }
9512   } else {
9513     RegionCodeGenTy ElseRCG(TargetElseGen);
9514     ElseRCG(CGF);
9515   }
9516 }
9517 
9518 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9519                                                     StringRef ParentName) {
9520   if (!S)
9521     return;
9522 
9523   // Codegen OMP target directives that offload compute to the device.
9524   bool RequiresDeviceCodegen =
9525       isa<OMPExecutableDirective>(S) &&
9526       isOpenMPTargetExecutionDirective(
9527           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9528 
9529   if (RequiresDeviceCodegen) {
9530     const auto &E = *cast<OMPExecutableDirective>(S);
9531 
9532     llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9533         CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9534 
9535     // Is this a target region that should not be emitted as an entry point? If
9536     // so just signal we are done with this target region.
9537     if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9538       return;
9539 
9540     switch (E.getDirectiveKind()) {
9541     case OMPD_target:
9542       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9543                                                    cast<OMPTargetDirective>(E));
9544       break;
9545     case OMPD_target_parallel:
9546       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9547           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9548       break;
9549     case OMPD_target_teams:
9550       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9551           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9552       break;
9553     case OMPD_target_teams_distribute:
9554       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9555           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9556       break;
9557     case OMPD_target_teams_distribute_simd:
9558       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9559           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9560       break;
9561     case OMPD_target_parallel_for:
9562       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9563           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9564       break;
9565     case OMPD_target_parallel_for_simd:
9566       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9567           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9568       break;
9569     case OMPD_target_simd:
9570       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9571           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9572       break;
9573     case OMPD_target_teams_distribute_parallel_for:
9574       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9575           CGM, ParentName,
9576           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9577       break;
9578     case OMPD_target_teams_distribute_parallel_for_simd:
9579       CodeGenFunction::
9580           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9581               CGM, ParentName,
9582               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9583       break;
9584     case OMPD_target_teams_loop:
9585       CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9586           CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9587       break;
9588     case OMPD_target_parallel_loop:
9589       CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9590           CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9591       break;
9592     case OMPD_parallel:
9593     case OMPD_for:
9594     case OMPD_parallel_for:
9595     case OMPD_parallel_master:
9596     case OMPD_parallel_sections:
9597     case OMPD_for_simd:
9598     case OMPD_parallel_for_simd:
9599     case OMPD_cancel:
9600     case OMPD_cancellation_point:
9601     case OMPD_ordered:
9602     case OMPD_threadprivate:
9603     case OMPD_allocate:
9604     case OMPD_task:
9605     case OMPD_simd:
9606     case OMPD_tile:
9607     case OMPD_unroll:
9608     case OMPD_sections:
9609     case OMPD_section:
9610     case OMPD_single:
9611     case OMPD_master:
9612     case OMPD_critical:
9613     case OMPD_taskyield:
9614     case OMPD_barrier:
9615     case OMPD_taskwait:
9616     case OMPD_taskgroup:
9617     case OMPD_atomic:
9618     case OMPD_flush:
9619     case OMPD_depobj:
9620     case OMPD_scan:
9621     case OMPD_teams:
9622     case OMPD_target_data:
9623     case OMPD_target_exit_data:
9624     case OMPD_target_enter_data:
9625     case OMPD_distribute:
9626     case OMPD_distribute_simd:
9627     case OMPD_distribute_parallel_for:
9628     case OMPD_distribute_parallel_for_simd:
9629     case OMPD_teams_distribute:
9630     case OMPD_teams_distribute_simd:
9631     case OMPD_teams_distribute_parallel_for:
9632     case OMPD_teams_distribute_parallel_for_simd:
9633     case OMPD_target_update:
9634     case OMPD_declare_simd:
9635     case OMPD_declare_variant:
9636     case OMPD_begin_declare_variant:
9637     case OMPD_end_declare_variant:
9638     case OMPD_declare_target:
9639     case OMPD_end_declare_target:
9640     case OMPD_declare_reduction:
9641     case OMPD_declare_mapper:
9642     case OMPD_taskloop:
9643     case OMPD_taskloop_simd:
9644     case OMPD_master_taskloop:
9645     case OMPD_master_taskloop_simd:
9646     case OMPD_parallel_master_taskloop:
9647     case OMPD_parallel_master_taskloop_simd:
9648     case OMPD_requires:
9649     case OMPD_metadirective:
9650     case OMPD_unknown:
9651     default:
9652       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9653     }
9654     return;
9655   }
9656 
9657   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9658     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9659       return;
9660 
9661     scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9662     return;
9663   }
9664 
9665   // If this is a lambda function, look into its body.
9666   if (const auto *L = dyn_cast<LambdaExpr>(S))
9667     S = L->getBody();
9668 
9669   // Keep looking for target regions recursively.
9670   for (const Stmt *II : S->children())
9671     scanForTargetRegionsFunctions(II, ParentName);
9672 }
9673 
9674 static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9675   std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9676       OMPDeclareTargetDeclAttr::getDeviceType(VD);
9677   if (!DevTy)
9678     return false;
9679   // Do not emit device_type(nohost) functions for the host.
9680   if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9681     return true;
9682   // Do not emit device_type(host) functions for the device.
9683   if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9684     return true;
9685   return false;
9686 }
9687 
9688 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9689   // If emitting code for the host, we do not process FD here. Instead we do
9690   // the normal code generation.
9691   if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9692     if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9693       if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9694                                   CGM.getLangOpts().OpenMPIsTargetDevice))
9695         return true;
9696     return false;
9697   }
9698 
9699   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9700   // Try to detect target regions in the function.
9701   if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9702     StringRef Name = CGM.getMangledName(GD);
9703     scanForTargetRegionsFunctions(FD->getBody(), Name);
9704     if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9705                                 CGM.getLangOpts().OpenMPIsTargetDevice))
9706       return true;
9707   }
9708 
9709   // Do not to emit function if it is not marked as declare target.
9710   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9711          AlreadyEmittedTargetDecls.count(VD) == 0;
9712 }
9713 
9714 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9715   if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9716                               CGM.getLangOpts().OpenMPIsTargetDevice))
9717     return true;
9718 
9719   if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9720     return false;
9721 
9722   // Check if there are Ctors/Dtors in this declaration and look for target
9723   // regions in it. We use the complete variant to produce the kernel name
9724   // mangling.
9725   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9726   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9727     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9728       StringRef ParentName =
9729           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9730       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9731     }
9732     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9733       StringRef ParentName =
9734           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9735       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9736     }
9737   }
9738 
9739   // Do not to emit variable if it is not marked as declare target.
9740   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9741       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9742           cast<VarDecl>(GD.getDecl()));
9743   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9744       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9745         *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9746        HasRequiresUnifiedSharedMemory)) {
9747     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9748     return true;
9749   }
9750   return false;
9751 }
9752 
9753 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9754                                                    llvm::Constant *Addr) {
9755   if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9756       !CGM.getLangOpts().OpenMPIsTargetDevice)
9757     return;
9758 
9759   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9760       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9761 
9762   // If this is an 'extern' declaration we defer to the canonical definition and
9763   // do not emit an offloading entry.
9764   if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9765       VD->hasExternalStorage())
9766     return;
9767 
9768   if (!Res) {
9769     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9770       // Register non-target variables being emitted in device code (debug info
9771       // may cause this).
9772       StringRef VarName = CGM.getMangledName(VD);
9773       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9774     }
9775     return;
9776   }
9777 
9778   auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
9779   auto LinkageForVariable = [&VD, this]() {
9780     return CGM.getLLVMLinkageVarDefinition(VD);
9781   };
9782 
9783   std::vector<llvm::GlobalVariable *> GeneratedRefs;
9784   OMPBuilder.registerTargetGlobalVariable(
9785       convertCaptureClause(VD), convertDeviceClause(VD),
9786       VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9787       VD->isExternallyVisible(),
9788       getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9789                                   VD->getCanonicalDecl()->getBeginLoc()),
9790       CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
9791       CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
9792       CGM.getTypes().ConvertTypeForMem(
9793           CGM.getContext().getPointerType(VD->getType())),
9794       Addr);
9795 
9796   for (auto *ref : GeneratedRefs)
9797     CGM.addCompilerUsedGlobal(ref);
9798 }
9799 
9800 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9801   if (isa<FunctionDecl>(GD.getDecl()) ||
9802       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9803     return emitTargetFunctions(GD);
9804 
9805   return emitTargetGlobalVariable(GD);
9806 }
9807 
9808 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9809   for (const VarDecl *VD : DeferredGlobalVariables) {
9810     std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9811         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9812     if (!Res)
9813       continue;
9814     if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9815          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9816         !HasRequiresUnifiedSharedMemory) {
9817       CGM.EmitGlobal(VD);
9818     } else {
9819       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9820               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9821                 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9822                HasRequiresUnifiedSharedMemory)) &&
9823              "Expected link clause or to clause with unified memory.");
9824       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9825     }
9826   }
9827 }
9828 
9829 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9830     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9831   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9832          " Expected target-based directive.");
9833 }
9834 
9835 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
9836   for (const OMPClause *Clause : D->clauselists()) {
9837     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9838       HasRequiresUnifiedSharedMemory = true;
9839       OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
9840     } else if (const auto *AC =
9841                    dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
9842       switch (AC->getAtomicDefaultMemOrderKind()) {
9843       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
9844         RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
9845         break;
9846       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
9847         RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
9848         break;
9849       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
9850         RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
9851         break;
9852       case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
9853         break;
9854       }
9855     }
9856   }
9857 }
9858 
9859 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
9860   return RequiresAtomicOrdering;
9861 }
9862 
9863 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9864                                                        LangAS &AS) {
9865   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9866     return false;
9867   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9868   switch(A->getAllocatorType()) {
9869   case OMPAllocateDeclAttr::OMPNullMemAlloc:
9870   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9871   // Not supported, fallback to the default mem space.
9872   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9873   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9874   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9875   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9876   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9877   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9878   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9879     AS = LangAS::Default;
9880     return true;
9881   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9882     llvm_unreachable("Expected predefined allocator for the variables with the "
9883                      "static storage.");
9884   }
9885   return false;
9886 }
9887 
9888 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9889   return HasRequiresUnifiedSharedMemory;
9890 }
9891 
9892 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9893     CodeGenModule &CGM)
9894     : CGM(CGM) {
9895   if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9896     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9897     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9898   }
9899 }
9900 
9901 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9902   if (CGM.getLangOpts().OpenMPIsTargetDevice)
9903     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9904 }
9905 
9906 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9907   if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
9908     return true;
9909 
9910   const auto *D = cast<FunctionDecl>(GD.getDecl());
9911   // Do not to emit function if it is marked as declare target as it was already
9912   // emitted.
9913   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9914     if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9915       if (auto *F = dyn_cast_or_null<llvm::Function>(
9916               CGM.GetGlobalValue(CGM.getMangledName(GD))))
9917         return !F->isDeclaration();
9918       return false;
9919     }
9920     return true;
9921   }
9922 
9923   return !AlreadyEmittedTargetDecls.insert(D).second;
9924 }
9925 
9926 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9927                                     const OMPExecutableDirective &D,
9928                                     SourceLocation Loc,
9929                                     llvm::Function *OutlinedFn,
9930                                     ArrayRef<llvm::Value *> CapturedVars) {
9931   if (!CGF.HaveInsertPoint())
9932     return;
9933 
9934   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9935   CodeGenFunction::RunCleanupsScope Scope(CGF);
9936 
9937   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9938   llvm::Value *Args[] = {
9939       RTLoc,
9940       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9941       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9942   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9943   RealArgs.append(std::begin(Args), std::end(Args));
9944   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9945 
9946   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
9947       CGM.getModule(), OMPRTL___kmpc_fork_teams);
9948   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9949 }
9950 
9951 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9952                                          const Expr *NumTeams,
9953                                          const Expr *ThreadLimit,
9954                                          SourceLocation Loc) {
9955   if (!CGF.HaveInsertPoint())
9956     return;
9957 
9958   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9959 
9960   llvm::Value *NumTeamsVal =
9961       NumTeams
9962           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9963                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9964           : CGF.Builder.getInt32(0);
9965 
9966   llvm::Value *ThreadLimitVal =
9967       ThreadLimit
9968           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9969                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9970           : CGF.Builder.getInt32(0);
9971 
9972   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9973   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9974                                      ThreadLimitVal};
9975   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
9976                           CGM.getModule(), OMPRTL___kmpc_push_num_teams),
9977                       PushNumTeamsArgs);
9978 }
9979 
9980 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
9981                                             const Expr *ThreadLimit,
9982                                             SourceLocation Loc) {
9983   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9984   llvm::Value *ThreadLimitVal =
9985       ThreadLimit
9986           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9987                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9988           : CGF.Builder.getInt32(0);
9989 
9990   // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
9991   llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
9992                                     ThreadLimitVal};
9993   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
9994                           CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
9995                       ThreadLimitArgs);
9996 }
9997 
9998 void CGOpenMPRuntime::emitTargetDataCalls(
9999     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10000     const Expr *Device, const RegionCodeGenTy &CodeGen,
10001     CGOpenMPRuntime::TargetDataInfo &Info) {
10002   if (!CGF.HaveInsertPoint())
10003     return;
10004 
10005   // Action used to replace the default codegen action and turn privatization
10006   // off.
10007   PrePostActionTy NoPrivAction;
10008 
10009   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10010 
10011   llvm::Value *IfCondVal = nullptr;
10012   if (IfCond)
10013     IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10014 
10015   // Emit device ID if any.
10016   llvm::Value *DeviceID = nullptr;
10017   if (Device) {
10018     DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10019                                          CGF.Int64Ty, /*isSigned=*/true);
10020   } else {
10021     DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10022   }
10023 
10024   // Fill up the arrays with all the mapped variables.
10025   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10026   auto GenMapInfoCB =
10027       [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10028     CGF.Builder.restoreIP(CodeGenIP);
10029     // Get map clause information.
10030     MappableExprsHandler MEHandler(D, CGF);
10031     MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10032 
10033     auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10034       return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10035     };
10036     if (CGM.getCodeGenOpts().getDebugInfo() !=
10037         llvm::codegenoptions::NoDebugInfo) {
10038       CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10039       llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10040                       FillInfoMap);
10041     }
10042 
10043     return CombinedInfo;
10044   };
10045   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10046   auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10047     CGF.Builder.restoreIP(CodeGenIP);
10048     switch (BodyGenType) {
10049     case BodyGenTy::Priv:
10050       if (!Info.CaptureDeviceAddrMap.empty())
10051         CodeGen(CGF);
10052       break;
10053     case BodyGenTy::DupNoPriv:
10054       if (!Info.CaptureDeviceAddrMap.empty()) {
10055         CodeGen.setAction(NoPrivAction);
10056         CodeGen(CGF);
10057       }
10058       break;
10059     case BodyGenTy::NoPriv:
10060       if (Info.CaptureDeviceAddrMap.empty()) {
10061         CodeGen.setAction(NoPrivAction);
10062         CodeGen(CGF);
10063       }
10064       break;
10065     }
10066     return InsertPointTy(CGF.Builder.GetInsertBlock(),
10067                          CGF.Builder.GetInsertPoint());
10068   };
10069 
10070   auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10071     if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10072       Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10073     }
10074   };
10075 
10076   auto CustomMapperCB = [&](unsigned int I) {
10077     llvm::Value *MFunc = nullptr;
10078     if (CombinedInfo.Mappers[I]) {
10079       Info.HasMapper = true;
10080       MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10081           cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10082     }
10083     return MFunc;
10084   };
10085 
10086   // Source location for the ident struct
10087   llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10088 
10089   InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10090                          CGF.AllocaInsertPt->getIterator());
10091   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10092                           CGF.Builder.GetInsertPoint());
10093   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10094   llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10095       cantFail(OMPBuilder.createTargetData(
10096           OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10097           /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10098   CGF.Builder.restoreIP(AfterIP);
10099 }
10100 
10101 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10102     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10103     const Expr *Device) {
10104   if (!CGF.HaveInsertPoint())
10105     return;
10106 
10107   assert((isa<OMPTargetEnterDataDirective>(D) ||
10108           isa<OMPTargetExitDataDirective>(D) ||
10109           isa<OMPTargetUpdateDirective>(D)) &&
10110          "Expecting either target enter, exit data, or update directives.");
10111 
10112   CodeGenFunction::OMPTargetDataInfo InputInfo;
10113   llvm::Value *MapTypesArray = nullptr;
10114   llvm::Value *MapNamesArray = nullptr;
10115   // Generate the code for the opening of the data environment.
10116   auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10117                     &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10118     // Emit device ID if any.
10119     llvm::Value *DeviceID = nullptr;
10120     if (Device) {
10121       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10122                                            CGF.Int64Ty, /*isSigned=*/true);
10123     } else {
10124       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10125     }
10126 
10127     // Emit the number of elements in the offloading arrays.
10128     llvm::Constant *PointerNum =
10129         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10130 
10131     // Source location for the ident struct
10132     llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10133 
10134     SmallVector<llvm::Value *, 13> OffloadingArgs(
10135         {RTLoc, DeviceID, PointerNum,
10136          InputInfo.BasePointersArray.emitRawPointer(CGF),
10137          InputInfo.PointersArray.emitRawPointer(CGF),
10138          InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10139          InputInfo.MappersArray.emitRawPointer(CGF)});
10140 
10141     // Select the right runtime function call for each standalone
10142     // directive.
10143     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10144     RuntimeFunction RTLFn;
10145     switch (D.getDirectiveKind()) {
10146     case OMPD_target_enter_data:
10147       RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10148                         : OMPRTL___tgt_target_data_begin_mapper;
10149       break;
10150     case OMPD_target_exit_data:
10151       RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10152                         : OMPRTL___tgt_target_data_end_mapper;
10153       break;
10154     case OMPD_target_update:
10155       RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10156                         : OMPRTL___tgt_target_data_update_mapper;
10157       break;
10158     case OMPD_parallel:
10159     case OMPD_for:
10160     case OMPD_parallel_for:
10161     case OMPD_parallel_master:
10162     case OMPD_parallel_sections:
10163     case OMPD_for_simd:
10164     case OMPD_parallel_for_simd:
10165     case OMPD_cancel:
10166     case OMPD_cancellation_point:
10167     case OMPD_ordered:
10168     case OMPD_threadprivate:
10169     case OMPD_allocate:
10170     case OMPD_task:
10171     case OMPD_simd:
10172     case OMPD_tile:
10173     case OMPD_unroll:
10174     case OMPD_sections:
10175     case OMPD_section:
10176     case OMPD_single:
10177     case OMPD_master:
10178     case OMPD_critical:
10179     case OMPD_taskyield:
10180     case OMPD_barrier:
10181     case OMPD_taskwait:
10182     case OMPD_taskgroup:
10183     case OMPD_atomic:
10184     case OMPD_flush:
10185     case OMPD_depobj:
10186     case OMPD_scan:
10187     case OMPD_teams:
10188     case OMPD_target_data:
10189     case OMPD_distribute:
10190     case OMPD_distribute_simd:
10191     case OMPD_distribute_parallel_for:
10192     case OMPD_distribute_parallel_for_simd:
10193     case OMPD_teams_distribute:
10194     case OMPD_teams_distribute_simd:
10195     case OMPD_teams_distribute_parallel_for:
10196     case OMPD_teams_distribute_parallel_for_simd:
10197     case OMPD_declare_simd:
10198     case OMPD_declare_variant:
10199     case OMPD_begin_declare_variant:
10200     case OMPD_end_declare_variant:
10201     case OMPD_declare_target:
10202     case OMPD_end_declare_target:
10203     case OMPD_declare_reduction:
10204     case OMPD_declare_mapper:
10205     case OMPD_taskloop:
10206     case OMPD_taskloop_simd:
10207     case OMPD_master_taskloop:
10208     case OMPD_master_taskloop_simd:
10209     case OMPD_parallel_master_taskloop:
10210     case OMPD_parallel_master_taskloop_simd:
10211     case OMPD_target:
10212     case OMPD_target_simd:
10213     case OMPD_target_teams_distribute:
10214     case OMPD_target_teams_distribute_simd:
10215     case OMPD_target_teams_distribute_parallel_for:
10216     case OMPD_target_teams_distribute_parallel_for_simd:
10217     case OMPD_target_teams:
10218     case OMPD_target_parallel:
10219     case OMPD_target_parallel_for:
10220     case OMPD_target_parallel_for_simd:
10221     case OMPD_requires:
10222     case OMPD_metadirective:
10223     case OMPD_unknown:
10224     default:
10225       llvm_unreachable("Unexpected standalone target data directive.");
10226       break;
10227     }
10228     if (HasNowait) {
10229       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10230       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10231       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10232       OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10233     }
10234     CGF.EmitRuntimeCall(
10235         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10236         OffloadingArgs);
10237   };
10238 
10239   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10240                           &MapNamesArray](CodeGenFunction &CGF,
10241                                           PrePostActionTy &) {
10242     // Fill up the arrays with all the mapped variables.
10243     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10244     CGOpenMPRuntime::TargetDataInfo Info;
10245     MappableExprsHandler MEHandler(D, CGF);
10246     genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10247     emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10248                                 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10249 
10250     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10251                              D.hasClausesOfKind<OMPNowaitClause>();
10252 
10253     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10254     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10255                                           CGF.VoidPtrTy, CGM.getPointerAlign());
10256     InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10257                                       CGM.getPointerAlign());
10258     InputInfo.SizesArray =
10259         Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10260     InputInfo.MappersArray =
10261         Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10262     MapTypesArray = Info.RTArgs.MapTypesArray;
10263     MapNamesArray = Info.RTArgs.MapNamesArray;
10264     if (RequiresOuterTask)
10265       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10266     else
10267       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10268   };
10269 
10270   if (IfCond) {
10271     emitIfClause(CGF, IfCond, TargetThenGen,
10272                  [](CodeGenFunction &CGF, PrePostActionTy &) {});
10273   } else {
10274     RegionCodeGenTy ThenRCG(TargetThenGen);
10275     ThenRCG(CGF);
10276   }
10277 }
10278 
10279 namespace {
10280   /// Kind of parameter in a function with 'declare simd' directive.
10281 enum ParamKindTy {
10282   Linear,
10283   LinearRef,
10284   LinearUVal,
10285   LinearVal,
10286   Uniform,
10287   Vector,
10288 };
10289 /// Attribute set of the parameter.
10290 struct ParamAttrTy {
10291   ParamKindTy Kind = Vector;
10292   llvm::APSInt StrideOrArg;
10293   llvm::APSInt Alignment;
10294   bool HasVarStride = false;
10295 };
10296 } // namespace
10297 
10298 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10299                                 ArrayRef<ParamAttrTy> ParamAttrs) {
10300   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10301   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10302   // of that clause. The VLEN value must be power of 2.
10303   // In other case the notion of the function`s "characteristic data type" (CDT)
10304   // is used to compute the vector length.
10305   // CDT is defined in the following order:
10306   //   a) For non-void function, the CDT is the return type.
10307   //   b) If the function has any non-uniform, non-linear parameters, then the
10308   //   CDT is the type of the first such parameter.
10309   //   c) If the CDT determined by a) or b) above is struct, union, or class
10310   //   type which is pass-by-value (except for the type that maps to the
10311   //   built-in complex data type), the characteristic data type is int.
10312   //   d) If none of the above three cases is applicable, the CDT is int.
10313   // The VLEN is then determined based on the CDT and the size of vector
10314   // register of that ISA for which current vector version is generated. The
10315   // VLEN is computed using the formula below:
10316   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
10317   // where vector register size specified in section 3.2.1 Registers and the
10318   // Stack Frame of original AMD64 ABI document.
10319   QualType RetType = FD->getReturnType();
10320   if (RetType.isNull())
10321     return 0;
10322   ASTContext &C = FD->getASTContext();
10323   QualType CDT;
10324   if (!RetType.isNull() && !RetType->isVoidType()) {
10325     CDT = RetType;
10326   } else {
10327     unsigned Offset = 0;
10328     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10329       if (ParamAttrs[Offset].Kind == Vector)
10330         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10331       ++Offset;
10332     }
10333     if (CDT.isNull()) {
10334       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10335         if (ParamAttrs[I + Offset].Kind == Vector) {
10336           CDT = FD->getParamDecl(I)->getType();
10337           break;
10338         }
10339       }
10340     }
10341   }
10342   if (CDT.isNull())
10343     CDT = C.IntTy;
10344   CDT = CDT->getCanonicalTypeUnqualified();
10345   if (CDT->isRecordType() || CDT->isUnionType())
10346     CDT = C.IntTy;
10347   return C.getTypeSize(CDT);
10348 }
10349 
10350 /// Mangle the parameter part of the vector function name according to
10351 /// their OpenMP classification. The mangling function is defined in
10352 /// section 4.5 of the AAVFABI(2021Q1).
10353 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10354   SmallString<256> Buffer;
10355   llvm::raw_svector_ostream Out(Buffer);
10356   for (const auto &ParamAttr : ParamAttrs) {
10357     switch (ParamAttr.Kind) {
10358     case Linear:
10359       Out << 'l';
10360       break;
10361     case LinearRef:
10362       Out << 'R';
10363       break;
10364     case LinearUVal:
10365       Out << 'U';
10366       break;
10367     case LinearVal:
10368       Out << 'L';
10369       break;
10370     case Uniform:
10371       Out << 'u';
10372       break;
10373     case Vector:
10374       Out << 'v';
10375       break;
10376     }
10377     if (ParamAttr.HasVarStride)
10378       Out << "s" << ParamAttr.StrideOrArg;
10379     else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10380              ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10381       // Don't print the step value if it is not present or if it is
10382       // equal to 1.
10383       if (ParamAttr.StrideOrArg < 0)
10384         Out << 'n' << -ParamAttr.StrideOrArg;
10385       else if (ParamAttr.StrideOrArg != 1)
10386         Out << ParamAttr.StrideOrArg;
10387     }
10388 
10389     if (!!ParamAttr.Alignment)
10390       Out << 'a' << ParamAttr.Alignment;
10391   }
10392 
10393   return std::string(Out.str());
10394 }
10395 
10396 static void
10397 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10398                            const llvm::APSInt &VLENVal,
10399                            ArrayRef<ParamAttrTy> ParamAttrs,
10400                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
10401   struct ISADataTy {
10402     char ISA;
10403     unsigned VecRegSize;
10404   };
10405   ISADataTy ISAData[] = {
10406       {
10407           'b', 128
10408       }, // SSE
10409       {
10410           'c', 256
10411       }, // AVX
10412       {
10413           'd', 256
10414       }, // AVX2
10415       {
10416           'e', 512
10417       }, // AVX512
10418   };
10419   llvm::SmallVector<char, 2> Masked;
10420   switch (State) {
10421   case OMPDeclareSimdDeclAttr::BS_Undefined:
10422     Masked.push_back('N');
10423     Masked.push_back('M');
10424     break;
10425   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10426     Masked.push_back('N');
10427     break;
10428   case OMPDeclareSimdDeclAttr::BS_Inbranch:
10429     Masked.push_back('M');
10430     break;
10431   }
10432   for (char Mask : Masked) {
10433     for (const ISADataTy &Data : ISAData) {
10434       SmallString<256> Buffer;
10435       llvm::raw_svector_ostream Out(Buffer);
10436       Out << "_ZGV" << Data.ISA << Mask;
10437       if (!VLENVal) {
10438         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10439         assert(NumElts && "Non-zero simdlen/cdtsize expected");
10440         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10441       } else {
10442         Out << VLENVal;
10443       }
10444       Out << mangleVectorParameters(ParamAttrs);
10445       Out << '_' << Fn->getName();
10446       Fn->addFnAttr(Out.str());
10447     }
10448   }
10449 }
10450 
10451 // This are the Functions that are needed to mangle the name of the
10452 // vector functions generated by the compiler, according to the rules
10453 // defined in the "Vector Function ABI specifications for AArch64",
10454 // available at
10455 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10456 
10457 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10458 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10459   QT = QT.getCanonicalType();
10460 
10461   if (QT->isVoidType())
10462     return false;
10463 
10464   if (Kind == ParamKindTy::Uniform)
10465     return false;
10466 
10467   if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10468     return false;
10469 
10470   if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10471       !QT->isReferenceType())
10472     return false;
10473 
10474   return true;
10475 }
10476 
10477 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10478 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10479   QT = QT.getCanonicalType();
10480   unsigned Size = C.getTypeSize(QT);
10481 
10482   // Only scalars and complex within 16 bytes wide set PVB to true.
10483   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10484     return false;
10485 
10486   if (QT->isFloatingType())
10487     return true;
10488 
10489   if (QT->isIntegerType())
10490     return true;
10491 
10492   if (QT->isPointerType())
10493     return true;
10494 
10495   // TODO: Add support for complex types (section 3.1.2, item 2).
10496 
10497   return false;
10498 }
10499 
10500 /// Computes the lane size (LS) of a return type or of an input parameter,
10501 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10502 /// TODO: Add support for references, section 3.2.1, item 1.
10503 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10504   if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10505     QualType PTy = QT.getCanonicalType()->getPointeeType();
10506     if (getAArch64PBV(PTy, C))
10507       return C.getTypeSize(PTy);
10508   }
10509   if (getAArch64PBV(QT, C))
10510     return C.getTypeSize(QT);
10511 
10512   return C.getTypeSize(C.getUIntPtrType());
10513 }
10514 
10515 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10516 // signature of the scalar function, as defined in 3.2.2 of the
10517 // AAVFABI.
10518 static std::tuple<unsigned, unsigned, bool>
10519 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10520   QualType RetType = FD->getReturnType().getCanonicalType();
10521 
10522   ASTContext &C = FD->getASTContext();
10523 
10524   bool OutputBecomesInput = false;
10525 
10526   llvm::SmallVector<unsigned, 8> Sizes;
10527   if (!RetType->isVoidType()) {
10528     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10529     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10530       OutputBecomesInput = true;
10531   }
10532   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10533     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10534     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10535   }
10536 
10537   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10538   // The LS of a function parameter / return value can only be a power
10539   // of 2, starting from 8 bits, up to 128.
10540   assert(llvm::all_of(Sizes,
10541                       [](unsigned Size) {
10542                         return Size == 8 || Size == 16 || Size == 32 ||
10543                                Size == 64 || Size == 128;
10544                       }) &&
10545          "Invalid size");
10546 
10547   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10548                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10549                          OutputBecomesInput);
10550 }
10551 
10552 // Function used to add the attribute. The parameter `VLEN` is
10553 // templated to allow the use of "x" when targeting scalable functions
10554 // for SVE.
10555 template <typename T>
10556 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10557                                  char ISA, StringRef ParSeq,
10558                                  StringRef MangledName, bool OutputBecomesInput,
10559                                  llvm::Function *Fn) {
10560   SmallString<256> Buffer;
10561   llvm::raw_svector_ostream Out(Buffer);
10562   Out << Prefix << ISA << LMask << VLEN;
10563   if (OutputBecomesInput)
10564     Out << "v";
10565   Out << ParSeq << "_" << MangledName;
10566   Fn->addFnAttr(Out.str());
10567 }
10568 
10569 // Helper function to generate the Advanced SIMD names depending on
10570 // the value of the NDS when simdlen is not present.
10571 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10572                                       StringRef Prefix, char ISA,
10573                                       StringRef ParSeq, StringRef MangledName,
10574                                       bool OutputBecomesInput,
10575                                       llvm::Function *Fn) {
10576   switch (NDS) {
10577   case 8:
10578     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10579                          OutputBecomesInput, Fn);
10580     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10581                          OutputBecomesInput, Fn);
10582     break;
10583   case 16:
10584     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10585                          OutputBecomesInput, Fn);
10586     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10587                          OutputBecomesInput, Fn);
10588     break;
10589   case 32:
10590     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10591                          OutputBecomesInput, Fn);
10592     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10593                          OutputBecomesInput, Fn);
10594     break;
10595   case 64:
10596   case 128:
10597     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10598                          OutputBecomesInput, Fn);
10599     break;
10600   default:
10601     llvm_unreachable("Scalar type is too wide.");
10602   }
10603 }
10604 
10605 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10606 static void emitAArch64DeclareSimdFunction(
10607     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10608     ArrayRef<ParamAttrTy> ParamAttrs,
10609     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10610     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10611 
10612   // Get basic data for building the vector signature.
10613   const auto Data = getNDSWDS(FD, ParamAttrs);
10614   const unsigned NDS = std::get<0>(Data);
10615   const unsigned WDS = std::get<1>(Data);
10616   const bool OutputBecomesInput = std::get<2>(Data);
10617 
10618   // Check the values provided via `simdlen` by the user.
10619   // 1. A `simdlen(1)` doesn't produce vector signatures,
10620   if (UserVLEN == 1) {
10621     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10622         DiagnosticsEngine::Warning,
10623         "The clause simdlen(1) has no effect when targeting aarch64.");
10624     CGM.getDiags().Report(SLoc, DiagID);
10625     return;
10626   }
10627 
10628   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10629   // Advanced SIMD output.
10630   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10631     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10632         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10633                                     "power of 2 when targeting Advanced SIMD.");
10634     CGM.getDiags().Report(SLoc, DiagID);
10635     return;
10636   }
10637 
10638   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10639   // limits.
10640   if (ISA == 's' && UserVLEN != 0) {
10641     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10642       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10643           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10644                                       "lanes in the architectural constraints "
10645                                       "for SVE (min is 128-bit, max is "
10646                                       "2048-bit, by steps of 128-bit)");
10647       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10648       return;
10649     }
10650   }
10651 
10652   // Sort out parameter sequence.
10653   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10654   StringRef Prefix = "_ZGV";
10655   // Generate simdlen from user input (if any).
10656   if (UserVLEN) {
10657     if (ISA == 's') {
10658       // SVE generates only a masked function.
10659       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10660                            OutputBecomesInput, Fn);
10661     } else {
10662       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10663       // Advanced SIMD generates one or two functions, depending on
10664       // the `[not]inbranch` clause.
10665       switch (State) {
10666       case OMPDeclareSimdDeclAttr::BS_Undefined:
10667         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10668                              OutputBecomesInput, Fn);
10669         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10670                              OutputBecomesInput, Fn);
10671         break;
10672       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10673         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10674                              OutputBecomesInput, Fn);
10675         break;
10676       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10677         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10678                              OutputBecomesInput, Fn);
10679         break;
10680       }
10681     }
10682   } else {
10683     // If no user simdlen is provided, follow the AAVFABI rules for
10684     // generating the vector length.
10685     if (ISA == 's') {
10686       // SVE, section 3.4.1, item 1.
10687       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10688                            OutputBecomesInput, Fn);
10689     } else {
10690       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10691       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10692       // two vector names depending on the use of the clause
10693       // `[not]inbranch`.
10694       switch (State) {
10695       case OMPDeclareSimdDeclAttr::BS_Undefined:
10696         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10697                                   OutputBecomesInput, Fn);
10698         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10699                                   OutputBecomesInput, Fn);
10700         break;
10701       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10702         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10703                                   OutputBecomesInput, Fn);
10704         break;
10705       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10706         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10707                                   OutputBecomesInput, Fn);
10708         break;
10709       }
10710     }
10711   }
10712 }
10713 
10714 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10715                                               llvm::Function *Fn) {
10716   ASTContext &C = CGM.getContext();
10717   FD = FD->getMostRecentDecl();
10718   while (FD) {
10719     // Map params to their positions in function decl.
10720     llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10721     if (isa<CXXMethodDecl>(FD))
10722       ParamPositions.try_emplace(FD, 0);
10723     unsigned ParamPos = ParamPositions.size();
10724     for (const ParmVarDecl *P : FD->parameters()) {
10725       ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10726       ++ParamPos;
10727     }
10728     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10729       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10730       // Mark uniform parameters.
10731       for (const Expr *E : Attr->uniforms()) {
10732         E = E->IgnoreParenImpCasts();
10733         unsigned Pos;
10734         if (isa<CXXThisExpr>(E)) {
10735           Pos = ParamPositions[FD];
10736         } else {
10737           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10738                                 ->getCanonicalDecl();
10739           auto It = ParamPositions.find(PVD);
10740           assert(It != ParamPositions.end() && "Function parameter not found");
10741           Pos = It->second;
10742         }
10743         ParamAttrs[Pos].Kind = Uniform;
10744       }
10745       // Get alignment info.
10746       auto *NI = Attr->alignments_begin();
10747       for (const Expr *E : Attr->aligneds()) {
10748         E = E->IgnoreParenImpCasts();
10749         unsigned Pos;
10750         QualType ParmTy;
10751         if (isa<CXXThisExpr>(E)) {
10752           Pos = ParamPositions[FD];
10753           ParmTy = E->getType();
10754         } else {
10755           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10756                                 ->getCanonicalDecl();
10757           auto It = ParamPositions.find(PVD);
10758           assert(It != ParamPositions.end() && "Function parameter not found");
10759           Pos = It->second;
10760           ParmTy = PVD->getType();
10761         }
10762         ParamAttrs[Pos].Alignment =
10763             (*NI)
10764                 ? (*NI)->EvaluateKnownConstInt(C)
10765                 : llvm::APSInt::getUnsigned(
10766                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10767                           .getQuantity());
10768         ++NI;
10769       }
10770       // Mark linear parameters.
10771       auto *SI = Attr->steps_begin();
10772       auto *MI = Attr->modifiers_begin();
10773       for (const Expr *E : Attr->linears()) {
10774         E = E->IgnoreParenImpCasts();
10775         unsigned Pos;
10776         bool IsReferenceType = false;
10777         // Rescaling factor needed to compute the linear parameter
10778         // value in the mangled name.
10779         unsigned PtrRescalingFactor = 1;
10780         if (isa<CXXThisExpr>(E)) {
10781           Pos = ParamPositions[FD];
10782           auto *P = cast<PointerType>(E->getType());
10783           PtrRescalingFactor = CGM.getContext()
10784                                    .getTypeSizeInChars(P->getPointeeType())
10785                                    .getQuantity();
10786         } else {
10787           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10788                                 ->getCanonicalDecl();
10789           auto It = ParamPositions.find(PVD);
10790           assert(It != ParamPositions.end() && "Function parameter not found");
10791           Pos = It->second;
10792           if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10793             PtrRescalingFactor = CGM.getContext()
10794                                      .getTypeSizeInChars(P->getPointeeType())
10795                                      .getQuantity();
10796           else if (PVD->getType()->isReferenceType()) {
10797             IsReferenceType = true;
10798             PtrRescalingFactor =
10799                 CGM.getContext()
10800                     .getTypeSizeInChars(PVD->getType().getNonReferenceType())
10801                     .getQuantity();
10802           }
10803         }
10804         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10805         if (*MI == OMPC_LINEAR_ref)
10806           ParamAttr.Kind = LinearRef;
10807         else if (*MI == OMPC_LINEAR_uval)
10808           ParamAttr.Kind = LinearUVal;
10809         else if (IsReferenceType)
10810           ParamAttr.Kind = LinearVal;
10811         else
10812           ParamAttr.Kind = Linear;
10813         // Assuming a stride of 1, for `linear` without modifiers.
10814         ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
10815         if (*SI) {
10816           Expr::EvalResult Result;
10817           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10818             if (const auto *DRE =
10819                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10820               if (const auto *StridePVD =
10821                       dyn_cast<ParmVarDecl>(DRE->getDecl())) {
10822                 ParamAttr.HasVarStride = true;
10823                 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
10824                 assert(It != ParamPositions.end() &&
10825                        "Function parameter not found");
10826                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
10827               }
10828             }
10829           } else {
10830             ParamAttr.StrideOrArg = Result.Val.getInt();
10831           }
10832         }
10833         // If we are using a linear clause on a pointer, we need to
10834         // rescale the value of linear_step with the byte size of the
10835         // pointee type.
10836         if (!ParamAttr.HasVarStride &&
10837             (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
10838           ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
10839         ++SI;
10840         ++MI;
10841       }
10842       llvm::APSInt VLENVal;
10843       SourceLocation ExprLoc;
10844       const Expr *VLENExpr = Attr->getSimdlen();
10845       if (VLENExpr) {
10846         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10847         ExprLoc = VLENExpr->getExprLoc();
10848       }
10849       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10850       if (CGM.getTriple().isX86()) {
10851         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10852       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10853         unsigned VLEN = VLENVal.getExtValue();
10854         StringRef MangledName = Fn->getName();
10855         if (CGM.getTarget().hasFeature("sve"))
10856           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10857                                          MangledName, 's', 128, Fn, ExprLoc);
10858         else if (CGM.getTarget().hasFeature("neon"))
10859           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10860                                          MangledName, 'n', 128, Fn, ExprLoc);
10861       }
10862     }
10863     FD = FD->getPreviousDecl();
10864   }
10865 }
10866 
10867 namespace {
10868 /// Cleanup action for doacross support.
10869 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10870 public:
10871   static const int DoacrossFinArgs = 2;
10872 
10873 private:
10874   llvm::FunctionCallee RTLFn;
10875   llvm::Value *Args[DoacrossFinArgs];
10876 
10877 public:
10878   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10879                     ArrayRef<llvm::Value *> CallArgs)
10880       : RTLFn(RTLFn) {
10881     assert(CallArgs.size() == DoacrossFinArgs);
10882     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10883   }
10884   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10885     if (!CGF.HaveInsertPoint())
10886       return;
10887     CGF.EmitRuntimeCall(RTLFn, Args);
10888   }
10889 };
10890 } // namespace
10891 
10892 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10893                                        const OMPLoopDirective &D,
10894                                        ArrayRef<Expr *> NumIterations) {
10895   if (!CGF.HaveInsertPoint())
10896     return;
10897 
10898   ASTContext &C = CGM.getContext();
10899   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10900   RecordDecl *RD;
10901   if (KmpDimTy.isNull()) {
10902     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10903     //  kmp_int64 lo; // lower
10904     //  kmp_int64 up; // upper
10905     //  kmp_int64 st; // stride
10906     // };
10907     RD = C.buildImplicitRecord("kmp_dim");
10908     RD->startDefinition();
10909     addFieldToRecordDecl(C, RD, Int64Ty);
10910     addFieldToRecordDecl(C, RD, Int64Ty);
10911     addFieldToRecordDecl(C, RD, Int64Ty);
10912     RD->completeDefinition();
10913     KmpDimTy = C.getRecordType(RD);
10914   } else {
10915     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10916   }
10917   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10918   QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
10919                                             ArraySizeModifier::Normal, 0);
10920 
10921   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10922   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10923   enum { LowerFD = 0, UpperFD, StrideFD };
10924   // Fill dims with data.
10925   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10926     LValue DimsLVal = CGF.MakeAddrLValue(
10927         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10928     // dims.upper = num_iterations;
10929     LValue UpperLVal = CGF.EmitLValueForField(
10930         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10931     llvm::Value *NumIterVal = CGF.EmitScalarConversion(
10932         CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
10933         Int64Ty, NumIterations[I]->getExprLoc());
10934     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10935     // dims.stride = 1;
10936     LValue StrideLVal = CGF.EmitLValueForField(
10937         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10938     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10939                           StrideLVal);
10940   }
10941 
10942   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10943   // kmp_int32 num_dims, struct kmp_dim * dims);
10944   llvm::Value *Args[] = {
10945       emitUpdateLocation(CGF, D.getBeginLoc()),
10946       getThreadID(CGF, D.getBeginLoc()),
10947       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10948       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10949           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
10950           CGM.VoidPtrTy)};
10951 
10952   llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10953       CGM.getModule(), OMPRTL___kmpc_doacross_init);
10954   CGF.EmitRuntimeCall(RTLFn, Args);
10955   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10956       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10957   llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10958       CGM.getModule(), OMPRTL___kmpc_doacross_fini);
10959   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10960                                              llvm::ArrayRef(FiniArgs));
10961 }
10962 
10963 template <typename T>
10964 static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
10965                                 const T *C, llvm::Value *ULoc,
10966                                 llvm::Value *ThreadID) {
10967   QualType Int64Ty =
10968       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10969   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10970   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10971       Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
10972   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10973   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10974     const Expr *CounterVal = C->getLoopData(I);
10975     assert(CounterVal);
10976     llvm::Value *CntVal = CGF.EmitScalarConversion(
10977         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10978         CounterVal->getExprLoc());
10979     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10980                           /*Volatile=*/false, Int64Ty);
10981   }
10982   llvm::Value *Args[] = {
10983       ULoc, ThreadID,
10984       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
10985   llvm::FunctionCallee RTLFn;
10986   llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
10987   OMPDoacrossKind<T> ODK;
10988   if (ODK.isSource(C)) {
10989     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10990                                                   OMPRTL___kmpc_doacross_post);
10991   } else {
10992     assert(ODK.isSink(C) && "Expect sink modifier.");
10993     RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10994                                                   OMPRTL___kmpc_doacross_wait);
10995   }
10996   CGF.EmitRuntimeCall(RTLFn, Args);
10997 }
10998 
10999 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11000                                           const OMPDependClause *C) {
11001   return EmitDoacrossOrdered<OMPDependClause>(
11002       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11003       getThreadID(CGF, C->getBeginLoc()));
11004 }
11005 
11006 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11007                                           const OMPDoacrossClause *C) {
11008   return EmitDoacrossOrdered<OMPDoacrossClause>(
11009       CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11010       getThreadID(CGF, C->getBeginLoc()));
11011 }
11012 
11013 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11014                                llvm::FunctionCallee Callee,
11015                                ArrayRef<llvm::Value *> Args) const {
11016   assert(Loc.isValid() && "Outlined function call location must be valid.");
11017   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11018 
11019   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11020     if (Fn->doesNotThrow()) {
11021       CGF.EmitNounwindRuntimeCall(Fn, Args);
11022       return;
11023     }
11024   }
11025   CGF.EmitRuntimeCall(Callee, Args);
11026 }
11027 
11028 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11029     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11030     ArrayRef<llvm::Value *> Args) const {
11031   emitCall(CGF, Loc, OutlinedFn, Args);
11032 }
11033 
11034 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11035   if (const auto *FD = dyn_cast<FunctionDecl>(D))
11036     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11037       HasEmittedDeclareTargetRegion = true;
11038 }
11039 
11040 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11041                                              const VarDecl *NativeParam,
11042                                              const VarDecl *TargetParam) const {
11043   return CGF.GetAddrOfLocalVar(NativeParam);
11044 }
11045 
11046 /// Return allocator value from expression, or return a null allocator (default
11047 /// when no allocator specified).
11048 static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11049                                     const Expr *Allocator) {
11050   llvm::Value *AllocVal;
11051   if (Allocator) {
11052     AllocVal = CGF.EmitScalarExpr(Allocator);
11053     // According to the standard, the original allocator type is a enum
11054     // (integer). Convert to pointer type, if required.
11055     AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11056                                         CGF.getContext().VoidPtrTy,
11057                                         Allocator->getExprLoc());
11058   } else {
11059     // If no allocator specified, it defaults to the null allocator.
11060     AllocVal = llvm::Constant::getNullValue(
11061         CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11062   }
11063   return AllocVal;
11064 }
11065 
11066 /// Return the alignment from an allocate directive if present.
11067 static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11068   std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11069 
11070   if (!AllocateAlignment)
11071     return nullptr;
11072 
11073   return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11074 }
11075 
11076 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11077                                                    const VarDecl *VD) {
11078   if (!VD)
11079     return Address::invalid();
11080   Address UntiedAddr = Address::invalid();
11081   Address UntiedRealAddr = Address::invalid();
11082   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11083   if (It != FunctionToUntiedTaskStackMap.end()) {
11084     const UntiedLocalVarsAddressesMap &UntiedData =
11085         UntiedLocalVarsStack[It->second];
11086     auto I = UntiedData.find(VD);
11087     if (I != UntiedData.end()) {
11088       UntiedAddr = I->second.first;
11089       UntiedRealAddr = I->second.second;
11090     }
11091   }
11092   const VarDecl *CVD = VD->getCanonicalDecl();
11093   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11094     // Use the default allocation.
11095     if (!isAllocatableDecl(VD))
11096       return UntiedAddr;
11097     llvm::Value *Size;
11098     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11099     if (CVD->getType()->isVariablyModifiedType()) {
11100       Size = CGF.getTypeSize(CVD->getType());
11101       // Align the size: ((size + align - 1) / align) * align
11102       Size = CGF.Builder.CreateNUWAdd(
11103           Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11104       Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11105       Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11106     } else {
11107       CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11108       Size = CGM.getSize(Sz.alignTo(Align));
11109     }
11110     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11111     const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11112     const Expr *Allocator = AA->getAllocator();
11113     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11114     llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11115     SmallVector<llvm::Value *, 4> Args;
11116     Args.push_back(ThreadID);
11117     if (Alignment)
11118       Args.push_back(Alignment);
11119     Args.push_back(Size);
11120     Args.push_back(AllocVal);
11121     llvm::omp::RuntimeFunction FnID =
11122         Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11123     llvm::Value *Addr = CGF.EmitRuntimeCall(
11124         OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11125         getName({CVD->getName(), ".void.addr"}));
11126     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11127         CGM.getModule(), OMPRTL___kmpc_free);
11128     QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11129     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11130         Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11131     if (UntiedAddr.isValid())
11132       CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11133 
11134     // Cleanup action for allocate support.
11135     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11136       llvm::FunctionCallee RTLFn;
11137       SourceLocation::UIntTy LocEncoding;
11138       Address Addr;
11139       const Expr *AllocExpr;
11140 
11141     public:
11142       OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11143                            SourceLocation::UIntTy LocEncoding, Address Addr,
11144                            const Expr *AllocExpr)
11145           : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11146             AllocExpr(AllocExpr) {}
11147       void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11148         if (!CGF.HaveInsertPoint())
11149           return;
11150         llvm::Value *Args[3];
11151         Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11152             CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11153         Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11154             Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11155         llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11156         Args[2] = AllocVal;
11157         CGF.EmitRuntimeCall(RTLFn, Args);
11158       }
11159     };
11160     Address VDAddr =
11161         UntiedRealAddr.isValid()
11162             ? UntiedRealAddr
11163             : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11164     CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11165         NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11166         VDAddr, Allocator);
11167     if (UntiedRealAddr.isValid())
11168       if (auto *Region =
11169               dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11170         Region->emitUntiedSwitch(CGF);
11171     return VDAddr;
11172   }
11173   return UntiedAddr;
11174 }
11175 
11176 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11177                                              const VarDecl *VD) const {
11178   auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11179   if (It == FunctionToUntiedTaskStackMap.end())
11180     return false;
11181   return UntiedLocalVarsStack[It->second].count(VD) > 0;
11182 }
11183 
11184 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11185     CodeGenModule &CGM, const OMPLoopDirective &S)
11186     : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11187   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11188   if (!NeedToPush)
11189     return;
11190   NontemporalDeclsSet &DS =
11191       CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11192   for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11193     for (const Stmt *Ref : C->private_refs()) {
11194       const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11195       const ValueDecl *VD;
11196       if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11197         VD = DRE->getDecl();
11198       } else {
11199         const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11200         assert((ME->isImplicitCXXThis() ||
11201                 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11202                "Expected member of current class.");
11203         VD = ME->getMemberDecl();
11204       }
11205       DS.insert(VD);
11206     }
11207   }
11208 }
11209 
11210 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11211   if (!NeedToPush)
11212     return;
11213   CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11214 }
11215 
11216 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11217     CodeGenFunction &CGF,
11218     const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11219                           std::pair<Address, Address>> &LocalVars)
11220     : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11221   if (!NeedToPush)
11222     return;
11223   CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11224       CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11225   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11226 }
11227 
11228 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11229   if (!NeedToPush)
11230     return;
11231   CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11232 }
11233 
11234 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11235   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11236 
11237   return llvm::any_of(
11238       CGM.getOpenMPRuntime().NontemporalDeclsStack,
11239       [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11240 }
11241 
11242 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11243     const OMPExecutableDirective &S,
11244     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11245     const {
11246   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11247   // Vars in target/task regions must be excluded completely.
11248   if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11249       isOpenMPTaskingDirective(S.getDirectiveKind())) {
11250     SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11251     getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11252     const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11253     for (const CapturedStmt::Capture &Cap : CS->captures()) {
11254       if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11255         NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11256     }
11257   }
11258   // Exclude vars in private clauses.
11259   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11260     for (const Expr *Ref : C->varlist()) {
11261       if (!Ref->getType()->isScalarType())
11262         continue;
11263       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11264       if (!DRE)
11265         continue;
11266       NeedToCheckForLPCs.insert(DRE->getDecl());
11267     }
11268   }
11269   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11270     for (const Expr *Ref : C->varlist()) {
11271       if (!Ref->getType()->isScalarType())
11272         continue;
11273       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11274       if (!DRE)
11275         continue;
11276       NeedToCheckForLPCs.insert(DRE->getDecl());
11277     }
11278   }
11279   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11280     for (const Expr *Ref : C->varlist()) {
11281       if (!Ref->getType()->isScalarType())
11282         continue;
11283       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11284       if (!DRE)
11285         continue;
11286       NeedToCheckForLPCs.insert(DRE->getDecl());
11287     }
11288   }
11289   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11290     for (const Expr *Ref : C->varlist()) {
11291       if (!Ref->getType()->isScalarType())
11292         continue;
11293       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11294       if (!DRE)
11295         continue;
11296       NeedToCheckForLPCs.insert(DRE->getDecl());
11297     }
11298   }
11299   for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11300     for (const Expr *Ref : C->varlist()) {
11301       if (!Ref->getType()->isScalarType())
11302         continue;
11303       const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11304       if (!DRE)
11305         continue;
11306       NeedToCheckForLPCs.insert(DRE->getDecl());
11307     }
11308   }
11309   for (const Decl *VD : NeedToCheckForLPCs) {
11310     for (const LastprivateConditionalData &Data :
11311          llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11312       if (Data.DeclToUniqueName.count(VD) > 0) {
11313         if (!Data.Disabled)
11314           NeedToAddForLPCsAsDisabled.insert(VD);
11315         break;
11316       }
11317     }
11318   }
11319 }
11320 
11321 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11322     CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11323     : CGM(CGF.CGM),
11324       Action((CGM.getLangOpts().OpenMP >= 50 &&
11325               llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11326                            [](const OMPLastprivateClause *C) {
11327                              return C->getKind() ==
11328                                     OMPC_LASTPRIVATE_conditional;
11329                            }))
11330                  ? ActionToDo::PushAsLastprivateConditional
11331                  : ActionToDo::DoNotPush) {
11332   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11333   if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11334     return;
11335   assert(Action == ActionToDo::PushAsLastprivateConditional &&
11336          "Expected a push action.");
11337   LastprivateConditionalData &Data =
11338       CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11339   for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11340     if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11341       continue;
11342 
11343     for (const Expr *Ref : C->varlist()) {
11344       Data.DeclToUniqueName.insert(std::make_pair(
11345           cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11346           SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11347     }
11348   }
11349   Data.IVLVal = IVLVal;
11350   Data.Fn = CGF.CurFn;
11351 }
11352 
11353 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11354     CodeGenFunction &CGF, const OMPExecutableDirective &S)
11355     : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11356   assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11357   if (CGM.getLangOpts().OpenMP < 50)
11358     return;
11359   llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11360   tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11361   if (!NeedToAddForLPCsAsDisabled.empty()) {
11362     Action = ActionToDo::DisableLastprivateConditional;
11363     LastprivateConditionalData &Data =
11364         CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11365     for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11366       Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11367     Data.Fn = CGF.CurFn;
11368     Data.Disabled = true;
11369   }
11370 }
11371 
11372 CGOpenMPRuntime::LastprivateConditionalRAII
11373 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11374     CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11375   return LastprivateConditionalRAII(CGF, S);
11376 }
11377 
11378 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11379   if (CGM.getLangOpts().OpenMP < 50)
11380     return;
11381   if (Action == ActionToDo::DisableLastprivateConditional) {
11382     assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11383            "Expected list of disabled private vars.");
11384     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11385   }
11386   if (Action == ActionToDo::PushAsLastprivateConditional) {
11387     assert(
11388         !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11389         "Expected list of lastprivate conditional vars.");
11390     CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11391   }
11392 }
11393 
11394 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11395                                                         const VarDecl *VD) {
11396   ASTContext &C = CGM.getContext();
11397   auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11398   QualType NewType;
11399   const FieldDecl *VDField;
11400   const FieldDecl *FiredField;
11401   LValue BaseLVal;
11402   auto VI = I->getSecond().find(VD);
11403   if (VI == I->getSecond().end()) {
11404     RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11405     RD->startDefinition();
11406     VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11407     FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11408     RD->completeDefinition();
11409     NewType = C.getRecordType(RD);
11410     Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11411     BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11412     I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11413   } else {
11414     NewType = std::get<0>(VI->getSecond());
11415     VDField = std::get<1>(VI->getSecond());
11416     FiredField = std::get<2>(VI->getSecond());
11417     BaseLVal = std::get<3>(VI->getSecond());
11418   }
11419   LValue FiredLVal =
11420       CGF.EmitLValueForField(BaseLVal, FiredField);
11421   CGF.EmitStoreOfScalar(
11422       llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11423       FiredLVal);
11424   return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11425 }
11426 
11427 namespace {
11428 /// Checks if the lastprivate conditional variable is referenced in LHS.
11429 class LastprivateConditionalRefChecker final
11430     : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11431   ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11432   const Expr *FoundE = nullptr;
11433   const Decl *FoundD = nullptr;
11434   StringRef UniqueDeclName;
11435   LValue IVLVal;
11436   llvm::Function *FoundFn = nullptr;
11437   SourceLocation Loc;
11438 
11439 public:
11440   bool VisitDeclRefExpr(const DeclRefExpr *E) {
11441     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11442          llvm::reverse(LPM)) {
11443       auto It = D.DeclToUniqueName.find(E->getDecl());
11444       if (It == D.DeclToUniqueName.end())
11445         continue;
11446       if (D.Disabled)
11447         return false;
11448       FoundE = E;
11449       FoundD = E->getDecl()->getCanonicalDecl();
11450       UniqueDeclName = It->second;
11451       IVLVal = D.IVLVal;
11452       FoundFn = D.Fn;
11453       break;
11454     }
11455     return FoundE == E;
11456   }
11457   bool VisitMemberExpr(const MemberExpr *E) {
11458     if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11459       return false;
11460     for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11461          llvm::reverse(LPM)) {
11462       auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11463       if (It == D.DeclToUniqueName.end())
11464         continue;
11465       if (D.Disabled)
11466         return false;
11467       FoundE = E;
11468       FoundD = E->getMemberDecl()->getCanonicalDecl();
11469       UniqueDeclName = It->second;
11470       IVLVal = D.IVLVal;
11471       FoundFn = D.Fn;
11472       break;
11473     }
11474     return FoundE == E;
11475   }
11476   bool VisitStmt(const Stmt *S) {
11477     for (const Stmt *Child : S->children()) {
11478       if (!Child)
11479         continue;
11480       if (const auto *E = dyn_cast<Expr>(Child))
11481         if (!E->isGLValue())
11482           continue;
11483       if (Visit(Child))
11484         return true;
11485     }
11486     return false;
11487   }
11488   explicit LastprivateConditionalRefChecker(
11489       ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11490       : LPM(LPM) {}
11491   std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11492   getFoundData() const {
11493     return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11494   }
11495 };
11496 } // namespace
11497 
11498 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11499                                                        LValue IVLVal,
11500                                                        StringRef UniqueDeclName,
11501                                                        LValue LVal,
11502                                                        SourceLocation Loc) {
11503   // Last updated loop counter for the lastprivate conditional var.
11504   // int<xx> last_iv = 0;
11505   llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11506   llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11507       LLIVTy, getName({UniqueDeclName, "iv"}));
11508   cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11509       IVLVal.getAlignment().getAsAlign());
11510   LValue LastIVLVal =
11511       CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11512 
11513   // Last value of the lastprivate conditional.
11514   // decltype(priv_a) last_a;
11515   llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11516       CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11517   cast<llvm::GlobalVariable>(Last)->setAlignment(
11518       LVal.getAlignment().getAsAlign());
11519   LValue LastLVal =
11520       CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11521 
11522   // Global loop counter. Required to handle inner parallel-for regions.
11523   // iv
11524   llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11525 
11526   // #pragma omp critical(a)
11527   // if (last_iv <= iv) {
11528   //   last_iv = iv;
11529   //   last_a = priv_a;
11530   // }
11531   auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11532                     Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11533     Action.Enter(CGF);
11534     llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11535     // (last_iv <= iv) ? Check if the variable is updated and store new
11536     // value in global var.
11537     llvm::Value *CmpRes;
11538     if (IVLVal.getType()->isSignedIntegerType()) {
11539       CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11540     } else {
11541       assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542              "Loop iteration variable must be integer.");
11543       CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11544     }
11545     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546     llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547     CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11548     // {
11549     CGF.EmitBlock(ThenBB);
11550 
11551     //   last_iv = iv;
11552     CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11553 
11554     //   last_a = priv_a;
11555     switch (CGF.getEvaluationKind(LVal.getType())) {
11556     case TEK_Scalar: {
11557       llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11558       CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11559       break;
11560     }
11561     case TEK_Complex: {
11562       CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11563       CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11564       break;
11565     }
11566     case TEK_Aggregate:
11567       llvm_unreachable(
11568           "Aggregates are not supported in lastprivate conditional.");
11569     }
11570     // }
11571     CGF.EmitBranch(ExitBB);
11572     // There is no need to emit line number for unconditional branch.
11573     (void)ApplyDebugLocation::CreateEmpty(CGF);
11574     CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11575   };
11576 
11577   if (CGM.getLangOpts().OpenMPSimd) {
11578     // Do not emit as a critical region as no parallel region could be emitted.
11579     RegionCodeGenTy ThenRCG(CodeGen);
11580     ThenRCG(CGF);
11581   } else {
11582     emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11583   }
11584 }
11585 
11586 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11587                                                          const Expr *LHS) {
11588   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11589     return;
11590   LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11591   if (!Checker.Visit(LHS))
11592     return;
11593   const Expr *FoundE;
11594   const Decl *FoundD;
11595   StringRef UniqueDeclName;
11596   LValue IVLVal;
11597   llvm::Function *FoundFn;
11598   std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11599       Checker.getFoundData();
11600   if (FoundFn != CGF.CurFn) {
11601     // Special codegen for inner parallel regions.
11602     // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11603     auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11604     assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11605            "Lastprivate conditional is not found in outer region.");
11606     QualType StructTy = std::get<0>(It->getSecond());
11607     const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11608     LValue PrivLVal = CGF.EmitLValue(FoundE);
11609     Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11610         PrivLVal.getAddress(),
11611         CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11612         CGF.ConvertTypeForMem(StructTy));
11613     LValue BaseLVal =
11614         CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11615     LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11616     CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11617                             CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11618                         FiredLVal, llvm::AtomicOrdering::Unordered,
11619                         /*IsVolatile=*/true, /*isInit=*/false);
11620     return;
11621   }
11622 
11623   // Private address of the lastprivate conditional in the current context.
11624   // priv_a
11625   LValue LVal = CGF.EmitLValue(FoundE);
11626   emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11627                                    FoundE->getExprLoc());
11628 }
11629 
11630 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11631     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11632     const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11633   if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11634     return;
11635   auto Range = llvm::reverse(LastprivateConditionalStack);
11636   auto It = llvm::find_if(
11637       Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11638   if (It == Range.end() || It->Fn != CGF.CurFn)
11639     return;
11640   auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11641   assert(LPCI != LastprivateConditionalToTypes.end() &&
11642          "Lastprivates must be registered already.");
11643   SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11644   getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11645   const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11646   for (const auto &Pair : It->DeclToUniqueName) {
11647     const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11648     if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11649       continue;
11650     auto I = LPCI->getSecond().find(Pair.first);
11651     assert(I != LPCI->getSecond().end() &&
11652            "Lastprivate must be rehistered already.");
11653     // bool Cmp = priv_a.Fired != 0;
11654     LValue BaseLVal = std::get<3>(I->getSecond());
11655     LValue FiredLVal =
11656         CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11657     llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11658     llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11659     llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11660     llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11661     // if (Cmp) {
11662     CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11663     CGF.EmitBlock(ThenBB);
11664     Address Addr = CGF.GetAddrOfLocalVar(VD);
11665     LValue LVal;
11666     if (VD->getType()->isReferenceType())
11667       LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11668                                            AlignmentSource::Decl);
11669     else
11670       LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11671                                 AlignmentSource::Decl);
11672     emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11673                                      D.getBeginLoc());
11674     auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11675     CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11676     // }
11677   }
11678 }
11679 
11680 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11681     CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11682     SourceLocation Loc) {
11683   if (CGF.getLangOpts().OpenMP < 50)
11684     return;
11685   auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11686   assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11687          "Unknown lastprivate conditional variable.");
11688   StringRef UniqueName = It->second;
11689   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11690   // The variable was not updated in the region - exit.
11691   if (!GV)
11692     return;
11693   LValue LPLVal = CGF.MakeRawAddrLValue(
11694       GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11695   llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11696   CGF.EmitStoreOfScalar(Res, PrivLVal);
11697 }
11698 
11699 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11700     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11701     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11702     const RegionCodeGenTy &CodeGen) {
11703   llvm_unreachable("Not supported in SIMD-only mode");
11704 }
11705 
11706 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11707     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11708     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11709     const RegionCodeGenTy &CodeGen) {
11710   llvm_unreachable("Not supported in SIMD-only mode");
11711 }
11712 
11713 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11714     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11715     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11716     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11717     bool Tied, unsigned &NumberOfParts) {
11718   llvm_unreachable("Not supported in SIMD-only mode");
11719 }
11720 
11721 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11722                                            SourceLocation Loc,
11723                                            llvm::Function *OutlinedFn,
11724                                            ArrayRef<llvm::Value *> CapturedVars,
11725                                            const Expr *IfCond,
11726                                            llvm::Value *NumThreads) {
11727   llvm_unreachable("Not supported in SIMD-only mode");
11728 }
11729 
11730 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11731     CodeGenFunction &CGF, StringRef CriticalName,
11732     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11733     const Expr *Hint) {
11734   llvm_unreachable("Not supported in SIMD-only mode");
11735 }
11736 
11737 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11738                                            const RegionCodeGenTy &MasterOpGen,
11739                                            SourceLocation Loc) {
11740   llvm_unreachable("Not supported in SIMD-only mode");
11741 }
11742 
11743 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11744                                            const RegionCodeGenTy &MasterOpGen,
11745                                            SourceLocation Loc,
11746                                            const Expr *Filter) {
11747   llvm_unreachable("Not supported in SIMD-only mode");
11748 }
11749 
11750 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11751                                             SourceLocation Loc) {
11752   llvm_unreachable("Not supported in SIMD-only mode");
11753 }
11754 
11755 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11756     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11757     SourceLocation Loc) {
11758   llvm_unreachable("Not supported in SIMD-only mode");
11759 }
11760 
11761 void CGOpenMPSIMDRuntime::emitSingleRegion(
11762     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11763     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11764     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11765     ArrayRef<const Expr *> AssignmentOps) {
11766   llvm_unreachable("Not supported in SIMD-only mode");
11767 }
11768 
11769 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11770                                             const RegionCodeGenTy &OrderedOpGen,
11771                                             SourceLocation Loc,
11772                                             bool IsThreads) {
11773   llvm_unreachable("Not supported in SIMD-only mode");
11774 }
11775 
11776 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11777                                           SourceLocation Loc,
11778                                           OpenMPDirectiveKind Kind,
11779                                           bool EmitChecks,
11780                                           bool ForceSimpleCall) {
11781   llvm_unreachable("Not supported in SIMD-only mode");
11782 }
11783 
11784 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11785     CodeGenFunction &CGF, SourceLocation Loc,
11786     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11787     bool Ordered, const DispatchRTInput &DispatchValues) {
11788   llvm_unreachable("Not supported in SIMD-only mode");
11789 }
11790 
11791 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
11792                                                 SourceLocation Loc) {
11793   llvm_unreachable("Not supported in SIMD-only mode");
11794 }
11795 
11796 void CGOpenMPSIMDRuntime::emitForStaticInit(
11797     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11798     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11799   llvm_unreachable("Not supported in SIMD-only mode");
11800 }
11801 
11802 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11803     CodeGenFunction &CGF, SourceLocation Loc,
11804     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11805   llvm_unreachable("Not supported in SIMD-only mode");
11806 }
11807 
11808 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11809                                                      SourceLocation Loc,
11810                                                      unsigned IVSize,
11811                                                      bool IVSigned) {
11812   llvm_unreachable("Not supported in SIMD-only mode");
11813 }
11814 
11815 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11816                                               SourceLocation Loc,
11817                                               OpenMPDirectiveKind DKind) {
11818   llvm_unreachable("Not supported in SIMD-only mode");
11819 }
11820 
11821 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11822                                               SourceLocation Loc,
11823                                               unsigned IVSize, bool IVSigned,
11824                                               Address IL, Address LB,
11825                                               Address UB, Address ST) {
11826   llvm_unreachable("Not supported in SIMD-only mode");
11827 }
11828 
11829 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11830                                                llvm::Value *NumThreads,
11831                                                SourceLocation Loc) {
11832   llvm_unreachable("Not supported in SIMD-only mode");
11833 }
11834 
11835 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11836                                              ProcBindKind ProcBind,
11837                                              SourceLocation Loc) {
11838   llvm_unreachable("Not supported in SIMD-only mode");
11839 }
11840 
11841 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11842                                                     const VarDecl *VD,
11843                                                     Address VDAddr,
11844                                                     SourceLocation Loc) {
11845   llvm_unreachable("Not supported in SIMD-only mode");
11846 }
11847 
11848 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11849     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11850     CodeGenFunction *CGF) {
11851   llvm_unreachable("Not supported in SIMD-only mode");
11852 }
11853 
11854 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11855     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11856   llvm_unreachable("Not supported in SIMD-only mode");
11857 }
11858 
11859 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11860                                     ArrayRef<const Expr *> Vars,
11861                                     SourceLocation Loc,
11862                                     llvm::AtomicOrdering AO) {
11863   llvm_unreachable("Not supported in SIMD-only mode");
11864 }
11865 
11866 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11867                                        const OMPExecutableDirective &D,
11868                                        llvm::Function *TaskFunction,
11869                                        QualType SharedsTy, Address Shareds,
11870                                        const Expr *IfCond,
11871                                        const OMPTaskDataTy &Data) {
11872   llvm_unreachable("Not supported in SIMD-only mode");
11873 }
11874 
11875 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11876     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11877     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11878     const Expr *IfCond, const OMPTaskDataTy &Data) {
11879   llvm_unreachable("Not supported in SIMD-only mode");
11880 }
11881 
11882 void CGOpenMPSIMDRuntime::emitReduction(
11883     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11884     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11885     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11886   assert(Options.SimpleReduction && "Only simple reduction is expected.");
11887   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11888                                  ReductionOps, Options);
11889 }
11890 
11891 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11892     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11893     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11894   llvm_unreachable("Not supported in SIMD-only mode");
11895 }
11896 
11897 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
11898                                                 SourceLocation Loc,
11899                                                 bool IsWorksharingReduction) {
11900   llvm_unreachable("Not supported in SIMD-only mode");
11901 }
11902 
11903 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11904                                                   SourceLocation Loc,
11905                                                   ReductionCodeGen &RCG,
11906                                                   unsigned N) {
11907   llvm_unreachable("Not supported in SIMD-only mode");
11908 }
11909 
11910 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11911                                                   SourceLocation Loc,
11912                                                   llvm::Value *ReductionsPtr,
11913                                                   LValue SharedLVal) {
11914   llvm_unreachable("Not supported in SIMD-only mode");
11915 }
11916 
11917 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11918                                            SourceLocation Loc,
11919                                            const OMPTaskDataTy &Data) {
11920   llvm_unreachable("Not supported in SIMD-only mode");
11921 }
11922 
11923 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11924     CodeGenFunction &CGF, SourceLocation Loc,
11925     OpenMPDirectiveKind CancelRegion) {
11926   llvm_unreachable("Not supported in SIMD-only mode");
11927 }
11928 
11929 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11930                                          SourceLocation Loc, const Expr *IfCond,
11931                                          OpenMPDirectiveKind CancelRegion) {
11932   llvm_unreachable("Not supported in SIMD-only mode");
11933 }
11934 
11935 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11936     const OMPExecutableDirective &D, StringRef ParentName,
11937     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11938     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11939   llvm_unreachable("Not supported in SIMD-only mode");
11940 }
11941 
11942 void CGOpenMPSIMDRuntime::emitTargetCall(
11943     CodeGenFunction &CGF, const OMPExecutableDirective &D,
11944     llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11945     llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11946     llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11947                                      const OMPLoopDirective &D)>
11948         SizeEmitter) {
11949   llvm_unreachable("Not supported in SIMD-only mode");
11950 }
11951 
11952 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11953   llvm_unreachable("Not supported in SIMD-only mode");
11954 }
11955 
11956 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11957   llvm_unreachable("Not supported in SIMD-only mode");
11958 }
11959 
11960 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11961   return false;
11962 }
11963 
11964 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11965                                         const OMPExecutableDirective &D,
11966                                         SourceLocation Loc,
11967                                         llvm::Function *OutlinedFn,
11968                                         ArrayRef<llvm::Value *> CapturedVars) {
11969   llvm_unreachable("Not supported in SIMD-only mode");
11970 }
11971 
11972 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11973                                              const Expr *NumTeams,
11974                                              const Expr *ThreadLimit,
11975                                              SourceLocation Loc) {
11976   llvm_unreachable("Not supported in SIMD-only mode");
11977 }
11978 
11979 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11980     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11981     const Expr *Device, const RegionCodeGenTy &CodeGen,
11982     CGOpenMPRuntime::TargetDataInfo &Info) {
11983   llvm_unreachable("Not supported in SIMD-only mode");
11984 }
11985 
11986 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11987     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11988     const Expr *Device) {
11989   llvm_unreachable("Not supported in SIMD-only mode");
11990 }
11991 
11992 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11993                                            const OMPLoopDirective &D,
11994                                            ArrayRef<Expr *> NumIterations) {
11995   llvm_unreachable("Not supported in SIMD-only mode");
11996 }
11997 
11998 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11999                                               const OMPDependClause *C) {
12000   llvm_unreachable("Not supported in SIMD-only mode");
12001 }
12002 
12003 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12004                                               const OMPDoacrossClause *C) {
12005   llvm_unreachable("Not supported in SIMD-only mode");
12006 }
12007 
12008 const VarDecl *
12009 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12010                                         const VarDecl *NativeParam) const {
12011   llvm_unreachable("Not supported in SIMD-only mode");
12012 }
12013 
12014 Address
12015 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12016                                          const VarDecl *NativeParam,
12017                                          const VarDecl *TargetParam) const {
12018   llvm_unreachable("Not supported in SIMD-only mode");
12019 }
12020