xref: /llvm-project/flang/lib/Lower/OpenMP/OpenMP.cpp (revision 15ab7be2e049bc0f4ea6744ca037395686a923bc)
1 //===-- OpenMP.cpp -- Open MP directive lowering --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Lower/OpenMP.h"
14 
15 #include "ClauseProcessor.h"
16 #include "Clauses.h"
17 #include "DataSharingProcessor.h"
18 #include "Decomposer.h"
19 #include "ReductionProcessor.h"
20 #include "Utils.h"
21 #include "flang/Common/OpenMP-utils.h"
22 #include "flang/Common/idioms.h"
23 #include "flang/Lower/Bridge.h"
24 #include "flang/Lower/ConvertExpr.h"
25 #include "flang/Lower/ConvertVariable.h"
26 #include "flang/Lower/DirectivesCommon.h"
27 #include "flang/Lower/StatementContext.h"
28 #include "flang/Lower/SymbolMap.h"
29 #include "flang/Optimizer/Builder/BoxValue.h"
30 #include "flang/Optimizer/Builder/FIRBuilder.h"
31 #include "flang/Optimizer/Builder/Todo.h"
32 #include "flang/Optimizer/Dialect/FIRType.h"
33 #include "flang/Optimizer/HLFIR/HLFIROps.h"
34 #include "flang/Parser/characters.h"
35 #include "flang/Parser/parse-tree.h"
36 #include "flang/Semantics/openmp-directive-sets.h"
37 #include "flang/Semantics/tools.h"
38 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
39 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
40 #include "mlir/Transforms/RegionUtils.h"
41 #include "llvm/ADT/STLExtras.h"
42 #include "llvm/Frontend/OpenMP/OMPConstants.h"
43 
44 using namespace Fortran::lower::omp;
45 using namespace Fortran::common::openmp;
46 
47 //===----------------------------------------------------------------------===//
48 // Code generation helper functions
49 //===----------------------------------------------------------------------===//
50 
51 static void genOMPDispatch(lower::AbstractConverter &converter,
52                            lower::SymMap &symTable,
53                            semantics::SemanticsContext &semaCtx,
54                            lower::pft::Evaluation &eval, mlir::Location loc,
55                            const ConstructQueue &queue,
56                            ConstructQueue::const_iterator item);
57 
58 static void processHostEvalClauses(lower::AbstractConverter &converter,
59                                    semantics::SemanticsContext &semaCtx,
60                                    lower::StatementContext &stmtCtx,
61                                    lower::pft::Evaluation &eval,
62                                    mlir::Location loc);
63 
64 namespace {
65 /// Structure holding information that is needed to pass host-evaluated
66 /// information to later lowering stages.
67 class HostEvalInfo {
68 public:
69   // Allow this function access to private members in order to initialize them.
70   friend void ::processHostEvalClauses(lower::AbstractConverter &,
71                                        semantics::SemanticsContext &,
72                                        lower::StatementContext &,
73                                        lower::pft::Evaluation &,
74                                        mlir::Location);
75 
76   /// Fill \c vars with values stored in \c ops.
77   ///
78   /// The order in which values are stored matches the one expected by \see
79   /// bindOperands().
80   void collectValues(llvm::SmallVectorImpl<mlir::Value> &vars) const {
81     vars.append(ops.loopLowerBounds);
82     vars.append(ops.loopUpperBounds);
83     vars.append(ops.loopSteps);
84 
85     if (ops.numTeamsLower)
86       vars.push_back(ops.numTeamsLower);
87 
88     if (ops.numTeamsUpper)
89       vars.push_back(ops.numTeamsUpper);
90 
91     if (ops.numThreads)
92       vars.push_back(ops.numThreads);
93 
94     if (ops.threadLimit)
95       vars.push_back(ops.threadLimit);
96   }
97 
98   /// Update \c ops, replacing all values with the corresponding block argument
99   /// in \c args.
100   ///
101   /// The order in which values are stored in \c args is the same as the one
102   /// used by \see collectValues().
103   void bindOperands(llvm::ArrayRef<mlir::BlockArgument> args) {
104     assert(args.size() ==
105                ops.loopLowerBounds.size() + ops.loopUpperBounds.size() +
106                    ops.loopSteps.size() + (ops.numTeamsLower ? 1 : 0) +
107                    (ops.numTeamsUpper ? 1 : 0) + (ops.numThreads ? 1 : 0) +
108                    (ops.threadLimit ? 1 : 0) &&
109            "invalid block argument list");
110     int argIndex = 0;
111     for (size_t i = 0; i < ops.loopLowerBounds.size(); ++i)
112       ops.loopLowerBounds[i] = args[argIndex++];
113 
114     for (size_t i = 0; i < ops.loopUpperBounds.size(); ++i)
115       ops.loopUpperBounds[i] = args[argIndex++];
116 
117     for (size_t i = 0; i < ops.loopSteps.size(); ++i)
118       ops.loopSteps[i] = args[argIndex++];
119 
120     if (ops.numTeamsLower)
121       ops.numTeamsLower = args[argIndex++];
122 
123     if (ops.numTeamsUpper)
124       ops.numTeamsUpper = args[argIndex++];
125 
126     if (ops.numThreads)
127       ops.numThreads = args[argIndex++];
128 
129     if (ops.threadLimit)
130       ops.threadLimit = args[argIndex++];
131   }
132 
133   /// Update \p clauseOps and \p ivOut with the corresponding host-evaluated
134   /// values and Fortran symbols, respectively, if they have already been
135   /// initialized but not yet applied.
136   ///
137   /// \returns whether an update was performed. If not, these clauses were not
138   ///          evaluated in the host device.
139   bool apply(mlir::omp::LoopNestOperands &clauseOps,
140              llvm::SmallVectorImpl<const semantics::Symbol *> &ivOut) {
141     if (iv.empty() || loopNestApplied) {
142       loopNestApplied = true;
143       return false;
144     }
145 
146     loopNestApplied = true;
147     clauseOps.loopLowerBounds = ops.loopLowerBounds;
148     clauseOps.loopUpperBounds = ops.loopUpperBounds;
149     clauseOps.loopSteps = ops.loopSteps;
150     ivOut.append(iv);
151     return true;
152   }
153 
154   /// Update \p clauseOps with the corresponding host-evaluated values if they
155   /// have already been initialized but not yet applied.
156   ///
157   /// \returns whether an update was performed. If not, these clauses were not
158   ///          evaluated in the host device.
159   bool apply(mlir::omp::ParallelOperands &clauseOps) {
160     if (!ops.numThreads || parallelApplied) {
161       parallelApplied = true;
162       return false;
163     }
164 
165     parallelApplied = true;
166     clauseOps.numThreads = ops.numThreads;
167     return true;
168   }
169 
170   /// Update \p clauseOps with the corresponding host-evaluated values if they
171   /// have already been initialized.
172   ///
173   /// \returns whether an update was performed. If not, these clauses were not
174   ///          evaluated in the host device.
175   bool apply(mlir::omp::TeamsOperands &clauseOps) {
176     if (!ops.numTeamsLower && !ops.numTeamsUpper && !ops.threadLimit)
177       return false;
178 
179     clauseOps.numTeamsLower = ops.numTeamsLower;
180     clauseOps.numTeamsUpper = ops.numTeamsUpper;
181     clauseOps.threadLimit = ops.threadLimit;
182     return true;
183   }
184 
185 private:
186   mlir::omp::HostEvaluatedOperands ops;
187   llvm::SmallVector<const semantics::Symbol *> iv;
188   bool loopNestApplied = false, parallelApplied = false;
189 };
190 } // namespace
191 
192 /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target
193 /// operations being created.
194 ///
195 /// The current implementation prevents nested 'target' regions from breaking
196 /// the handling of the outer region by keeping a stack of information
197 /// structures, but it will probably still require some further work to support
198 /// reverse offloading.
199 static llvm::SmallVector<HostEvalInfo, 0> hostEvalInfo;
200 
201 /// Bind symbols to their corresponding entry block arguments.
202 ///
203 /// The binding will be performed inside of the current block, which does not
204 /// necessarily have to be part of the operation for which the binding is done.
205 /// However, block arguments must be accessible. This enables controlling the
206 /// insertion point of any new MLIR operations related to the binding of
207 /// arguments of a loop wrapper operation.
208 ///
209 /// \param [in] converter - PFT to MLIR conversion interface.
210 /// \param [in]        op - owner operation of the block arguments to bind.
211 /// \param [in]      args - entry block arguments information for the given
212 ///                         operation.
213 static void bindEntryBlockArgs(lower::AbstractConverter &converter,
214                                mlir::omp::BlockArgOpenMPOpInterface op,
215                                const EntryBlockArgs &args) {
216   assert(op != nullptr && "invalid block argument-defining operation");
217   assert(args.isValid() && "invalid args");
218   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
219 
220   auto bindSingleMapLike = [&converter,
221                             &firOpBuilder](const semantics::Symbol &sym,
222                                            const mlir::BlockArgument &arg) {
223     // Clones the `bounds` placing them inside the entry block and returns
224     // them.
225     auto cloneBound = [&](mlir::Value bound) {
226       if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
227         mlir::Operation *clonedOp = firOpBuilder.clone(*bound.getDefiningOp());
228         return clonedOp->getResult(0);
229       }
230       TODO(converter.getCurrentLocation(),
231            "target map-like clause operand unsupported bound type");
232     };
233 
234     auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
235       llvm::SmallVector<mlir::Value> clonedBounds;
236       llvm::transform(bounds, std::back_inserter(clonedBounds),
237                       [&](mlir::Value bound) { return cloneBound(bound); });
238       return clonedBounds;
239     };
240 
241     fir::ExtendedValue extVal = converter.getSymbolExtendedValue(sym);
242     auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
243     if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
244       converter.bindSymbol(sym, arg);
245     } else {
246       extVal.match(
247           [&](const fir::BoxValue &v) {
248             converter.bindSymbol(sym,
249                                  fir::BoxValue(arg, cloneBounds(v.getLBounds()),
250                                                v.getExplicitParameters(),
251                                                v.getExplicitExtents()));
252           },
253           [&](const fir::MutableBoxValue &v) {
254             converter.bindSymbol(
255                 sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
256                                           v.getMutableProperties()));
257           },
258           [&](const fir::ArrayBoxValue &v) {
259             converter.bindSymbol(
260                 sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
261                                         cloneBounds(v.getLBounds()),
262                                         v.getSourceBox()));
263           },
264           [&](const fir::CharArrayBoxValue &v) {
265             converter.bindSymbol(
266                 sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
267                                             cloneBounds(v.getExtents()),
268                                             cloneBounds(v.getLBounds())));
269           },
270           [&](const fir::CharBoxValue &v) {
271             converter.bindSymbol(
272                 sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
273           },
274           [&](const fir::UnboxedValue &v) { converter.bindSymbol(sym, arg); },
275           [&](const auto &) {
276             TODO(converter.getCurrentLocation(),
277                  "target map clause operand unsupported type");
278           });
279     }
280   };
281 
282   auto bindMapLike =
283       [&bindSingleMapLike](llvm::ArrayRef<const semantics::Symbol *> syms,
284                            llvm::ArrayRef<mlir::BlockArgument> args) {
285         // Structure component symbols don't have bindings, and can only be
286         // explicitly mapped individually. If a member is captured implicitly
287         // we map the entirety of the derived type when we find its symbol.
288         llvm::SmallVector<const semantics::Symbol *> processedSyms;
289         llvm::copy_if(syms, std::back_inserter(processedSyms),
290                       [](auto *sym) { return !sym->owner().IsDerivedType(); });
291 
292         for (auto [sym, arg] : llvm::zip_equal(processedSyms, args))
293           bindSingleMapLike(*sym, arg);
294       };
295 
296   auto bindPrivateLike = [&converter, &firOpBuilder](
297                              llvm::ArrayRef<const semantics::Symbol *> syms,
298                              llvm::ArrayRef<mlir::Value> vars,
299                              llvm::ArrayRef<mlir::BlockArgument> args) {
300     llvm::SmallVector<const semantics::Symbol *> processedSyms;
301     for (auto *sym : syms) {
302       if (const auto *commonDet =
303               sym->detailsIf<semantics::CommonBlockDetails>()) {
304         llvm::transform(commonDet->objects(), std::back_inserter(processedSyms),
305                         [&](const auto &mem) { return &*mem; });
306       } else {
307         processedSyms.push_back(sym);
308       }
309     }
310 
311     for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args))
312       converter.bindSymbol(
313           *sym,
314           hlfir::translateToExtendedValue(
315               var.getLoc(), firOpBuilder, hlfir::Entity{arg},
316               /*contiguousHint=*/
317               evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
318               .first);
319   };
320 
321   // Process in clause name alphabetical order to match block arguments order.
322   // Do not bind host_eval variables because they cannot be used inside of the
323   // corresponding region, except for very specific cases handled separately.
324   bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
325                   op.getInReductionBlockArgs());
326   bindMapLike(args.map.syms, op.getMapBlockArgs());
327   bindPrivateLike(args.priv.syms, args.priv.vars, op.getPrivateBlockArgs());
328   bindPrivateLike(args.reduction.syms, args.reduction.vars,
329                   op.getReductionBlockArgs());
330   bindPrivateLike(args.taskReduction.syms, args.taskReduction.vars,
331                   op.getTaskReductionBlockArgs());
332   bindMapLike(args.useDeviceAddr.syms, op.getUseDeviceAddrBlockArgs());
333   bindMapLike(args.useDevicePtr.syms, op.getUseDevicePtrBlockArgs());
334 }
335 
336 /// Get the list of base values that the specified map-like variables point to.
337 ///
338 /// This function must be kept in sync with changes to the `createMapInfoOp`
339 /// utility function, since it must take into account the potential introduction
340 /// of levels of indirection (i.e. intermediate ops).
341 ///
342 /// \param [in]     vars - list of values passed to map-like clauses, returned
343 ///                        by an `omp.map.info` operation.
344 /// \param [out] baseOps - populated with the `var_ptr` values of the
345 ///                        corresponding defining operations.
346 static void
347 extractMappedBaseValues(llvm::ArrayRef<mlir::Value> vars,
348                         llvm::SmallVectorImpl<mlir::Value> &baseOps) {
349   llvm::transform(vars, std::back_inserter(baseOps), [](mlir::Value map) {
350     auto mapInfo = map.getDefiningOp<mlir::omp::MapInfoOp>();
351     assert(mapInfo && "expected all map vars to be defined by omp.map.info");
352 
353     mlir::Value varPtr = mapInfo.getVarPtr();
354     if (auto boxAddr = varPtr.getDefiningOp<fir::BoxAddrOp>())
355       return boxAddr.getVal();
356 
357     return varPtr;
358   });
359 }
360 
361 /// Get the directive enumeration value corresponding to the given OpenMP
362 /// construct PFT node.
363 llvm::omp::Directive
364 extractOmpDirective(const parser::OpenMPConstruct &ompConstruct) {
365   return common::visit(
366       common::visitors{
367           [](const parser::OpenMPAllocatorsConstruct &c) {
368             return llvm::omp::OMPD_allocators;
369           },
370           [](const parser::OpenMPAtomicConstruct &c) {
371             return llvm::omp::OMPD_atomic;
372           },
373           [](const parser::OpenMPBlockConstruct &c) {
374             return std::get<parser::OmpBlockDirective>(
375                        std::get<parser::OmpBeginBlockDirective>(c.t).t)
376                 .v;
377           },
378           [](const parser::OpenMPCriticalConstruct &c) {
379             return llvm::omp::OMPD_critical;
380           },
381           [](const parser::OpenMPDeclarativeAllocate &c) {
382             return llvm::omp::OMPD_allocate;
383           },
384           [](const parser::OpenMPDispatchConstruct &c) {
385             return llvm::omp::OMPD_dispatch;
386           },
387           [](const parser::OpenMPExecutableAllocate &c) {
388             return llvm::omp::OMPD_allocate;
389           },
390           [](const parser::OpenMPLoopConstruct &c) {
391             return std::get<parser::OmpLoopDirective>(
392                        std::get<parser::OmpBeginLoopDirective>(c.t).t)
393                 .v;
394           },
395           [](const parser::OpenMPSectionConstruct &c) {
396             return llvm::omp::OMPD_section;
397           },
398           [](const parser::OpenMPSectionsConstruct &c) {
399             return std::get<parser::OmpSectionsDirective>(
400                        std::get<parser::OmpBeginSectionsDirective>(c.t).t)
401                 .v;
402           },
403           [](const parser::OpenMPStandaloneConstruct &c) {
404             return common::visit(
405                 common::visitors{
406                     [](const parser::OpenMPSimpleStandaloneConstruct &c) {
407                       return std::get<parser::OmpSimpleStandaloneDirective>(c.t)
408                           .v;
409                     },
410                     [](const parser::OpenMPFlushConstruct &c) {
411                       return llvm::omp::OMPD_flush;
412                     },
413                     [](const parser::OpenMPCancelConstruct &c) {
414                       return llvm::omp::OMPD_cancel;
415                     },
416                     [](const parser::OpenMPCancellationPointConstruct &c) {
417                       return llvm::omp::OMPD_cancellation_point;
418                     },
419                     [](const parser::OmpMetadirectiveDirective &c) {
420                       return llvm::omp::OMPD_metadirective;
421                     },
422                     [](const parser::OpenMPDepobjConstruct &c) {
423                       return llvm::omp::OMPD_depobj;
424                     }},
425                 c.u);
426           },
427           [](const parser::OpenMPUtilityConstruct &c) {
428             return common::visit(
429                 common::visitors{[](const parser::OmpErrorDirective &c) {
430                                    return llvm::omp::OMPD_error;
431                                  },
432                                  [](const parser::OmpNothingDirective &c) {
433                                    return llvm::omp::OMPD_nothing;
434                                  }},
435                 c.u);
436           }},
437       ompConstruct.u);
438 }
439 
440 /// Populate the global \see hostEvalInfo after processing clauses for the given
441 /// \p eval OpenMP target construct, or nested constructs, if these must be
442 /// evaluated outside of the target region per the spec.
443 ///
444 /// In particular, this will ensure that in 'target teams' and equivalent nested
445 /// constructs, the \c thread_limit and \c num_teams clauses will be evaluated
446 /// in the host. Additionally, loop bounds, steps and the \c num_threads clause
447 /// will also be evaluated in the host if a target SPMD construct is detected
448 /// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting).
449 ///
450 /// The result, stored as a global, is intended to be used to populate the \c
451 /// host_eval operands of the associated \c omp.target operation, and also to be
452 /// checked and used by later lowering steps to populate the corresponding
453 /// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest
454 /// operations.
455 static void processHostEvalClauses(lower::AbstractConverter &converter,
456                                    semantics::SemanticsContext &semaCtx,
457                                    lower::StatementContext &stmtCtx,
458                                    lower::pft::Evaluation &eval,
459                                    mlir::Location loc) {
460   // Obtain the list of clauses of the given OpenMP block or loop construct
461   // evaluation. Other evaluations passed to this lambda keep `clauses`
462   // unchanged.
463   auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval,
464                                    List<Clause> &clauses) {
465     const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
466     if (!ompEval)
467       return;
468 
469     const parser::OmpClauseList *beginClauseList = nullptr;
470     const parser::OmpClauseList *endClauseList = nullptr;
471     common::visit(
472         common::visitors{
473             [&](const parser::OpenMPBlockConstruct &ompConstruct) {
474               const auto &beginDirective =
475                   std::get<parser::OmpBeginBlockDirective>(ompConstruct.t);
476               beginClauseList =
477                   &std::get<parser::OmpClauseList>(beginDirective.t);
478               endClauseList = &std::get<parser::OmpClauseList>(
479                   std::get<parser::OmpEndBlockDirective>(ompConstruct.t).t);
480             },
481             [&](const parser::OpenMPLoopConstruct &ompConstruct) {
482               const auto &beginDirective =
483                   std::get<parser::OmpBeginLoopDirective>(ompConstruct.t);
484               beginClauseList =
485                   &std::get<parser::OmpClauseList>(beginDirective.t);
486 
487               if (auto &endDirective =
488                       std::get<std::optional<parser::OmpEndLoopDirective>>(
489                           ompConstruct.t))
490                 endClauseList =
491                     &std::get<parser::OmpClauseList>(endDirective->t);
492             },
493             [&](const auto &) {}},
494         ompEval->u);
495 
496     assert(beginClauseList && "expected begin directive");
497     clauses.append(makeClauses(*beginClauseList, semaCtx));
498 
499     if (endClauseList)
500       clauses.append(makeClauses(*endClauseList, semaCtx));
501   };
502 
503   // Return the directive that is immediately nested inside of the given
504   // `parent` evaluation, if it is its only non-end-statement nested evaluation
505   // and it represents an OpenMP construct.
506   auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent)
507       -> std::optional<llvm::omp::Directive> {
508     if (!parent.hasNestedEvaluations())
509       return std::nullopt;
510 
511     llvm::omp::Directive dir;
512     auto &nested = parent.getFirstNestedEvaluation();
513     if (const auto *ompEval = nested.getIf<parser::OpenMPConstruct>())
514       dir = extractOmpDirective(*ompEval);
515     else
516       return std::nullopt;
517 
518     for (auto &sibling : parent.getNestedEvaluations())
519       if (&sibling != &nested && !sibling.isEndStmt())
520         return std::nullopt;
521 
522     return dir;
523   };
524 
525   // Process the given evaluation assuming it's part of a 'target' construct or
526   // captured by one, and store results in the global `hostEvalInfo`.
527   std::function<void(lower::pft::Evaluation &, const List<Clause> &)>
528       processEval;
529   processEval = [&](lower::pft::Evaluation &eval, const List<Clause> &clauses) {
530     using namespace llvm::omp;
531     ClauseProcessor cp(converter, semaCtx, clauses);
532 
533     // Call `processEval` recursively with the immediately nested evaluation and
534     // its corresponding clauses if there is a single nested evaluation
535     // representing an OpenMP directive that passes the given test.
536     auto processSingleNestedIf = [&](llvm::function_ref<bool(Directive)> test) {
537       std::optional<Directive> nestedDir = extractOnlyOmpNestedDir(eval);
538       if (!nestedDir || !test(*nestedDir))
539         return;
540 
541       lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation();
542       List<lower::omp::Clause> nestedClauses;
543       extractClauses(nestedEval, nestedClauses);
544       processEval(nestedEval, nestedClauses);
545     };
546 
547     const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
548     if (!ompEval)
549       return;
550 
551     HostEvalInfo &hostInfo = hostEvalInfo.back();
552 
553     switch (extractOmpDirective(*ompEval)) {
554     // Cases where 'teams' and target SPMD clauses might be present.
555     case OMPD_teams_distribute_parallel_do:
556     case OMPD_teams_distribute_parallel_do_simd:
557       cp.processThreadLimit(stmtCtx, hostInfo.ops);
558       [[fallthrough]];
559     case OMPD_target_teams_distribute_parallel_do:
560     case OMPD_target_teams_distribute_parallel_do_simd:
561       cp.processNumTeams(stmtCtx, hostInfo.ops);
562       [[fallthrough]];
563     case OMPD_distribute_parallel_do:
564     case OMPD_distribute_parallel_do_simd:
565       cp.processCollapse(loc, eval, hostInfo.ops, hostInfo.iv);
566       cp.processNumThreads(stmtCtx, hostInfo.ops);
567       break;
568 
569     // Cases where 'teams' clauses might be present, and target SPMD is
570     // possible by looking at nested evaluations.
571     case OMPD_teams:
572       cp.processThreadLimit(stmtCtx, hostInfo.ops);
573       [[fallthrough]];
574     case OMPD_target_teams:
575       cp.processNumTeams(stmtCtx, hostInfo.ops);
576       processSingleNestedIf([](Directive nestedDir) {
577         return nestedDir == OMPD_distribute_parallel_do ||
578                nestedDir == OMPD_distribute_parallel_do_simd;
579       });
580       break;
581 
582     // Cases where only 'teams' host-evaluated clauses might be present.
583     case OMPD_teams_distribute:
584     case OMPD_teams_distribute_simd:
585       cp.processThreadLimit(stmtCtx, hostInfo.ops);
586       [[fallthrough]];
587     case OMPD_target_teams_distribute:
588     case OMPD_target_teams_distribute_simd:
589       cp.processNumTeams(stmtCtx, hostInfo.ops);
590       break;
591 
592     // Standalone 'target' case.
593     case OMPD_target: {
594       processSingleNestedIf(
595           [](Directive nestedDir) { return topTeamsSet.test(nestedDir); });
596       break;
597     }
598     default:
599       break;
600     }
601   };
602 
603   assert(!hostEvalInfo.empty() && "expected HOST_EVAL info structure");
604 
605   const auto *ompEval = eval.getIf<parser::OpenMPConstruct>();
606   assert(ompEval &&
607          llvm::omp::allTargetSet.test(extractOmpDirective(*ompEval)) &&
608          "expected TARGET construct evaluation");
609   (void)ompEval;
610 
611   // Use the whole list of clauses passed to the construct here, rather than the
612   // ones only applied to omp.target.
613   List<lower::omp::Clause> clauses;
614   extractClauses(eval, clauses);
615   processEval(eval, clauses);
616 }
617 
618 static lower::pft::Evaluation *
619 getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) {
620   // Return the Evaluation of the innermost collapsed loop, or the current one
621   // if there was no COLLAPSE.
622   if (collapseValue == 0)
623     return &eval;
624 
625   lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation();
626   for (int i = 1; i < collapseValue; i++) {
627     // The nested evaluations should be DoConstructs (i.e. they should form
628     // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block,
629     // EndDoStmt>.
630     assert(curEval->isA<parser::DoConstruct>());
631     curEval = &*std::next(curEval->getNestedEvaluations().begin());
632   }
633   return curEval;
634 }
635 
636 static void genNestedEvaluations(lower::AbstractConverter &converter,
637                                  lower::pft::Evaluation &eval,
638                                  int collapseValue = 0) {
639   lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue);
640 
641   for (lower::pft::Evaluation &e : curEval->getNestedEvaluations())
642     converter.genEval(e);
643 }
644 
645 static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter,
646                                           fir::FirOpBuilder &firOpBuilder,
647                                           const semantics::Symbol &sym,
648                                           const lower::pft::Variable &var,
649                                           mlir::Location currentLocation) {
650   mlir::Type ty = converter.genType(sym);
651   std::string globalName = converter.mangleName(sym);
652   mlir::StringAttr linkage = firOpBuilder.createInternalLinkage();
653   fir::GlobalOp global =
654       firOpBuilder.createGlobal(currentLocation, ty, globalName, linkage);
655 
656   // Create default initialization for non-character scalar.
657   if (semantics::IsAllocatableOrObjectPointer(&sym)) {
658     mlir::Type baseAddrType = mlir::dyn_cast<fir::BoxType>(ty).getEleTy();
659     lower::createGlobalInitialization(
660         firOpBuilder, global, [&](fir::FirOpBuilder &b) {
661           mlir::Value nullAddr =
662               b.createNullConstant(currentLocation, baseAddrType);
663           mlir::Value box =
664               b.create<fir::EmboxOp>(currentLocation, ty, nullAddr);
665           b.create<fir::HasValueOp>(currentLocation, box);
666         });
667   } else {
668     lower::createGlobalInitialization(
669         firOpBuilder, global, [&](fir::FirOpBuilder &b) {
670           mlir::Value undef = b.create<fir::UndefOp>(currentLocation, ty);
671           b.create<fir::HasValueOp>(currentLocation, undef);
672         });
673   }
674 
675   return global;
676 }
677 
678 // Get the extended value for \p val by extracting additional variable
679 // information from \p base.
680 static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base,
681                                            mlir::Value val) {
682   return base.match(
683       [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue {
684         return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {});
685       },
686       [&](const auto &) -> fir::ExtendedValue {
687         return fir::substBase(base, val);
688       });
689 }
690 
691 #ifndef NDEBUG
692 static bool isThreadPrivate(lower::SymbolRef sym) {
693   if (const auto *details = sym->detailsIf<semantics::CommonBlockDetails>()) {
694     for (const auto &obj : details->objects())
695       if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate))
696         return false;
697     return true;
698   }
699   return sym->test(semantics::Symbol::Flag::OmpThreadprivate);
700 }
701 #endif
702 
703 static void threadPrivatizeVars(lower::AbstractConverter &converter,
704                                 lower::pft::Evaluation &eval) {
705   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
706   mlir::Location currentLocation = converter.getCurrentLocation();
707   mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
708   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
709 
710   // If the symbol corresponds to the original ThreadprivateOp, use the symbol
711   // value from that operation to create one ThreadprivateOp copy operation
712   // inside the parallel region.
713   // In some cases, however, the symbol will correspond to the original,
714   // non-threadprivate variable. This can happen, for instance, with a common
715   // block, declared in a separate module, used by a parent procedure and
716   // privatized in its child procedure.
717   auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value {
718     assert(isThreadPrivate(sym));
719     mlir::Value symValue = converter.getSymbolAddress(sym);
720     mlir::Operation *op = symValue.getDefiningOp();
721     if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op))
722       op = declOp.getMemref().getDefiningOp();
723     if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
724       symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr();
725     return firOpBuilder.create<mlir::omp::ThreadprivateOp>(
726         currentLocation, symValue.getType(), symValue);
727   };
728 
729   llvm::SetVector<const semantics::Symbol *> threadprivateSyms;
730   converter.collectSymbolSet(eval, threadprivateSyms,
731                              semantics::Symbol::Flag::OmpThreadprivate,
732                              /*collectSymbols=*/true,
733                              /*collectHostAssociatedSymbols=*/true);
734   std::set<semantics::SourceName> threadprivateSymNames;
735 
736   // For a COMMON block, the ThreadprivateOp is generated for itself instead of
737   // its members, so only bind the value of the new copied ThreadprivateOp
738   // inside the parallel region to the common block symbol only once for
739   // multiple members in one COMMON block.
740   llvm::SetVector<const semantics::Symbol *> commonSyms;
741   for (std::size_t i = 0; i < threadprivateSyms.size(); i++) {
742     const semantics::Symbol *sym = threadprivateSyms[i];
743     mlir::Value symThreadprivateValue;
744     // The variable may be used more than once, and each reference has one
745     // symbol with the same name. Only do once for references of one variable.
746     if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end())
747       continue;
748     threadprivateSymNames.insert(sym->name());
749     if (const semantics::Symbol *common =
750             semantics::FindCommonBlockContaining(sym->GetUltimate())) {
751       mlir::Value commonThreadprivateValue;
752       if (commonSyms.contains(common)) {
753         commonThreadprivateValue = converter.getSymbolAddress(*common);
754       } else {
755         commonThreadprivateValue = genThreadprivateOp(*common);
756         converter.bindSymbol(*common, commonThreadprivateValue);
757         commonSyms.insert(common);
758       }
759       symThreadprivateValue = lower::genCommonBlockMember(
760           converter, currentLocation, *sym, commonThreadprivateValue);
761     } else {
762       symThreadprivateValue = genThreadprivateOp(*sym);
763     }
764 
765     fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym);
766     fir::ExtendedValue symThreadprivateExv =
767         getExtendedValue(sexv, symThreadprivateValue);
768     converter.bindSymbol(*sym, symThreadprivateExv);
769   }
770 }
771 
772 static mlir::Operation *
773 createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter,
774                               mlir::Location loc, mlir::Value indexVal,
775                               const semantics::Symbol *sym) {
776   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
777   mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint();
778   firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
779 
780   mlir::Type tempTy = converter.genType(*sym);
781 
782   assert(converter.isPresentShallowLookup(*sym) &&
783          "Expected symbol to be in symbol table.");
784 
785   firOpBuilder.restoreInsertionPoint(insPt);
786   mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal);
787   mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>(
788       loc, cvtVal, converter.getSymbolAddress(*sym));
789   return storeOp;
790 }
791 
792 // This helper function implements the functionality of "promoting" non-CPTR
793 // arguments of use_device_ptr to use_device_addr arguments (automagic
794 // conversion of use_device_ptr -> use_device_addr in these cases). The way we
795 // do so currently is through the shuffling of operands from the
796 // devicePtrOperands to deviceAddrOperands, as well as the types, locations and
797 // symbols.
798 //
799 // This effectively implements some deprecated OpenMP functionality that some
800 // legacy applications unfortunately depend on (deprecated in specification
801 // version 5.2):
802 //
803 // "If a list item in a use_device_ptr clause is not of type C_PTR, the behavior
804 //  is as if the list item appeared in a use_device_addr clause. Support for
805 //  such list items in a use_device_ptr clause is deprecated."
806 static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
807     llvm::SmallVectorImpl<mlir::Value> &useDeviceAddrVars,
808     llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
809     llvm::SmallVectorImpl<mlir::Value> &useDevicePtrVars,
810     llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
811   // Iterate over our use_device_ptr list and shift all non-cptr arguments into
812   // use_device_addr.
813   auto *varIt = useDevicePtrVars.begin();
814   auto *symIt = useDevicePtrSyms.begin();
815   while (varIt != useDevicePtrVars.end()) {
816     if (fir::isa_builtin_cptr_type(fir::unwrapRefType(varIt->getType()))) {
817       ++varIt;
818       ++symIt;
819       continue;
820     }
821 
822     useDeviceAddrVars.push_back(*varIt);
823     useDeviceAddrSyms.push_back(*symIt);
824 
825     varIt = useDevicePtrVars.erase(varIt);
826     symIt = useDevicePtrSyms.erase(symIt);
827   }
828 }
829 
830 /// Extract the list of function and variable symbols affected by the given
831 /// 'declare target' directive and return the intended device type for them.
832 static void getDeclareTargetInfo(
833     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
834     lower::pft::Evaluation &eval,
835     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
836     mlir::omp::DeclareTargetOperands &clauseOps,
837     llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) {
838   const auto &spec =
839       std::get<parser::OmpDeclareTargetSpecifier>(declareTargetConstruct.t);
840   if (const auto *objectList{parser::Unwrap<parser::OmpObjectList>(spec.u)}) {
841     ObjectList objects{makeObjects(*objectList, semaCtx)};
842     // Case: declare target(func, var1, var2)
843     gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to,
844                          symbolAndClause);
845   } else if (const auto *clauseList{
846                  parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
847     List<Clause> clauses = makeClauses(*clauseList, semaCtx);
848     if (clauses.empty()) {
849       Fortran::lower::pft::FunctionLikeUnit *owningProc =
850           eval.getOwningProcedure();
851       if (owningProc && (!owningProc->isMainProgram() ||
852                          owningProc->getMainProgramSymbol())) {
853         // Case: declare target, implicit capture of function
854         symbolAndClause.emplace_back(mlir::omp::DeclareTargetCaptureClause::to,
855                                      owningProc->getSubprogramSymbol());
856       }
857     }
858 
859     ClauseProcessor cp(converter, semaCtx, clauses);
860     cp.processDeviceType(clauseOps);
861     cp.processEnter(symbolAndClause);
862     cp.processLink(symbolAndClause);
863     cp.processTo(symbolAndClause);
864 
865     cp.processTODO<clause::Indirect>(converter.getCurrentLocation(),
866                                      llvm::omp::Directive::OMPD_declare_target);
867   }
868 }
869 
870 static void collectDeferredDeclareTargets(
871     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
872     lower::pft::Evaluation &eval,
873     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct,
874     llvm::SmallVectorImpl<lower::OMPDeferredDeclareTargetInfo>
875         &deferredDeclareTarget) {
876   mlir::omp::DeclareTargetOperands clauseOps;
877   llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
878   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
879                        clauseOps, symbolAndClause);
880   // Return the device type only if at least one of the targets for the
881   // directive is a function or subroutine
882   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
883 
884   for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
885     mlir::Operation *op = mod.lookupSymbol(
886         converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
887 
888     if (!op) {
889       deferredDeclareTarget.push_back({std::get<0>(symClause),
890                                        clauseOps.deviceType,
891                                        std::get<1>(symClause)});
892     }
893   }
894 }
895 
896 static std::optional<mlir::omp::DeclareTargetDeviceType>
897 getDeclareTargetFunctionDevice(
898     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
899     lower::pft::Evaluation &eval,
900     const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
901   mlir::omp::DeclareTargetOperands clauseOps;
902   llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
903   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
904                        clauseOps, symbolAndClause);
905 
906   // Return the device type only if at least one of the targets for the
907   // directive is a function or subroutine
908   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
909   for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
910     mlir::Operation *op = mod.lookupSymbol(
911         converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
912 
913     if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op))
914       return clauseOps.deviceType;
915   }
916 
917   return std::nullopt;
918 }
919 
920 /// Set up the entry block of the given `omp.loop_nest` operation, adding a
921 /// block argument for each loop induction variable and allocating and
922 /// initializing a private value to hold each of them.
923 ///
924 /// This function can also bind the symbols of any variables that should match
925 /// block arguments on parent loop wrapper operations attached to the same
926 /// loop. This allows the introduction of any necessary `hlfir.declare`
927 /// operations inside of the entry block of the `omp.loop_nest` operation and
928 /// not directly under any of the wrappers, which would invalidate them.
929 ///
930 /// \param [in]          op - the loop nest operation.
931 /// \param [in]   converter - PFT to MLIR conversion interface.
932 /// \param [in]         loc - location.
933 /// \param [in]        args - symbols of induction variables.
934 /// \param [in] wrapperArgs - list of parent loop wrappers and their associated
935 ///                           entry block arguments.
936 static void genLoopVars(
937     mlir::Operation *op, lower::AbstractConverter &converter,
938     mlir::Location &loc, llvm::ArrayRef<const semantics::Symbol *> args,
939     llvm::ArrayRef<
940         std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
941         wrapperArgs = {}) {
942   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
943   auto &region = op->getRegion(0);
944 
945   std::size_t loopVarTypeSize = 0;
946   for (const semantics::Symbol *arg : args)
947     loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
948   mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
949   llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType);
950   llvm::SmallVector<mlir::Location> locs(args.size(), loc);
951   firOpBuilder.createBlock(&region, {}, tiv, locs);
952 
953   // Update nested wrapper operands if parent wrappers have mapped these values
954   // to block arguments.
955   //
956   // Binding these values earlier would take care of this, but we cannot rely on
957   // that approach because binding in between the creation of a wrapper and the
958   // next one would result in 'hlfir.declare' operations being introduced inside
959   // of a wrapper, which is illegal.
960   mlir::IRMapping mapper;
961   for (auto [argGeneratingOp, blockArgs] : wrapperArgs) {
962     for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands())
963       operand.set(mapper.lookupOrDefault(operand.get()));
964 
965     for (const auto [arg, var] : llvm::zip_equal(
966              argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars()))
967       mapper.map(var, arg);
968   }
969 
970   // Bind the entry block arguments of parent wrappers to the corresponding
971   // symbols.
972   for (auto [argGeneratingOp, blockArgs] : wrapperArgs)
973     bindEntryBlockArgs(converter, argGeneratingOp, blockArgs);
974 
975   // The argument is not currently in memory, so make a temporary for the
976   // argument, and store it there, then bind that location to the argument.
977   mlir::Operation *storeOp = nullptr;
978   for (auto [argIndex, argSymbol] : llvm::enumerate(args)) {
979     mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex));
980     storeOp =
981         createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol);
982   }
983   firOpBuilder.setInsertionPointAfter(storeOp);
984 }
985 
986 static void
987 markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
988                   mlir::omp::DeclareTargetCaptureClause captureClause,
989                   mlir::omp::DeclareTargetDeviceType deviceType) {
990   // TODO: Add support for program local variables with declare target applied
991   auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op);
992   if (!declareTargetOp)
993     fir::emitFatalError(
994         converter.getCurrentLocation(),
995         "Attempt to apply declare target on unsupported operation");
996 
997   // The function or global already has a declare target applied to it, very
998   // likely through implicit capture (usage in another declare target
999   // function/subroutine). It should be marked as any if it has been assigned
1000   // both host and nohost, else we skip, as there is no change
1001   if (declareTargetOp.isDeclareTarget()) {
1002     if (declareTargetOp.getDeclareTargetDeviceType() != deviceType)
1003       declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any,
1004                                        captureClause);
1005     return;
1006   }
1007 
1008   declareTargetOp.setDeclareTarget(deviceType, captureClause);
1009 }
1010 
1011 //===----------------------------------------------------------------------===//
1012 // Op body generation helper structures and functions
1013 //===----------------------------------------------------------------------===//
1014 
1015 struct OpWithBodyGenInfo {
1016   /// A type for a code-gen callback function. This takes as argument the op for
1017   /// which the code is being generated and returns the arguments of the op's
1018   /// region.
1019   using GenOMPRegionEntryCBFn =
1020       std::function<llvm::SmallVector<const semantics::Symbol *>(
1021           mlir::Operation *)>;
1022 
1023   OpWithBodyGenInfo(lower::AbstractConverter &converter,
1024                     lower::SymMap &symTable,
1025                     semantics::SemanticsContext &semaCtx, mlir::Location loc,
1026                     lower::pft::Evaluation &eval, llvm::omp::Directive dir)
1027       : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc),
1028         eval(eval), dir(dir) {}
1029 
1030   OpWithBodyGenInfo &setClauses(const List<Clause> *value) {
1031     clauses = value;
1032     return *this;
1033   }
1034 
1035   OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) {
1036     dsp = value;
1037     return *this;
1038   }
1039 
1040   OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) {
1041     genRegionEntryCB = value;
1042     return *this;
1043   }
1044 
1045   OpWithBodyGenInfo &setGenSkeletonOnly(bool value) {
1046     genSkeletonOnly = value;
1047     return *this;
1048   }
1049 
1050   /// [inout] converter to use for the clauses.
1051   lower::AbstractConverter &converter;
1052   /// [in] Symbol table
1053   lower::SymMap &symTable;
1054   /// [in] Semantics context
1055   semantics::SemanticsContext &semaCtx;
1056   /// [in] location in source code.
1057   mlir::Location loc;
1058   /// [in] current PFT node/evaluation.
1059   lower::pft::Evaluation &eval;
1060   /// [in] leaf directive for which to generate the op body.
1061   llvm::omp::Directive dir;
1062   /// [in] list of clauses to process.
1063   const List<Clause> *clauses = nullptr;
1064   /// [in] if provided, processes the construct's data-sharing attributes.
1065   DataSharingProcessor *dsp = nullptr;
1066   /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block
1067   /// is created in the region.
1068   GenOMPRegionEntryCBFn genRegionEntryCB = nullptr;
1069   /// [in] if set to `true`, skip generating nested evaluations and dispatching
1070   /// any further leaf constructs.
1071   bool genSkeletonOnly = false;
1072 };
1073 
1074 /// Create the body (block) for an OpenMP Operation.
1075 ///
1076 /// \param [in]   op  - the operation the body belongs to.
1077 /// \param [in] info  - options controlling code-gen for the construction.
1078 /// \param [in] queue - work queue with nested constructs.
1079 /// \param [in] item  - item in the queue to generate body for.
1080 static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
1081                            const ConstructQueue &queue,
1082                            ConstructQueue::const_iterator item) {
1083   fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder();
1084 
1085   auto insertMarker = [](fir::FirOpBuilder &builder) {
1086     mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(),
1087                                                      builder.getIndexType());
1088     return undef.getDefiningOp();
1089   };
1090 
1091   // If an argument for the region is provided then create the block with that
1092   // argument. Also update the symbol's address with the mlir argument value.
1093   // e.g. For loops the argument is the induction variable. And all further
1094   // uses of the induction variable should use this mlir value.
1095   auto regionArgs = [&]() -> llvm::SmallVector<const semantics::Symbol *> {
1096     if (info.genRegionEntryCB != nullptr) {
1097       return info.genRegionEntryCB(&op);
1098     }
1099 
1100     firOpBuilder.createBlock(&op.getRegion(0));
1101     return {};
1102   }();
1103   // Mark the earliest insertion point.
1104   mlir::Operation *marker = insertMarker(firOpBuilder);
1105 
1106   // If it is an unstructured region, create empty blocks for all evaluations.
1107   if (lower::omp::isLastItemInQueue(item, queue) &&
1108       info.eval.lowerAsUnstructured()) {
1109     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
1110         firOpBuilder, info.eval.getNestedEvaluations());
1111   }
1112 
1113   // Start with privatization, so that the lowering of the nested
1114   // code will use the right symbols.
1115   bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) ==
1116                 llvm::omp::Association::Loop;
1117   bool privatize = info.clauses;
1118 
1119   firOpBuilder.setInsertionPoint(marker);
1120   std::optional<DataSharingProcessor> tempDsp;
1121   if (privatize && !info.dsp) {
1122     tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval,
1123                     Fortran::lower::omp::isLastItemInQueue(item, queue),
1124                     /*useDelayedPrivatization=*/false, info.symTable);
1125     tempDsp->processStep1();
1126   }
1127 
1128   if (info.dir == llvm::omp::Directive::OMPD_parallel) {
1129     threadPrivatizeVars(info.converter, info.eval);
1130     if (info.clauses) {
1131       firOpBuilder.setInsertionPoint(marker);
1132       ClauseProcessor(info.converter, info.semaCtx, *info.clauses)
1133           .processCopyin();
1134     }
1135   }
1136 
1137   if (!info.genSkeletonOnly) {
1138     if (ConstructQueue::const_iterator next = std::next(item);
1139         next != queue.end()) {
1140       genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
1141                      info.loc, queue, next);
1142     } else {
1143       // genFIR(Evaluation&) tries to patch up unterminated blocks, causing
1144       // a lot of complications for our approach if the terminator generation
1145       // is delayed past this point. Insert a temporary terminator here, then
1146       // delete it.
1147       firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
1148       auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
1149       firOpBuilder.setInsertionPointAfter(marker);
1150       genNestedEvaluations(info.converter, info.eval);
1151       temp->erase();
1152     }
1153   }
1154 
1155   // Get or create a unique exiting block from the given region, or
1156   // return nullptr if there is no exiting block.
1157   auto getUniqueExit = [&](mlir::Region &region) -> mlir::Block * {
1158     // Find the blocks where the OMP terminator should go. In simple cases
1159     // it is the single block in the operation's region. When the region
1160     // is more complicated, especially with unstructured control flow, there
1161     // may be multiple blocks, and some of them may have non-OMP terminators
1162     // resulting from lowering of the code contained within the operation.
1163     // All the remaining blocks are potential exit points from the op's region.
1164     //
1165     // Explicit control flow cannot exit any OpenMP region (other than via
1166     // STOP), and that is enforced by semantic checks prior to lowering. STOP
1167     // statements are lowered to a function call.
1168 
1169     // Collect unterminated blocks.
1170     llvm::SmallVector<mlir::Block *> exits;
1171     for (mlir::Block &b : region) {
1172       if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>())
1173         exits.push_back(&b);
1174     }
1175 
1176     if (exits.empty())
1177       return nullptr;
1178     // If there already is a unique exiting block, do not create another one.
1179     // Additionally, some ops (e.g. omp.sections) require only 1 block in
1180     // its region.
1181     if (exits.size() == 1)
1182       return exits[0];
1183     mlir::Block *exit = firOpBuilder.createBlock(&region);
1184     for (mlir::Block *b : exits) {
1185       firOpBuilder.setInsertionPointToEnd(b);
1186       firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit);
1187     }
1188     return exit;
1189   };
1190 
1191   if (auto *exitBlock = getUniqueExit(op.getRegion(0))) {
1192     firOpBuilder.setInsertionPointToEnd(exitBlock);
1193     auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
1194     // Only insert lastprivate code when there actually is an exit block.
1195     // Such a block may not exist if the nested code produced an infinite
1196     // loop (this may not make sense in production code, but a user could
1197     // write that and we should handle it).
1198     firOpBuilder.setInsertionPoint(term);
1199     if (privatize) {
1200       // DataSharingProcessor::processStep2() may create operations before/after
1201       // the one passed as argument. We need to treat loop wrappers and their
1202       // nested loop as a unit, so we need to pass the top level wrapper (if
1203       // present). Otherwise, these operations will be inserted within a
1204       // wrapper region.
1205       mlir::Operation *privatizationTopLevelOp = &op;
1206       if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) {
1207         llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers;
1208         loopNest.gatherWrappers(wrappers);
1209         if (!wrappers.empty())
1210           privatizationTopLevelOp = &*wrappers.back();
1211       }
1212 
1213       if (!info.dsp) {
1214         assert(tempDsp.has_value());
1215         tempDsp->processStep2(privatizationTopLevelOp, isLoop);
1216       } else {
1217         if (isLoop && regionArgs.size() > 0) {
1218           for (const auto &regionArg : regionArgs) {
1219             info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg));
1220           }
1221         }
1222         info.dsp->processStep2(privatizationTopLevelOp, isLoop);
1223       }
1224     }
1225   }
1226 
1227   firOpBuilder.setInsertionPointAfter(marker);
1228   marker->erase();
1229 }
1230 
1231 static void genBodyOfTargetDataOp(
1232     lower::AbstractConverter &converter, lower::SymMap &symTable,
1233     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1234     mlir::omp::TargetDataOp &dataOp, const EntryBlockArgs &args,
1235     const mlir::Location &currentLocation, const ConstructQueue &queue,
1236     ConstructQueue::const_iterator item) {
1237   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1238 
1239   genEntryBlock(firOpBuilder, args, dataOp.getRegion());
1240   bindEntryBlockArgs(converter, dataOp, args);
1241 
1242   // Insert dummy instruction to remember the insertion position. The
1243   // marker will be deleted by clean up passes since there are no uses.
1244   // Remembering the position for further insertion is important since
1245   // there are hlfir.declares inserted above while setting block arguments
1246   // and new code from the body should be inserted after that.
1247   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
1248       dataOp.getLoc(), firOpBuilder.getIndexType());
1249 
1250   // Create blocks for unstructured regions. This has to be done since
1251   // blocks are initially allocated with the function as the parent region.
1252   if (eval.lowerAsUnstructured()) {
1253     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
1254         firOpBuilder, eval.getNestedEvaluations());
1255   }
1256 
1257   firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
1258 
1259   // Set the insertion point after the marker.
1260   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
1261 
1262   if (ConstructQueue::const_iterator next = std::next(item);
1263       next != queue.end()) {
1264     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
1265                    next);
1266   } else {
1267     genNestedEvaluations(converter, eval);
1268   }
1269 }
1270 
1271 // This generates intermediate common block member accesses within a region
1272 // and then rebinds the members symbol to the intermediate accessors we have
1273 // generated so that subsequent code generation will utilise these instead.
1274 //
1275 // When the scope changes, the bindings to the intermediate accessors should
1276 // be dropped in place of the original symbol bindings.
1277 //
1278 // This is for utilisation with TargetOp.
1279 static void genIntermediateCommonBlockAccessors(
1280     Fortran::lower::AbstractConverter &converter,
1281     const mlir::Location &currentLocation,
1282     llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs,
1283     llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms) {
1284   // Iterate over the symbol list, which will be shorter than the list of
1285   // arguments if new entry block arguments were introduced to implicitly map
1286   // outside values used by the bounds cloned into the target region. In that
1287   // case, the additional block arguments do not need processing here.
1288   for (auto [mapSym, mapArg] : llvm::zip_first(mapSyms, mapBlockArgs)) {
1289     auto *details = mapSym->detailsIf<Fortran::semantics::CommonBlockDetails>();
1290     if (!details)
1291       continue;
1292 
1293     for (auto obj : details->objects()) {
1294       auto targetCBMemberBind = Fortran::lower::genCommonBlockMember(
1295           converter, currentLocation, *obj, mapArg);
1296       fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj);
1297       fir::ExtendedValue targetCBExv =
1298           getExtendedValue(sexv, targetCBMemberBind);
1299       converter.bindSymbol(*obj, targetCBExv);
1300     }
1301   }
1302 }
1303 
1304 // This functions creates a block for the body of the targetOp's region. It adds
1305 // all the symbols present in mapSymbols as block arguments to this block.
1306 static void genBodyOfTargetOp(
1307     lower::AbstractConverter &converter, lower::SymMap &symTable,
1308     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1309     mlir::omp::TargetOp &targetOp, const EntryBlockArgs &args,
1310     const mlir::Location &currentLocation, const ConstructQueue &queue,
1311     ConstructQueue::const_iterator item, DataSharingProcessor &dsp) {
1312   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1313   auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
1314 
1315   mlir::Region &region = targetOp.getRegion();
1316   mlir::Block *entryBlock = genEntryBlock(firOpBuilder, args, region);
1317   bindEntryBlockArgs(converter, targetOp, args);
1318   if (!hostEvalInfo.empty())
1319     hostEvalInfo.back().bindOperands(argIface.getHostEvalBlockArgs());
1320 
1321   // Check if cloning the bounds introduced any dependency on the outer region.
1322   // If so, then either clone them as well if they are MemoryEffectFree, or else
1323   // copy them to a new temporary and add them to the map and block_argument
1324   // lists and replace their uses with the new temporary.
1325   llvm::SetVector<mlir::Value> valuesDefinedAbove;
1326   mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
1327   while (!valuesDefinedAbove.empty()) {
1328     for (mlir::Value val : valuesDefinedAbove) {
1329       mlir::Operation *valOp = val.getDefiningOp();
1330       assert(valOp != nullptr);
1331 
1332       // NOTE: We skip BoxDimsOp's as the lesser of two evils is to map the
1333       // indices separately, as the alternative is to eventually map the Box,
1334       // which comes with a fairly large overhead comparatively. We could be
1335       // more robust about this and check using a BackwardsSlice to see if we
1336       // run the risk of mapping a box.
1337       if (mlir::isMemoryEffectFree(valOp) &&
1338           !mlir::isa<fir::BoxDimsOp>(valOp)) {
1339         mlir::Operation *clonedOp = valOp->clone();
1340         entryBlock->push_front(clonedOp);
1341 
1342         auto replace = [entryBlock](mlir::OpOperand &use) {
1343           return use.getOwner()->getBlock() == entryBlock;
1344         };
1345 
1346         valOp->getResults().replaceUsesWithIf(clonedOp->getResults(), replace);
1347         valOp->replaceUsesWithIf(clonedOp, replace);
1348       } else {
1349         auto savedIP = firOpBuilder.getInsertionPoint();
1350         firOpBuilder.setInsertionPointAfter(valOp);
1351         auto copyVal =
1352             firOpBuilder.createTemporary(val.getLoc(), val.getType());
1353         firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
1354 
1355         fir::factory::AddrAndBoundsInfo info =
1356             fir::factory::getDataOperandBaseAddr(
1357                 firOpBuilder, val, /*isOptional=*/false, val.getLoc());
1358         llvm::SmallVector<mlir::Value> bounds =
1359             fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
1360                                                mlir::omp::MapBoundsType>(
1361                 firOpBuilder, info,
1362                 hlfir::translateToExtendedValue(val.getLoc(), firOpBuilder,
1363                                                 hlfir::Entity{val})
1364                     .first,
1365                 /*dataExvIsAssumedSize=*/false, val.getLoc());
1366 
1367         std::stringstream name;
1368         firOpBuilder.setInsertionPoint(targetOp);
1369 
1370         llvm::omp::OpenMPOffloadMappingFlags mapFlag =
1371             llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
1372         mlir::omp::VariableCaptureKind captureKind =
1373             mlir::omp::VariableCaptureKind::ByRef;
1374 
1375         mlir::Type eleType = copyVal.getType();
1376         if (auto refType =
1377                 mlir::dyn_cast<fir::ReferenceType>(copyVal.getType()))
1378           eleType = refType.getElementType();
1379 
1380         if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
1381           captureKind = mlir::omp::VariableCaptureKind::ByCopy;
1382         } else if (!fir::isa_builtin_cptr_type(eleType)) {
1383           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
1384         }
1385 
1386         mlir::Value mapOp = createMapInfoOp(
1387             firOpBuilder, copyVal.getLoc(), copyVal,
1388             /*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
1389             /*members=*/llvm::SmallVector<mlir::Value>{},
1390             /*membersIndex=*/mlir::ArrayAttr{},
1391             static_cast<
1392                 std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
1393                 mapFlag),
1394             captureKind, copyVal.getType());
1395 
1396         // Get the index of the first non-map argument before modifying mapVars,
1397         // then append an element to mapVars and an associated entry block
1398         // argument at that index.
1399         unsigned insertIndex =
1400             argIface.getMapBlockArgsStart() + argIface.numMapBlockArgs();
1401         targetOp.getMapVarsMutable().append(mapOp);
1402         mlir::Value clonedValArg = region.insertArgument(
1403             insertIndex, copyVal.getType(), copyVal.getLoc());
1404 
1405         firOpBuilder.setInsertionPointToStart(entryBlock);
1406         auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(),
1407                                                        clonedValArg);
1408         val.replaceUsesWithIf(loadOp->getResult(0),
1409                               [entryBlock](mlir::OpOperand &use) {
1410                                 return use.getOwner()->getBlock() == entryBlock;
1411                               });
1412         firOpBuilder.setInsertionPoint(entryBlock, savedIP);
1413       }
1414     }
1415     valuesDefinedAbove.clear();
1416     mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove);
1417   }
1418 
1419   // Insert dummy instruction to remember the insertion position. The
1420   // marker will be deleted since there are not uses.
1421   // In the HLFIR flow there are hlfir.declares inserted above while
1422   // setting block arguments.
1423   mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>(
1424       targetOp.getLoc(), firOpBuilder.getIndexType());
1425 
1426   // Create blocks for unstructured regions. This has to be done since
1427   // blocks are initially allocated with the function as the parent region.
1428   if (lower::omp::isLastItemInQueue(item, queue) &&
1429       eval.lowerAsUnstructured()) {
1430     lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, mlir::omp::YieldOp>(
1431         firOpBuilder, eval.getNestedEvaluations());
1432   }
1433 
1434   firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation);
1435 
1436   // Create the insertion point after the marker.
1437   firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp());
1438 
1439   // If we map a common block using it's symbol e.g. map(tofrom: /common_block/)
1440   // and accessing its members within the target region, there is a large
1441   // chance we will end up with uses external to the region accessing the common
1442   // resolve these, we do so by generating new common block member accesses
1443   // within the region, binding them to the member symbol for the scope of the
1444   // region so that subsequent code generation within the region will utilise
1445   // our new member accesses we have created.
1446   genIntermediateCommonBlockAccessors(
1447       converter, currentLocation, argIface.getMapBlockArgs(), args.map.syms);
1448 
1449   if (ConstructQueue::const_iterator next = std::next(item);
1450       next != queue.end()) {
1451     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
1452                    next);
1453   } else {
1454     genNestedEvaluations(converter, eval);
1455   }
1456 
1457   dsp.processStep2(targetOp, /*isLoop=*/false);
1458 }
1459 
1460 template <typename OpTy, typename... Args>
1461 static OpTy genOpWithBody(const OpWithBodyGenInfo &info,
1462                           const ConstructQueue &queue,
1463                           ConstructQueue::const_iterator item, Args &&...args) {
1464   auto op = info.converter.getFirOpBuilder().create<OpTy>(
1465       info.loc, std::forward<Args>(args)...);
1466   createBodyOfOp(*op, info, queue, item);
1467   return op;
1468 }
1469 
1470 template <typename OpTy, typename ClauseOpsTy>
1471 static OpTy genWrapperOp(lower::AbstractConverter &converter,
1472                          mlir::Location loc, const ClauseOpsTy &clauseOps,
1473                          const EntryBlockArgs &args) {
1474   static_assert(
1475       OpTy::template hasTrait<mlir::omp::LoopWrapperInterface::Trait>(),
1476       "expected a loop wrapper");
1477   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1478 
1479   // Create wrapper.
1480   auto op = firOpBuilder.create<OpTy>(loc, clauseOps);
1481 
1482   // Create entry block with arguments.
1483   genEntryBlock(firOpBuilder, args, op.getRegion());
1484 
1485   return op;
1486 }
1487 
1488 //===----------------------------------------------------------------------===//
1489 // Code generation functions for clauses
1490 //===----------------------------------------------------------------------===//
1491 
1492 static void genCriticalDeclareClauses(
1493     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1494     const List<Clause> &clauses, mlir::Location loc,
1495     mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) {
1496   ClauseProcessor cp(converter, semaCtx, clauses);
1497   cp.processHint(clauseOps);
1498   clauseOps.symName =
1499       mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name);
1500 }
1501 
1502 static void genDistributeClauses(lower::AbstractConverter &converter,
1503                                  semantics::SemanticsContext &semaCtx,
1504                                  lower::StatementContext &stmtCtx,
1505                                  const List<Clause> &clauses,
1506                                  mlir::Location loc,
1507                                  mlir::omp::DistributeOperands &clauseOps) {
1508   ClauseProcessor cp(converter, semaCtx, clauses);
1509   cp.processAllocate(clauseOps);
1510   cp.processDistSchedule(stmtCtx, clauseOps);
1511   cp.processOrder(clauseOps);
1512 }
1513 
1514 static void genFlushClauses(lower::AbstractConverter &converter,
1515                             semantics::SemanticsContext &semaCtx,
1516                             const ObjectList &objects,
1517                             const List<Clause> &clauses, mlir::Location loc,
1518                             llvm::SmallVectorImpl<mlir::Value> &operandRange) {
1519   if (!objects.empty())
1520     genObjectList(objects, converter, operandRange);
1521 
1522   ClauseProcessor cp(converter, semaCtx, clauses);
1523   cp.processTODO<clause::AcqRel, clause::Acquire, clause::Release,
1524                  clause::SeqCst>(loc, llvm::omp::OMPD_flush);
1525 }
1526 
1527 static void
1528 genLoopNestClauses(lower::AbstractConverter &converter,
1529                    semantics::SemanticsContext &semaCtx,
1530                    lower::pft::Evaluation &eval, const List<Clause> &clauses,
1531                    mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps,
1532                    llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
1533   ClauseProcessor cp(converter, semaCtx, clauses);
1534 
1535   if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps, iv))
1536     cp.processCollapse(loc, eval, clauseOps, iv);
1537 
1538   clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr();
1539 }
1540 
1541 static void genLoopClauses(
1542     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1543     const List<Clause> &clauses, mlir::Location loc,
1544     mlir::omp::LoopOperands &clauseOps,
1545     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
1546   ClauseProcessor cp(converter, semaCtx, clauses);
1547   cp.processBind(clauseOps);
1548   cp.processOrder(clauseOps);
1549   cp.processReduction(loc, clauseOps, reductionSyms);
1550   cp.processTODO<clause::Lastprivate>(loc, llvm::omp::Directive::OMPD_loop);
1551 }
1552 
1553 static void genMaskedClauses(lower::AbstractConverter &converter,
1554                              semantics::SemanticsContext &semaCtx,
1555                              lower::StatementContext &stmtCtx,
1556                              const List<Clause> &clauses, mlir::Location loc,
1557                              mlir::omp::MaskedOperands &clauseOps) {
1558   ClauseProcessor cp(converter, semaCtx, clauses);
1559   cp.processFilter(stmtCtx, clauseOps);
1560 }
1561 
1562 static void
1563 genOrderedRegionClauses(lower::AbstractConverter &converter,
1564                         semantics::SemanticsContext &semaCtx,
1565                         const List<Clause> &clauses, mlir::Location loc,
1566                         mlir::omp::OrderedRegionOperands &clauseOps) {
1567   ClauseProcessor cp(converter, semaCtx, clauses);
1568   cp.processTODO<clause::Simd>(loc, llvm::omp::Directive::OMPD_ordered);
1569 }
1570 
1571 static void genParallelClauses(
1572     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1573     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
1574     mlir::Location loc, mlir::omp::ParallelOperands &clauseOps,
1575     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
1576   ClauseProcessor cp(converter, semaCtx, clauses);
1577   cp.processAllocate(clauseOps);
1578   cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps);
1579 
1580   if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps))
1581     cp.processNumThreads(stmtCtx, clauseOps);
1582 
1583   cp.processProcBind(clauseOps);
1584   cp.processReduction(loc, clauseOps, reductionSyms);
1585 }
1586 
1587 static void genSectionsClauses(
1588     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1589     const List<Clause> &clauses, mlir::Location loc,
1590     mlir::omp::SectionsOperands &clauseOps,
1591     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
1592   ClauseProcessor cp(converter, semaCtx, clauses);
1593   cp.processAllocate(clauseOps);
1594   cp.processNowait(clauseOps);
1595   cp.processReduction(loc, clauseOps, reductionSyms);
1596   // TODO Support delayed privatization.
1597 }
1598 
1599 static void genSimdClauses(
1600     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1601     const List<Clause> &clauses, mlir::Location loc,
1602     mlir::omp::SimdOperands &clauseOps,
1603     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
1604   ClauseProcessor cp(converter, semaCtx, clauses);
1605   cp.processAligned(clauseOps);
1606   cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps);
1607   cp.processNontemporal(clauseOps);
1608   cp.processOrder(clauseOps);
1609   cp.processReduction(loc, clauseOps, reductionSyms);
1610   cp.processSafelen(clauseOps);
1611   cp.processSimdlen(clauseOps);
1612 
1613   cp.processTODO<clause::Linear>(loc, llvm::omp::Directive::OMPD_simd);
1614 }
1615 
1616 static void genSingleClauses(lower::AbstractConverter &converter,
1617                              semantics::SemanticsContext &semaCtx,
1618                              const List<Clause> &clauses, mlir::Location loc,
1619                              mlir::omp::SingleOperands &clauseOps) {
1620   ClauseProcessor cp(converter, semaCtx, clauses);
1621   cp.processAllocate(clauseOps);
1622   cp.processCopyprivate(loc, clauseOps);
1623   cp.processNowait(clauseOps);
1624   // TODO Support delayed privatization.
1625 }
1626 
1627 static void genTargetClauses(
1628     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1629     lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval,
1630     const List<Clause> &clauses, mlir::Location loc,
1631     mlir::omp::TargetOperands &clauseOps,
1632     llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceAddrSyms,
1633     llvm::SmallVectorImpl<const semantics::Symbol *> &isDevicePtrSyms,
1634     llvm::SmallVectorImpl<const semantics::Symbol *> &mapSyms) {
1635   ClauseProcessor cp(converter, semaCtx, clauses);
1636   cp.processBare(clauseOps);
1637   cp.processDepend(clauseOps);
1638   cp.processDevice(stmtCtx, clauseOps);
1639   cp.processHasDeviceAddr(clauseOps, hasDeviceAddrSyms);
1640   if (!hostEvalInfo.empty()) {
1641     // Only process host_eval if compiling for the host device.
1642     processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc);
1643     hostEvalInfo.back().collectValues(clauseOps.hostEvalVars);
1644   }
1645   cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps);
1646   cp.processIsDevicePtr(clauseOps, isDevicePtrSyms);
1647   cp.processMap(loc, stmtCtx, clauseOps, &mapSyms);
1648   cp.processNowait(clauseOps);
1649   cp.processThreadLimit(stmtCtx, clauseOps);
1650 
1651   cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
1652                  clause::InReduction, clause::UsesAllocators>(
1653       loc, llvm::omp::Directive::OMPD_target);
1654 
1655   // `target private(..)` is only supported in delayed privatization mode.
1656   if (!enableDelayedPrivatizationStaging)
1657     cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
1658 }
1659 
1660 static void genTargetDataClauses(
1661     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1662     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
1663     mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps,
1664     llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceAddrSyms,
1665     llvm::SmallVectorImpl<const semantics::Symbol *> &useDevicePtrSyms) {
1666   ClauseProcessor cp(converter, semaCtx, clauses);
1667   cp.processDevice(stmtCtx, clauseOps);
1668   cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps);
1669   cp.processMap(loc, stmtCtx, clauseOps);
1670   cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceAddrSyms);
1671   cp.processUseDevicePtr(stmtCtx, clauseOps, useDevicePtrSyms);
1672 
1673   // This function implements the deprecated functionality of use_device_ptr
1674   // that allows users to provide non-CPTR arguments to it with the caveat
1675   // that the compiler will treat them as use_device_addr. A lot of legacy
1676   // code may still depend on this functionality, so we should support it
1677   // in some manner. We do so currently by simply shifting non-cptr operands
1678   // from the use_device_ptr lists into the use_device_addr lists.
1679   // TODO: Perhaps create a user provideable compiler option that will
1680   // re-introduce a hard-error rather than a warning in these cases.
1681   promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(
1682       clauseOps.useDeviceAddrVars, useDeviceAddrSyms,
1683       clauseOps.useDevicePtrVars, useDevicePtrSyms);
1684 }
1685 
1686 static void genTargetEnterExitUpdateDataClauses(
1687     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1688     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
1689     mlir::Location loc, llvm::omp::Directive directive,
1690     mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) {
1691   ClauseProcessor cp(converter, semaCtx, clauses);
1692   cp.processDepend(clauseOps);
1693   cp.processDevice(stmtCtx, clauseOps);
1694   cp.processIf(directive, clauseOps);
1695 
1696   if (directive == llvm::omp::Directive::OMPD_target_update)
1697     cp.processMotionClauses(stmtCtx, clauseOps);
1698   else
1699     cp.processMap(loc, stmtCtx, clauseOps);
1700 
1701   cp.processNowait(clauseOps);
1702 }
1703 
1704 static void genTaskClauses(lower::AbstractConverter &converter,
1705                            semantics::SemanticsContext &semaCtx,
1706                            lower::StatementContext &stmtCtx,
1707                            const List<Clause> &clauses, mlir::Location loc,
1708                            mlir::omp::TaskOperands &clauseOps) {
1709   ClauseProcessor cp(converter, semaCtx, clauses);
1710   cp.processAllocate(clauseOps);
1711   cp.processDepend(clauseOps);
1712   cp.processFinal(stmtCtx, clauseOps);
1713   cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps);
1714   cp.processMergeable(clauseOps);
1715   cp.processPriority(stmtCtx, clauseOps);
1716   cp.processUntied(clauseOps);
1717   cp.processDetach(clauseOps);
1718   // TODO Support delayed privatization.
1719 
1720   cp.processTODO<clause::Affinity, clause::InReduction>(
1721       loc, llvm::omp::Directive::OMPD_task);
1722 }
1723 
1724 static void genTaskgroupClauses(lower::AbstractConverter &converter,
1725                                 semantics::SemanticsContext &semaCtx,
1726                                 const List<Clause> &clauses, mlir::Location loc,
1727                                 mlir::omp::TaskgroupOperands &clauseOps) {
1728   ClauseProcessor cp(converter, semaCtx, clauses);
1729   cp.processAllocate(clauseOps);
1730   cp.processTODO<clause::TaskReduction>(loc,
1731                                         llvm::omp::Directive::OMPD_taskgroup);
1732 }
1733 
1734 static void genTaskwaitClauses(lower::AbstractConverter &converter,
1735                                semantics::SemanticsContext &semaCtx,
1736                                const List<Clause> &clauses, mlir::Location loc,
1737                                mlir::omp::TaskwaitOperands &clauseOps) {
1738   ClauseProcessor cp(converter, semaCtx, clauses);
1739   cp.processTODO<clause::Depend, clause::Nowait>(
1740       loc, llvm::omp::Directive::OMPD_taskwait);
1741 }
1742 
1743 static void genWorkshareClauses(lower::AbstractConverter &converter,
1744                                 semantics::SemanticsContext &semaCtx,
1745                                 lower::StatementContext &stmtCtx,
1746                                 const List<Clause> &clauses, mlir::Location loc,
1747                                 mlir::omp::WorkshareOperands &clauseOps) {
1748   ClauseProcessor cp(converter, semaCtx, clauses);
1749   cp.processNowait(clauseOps);
1750 }
1751 
1752 static void genTeamsClauses(
1753     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1754     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
1755     mlir::Location loc, mlir::omp::TeamsOperands &clauseOps,
1756     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
1757   ClauseProcessor cp(converter, semaCtx, clauses);
1758   cp.processAllocate(clauseOps);
1759   cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps);
1760 
1761   if (hostEvalInfo.empty() || !hostEvalInfo.back().apply(clauseOps)) {
1762     cp.processNumTeams(stmtCtx, clauseOps);
1763     cp.processThreadLimit(stmtCtx, clauseOps);
1764   }
1765 
1766   cp.processReduction(loc, clauseOps, reductionSyms);
1767   // TODO Support delayed privatization.
1768 }
1769 
1770 static void genWsloopClauses(
1771     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
1772     lower::StatementContext &stmtCtx, const List<Clause> &clauses,
1773     mlir::Location loc, mlir::omp::WsloopOperands &clauseOps,
1774     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSyms) {
1775   ClauseProcessor cp(converter, semaCtx, clauses);
1776   cp.processNowait(clauseOps);
1777   cp.processOrder(clauseOps);
1778   cp.processOrdered(clauseOps);
1779   cp.processReduction(loc, clauseOps, reductionSyms);
1780   cp.processSchedule(stmtCtx, clauseOps);
1781 
1782   cp.processTODO<clause::Allocate, clause::Linear>(
1783       loc, llvm::omp::Directive::OMPD_do);
1784 }
1785 
1786 //===----------------------------------------------------------------------===//
1787 // Code generation functions for leaf constructs
1788 //===----------------------------------------------------------------------===//
1789 
1790 static mlir::omp::BarrierOp
1791 genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1792              semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1793              mlir::Location loc, const ConstructQueue &queue,
1794              ConstructQueue::const_iterator item) {
1795   return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc);
1796 }
1797 
1798 static mlir::omp::CriticalOp
1799 genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1800               semantics::SemanticsContext &semaCtx,
1801               lower::pft::Evaluation &eval, mlir::Location loc,
1802               const ConstructQueue &queue, ConstructQueue::const_iterator item,
1803               const std::optional<parser::Name> &name) {
1804   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
1805   mlir::FlatSymbolRefAttr nameAttr;
1806 
1807   if (name) {
1808     std::string nameStr = name->ToString();
1809     mlir::ModuleOp mod = firOpBuilder.getModule();
1810     auto global = mod.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr);
1811     if (!global) {
1812       mlir::omp::CriticalDeclareOperands clauseOps;
1813       genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc,
1814                                 clauseOps, nameStr);
1815 
1816       mlir::OpBuilder modBuilder(mod.getBodyRegion());
1817       global = modBuilder.create<mlir::omp::CriticalDeclareOp>(loc, clauseOps);
1818     }
1819     nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(),
1820                                             global.getSymName());
1821   }
1822 
1823   return genOpWithBody<mlir::omp::CriticalOp>(
1824       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
1825                         llvm::omp::Directive::OMPD_critical),
1826       queue, item, nameAttr);
1827 }
1828 
1829 static mlir::omp::FlushOp
1830 genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1831            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1832            mlir::Location loc, const ObjectList &objects,
1833            const ConstructQueue &queue, ConstructQueue::const_iterator item) {
1834   llvm::SmallVector<mlir::Value> operandRange;
1835   genFlushClauses(converter, semaCtx, objects, item->clauses, loc,
1836                   operandRange);
1837 
1838   return converter.getFirOpBuilder().create<mlir::omp::FlushOp>(
1839       converter.getCurrentLocation(), operandRange);
1840 }
1841 
1842 static mlir::omp::LoopNestOp genLoopNestOp(
1843     lower::AbstractConverter &converter, lower::SymMap &symTable,
1844     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1845     mlir::Location loc, const ConstructQueue &queue,
1846     ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps,
1847     llvm::ArrayRef<const semantics::Symbol *> iv,
1848     llvm::ArrayRef<
1849         std::pair<mlir::omp::BlockArgOpenMPOpInterface, const EntryBlockArgs &>>
1850         wrapperArgs,
1851     llvm::omp::Directive directive, DataSharingProcessor &dsp) {
1852   auto ivCallback = [&](mlir::Operation *op) {
1853     genLoopVars(op, converter, loc, iv, wrapperArgs);
1854     return llvm::SmallVector<const semantics::Symbol *>(iv);
1855   };
1856 
1857   auto *nestedEval =
1858       getCollapsedLoopEval(eval, getCollapseValue(item->clauses));
1859 
1860   return genOpWithBody<mlir::omp::LoopNestOp>(
1861       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
1862                         directive)
1863           .setClauses(&item->clauses)
1864           .setDataSharingProcessor(&dsp)
1865           .setGenRegionEntryCb(ivCallback),
1866       queue, item, clauseOps);
1867 }
1868 
1869 static void genLoopOp(lower::AbstractConverter &converter,
1870                       lower::SymMap &symTable,
1871                       semantics::SemanticsContext &semaCtx,
1872                       lower::pft::Evaluation &eval, mlir::Location loc,
1873                       const ConstructQueue &queue,
1874                       ConstructQueue::const_iterator item) {
1875   mlir::omp::LoopOperands loopClauseOps;
1876   llvm::SmallVector<const semantics::Symbol *> loopReductionSyms;
1877   genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps,
1878                  loopReductionSyms);
1879 
1880   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
1881                            /*shouldCollectPreDeterminedSymbols=*/true,
1882                            /*useDelayedPrivatization=*/true, symTable);
1883   dsp.processStep1(&loopClauseOps);
1884 
1885   mlir::omp::LoopNestOperands loopNestClauseOps;
1886   llvm::SmallVector<const semantics::Symbol *> iv;
1887   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
1888                      loopNestClauseOps, iv);
1889 
1890   EntryBlockArgs loopArgs;
1891   loopArgs.priv.syms = dsp.getDelayedPrivSymbols();
1892   loopArgs.priv.vars = loopClauseOps.privateVars;
1893   loopArgs.reduction.syms = loopReductionSyms;
1894   loopArgs.reduction.vars = loopClauseOps.reductionVars;
1895 
1896   auto loopOp =
1897       genWrapperOp<mlir::omp::LoopOp>(converter, loc, loopClauseOps, loopArgs);
1898   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
1899                 loopNestClauseOps, iv, {{loopOp, loopArgs}},
1900                 llvm::omp::Directive::OMPD_loop, dsp);
1901 }
1902 
1903 static mlir::omp::MaskedOp
1904 genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1905             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1906             mlir::Location loc, const ConstructQueue &queue,
1907             ConstructQueue::const_iterator item) {
1908   lower::StatementContext stmtCtx;
1909   mlir::omp::MaskedOperands clauseOps;
1910   genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);
1911 
1912   return genOpWithBody<mlir::omp::MaskedOp>(
1913       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
1914                         llvm::omp::Directive::OMPD_masked),
1915       queue, item, clauseOps);
1916 }
1917 
1918 static mlir::omp::MasterOp
1919 genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1920             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1921             mlir::Location loc, const ConstructQueue &queue,
1922             ConstructQueue::const_iterator item) {
1923   return genOpWithBody<mlir::omp::MasterOp>(
1924       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
1925                         llvm::omp::Directive::OMPD_master),
1926       queue, item);
1927 }
1928 
1929 static mlir::omp::OrderedOp
1930 genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1931              semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
1932              mlir::Location loc, const ConstructQueue &queue,
1933              ConstructQueue::const_iterator item) {
1934   TODO(loc, "OMPD_ordered");
1935   return nullptr;
1936 }
1937 
1938 static mlir::omp::OrderedRegionOp
1939 genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1940                    semantics::SemanticsContext &semaCtx,
1941                    lower::pft::Evaluation &eval, mlir::Location loc,
1942                    const ConstructQueue &queue,
1943                    ConstructQueue::const_iterator item) {
1944   mlir::omp::OrderedRegionOperands clauseOps;
1945   genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps);
1946 
1947   return genOpWithBody<mlir::omp::OrderedRegionOp>(
1948       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
1949                         llvm::omp::Directive::OMPD_ordered),
1950       queue, item, clauseOps);
1951 }
1952 
1953 static mlir::omp::ParallelOp
1954 genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1955               semantics::SemanticsContext &semaCtx,
1956               lower::pft::Evaluation &eval, mlir::Location loc,
1957               const ConstructQueue &queue, ConstructQueue::const_iterator item,
1958               mlir::omp::ParallelOperands &clauseOps,
1959               const EntryBlockArgs &args, DataSharingProcessor *dsp,
1960               bool isComposite = false) {
1961   auto genRegionEntryCB = [&](mlir::Operation *op) {
1962     genEntryBlock(converter.getFirOpBuilder(), args, op->getRegion(0));
1963     bindEntryBlockArgs(
1964         converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
1965     return llvm::to_vector(args.getSyms());
1966   };
1967 
1968   assert((!enableDelayedPrivatization || dsp) &&
1969          "expected valid DataSharingProcessor");
1970   OpWithBodyGenInfo genInfo =
1971       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
1972                         llvm::omp::Directive::OMPD_parallel)
1973           .setClauses(&item->clauses)
1974           .setGenRegionEntryCb(genRegionEntryCB)
1975           .setGenSkeletonOnly(isComposite)
1976           .setDataSharingProcessor(dsp);
1977 
1978   auto parallelOp =
1979       genOpWithBody<mlir::omp::ParallelOp>(genInfo, queue, item, clauseOps);
1980   parallelOp.setComposite(isComposite);
1981   return parallelOp;
1982 }
1983 
1984 /// This breaks the normal prototype of the gen*Op functions: adding the
1985 /// sectionBlocks argument so that the enclosed section constructs can be
1986 /// lowered here with correct reduction symbol remapping.
1987 static mlir::omp::SectionsOp
1988 genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
1989               semantics::SemanticsContext &semaCtx,
1990               lower::pft::Evaluation &eval, mlir::Location loc,
1991               const ConstructQueue &queue, ConstructQueue::const_iterator item,
1992               const parser::OmpSectionBlocks &sectionBlocks) {
1993   mlir::omp::SectionsOperands clauseOps;
1994   llvm::SmallVector<const semantics::Symbol *> reductionSyms;
1995   genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps,
1996                      reductionSyms);
1997 
1998   auto &builder = converter.getFirOpBuilder();
1999 
2000   // Insert privatizations before SECTIONS
2001   lower::SymMapScope scope(symTable);
2002   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2003                            lower::omp::isLastItemInQueue(item, queue),
2004                            /*useDelayedPrivatization=*/false, symTable);
2005   dsp.processStep1();
2006 
2007   List<Clause> nonDsaClauses;
2008   List<const clause::Lastprivate *> lastprivates;
2009 
2010   for (const Clause &clause : item->clauses) {
2011     if (clause.id == llvm::omp::Clause::OMPC_lastprivate) {
2012       auto &lastp = std::get<clause::Lastprivate>(clause.u);
2013       lastprivateModifierNotSupported(lastp, converter.getCurrentLocation());
2014       lastprivates.push_back(&lastp);
2015     } else {
2016       switch (clause.id) {
2017       case llvm::omp::Clause::OMPC_firstprivate:
2018       case llvm::omp::Clause::OMPC_private:
2019       case llvm::omp::Clause::OMPC_shared:
2020         break;
2021       default:
2022         nonDsaClauses.push_back(clause);
2023       }
2024     }
2025   }
2026 
2027   // SECTIONS construct.
2028   auto sectionsOp = builder.create<mlir::omp::SectionsOp>(loc, clauseOps);
2029 
2030   // Create entry block with reduction variables as arguments.
2031   EntryBlockArgs args;
2032   // TODO: Add private syms and vars.
2033   args.reduction.syms = reductionSyms;
2034   args.reduction.vars = clauseOps.reductionVars;
2035 
2036   genEntryBlock(builder, args, sectionsOp.getRegion());
2037   mlir::Operation *terminator =
2038       lower::genOpenMPTerminator(builder, sectionsOp, loc);
2039 
2040   auto genRegionEntryCB = [&](mlir::Operation *op) {
2041     genEntryBlock(builder, args, op->getRegion(0));
2042     bindEntryBlockArgs(
2043         converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
2044     return llvm::to_vector(args.getSyms());
2045   };
2046 
2047   // Generate nested SECTION constructs.
2048   // This is done here rather than in genOMP([...], OpenMPSectionConstruct )
2049   // because we need to run genReductionVars on each omp.section so that the
2050   // reduction variable gets mapped to the private version
2051   for (auto [construct, nestedEval] :
2052        llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) {
2053     const auto *sectionConstruct =
2054         std::get_if<parser::OpenMPSectionConstruct>(&construct.u);
2055     if (!sectionConstruct) {
2056       assert(false &&
2057              "unexpected construct nested inside of SECTIONS construct");
2058       continue;
2059     }
2060 
2061     ConstructQueue sectionQueue{buildConstructQueue(
2062         converter.getFirOpBuilder().getModule(), semaCtx, nestedEval,
2063         sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})};
2064 
2065     builder.setInsertionPoint(terminator);
2066     genOpWithBody<mlir::omp::SectionOp>(
2067         OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
2068                           llvm::omp::Directive::OMPD_section)
2069             .setClauses(&sectionQueue.begin()->clauses)
2070             .setGenRegionEntryCb(genRegionEntryCB),
2071         sectionQueue, sectionQueue.begin());
2072   }
2073 
2074   if (!lastprivates.empty()) {
2075     mlir::Region &sectionsBody = sectionsOp.getRegion();
2076     assert(sectionsBody.hasOneBlock());
2077     mlir::Block &body = sectionsBody.front();
2078 
2079     auto lastSectionOp = llvm::find_if(
2080         llvm::reverse(body.getOperations()), [](const mlir::Operation &op) {
2081           return llvm::isa<mlir::omp::SectionOp>(op);
2082         });
2083     assert(lastSectionOp != body.rend());
2084 
2085     for (const clause::Lastprivate *lastp : lastprivates) {
2086       builder.setInsertionPoint(
2087           lastSectionOp->getRegion(0).back().getTerminator());
2088       mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint();
2089       const auto &objList = std::get<ObjectList>(lastp->t);
2090       for (const Object &object : objList) {
2091         semantics::Symbol *sym = object.sym();
2092         converter.copyHostAssociateVar(*sym, &insp, /*hostIsSource=*/false);
2093       }
2094     }
2095   }
2096 
2097   // Perform DataSharingProcessor's step2 out of SECTIONS
2098   builder.setInsertionPointAfter(sectionsOp.getOperation());
2099   dsp.processStep2(sectionsOp, false);
2100   // Emit implicit barrier to synchronize threads and avoid data
2101   // races on post-update of lastprivate variables when `nowait`
2102   // clause is present.
2103   if (clauseOps.nowait && !lastprivates.empty())
2104     builder.create<mlir::omp::BarrierOp>(loc);
2105 
2106   return sectionsOp;
2107 }
2108 
2109 static void genScopeOp(lower::AbstractConverter &converter,
2110                        lower::SymMap &symTable,
2111                        semantics::SemanticsContext &semaCtx,
2112                        lower::pft::Evaluation &eval, mlir::Location loc,
2113                        const ConstructQueue &queue,
2114                        ConstructQueue::const_iterator item) {
2115   TODO(loc, "Scope construct");
2116 }
2117 
2118 static mlir::omp::SingleOp
2119 genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2120             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2121             mlir::Location loc, const ConstructQueue &queue,
2122             ConstructQueue::const_iterator item) {
2123   mlir::omp::SingleOperands clauseOps;
2124   genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps);
2125 
2126   return genOpWithBody<mlir::omp::SingleOp>(
2127       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
2128                         llvm::omp::Directive::OMPD_single)
2129           .setClauses(&item->clauses),
2130       queue, item, clauseOps);
2131 }
2132 
2133 static mlir::omp::TargetOp
2134 genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2135             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2136             mlir::Location loc, const ConstructQueue &queue,
2137             ConstructQueue::const_iterator item) {
2138   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
2139   lower::StatementContext stmtCtx;
2140   bool isTargetDevice =
2141       llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp())
2142           .getIsTargetDevice();
2143 
2144   // Introduce a new host_eval information structure for this target region.
2145   if (!isTargetDevice)
2146     hostEvalInfo.emplace_back();
2147 
2148   mlir::omp::TargetOperands clauseOps;
2149   llvm::SmallVector<const semantics::Symbol *> mapSyms, isDevicePtrSyms,
2150       hasDeviceAddrSyms;
2151   genTargetClauses(converter, semaCtx, stmtCtx, eval, item->clauses, loc,
2152                    clauseOps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms);
2153 
2154   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2155                            /*shouldCollectPreDeterminedSymbols=*/
2156                            lower::omp::isLastItemInQueue(item, queue),
2157                            /*useDelayedPrivatization=*/true, symTable);
2158   dsp.processStep1(&clauseOps);
2159 
2160   // 5.8.1 Implicit Data-Mapping Attribute Rules
2161   // The following code follows the implicit data-mapping rules to map all the
2162   // symbols used inside the region that do not have explicit data-environment
2163   // attribute clauses (neither data-sharing; e.g. `private`, nor `map`
2164   // clauses).
2165   auto captureImplicitMap = [&](const semantics::Symbol &sym) {
2166     if (dsp.getAllSymbolsToPrivatize().contains(&sym))
2167       return;
2168 
2169     // Structure component symbols don't have bindings, and can only be
2170     // explicitly mapped individually. If a member is captured implicitly
2171     // we map the entirety of the derived type when we find its symbol.
2172     if (sym.owner().IsDerivedType())
2173       return;
2174 
2175     // if the symbol is part of an already mapped common block, do not make a
2176     // map for it.
2177     if (const Fortran::semantics::Symbol *common =
2178             Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate()))
2179       if (llvm::is_contained(mapSyms, common))
2180         return;
2181 
2182     // If we come across a symbol without a symbol address, we
2183     // return as we cannot process it, this is intended as a
2184     // catch all early exit for symbols that do not have a
2185     // corresponding extended value. Such as subroutines,
2186     // interfaces and named blocks.
2187     if (!converter.getSymbolAddress(sym))
2188       return;
2189 
2190     if (!llvm::is_contained(mapSyms, &sym)) {
2191       if (const auto *details =
2192               sym.template detailsIf<semantics::HostAssocDetails>())
2193         converter.copySymbolBinding(details->symbol(), sym);
2194       std::stringstream name;
2195       fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym);
2196       name << sym.name().ToString();
2197 
2198       fir::factory::AddrAndBoundsInfo info =
2199           Fortran::lower::getDataOperandBaseAddr(
2200               converter, firOpBuilder, sym, converter.getCurrentLocation());
2201       llvm::SmallVector<mlir::Value> bounds =
2202           fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
2203                                              mlir::omp::MapBoundsType>(
2204               firOpBuilder, info, dataExv,
2205               semantics::IsAssumedSizeArray(sym.GetUltimate()),
2206               converter.getCurrentLocation());
2207 
2208       llvm::omp::OpenMPOffloadMappingFlags mapFlag =
2209           llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
2210       mlir::omp::VariableCaptureKind captureKind =
2211           mlir::omp::VariableCaptureKind::ByRef;
2212 
2213       mlir::Value baseOp = info.rawInput;
2214       mlir::Type eleType = baseOp.getType();
2215       if (auto refType = mlir::dyn_cast<fir::ReferenceType>(baseOp.getType()))
2216         eleType = refType.getElementType();
2217 
2218       // If a variable is specified in declare target link and if device
2219       // type is not specified as `nohost`, it needs to be mapped tofrom
2220       mlir::ModuleOp mod = firOpBuilder.getModule();
2221       mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym));
2222       auto declareTargetOp =
2223           llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op);
2224       if (declareTargetOp && declareTargetOp.isDeclareTarget()) {
2225         if (declareTargetOp.getDeclareTargetCaptureClause() ==
2226                 mlir::omp::DeclareTargetCaptureClause::link &&
2227             declareTargetOp.getDeclareTargetDeviceType() !=
2228                 mlir::omp::DeclareTargetDeviceType::nohost) {
2229           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2230           mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
2231         }
2232       } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
2233         captureKind = mlir::omp::VariableCaptureKind::ByCopy;
2234       } else if (!fir::isa_builtin_cptr_type(eleType)) {
2235         mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
2236         mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
2237       }
2238       auto location =
2239           mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(),
2240                                                    sym.name().ToString()),
2241                              baseOp.getLoc());
2242       mlir::Value mapOp = createMapInfoOp(
2243           firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{},
2244           name.str(), bounds, /*members=*/{},
2245           /*membersIndex=*/mlir::ArrayAttr{},
2246           static_cast<
2247               std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
2248               mapFlag),
2249           captureKind, baseOp.getType());
2250 
2251       clauseOps.mapVars.push_back(mapOp);
2252       mapSyms.push_back(&sym);
2253     }
2254   };
2255   lower::pft::visitAllSymbols(eval, captureImplicitMap);
2256 
2257   auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
2258 
2259   llvm::SmallVector<mlir::Value> mapBaseValues;
2260   extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);
2261 
2262   EntryBlockArgs args;
2263   args.hostEvalVars = clauseOps.hostEvalVars;
2264   // TODO: Add in_reduction syms and vars.
2265   args.map.syms = mapSyms;
2266   args.map.vars = mapBaseValues;
2267   args.priv.syms = dsp.getDelayedPrivSymbols();
2268   args.priv.vars = clauseOps.privateVars;
2269 
2270   genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, args, loc,
2271                     queue, item, dsp);
2272 
2273   // Remove the host_eval information structure created for this target region.
2274   if (!isTargetDevice)
2275     hostEvalInfo.pop_back();
2276   return targetOp;
2277 }
2278 
2279 static mlir::omp::TargetDataOp
2280 genTargetDataOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2281                 semantics::SemanticsContext &semaCtx,
2282                 lower::pft::Evaluation &eval, mlir::Location loc,
2283                 const ConstructQueue &queue,
2284                 ConstructQueue::const_iterator item) {
2285   lower::StatementContext stmtCtx;
2286   mlir::omp::TargetDataOperands clauseOps;
2287   llvm::SmallVector<const semantics::Symbol *> useDeviceAddrSyms,
2288       useDevicePtrSyms;
2289   genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
2290                        clauseOps, useDeviceAddrSyms, useDevicePtrSyms);
2291 
2292   auto targetDataOp =
2293       converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc,
2294                                                                   clauseOps);
2295 
2296   llvm::SmallVector<mlir::Value> useDeviceAddrBaseValues,
2297       useDevicePtrBaseValues;
2298   extractMappedBaseValues(clauseOps.useDeviceAddrVars, useDeviceAddrBaseValues);
2299   extractMappedBaseValues(clauseOps.useDevicePtrVars, useDevicePtrBaseValues);
2300 
2301   EntryBlockArgs args;
2302   args.useDeviceAddr.syms = useDeviceAddrSyms;
2303   args.useDeviceAddr.vars = useDeviceAddrBaseValues;
2304   args.useDevicePtr.syms = useDevicePtrSyms;
2305   args.useDevicePtr.vars = useDevicePtrBaseValues;
2306 
2307   genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, args,
2308                         loc, queue, item);
2309   return targetDataOp;
2310 }
2311 
2312 template <typename OpTy>
2313 static OpTy genTargetEnterExitUpdateDataOp(
2314     lower::AbstractConverter &converter, lower::SymMap &symTable,
2315     semantics::SemanticsContext &semaCtx, mlir::Location loc,
2316     const ConstructQueue &queue, ConstructQueue::const_iterator item) {
2317   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
2318   lower::StatementContext stmtCtx;
2319 
2320   // GCC 9.3.0 emits a (probably) bogus warning about an unused variable.
2321   [[maybe_unused]] llvm::omp::Directive directive;
2322   if constexpr (std::is_same_v<OpTy, mlir::omp::TargetEnterDataOp>) {
2323     directive = llvm::omp::Directive::OMPD_target_enter_data;
2324   } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetExitDataOp>) {
2325     directive = llvm::omp::Directive::OMPD_target_exit_data;
2326   } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetUpdateOp>) {
2327     directive = llvm::omp::Directive::OMPD_target_update;
2328   } else {
2329     llvm_unreachable("Unexpected TARGET DATA construct");
2330   }
2331 
2332   mlir::omp::TargetEnterExitUpdateDataOperands clauseOps;
2333   genTargetEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx,
2334                                       item->clauses, loc, directive, clauseOps);
2335 
2336   return firOpBuilder.create<OpTy>(loc, clauseOps);
2337 }
2338 
2339 static mlir::omp::TaskOp
2340 genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2341           semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2342           mlir::Location loc, const ConstructQueue &queue,
2343           ConstructQueue::const_iterator item) {
2344   lower::StatementContext stmtCtx;
2345   mlir::omp::TaskOperands clauseOps;
2346   genTaskClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps);
2347 
2348   if (!enableDelayedPrivatization)
2349     return genOpWithBody<mlir::omp::TaskOp>(
2350         OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
2351                           llvm::omp::Directive::OMPD_task)
2352             .setClauses(&item->clauses),
2353         queue, item, clauseOps);
2354 
2355   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2356                            lower::omp::isLastItemInQueue(item, queue),
2357                            /*useDelayedPrivatization=*/true, symTable);
2358   dsp.processStep1(&clauseOps);
2359 
2360   EntryBlockArgs taskArgs;
2361   taskArgs.priv.syms = dsp.getDelayedPrivSymbols();
2362   taskArgs.priv.vars = clauseOps.privateVars;
2363 
2364   auto genRegionEntryCB = [&](mlir::Operation *op) {
2365     genEntryBlock(converter.getFirOpBuilder(), taskArgs, op->getRegion(0));
2366     bindEntryBlockArgs(converter,
2367                        llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op),
2368                        taskArgs);
2369     return llvm::to_vector(taskArgs.priv.syms);
2370   };
2371 
2372   return genOpWithBody<mlir::omp::TaskOp>(
2373       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
2374                         llvm::omp::Directive::OMPD_task)
2375           .setClauses(&item->clauses)
2376           .setDataSharingProcessor(&dsp)
2377           .setGenRegionEntryCb(genRegionEntryCB),
2378       queue, item, clauseOps);
2379 }
2380 
2381 static mlir::omp::TaskgroupOp
2382 genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2383                semantics::SemanticsContext &semaCtx,
2384                lower::pft::Evaluation &eval, mlir::Location loc,
2385                const ConstructQueue &queue,
2386                ConstructQueue::const_iterator item) {
2387   mlir::omp::TaskgroupOperands clauseOps;
2388   genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps);
2389 
2390   return genOpWithBody<mlir::omp::TaskgroupOp>(
2391       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
2392                         llvm::omp::Directive::OMPD_taskgroup)
2393           .setClauses(&item->clauses),
2394       queue, item, clauseOps);
2395 }
2396 
2397 static mlir::omp::TaskwaitOp
2398 genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2399               semantics::SemanticsContext &semaCtx,
2400               lower::pft::Evaluation &eval, mlir::Location loc,
2401               const ConstructQueue &queue,
2402               ConstructQueue::const_iterator item) {
2403   mlir::omp::TaskwaitOperands clauseOps;
2404   genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps);
2405   return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc,
2406                                                                    clauseOps);
2407 }
2408 
2409 static mlir::omp::TaskyieldOp
2410 genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2411                semantics::SemanticsContext &semaCtx,
2412                lower::pft::Evaluation &eval, mlir::Location loc,
2413                const ConstructQueue &queue,
2414                ConstructQueue::const_iterator item) {
2415   return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc);
2416 }
2417 
2418 static mlir::omp::WorkshareOp
2419 genWorkshareOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2420                semantics::SemanticsContext &semaCtx,
2421                lower::pft::Evaluation &eval, mlir::Location loc,
2422                const ConstructQueue &queue,
2423                ConstructQueue::const_iterator item) {
2424   lower::StatementContext stmtCtx;
2425   mlir::omp::WorkshareOperands clauseOps;
2426   genWorkshareClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
2427                       clauseOps);
2428 
2429   return genOpWithBody<mlir::omp::WorkshareOp>(
2430       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
2431                         llvm::omp::Directive::OMPD_workshare)
2432           .setClauses(&item->clauses),
2433       queue, item, clauseOps);
2434 }
2435 
2436 static mlir::omp::TeamsOp
2437 genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
2438            semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2439            mlir::Location loc, const ConstructQueue &queue,
2440            ConstructQueue::const_iterator item) {
2441   lower::StatementContext stmtCtx;
2442 
2443   mlir::omp::TeamsOperands clauseOps;
2444   llvm::SmallVector<const semantics::Symbol *> reductionSyms;
2445   genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps,
2446                   reductionSyms);
2447 
2448   EntryBlockArgs args;
2449   // TODO: Add private syms and vars.
2450   args.reduction.syms = reductionSyms;
2451   args.reduction.vars = clauseOps.reductionVars;
2452 
2453   auto genRegionEntryCB = [&](mlir::Operation *op) {
2454     genEntryBlock(converter.getFirOpBuilder(), args, op->getRegion(0));
2455     bindEntryBlockArgs(
2456         converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
2457     return llvm::to_vector(args.getSyms());
2458   };
2459 
2460   return genOpWithBody<mlir::omp::TeamsOp>(
2461       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
2462                         llvm::omp::Directive::OMPD_teams)
2463           .setClauses(&item->clauses)
2464           .setGenRegionEntryCb(genRegionEntryCB),
2465       queue, item, clauseOps);
2466 }
2467 
2468 //===----------------------------------------------------------------------===//
2469 // Code generation functions for the standalone version of constructs that can
2470 // also be a leaf of a composite construct
2471 //===----------------------------------------------------------------------===//
2472 
2473 static void genStandaloneDistribute(lower::AbstractConverter &converter,
2474                                     lower::SymMap &symTable,
2475                                     semantics::SemanticsContext &semaCtx,
2476                                     lower::pft::Evaluation &eval,
2477                                     mlir::Location loc,
2478                                     const ConstructQueue &queue,
2479                                     ConstructQueue::const_iterator item) {
2480   lower::StatementContext stmtCtx;
2481 
2482   mlir::omp::DistributeOperands distributeClauseOps;
2483   genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
2484                        distributeClauseOps);
2485 
2486   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2487                            /*shouldCollectPreDeterminedSymbols=*/true,
2488                            enableDelayedPrivatizationStaging, symTable);
2489   dsp.processStep1(&distributeClauseOps);
2490 
2491   mlir::omp::LoopNestOperands loopNestClauseOps;
2492   llvm::SmallVector<const semantics::Symbol *> iv;
2493   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
2494                      loopNestClauseOps, iv);
2495 
2496   EntryBlockArgs distributeArgs;
2497   distributeArgs.priv.syms = dsp.getDelayedPrivSymbols();
2498   distributeArgs.priv.vars = distributeClauseOps.privateVars;
2499   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
2500       converter, loc, distributeClauseOps, distributeArgs);
2501 
2502   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
2503                 loopNestClauseOps, iv, {{distributeOp, distributeArgs}},
2504                 llvm::omp::Directive::OMPD_distribute, dsp);
2505 }
2506 
2507 static void genStandaloneDo(lower::AbstractConverter &converter,
2508                             lower::SymMap &symTable,
2509                             semantics::SemanticsContext &semaCtx,
2510                             lower::pft::Evaluation &eval, mlir::Location loc,
2511                             const ConstructQueue &queue,
2512                             ConstructQueue::const_iterator item) {
2513   lower::StatementContext stmtCtx;
2514 
2515   mlir::omp::WsloopOperands wsloopClauseOps;
2516   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
2517   genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
2518                    wsloopClauseOps, wsloopReductionSyms);
2519 
2520   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2521                            /*shouldCollectPreDeterminedSymbols=*/true,
2522                            enableDelayedPrivatizationStaging, symTable);
2523   dsp.processStep1(&wsloopClauseOps);
2524 
2525   mlir::omp::LoopNestOperands loopNestClauseOps;
2526   llvm::SmallVector<const semantics::Symbol *> iv;
2527   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
2528                      loopNestClauseOps, iv);
2529 
2530   EntryBlockArgs wsloopArgs;
2531   wsloopArgs.priv.syms = dsp.getDelayedPrivSymbols();
2532   wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
2533   wsloopArgs.reduction.syms = wsloopReductionSyms;
2534   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
2535   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
2536       converter, loc, wsloopClauseOps, wsloopArgs);
2537 
2538   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
2539                 loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}},
2540                 llvm::omp::Directive::OMPD_do, dsp);
2541 }
2542 
2543 static void genStandaloneParallel(lower::AbstractConverter &converter,
2544                                   lower::SymMap &symTable,
2545                                   semantics::SemanticsContext &semaCtx,
2546                                   lower::pft::Evaluation &eval,
2547                                   mlir::Location loc,
2548                                   const ConstructQueue &queue,
2549                                   ConstructQueue::const_iterator item) {
2550   lower::StatementContext stmtCtx;
2551 
2552   mlir::omp::ParallelOperands parallelClauseOps;
2553   llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
2554   genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
2555                      parallelClauseOps, parallelReductionSyms);
2556 
2557   std::optional<DataSharingProcessor> dsp;
2558   if (enableDelayedPrivatization) {
2559     dsp.emplace(converter, semaCtx, item->clauses, eval,
2560                 lower::omp::isLastItemInQueue(item, queue),
2561                 /*useDelayedPrivatization=*/true, symTable);
2562     dsp->processStep1(&parallelClauseOps);
2563   }
2564 
2565   EntryBlockArgs parallelArgs;
2566   if (dsp)
2567     parallelArgs.priv.syms = dsp->getDelayedPrivSymbols();
2568   parallelArgs.priv.vars = parallelClauseOps.privateVars;
2569   parallelArgs.reduction.syms = parallelReductionSyms;
2570   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
2571   genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item,
2572                 parallelClauseOps, parallelArgs,
2573                 enableDelayedPrivatization ? &dsp.value() : nullptr);
2574 }
2575 
2576 static void genStandaloneSimd(lower::AbstractConverter &converter,
2577                               lower::SymMap &symTable,
2578                               semantics::SemanticsContext &semaCtx,
2579                               lower::pft::Evaluation &eval, mlir::Location loc,
2580                               const ConstructQueue &queue,
2581                               ConstructQueue::const_iterator item) {
2582   mlir::omp::SimdOperands simdClauseOps;
2583   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
2584   genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps,
2585                  simdReductionSyms);
2586 
2587   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
2588                            /*shouldCollectPreDeterminedSymbols=*/true,
2589                            enableDelayedPrivatization, symTable);
2590   dsp.processStep1(&simdClauseOps);
2591 
2592   mlir::omp::LoopNestOperands loopNestClauseOps;
2593   llvm::SmallVector<const semantics::Symbol *> iv;
2594   genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
2595                      loopNestClauseOps, iv);
2596 
2597   EntryBlockArgs simdArgs;
2598   simdArgs.priv.syms = dsp.getDelayedPrivSymbols();
2599   simdArgs.priv.vars = simdClauseOps.privateVars;
2600   simdArgs.reduction.syms = simdReductionSyms;
2601   simdArgs.reduction.vars = simdClauseOps.reductionVars;
2602   auto simdOp =
2603       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
2604 
2605   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
2606                 loopNestClauseOps, iv, {{simdOp, simdArgs}},
2607                 llvm::omp::Directive::OMPD_simd, dsp);
2608 }
2609 
2610 static void genStandaloneTaskloop(lower::AbstractConverter &converter,
2611                                   lower::SymMap &symTable,
2612                                   semantics::SemanticsContext &semaCtx,
2613                                   lower::pft::Evaluation &eval,
2614                                   mlir::Location loc,
2615                                   const ConstructQueue &queue,
2616                                   ConstructQueue::const_iterator item) {
2617   TODO(loc, "Taskloop construct");
2618 }
2619 
2620 //===----------------------------------------------------------------------===//
2621 // Code generation functions for composite constructs
2622 //===----------------------------------------------------------------------===//
2623 
2624 static void genCompositeDistributeParallelDo(
2625     lower::AbstractConverter &converter, lower::SymMap &symTable,
2626     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2627     mlir::Location loc, const ConstructQueue &queue,
2628     ConstructQueue::const_iterator item) {
2629   lower::StatementContext stmtCtx;
2630 
2631   assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs");
2632   ConstructQueue::const_iterator distributeItem = item;
2633   ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
2634   ConstructQueue::const_iterator doItem = std::next(parallelItem);
2635 
2636   // Create parent omp.parallel first.
2637   mlir::omp::ParallelOperands parallelClauseOps;
2638   llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
2639   genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
2640                      parallelClauseOps, parallelReductionSyms);
2641 
2642   DataSharingProcessor dsp(converter, semaCtx, doItem->clauses, eval,
2643                            /*shouldCollectPreDeterminedSymbols=*/true,
2644                            /*useDelayedPrivatization=*/true, symTable);
2645   dsp.processStep1(&parallelClauseOps);
2646 
2647   EntryBlockArgs parallelArgs;
2648   parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
2649   parallelArgs.priv.vars = parallelClauseOps.privateVars;
2650   parallelArgs.reduction.syms = parallelReductionSyms;
2651   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
2652   genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
2653                 parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true);
2654 
2655   // Clause processing.
2656   mlir::omp::DistributeOperands distributeClauseOps;
2657   genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
2658                        loc, distributeClauseOps);
2659 
2660   mlir::omp::WsloopOperands wsloopClauseOps;
2661   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
2662   genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
2663                    wsloopClauseOps, wsloopReductionSyms);
2664 
2665   mlir::omp::LoopNestOperands loopNestClauseOps;
2666   llvm::SmallVector<const semantics::Symbol *> iv;
2667   genLoopNestClauses(converter, semaCtx, eval, doItem->clauses, loc,
2668                      loopNestClauseOps, iv);
2669 
2670   // Operation creation.
2671   EntryBlockArgs distributeArgs;
2672   // TODO: Add private syms and vars.
2673   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
2674       converter, loc, distributeClauseOps, distributeArgs);
2675   distributeOp.setComposite(/*val=*/true);
2676 
2677   EntryBlockArgs wsloopArgs;
2678   // TODO: Add private syms and vars.
2679   wsloopArgs.reduction.syms = wsloopReductionSyms;
2680   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
2681   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
2682       converter, loc, wsloopClauseOps, wsloopArgs);
2683   wsloopOp.setComposite(/*val=*/true);
2684 
2685   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem,
2686                 loopNestClauseOps, iv,
2687                 {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}},
2688                 llvm::omp::Directive::OMPD_distribute_parallel_do, dsp);
2689 }
2690 
2691 static void genCompositeDistributeParallelDoSimd(
2692     lower::AbstractConverter &converter, lower::SymMap &symTable,
2693     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
2694     mlir::Location loc, const ConstructQueue &queue,
2695     ConstructQueue::const_iterator item) {
2696   lower::StatementContext stmtCtx;
2697 
2698   assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs");
2699   ConstructQueue::const_iterator distributeItem = item;
2700   ConstructQueue::const_iterator parallelItem = std::next(distributeItem);
2701   ConstructQueue::const_iterator doItem = std::next(parallelItem);
2702   ConstructQueue::const_iterator simdItem = std::next(doItem);
2703 
2704   // Create parent omp.parallel first.
2705   mlir::omp::ParallelOperands parallelClauseOps;
2706   llvm::SmallVector<const semantics::Symbol *> parallelReductionSyms;
2707   genParallelClauses(converter, semaCtx, stmtCtx, parallelItem->clauses, loc,
2708                      parallelClauseOps, parallelReductionSyms);
2709 
2710   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2711                            /*shouldCollectPreDeterminedSymbols=*/true,
2712                            /*useDelayedPrivatization=*/true, symTable);
2713   dsp.processStep1(&parallelClauseOps);
2714 
2715   EntryBlockArgs parallelArgs;
2716   parallelArgs.priv.syms = dsp.getDelayedPrivSymbols();
2717   parallelArgs.priv.vars = parallelClauseOps.privateVars;
2718   parallelArgs.reduction.syms = parallelReductionSyms;
2719   parallelArgs.reduction.vars = parallelClauseOps.reductionVars;
2720   genParallelOp(converter, symTable, semaCtx, eval, loc, queue, parallelItem,
2721                 parallelClauseOps, parallelArgs, &dsp, /*isComposite=*/true);
2722 
2723   // Clause processing.
2724   mlir::omp::DistributeOperands distributeClauseOps;
2725   genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
2726                        loc, distributeClauseOps);
2727 
2728   mlir::omp::WsloopOperands wsloopClauseOps;
2729   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
2730   genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
2731                    wsloopClauseOps, wsloopReductionSyms);
2732 
2733   mlir::omp::SimdOperands simdClauseOps;
2734   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
2735   genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
2736                  simdReductionSyms);
2737 
2738   mlir::omp::LoopNestOperands loopNestClauseOps;
2739   llvm::SmallVector<const semantics::Symbol *> iv;
2740   genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
2741                      loopNestClauseOps, iv);
2742 
2743   // Operation creation.
2744   EntryBlockArgs distributeArgs;
2745   // TODO: Add private syms and vars.
2746   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
2747       converter, loc, distributeClauseOps, distributeArgs);
2748   distributeOp.setComposite(/*val=*/true);
2749 
2750   EntryBlockArgs wsloopArgs;
2751   // TODO: Add private syms and vars.
2752   wsloopArgs.reduction.syms = wsloopReductionSyms;
2753   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
2754   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
2755       converter, loc, wsloopClauseOps, wsloopArgs);
2756   wsloopOp.setComposite(/*val=*/true);
2757 
2758   EntryBlockArgs simdArgs;
2759   // TODO: Add private syms and vars.
2760   simdArgs.reduction.syms = simdReductionSyms;
2761   simdArgs.reduction.vars = simdClauseOps.reductionVars;
2762   auto simdOp =
2763       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
2764   simdOp.setComposite(/*val=*/true);
2765 
2766   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
2767                 loopNestClauseOps, iv,
2768                 {{distributeOp, distributeArgs},
2769                  {wsloopOp, wsloopArgs},
2770                  {simdOp, simdArgs}},
2771                 llvm::omp::Directive::OMPD_distribute_parallel_do_simd, dsp);
2772 }
2773 
2774 static void genCompositeDistributeSimd(lower::AbstractConverter &converter,
2775                                        lower::SymMap &symTable,
2776                                        semantics::SemanticsContext &semaCtx,
2777                                        lower::pft::Evaluation &eval,
2778                                        mlir::Location loc,
2779                                        const ConstructQueue &queue,
2780                                        ConstructQueue::const_iterator item) {
2781   lower::StatementContext stmtCtx;
2782 
2783   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
2784   ConstructQueue::const_iterator distributeItem = item;
2785   ConstructQueue::const_iterator simdItem = std::next(distributeItem);
2786 
2787   // Clause processing.
2788   mlir::omp::DistributeOperands distributeClauseOps;
2789   genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses,
2790                        loc, distributeClauseOps);
2791 
2792   mlir::omp::SimdOperands simdClauseOps;
2793   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
2794   genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
2795                  simdReductionSyms);
2796 
2797   // TODO: Support delayed privatization.
2798   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2799                            /*shouldCollectPreDeterminedSymbols=*/true,
2800                            /*useDelayedPrivatization=*/false, symTable);
2801   dsp.processStep1();
2802 
2803   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
2804   // is placed by construct decomposition.
2805   mlir::omp::LoopNestOperands loopNestClauseOps;
2806   llvm::SmallVector<const semantics::Symbol *> iv;
2807   genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
2808                      loopNestClauseOps, iv);
2809 
2810   // Operation creation.
2811   EntryBlockArgs distributeArgs;
2812   // TODO: Add private syms and vars.
2813   auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>(
2814       converter, loc, distributeClauseOps, distributeArgs);
2815   distributeOp.setComposite(/*val=*/true);
2816 
2817   EntryBlockArgs simdArgs;
2818   // TODO: Add private syms and vars.
2819   simdArgs.reduction.syms = simdReductionSyms;
2820   simdArgs.reduction.vars = simdClauseOps.reductionVars;
2821   auto simdOp =
2822       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
2823   simdOp.setComposite(/*val=*/true);
2824 
2825   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
2826                 loopNestClauseOps, iv,
2827                 {{distributeOp, distributeArgs}, {simdOp, simdArgs}},
2828                 llvm::omp::Directive::OMPD_distribute_simd, dsp);
2829 }
2830 
2831 static void genCompositeDoSimd(lower::AbstractConverter &converter,
2832                                lower::SymMap &symTable,
2833                                semantics::SemanticsContext &semaCtx,
2834                                lower::pft::Evaluation &eval, mlir::Location loc,
2835                                const ConstructQueue &queue,
2836                                ConstructQueue::const_iterator item) {
2837   lower::StatementContext stmtCtx;
2838 
2839   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
2840   ConstructQueue::const_iterator doItem = item;
2841   ConstructQueue::const_iterator simdItem = std::next(doItem);
2842 
2843   // Clause processing.
2844   mlir::omp::WsloopOperands wsloopClauseOps;
2845   llvm::SmallVector<const semantics::Symbol *> wsloopReductionSyms;
2846   genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc,
2847                    wsloopClauseOps, wsloopReductionSyms);
2848 
2849   mlir::omp::SimdOperands simdClauseOps;
2850   llvm::SmallVector<const semantics::Symbol *> simdReductionSyms;
2851   genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps,
2852                  simdReductionSyms);
2853 
2854   // TODO: Support delayed privatization.
2855   DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval,
2856                            /*shouldCollectPreDeterminedSymbols=*/true,
2857                            /*useDelayedPrivatization=*/false, symTable);
2858   dsp.processStep1();
2859 
2860   // Pass the innermost leaf construct's clauses because that's where COLLAPSE
2861   // is placed by construct decomposition.
2862   mlir::omp::LoopNestOperands loopNestClauseOps;
2863   llvm::SmallVector<const semantics::Symbol *> iv;
2864   genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc,
2865                      loopNestClauseOps, iv);
2866 
2867   // Operation creation.
2868   EntryBlockArgs wsloopArgs;
2869   // TODO: Add private syms and vars.
2870   wsloopArgs.reduction.syms = wsloopReductionSyms;
2871   wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars;
2872   auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>(
2873       converter, loc, wsloopClauseOps, wsloopArgs);
2874   wsloopOp.setComposite(/*val=*/true);
2875 
2876   EntryBlockArgs simdArgs;
2877   // TODO: Add private syms and vars.
2878   simdArgs.reduction.syms = simdReductionSyms;
2879   simdArgs.reduction.vars = simdClauseOps.reductionVars;
2880   auto simdOp =
2881       genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);
2882   simdOp.setComposite(/*val=*/true);
2883 
2884   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem,
2885                 loopNestClauseOps, iv,
2886                 {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}},
2887                 llvm::omp::Directive::OMPD_do_simd, dsp);
2888 }
2889 
2890 static void genCompositeTaskloopSimd(lower::AbstractConverter &converter,
2891                                      lower::SymMap &symTable,
2892                                      semantics::SemanticsContext &semaCtx,
2893                                      lower::pft::Evaluation &eval,
2894                                      mlir::Location loc,
2895                                      const ConstructQueue &queue,
2896                                      ConstructQueue::const_iterator item) {
2897   assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs");
2898   TODO(loc, "Composite TASKLOOP SIMD");
2899 }
2900 
2901 //===----------------------------------------------------------------------===//
2902 // Dispatch
2903 //===----------------------------------------------------------------------===//
2904 
2905 static bool genOMPCompositeDispatch(lower::AbstractConverter &converter,
2906                                     lower::SymMap &symTable,
2907                                     semantics::SemanticsContext &semaCtx,
2908                                     lower::pft::Evaluation &eval,
2909                                     mlir::Location loc,
2910                                     const ConstructQueue &queue,
2911                                     ConstructQueue::const_iterator item) {
2912   using llvm::omp::Directive;
2913   using lower::omp::matchLeafSequence;
2914 
2915   // TODO: Privatization for composite constructs is currently only done based
2916   // on the clauses for their last leaf construct, which may not always be
2917   // correct. Consider per-leaf privatization of composite constructs once
2918   // delayed privatization is supported by all participating ops.
2919   if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do))
2920     genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc,
2921                                      queue, item);
2922   else if (matchLeafSequence(item, queue,
2923                              Directive::OMPD_distribute_parallel_do_simd))
2924     genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval,
2925                                          loc, queue, item);
2926   else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd))
2927     genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue,
2928                                item);
2929   else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd))
2930     genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item);
2931   else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd))
2932     genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue,
2933                              item);
2934   else
2935     return false;
2936 
2937   return true;
2938 }
2939 
2940 static void genOMPDispatch(lower::AbstractConverter &converter,
2941                            lower::SymMap &symTable,
2942                            semantics::SemanticsContext &semaCtx,
2943                            lower::pft::Evaluation &eval, mlir::Location loc,
2944                            const ConstructQueue &queue,
2945                            ConstructQueue::const_iterator item) {
2946   assert(item != queue.end());
2947 
2948   bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) ==
2949                   llvm::omp::Association::Loop;
2950   if (loopLeaf) {
2951     symTable.pushScope();
2952     if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue,
2953                                 item)) {
2954       symTable.popScope();
2955       return;
2956     }
2957   }
2958 
2959   switch (llvm::omp::Directive dir = item->id) {
2960   case llvm::omp::Directive::OMPD_barrier:
2961     genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item);
2962     break;
2963   case llvm::omp::Directive::OMPD_distribute:
2964     genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue,
2965                             item);
2966     break;
2967   case llvm::omp::Directive::OMPD_do:
2968     genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item);
2969     break;
2970   case llvm::omp::Directive::OMPD_loop:
2971     genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item);
2972     break;
2973   case llvm::omp::Directive::OMPD_masked:
2974     genMaskedOp(converter, symTable, semaCtx, eval, loc, queue, item);
2975     break;
2976   case llvm::omp::Directive::OMPD_master:
2977     genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item);
2978     break;
2979   case llvm::omp::Directive::OMPD_ordered:
2980     // Block-associated "ordered" construct.
2981     genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue, item);
2982     break;
2983   case llvm::omp::Directive::OMPD_parallel:
2984     genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item);
2985     break;
2986   case llvm::omp::Directive::OMPD_scan:
2987     TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir));
2988     break;
2989   case llvm::omp::Directive::OMPD_section:
2990     llvm_unreachable("genOMPDispatch: OMPD_section");
2991     // Lowered in the enclosing genSectionsOp.
2992     break;
2993   case llvm::omp::Directive::OMPD_sections:
2994     // Called directly from genOMP([...], OpenMPSectionsConstruct) because it
2995     // has a different prototype.
2996     // This code path is still taken when iterating through the construct queue
2997     // in genBodyOfOp
2998     break;
2999   case llvm::omp::Directive::OMPD_simd:
3000     genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
3001     break;
3002   case llvm::omp::Directive::OMPD_scope:
3003     genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item);
3004     break;
3005   case llvm::omp::Directive::OMPD_single:
3006     genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
3007     break;
3008   case llvm::omp::Directive::OMPD_target:
3009     genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item);
3010     break;
3011   case llvm::omp::Directive::OMPD_target_data:
3012     genTargetDataOp(converter, symTable, semaCtx, eval, loc, queue, item);
3013     break;
3014   case llvm::omp::Directive::OMPD_target_enter_data:
3015     genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>(
3016         converter, symTable, semaCtx, loc, queue, item);
3017     break;
3018   case llvm::omp::Directive::OMPD_target_exit_data:
3019     genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>(
3020         converter, symTable, semaCtx, loc, queue, item);
3021     break;
3022   case llvm::omp::Directive::OMPD_target_update:
3023     genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>(
3024         converter, symTable, semaCtx, loc, queue, item);
3025     break;
3026   case llvm::omp::Directive::OMPD_task:
3027     genTaskOp(converter, symTable, semaCtx, eval, loc, queue, item);
3028     break;
3029   case llvm::omp::Directive::OMPD_taskgroup:
3030     genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item);
3031     break;
3032   case llvm::omp::Directive::OMPD_taskloop:
3033     genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item);
3034     break;
3035   case llvm::omp::Directive::OMPD_taskwait:
3036     genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item);
3037     break;
3038   case llvm::omp::Directive::OMPD_taskyield:
3039     genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item);
3040     break;
3041   case llvm::omp::Directive::OMPD_teams:
3042     genTeamsOp(converter, symTable, semaCtx, eval, loc, queue, item);
3043     break;
3044   case llvm::omp::Directive::OMPD_tile:
3045   case llvm::omp::Directive::OMPD_unroll:
3046     TODO(loc, "Unhandled loop directive (" +
3047                   llvm::omp::getOpenMPDirectiveName(dir) + ")");
3048   // case llvm::omp::Directive::OMPD_workdistribute:
3049   case llvm::omp::Directive::OMPD_workshare:
3050     genWorkshareOp(converter, symTable, semaCtx, eval, loc, queue, item);
3051     break;
3052   default:
3053     // Combined and composite constructs should have been split into a sequence
3054     // of leaf constructs when building the construct queue.
3055     assert(!llvm::omp::isLeafConstruct(dir) &&
3056            "Unexpected compound construct.");
3057     break;
3058   }
3059 
3060   if (loopLeaf)
3061     symTable.popScope();
3062 }
3063 
3064 //===----------------------------------------------------------------------===//
3065 // OpenMPDeclarativeConstruct visitors
3066 //===----------------------------------------------------------------------===//
3067 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3068                    semantics::SemanticsContext &semaCtx,
3069                    lower::pft::Evaluation &eval,
3070                    const parser::OpenMPUtilityConstruct &);
3071 
3072 static void
3073 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3074        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3075        const parser::OpenMPDeclarativeAllocate &declarativeAllocate) {
3076   TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate");
3077 }
3078 
3079 static void genOMP(
3080     lower::AbstractConverter &converter, lower::SymMap &symTable,
3081     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3082     const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) {
3083   TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct");
3084 }
3085 
3086 static void
3087 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3088        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3089        const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) {
3090   TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct");
3091 }
3092 
3093 static void
3094 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3095        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3096        const parser::OpenMPDeclareMapperConstruct &declareMapperConstruct) {
3097   TODO(converter.getCurrentLocation(), "OpenMPDeclareMapperConstruct");
3098 }
3099 
3100 static void
3101 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3102        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3103        const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) {
3104   mlir::omp::DeclareTargetOperands clauseOps;
3105   llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause;
3106   mlir::ModuleOp mod = converter.getFirOpBuilder().getModule();
3107   getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct,
3108                        clauseOps, symbolAndClause);
3109 
3110   for (const DeclareTargetCapturePair &symClause : symbolAndClause) {
3111     mlir::Operation *op = mod.lookupSymbol(
3112         converter.mangleName(std::get<const semantics::Symbol &>(symClause)));
3113 
3114     // Some symbols are deferred until later in the module, these are handled
3115     // upon finalization of the module for OpenMP inside of Bridge, so we simply
3116     // skip for now.
3117     if (!op)
3118       continue;
3119 
3120     markDeclareTarget(
3121         op, converter,
3122         std::get<mlir::omp::DeclareTargetCaptureClause>(symClause),
3123         clauseOps.deviceType);
3124   }
3125 }
3126 
3127 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3128                    semantics::SemanticsContext &semaCtx,
3129                    lower::pft::Evaluation &eval,
3130                    const parser::OpenMPRequiresConstruct &requiresConstruct) {
3131   // Requires directives are gathered and processed in semantics and
3132   // then combined in the lowering bridge before triggering codegen
3133   // just once. Hence, there is no need to lower each individual
3134   // occurrence here.
3135 }
3136 
3137 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3138                    semantics::SemanticsContext &semaCtx,
3139                    lower::pft::Evaluation &eval,
3140                    const parser::OpenMPThreadprivate &threadprivate) {
3141   // The directive is lowered when instantiating the variable to
3142   // support the case of threadprivate variable declared in module.
3143 }
3144 
3145 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3146                    semantics::SemanticsContext &semaCtx,
3147                    lower::pft::Evaluation &eval,
3148                    const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) {
3149   Fortran::common::visit(
3150       [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
3151       ompDeclConstruct.u);
3152 }
3153 
3154 //===----------------------------------------------------------------------===//
3155 // OpenMPStandaloneConstruct visitors
3156 //===----------------------------------------------------------------------===//
3157 
3158 static void genOMP(
3159     lower::AbstractConverter &converter, lower::SymMap &symTable,
3160     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3161     const parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) {
3162   const auto &directive = std::get<parser::OmpSimpleStandaloneDirective>(
3163       simpleStandaloneConstruct.t);
3164   List<Clause> clauses = makeClauses(
3165       std::get<parser::OmpClauseList>(simpleStandaloneConstruct.t), semaCtx);
3166   mlir::Location currentLocation = converter.genLocation(directive.source);
3167 
3168   ConstructQueue queue{
3169       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
3170                           eval, directive.source, directive.v, clauses)};
3171   if (directive.v == llvm::omp::Directive::OMPD_ordered) {
3172     // Standalone "ordered" directive.
3173     genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue,
3174                  queue.begin());
3175   } else {
3176     // Dispatch handles the "block-associated" variant of "ordered".
3177     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
3178                    queue.begin());
3179   }
3180 }
3181 
3182 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3183                    semantics::SemanticsContext &semaCtx,
3184                    lower::pft::Evaluation &eval,
3185                    const parser::OpenMPFlushConstruct &flushConstruct) {
3186   const auto &verbatim = std::get<parser::Verbatim>(flushConstruct.t);
3187   const auto &objectList =
3188       std::get<std::optional<parser::OmpObjectList>>(flushConstruct.t);
3189   const auto &clauseList =
3190       std::get<std::optional<std::list<parser::OmpMemoryOrderClause>>>(
3191           flushConstruct.t);
3192   ObjectList objects =
3193       objectList ? makeObjects(*objectList, semaCtx) : ObjectList{};
3194   List<Clause> clauses =
3195       clauseList ? makeList(*clauseList,
3196                             [&](auto &&s) { return makeClause(s.v, semaCtx); })
3197                  : List<Clause>{};
3198   mlir::Location currentLocation = converter.genLocation(verbatim.source);
3199 
3200   ConstructQueue queue{buildConstructQueue(
3201       converter.getFirOpBuilder().getModule(), semaCtx, eval, verbatim.source,
3202       llvm::omp::Directive::OMPD_flush, clauses)};
3203   genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects,
3204              queue, queue.begin());
3205 }
3206 
3207 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3208                    semantics::SemanticsContext &semaCtx,
3209                    lower::pft::Evaluation &eval,
3210                    const parser::OpenMPCancelConstruct &cancelConstruct) {
3211   TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct");
3212 }
3213 
3214 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3215                    semantics::SemanticsContext &semaCtx,
3216                    lower::pft::Evaluation &eval,
3217                    const parser::OpenMPCancellationPointConstruct
3218                        &cancellationPointConstruct) {
3219   TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct");
3220 }
3221 
3222 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3223                    semantics::SemanticsContext &semaCtx,
3224                    lower::pft::Evaluation &eval,
3225                    const parser::OpenMPDepobjConstruct &construct) {
3226   // These values will be ignored until the construct itself is implemented,
3227   // but run them anyway for the sake of testing (via a Todo test).
3228   auto &ompObj = std::get<parser::OmpObject>(construct.t);
3229   const Object &depObj = makeObject(ompObj, semaCtx);
3230   Clause clause = makeClause(std::get<parser::OmpClause>(construct.t), semaCtx);
3231   (void)depObj;
3232   (void)clause;
3233 
3234   TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct");
3235 }
3236 
3237 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3238                    semantics::SemanticsContext &semaCtx,
3239                    lower::pft::Evaluation &eval,
3240                    const parser::OmpMetadirectiveDirective &construct) {}
3241 
3242 static void
3243 genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3244        semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3245        const parser::OpenMPStandaloneConstruct &standaloneConstruct) {
3246   Fortran::common::visit(
3247       [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
3248       standaloneConstruct.u);
3249 }
3250 
3251 //===----------------------------------------------------------------------===//
3252 // OpenMPConstruct visitors
3253 //===----------------------------------------------------------------------===//
3254 
3255 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3256                    semantics::SemanticsContext &semaCtx,
3257                    lower::pft::Evaluation &eval,
3258                    const parser::OpenMPAllocatorsConstruct &allocsConstruct) {
3259   TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct");
3260 }
3261 
3262 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3263                    semantics::SemanticsContext &semaCtx,
3264                    lower::pft::Evaluation &eval,
3265                    const parser::OpenMPAtomicConstruct &atomicConstruct) {
3266   Fortran::common::visit(
3267       common::visitors{
3268           [&](const parser::OmpAtomicRead &atomicRead) {
3269             mlir::Location loc = converter.genLocation(atomicRead.source);
3270             lower::genOmpAccAtomicRead<parser::OmpAtomicRead,
3271                                        parser::OmpAtomicClauseList>(
3272                 converter, atomicRead, loc);
3273           },
3274           [&](const parser::OmpAtomicWrite &atomicWrite) {
3275             mlir::Location loc = converter.genLocation(atomicWrite.source);
3276             lower::genOmpAccAtomicWrite<parser::OmpAtomicWrite,
3277                                         parser::OmpAtomicClauseList>(
3278                 converter, atomicWrite, loc);
3279           },
3280           [&](const parser::OmpAtomic &atomicConstruct) {
3281             mlir::Location loc = converter.genLocation(atomicConstruct.source);
3282             lower::genOmpAtomic<parser::OmpAtomic, parser::OmpAtomicClauseList>(
3283                 converter, atomicConstruct, loc);
3284           },
3285           [&](const parser::OmpAtomicUpdate &atomicUpdate) {
3286             mlir::Location loc = converter.genLocation(atomicUpdate.source);
3287             lower::genOmpAccAtomicUpdate<parser::OmpAtomicUpdate,
3288                                          parser::OmpAtomicClauseList>(
3289                 converter, atomicUpdate, loc);
3290           },
3291           [&](const parser::OmpAtomicCapture &atomicCapture) {
3292             mlir::Location loc = converter.genLocation(atomicCapture.source);
3293             lower::genOmpAccAtomicCapture<parser::OmpAtomicCapture,
3294                                           parser::OmpAtomicClauseList>(
3295                 converter, atomicCapture, loc);
3296           },
3297           [&](const parser::OmpAtomicCompare &atomicCompare) {
3298             mlir::Location loc = converter.genLocation(atomicCompare.source);
3299             TODO(loc, "OpenMP atomic compare");
3300           },
3301       },
3302       atomicConstruct.u);
3303 }
3304 
3305 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3306                    semantics::SemanticsContext &semaCtx,
3307                    lower::pft::Evaluation &eval,
3308                    const parser::OpenMPBlockConstruct &blockConstruct) {
3309   const auto &beginBlockDirective =
3310       std::get<parser::OmpBeginBlockDirective>(blockConstruct.t);
3311   const auto &endBlockDirective =
3312       std::get<parser::OmpEndBlockDirective>(blockConstruct.t);
3313   mlir::Location currentLocation =
3314       converter.genLocation(beginBlockDirective.source);
3315   const auto origDirective =
3316       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
3317   List<Clause> clauses = makeClauses(
3318       std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx);
3319   clauses.append(makeClauses(
3320       std::get<parser::OmpClauseList>(endBlockDirective.t), semaCtx));
3321 
3322   assert(llvm::omp::blockConstructSet.test(origDirective) &&
3323          "Expected block construct");
3324   (void)origDirective;
3325 
3326   for (const Clause &clause : clauses) {
3327     mlir::Location clauseLocation = converter.genLocation(clause.source);
3328     if (!std::holds_alternative<clause::Affinity>(clause.u) &&
3329         !std::holds_alternative<clause::Allocate>(clause.u) &&
3330         !std::holds_alternative<clause::Copyin>(clause.u) &&
3331         !std::holds_alternative<clause::Copyprivate>(clause.u) &&
3332         !std::holds_alternative<clause::Default>(clause.u) &&
3333         !std::holds_alternative<clause::Depend>(clause.u) &&
3334         !std::holds_alternative<clause::Filter>(clause.u) &&
3335         !std::holds_alternative<clause::Final>(clause.u) &&
3336         !std::holds_alternative<clause::Firstprivate>(clause.u) &&
3337         !std::holds_alternative<clause::HasDeviceAddr>(clause.u) &&
3338         !std::holds_alternative<clause::If>(clause.u) &&
3339         !std::holds_alternative<clause::IsDevicePtr>(clause.u) &&
3340         !std::holds_alternative<clause::Map>(clause.u) &&
3341         !std::holds_alternative<clause::Nowait>(clause.u) &&
3342         !std::holds_alternative<clause::NumTeams>(clause.u) &&
3343         !std::holds_alternative<clause::NumThreads>(clause.u) &&
3344         !std::holds_alternative<clause::OmpxBare>(clause.u) &&
3345         !std::holds_alternative<clause::Priority>(clause.u) &&
3346         !std::holds_alternative<clause::Private>(clause.u) &&
3347         !std::holds_alternative<clause::ProcBind>(clause.u) &&
3348         !std::holds_alternative<clause::Reduction>(clause.u) &&
3349         !std::holds_alternative<clause::Shared>(clause.u) &&
3350         !std::holds_alternative<clause::Simd>(clause.u) &&
3351         !std::holds_alternative<clause::ThreadLimit>(clause.u) &&
3352         !std::holds_alternative<clause::Threads>(clause.u) &&
3353         !std::holds_alternative<clause::UseDeviceAddr>(clause.u) &&
3354         !std::holds_alternative<clause::UseDevicePtr>(clause.u) &&
3355         !std::holds_alternative<clause::InReduction>(clause.u) &&
3356         !std::holds_alternative<clause::Mergeable>(clause.u) &&
3357         !std::holds_alternative<clause::Untied>(clause.u) &&
3358         !std::holds_alternative<clause::TaskReduction>(clause.u) &&
3359         !std::holds_alternative<clause::Detach>(clause.u)) {
3360       std::string name =
3361           parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(clause.id));
3362       TODO(clauseLocation, name + " clause is not implemented yet");
3363     }
3364   }
3365 
3366   llvm::omp::Directive directive =
3367       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v;
3368   const parser::CharBlock &source =
3369       std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source;
3370   ConstructQueue queue{
3371       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
3372                           eval, source, directive, clauses)};
3373   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
3374                  queue.begin());
3375 }
3376 
3377 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3378                    semantics::SemanticsContext &semaCtx,
3379                    lower::pft::Evaluation &eval,
3380                    const parser::OpenMPCriticalConstruct &criticalConstruct) {
3381   const auto &cd = std::get<parser::OmpCriticalDirective>(criticalConstruct.t);
3382   List<Clause> clauses =
3383       makeClauses(std::get<parser::OmpClauseList>(cd.t), semaCtx);
3384 
3385   ConstructQueue queue{buildConstructQueue(
3386       converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source,
3387       llvm::omp::Directive::OMPD_critical, clauses)};
3388 
3389   const auto &name = std::get<std::optional<parser::Name>>(cd.t);
3390   mlir::Location currentLocation = converter.getCurrentLocation();
3391   genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue,
3392                 queue.begin(), name);
3393 }
3394 
3395 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3396                    semantics::SemanticsContext &semaCtx,
3397                    lower::pft::Evaluation &eval,
3398                    const parser::OpenMPUtilityConstruct &) {
3399   TODO(converter.getCurrentLocation(), "OpenMPUtilityConstruct");
3400 }
3401 
3402 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3403                    semantics::SemanticsContext &semaCtx,
3404                    lower::pft::Evaluation &eval,
3405                    const parser::OpenMPDispatchConstruct &) {
3406   TODO(converter.getCurrentLocation(), "OpenMPDispatchConstruct");
3407 }
3408 
3409 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3410                    semantics::SemanticsContext &semaCtx,
3411                    lower::pft::Evaluation &eval,
3412                    const parser::OpenMPExecutableAllocate &execAllocConstruct) {
3413   TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate");
3414 }
3415 
3416 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3417                    semantics::SemanticsContext &semaCtx,
3418                    lower::pft::Evaluation &eval,
3419                    const parser::OpenMPLoopConstruct &loopConstruct) {
3420   const auto &beginLoopDirective =
3421       std::get<parser::OmpBeginLoopDirective>(loopConstruct.t);
3422   List<Clause> clauses = makeClauses(
3423       std::get<parser::OmpClauseList>(beginLoopDirective.t), semaCtx);
3424   if (auto &endLoopDirective =
3425           std::get<std::optional<parser::OmpEndLoopDirective>>(
3426               loopConstruct.t)) {
3427     clauses.append(makeClauses(
3428         std::get<parser::OmpClauseList>(endLoopDirective->t), semaCtx));
3429   }
3430 
3431   mlir::Location currentLocation =
3432       converter.genLocation(beginLoopDirective.source);
3433 
3434   llvm::omp::Directive directive =
3435       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
3436   const parser::CharBlock &source =
3437       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
3438   ConstructQueue queue{
3439       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
3440                           eval, source, directive, clauses)};
3441   genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
3442                  queue.begin());
3443 }
3444 
3445 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3446                    semantics::SemanticsContext &semaCtx,
3447                    lower::pft::Evaluation &eval,
3448                    const parser::OpenMPSectionConstruct &sectionConstruct) {
3449   // Do nothing here. SECTION is lowered inside of the lowering for Sections
3450 }
3451 
3452 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3453                    semantics::SemanticsContext &semaCtx,
3454                    lower::pft::Evaluation &eval,
3455                    const parser::OpenMPSectionsConstruct &sectionsConstruct) {
3456   const auto &beginSectionsDirective =
3457       std::get<parser::OmpBeginSectionsDirective>(sectionsConstruct.t);
3458   List<Clause> clauses = makeClauses(
3459       std::get<parser::OmpClauseList>(beginSectionsDirective.t), semaCtx);
3460   const auto &endSectionsDirective =
3461       std::get<parser::OmpEndSectionsDirective>(sectionsConstruct.t);
3462   const auto &sectionBlocks =
3463       std::get<parser::OmpSectionBlocks>(sectionsConstruct.t);
3464   clauses.append(makeClauses(
3465       std::get<parser::OmpClauseList>(endSectionsDirective.t), semaCtx));
3466   mlir::Location currentLocation = converter.getCurrentLocation();
3467 
3468   llvm::omp::Directive directive =
3469       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).v;
3470   const parser::CharBlock &source =
3471       std::get<parser::OmpSectionsDirective>(beginSectionsDirective.t).source;
3472   ConstructQueue queue{
3473       buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx,
3474                           eval, source, directive, clauses)};
3475   ConstructQueue::iterator next = queue.begin();
3476   // Generate constructs that come first e.g. Parallel
3477   while (next != queue.end() &&
3478          next->id != llvm::omp::Directive::OMPD_sections) {
3479     genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue,
3480                    next);
3481     next = std::next(next);
3482   }
3483 
3484   // call genSectionsOp directly (not via genOMPDispatch) so that we can add the
3485   // sectionBlocks argument
3486   assert(next != queue.end());
3487   assert(next->id == llvm::omp::Directive::OMPD_sections);
3488   genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue,
3489                 next, sectionBlocks);
3490   assert(std::next(next) == queue.end());
3491 }
3492 
3493 static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
3494                    semantics::SemanticsContext &semaCtx,
3495                    lower::pft::Evaluation &eval,
3496                    const parser::OpenMPConstruct &ompConstruct) {
3497   Fortran::common::visit(
3498       [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); },
3499       ompConstruct.u);
3500 }
3501 
3502 //===----------------------------------------------------------------------===//
3503 // Public functions
3504 //===----------------------------------------------------------------------===//
3505 
3506 mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder,
3507                                                      mlir::Operation *op,
3508                                                      mlir::Location loc) {
3509   if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp,
3510                 mlir::omp::LoopNestOp>(op))
3511     return builder.create<mlir::omp::YieldOp>(loc);
3512   return builder.create<mlir::omp::TerminatorOp>(loc);
3513 }
3514 
3515 void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter,
3516                                         lower::SymMap &symTable,
3517                                         semantics::SemanticsContext &semaCtx,
3518                                         lower::pft::Evaluation &eval,
3519                                         const parser::OpenMPConstruct &omp) {
3520   lower::SymMapScope scope(symTable);
3521   genOMP(converter, symTable, semaCtx, eval, omp);
3522 }
3523 
3524 void Fortran::lower::genOpenMPDeclarativeConstruct(
3525     lower::AbstractConverter &converter, lower::SymMap &symTable,
3526     semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
3527     const parser::OpenMPDeclarativeConstruct &omp) {
3528   genOMP(converter, symTable, semaCtx, eval, omp);
3529   genNestedEvaluations(converter, eval);
3530 }
3531 
3532 void Fortran::lower::genOpenMPSymbolProperties(
3533     lower::AbstractConverter &converter, const lower::pft::Variable &var) {
3534   assert(var.hasSymbol() && "Expecting Symbol");
3535   const semantics::Symbol &sym = var.getSymbol();
3536 
3537   if (sym.test(semantics::Symbol::Flag::OmpThreadprivate))
3538     lower::genThreadprivateOp(converter, var);
3539 
3540   if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget))
3541     lower::genDeclareTargetIntGlobal(converter, var);
3542 }
3543 
3544 int64_t
3545 Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) {
3546   for (const parser::OmpClause &clause : clauseList.v) {
3547     if (const auto &collapseClause =
3548             std::get_if<parser::OmpClause::Collapse>(&clause.u)) {
3549       const auto *expr = semantics::GetExpr(collapseClause->v);
3550       return evaluate::ToInt64(*expr).value();
3551     }
3552   }
3553   return 1;
3554 }
3555 
3556 void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter,
3557                                         const lower::pft::Variable &var) {
3558   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
3559   mlir::Location currentLocation = converter.getCurrentLocation();
3560 
3561   const semantics::Symbol &sym = var.getSymbol();
3562   mlir::Value symThreadprivateValue;
3563   if (const semantics::Symbol *common =
3564           semantics::FindCommonBlockContaining(sym.GetUltimate())) {
3565     mlir::Value commonValue = converter.getSymbolAddress(*common);
3566     if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) {
3567       // Generate ThreadprivateOp for a common block instead of its members and
3568       // only do it once for a common block.
3569       return;
3570     }
3571     // Generate ThreadprivateOp and rebind the common block.
3572     mlir::Value commonThreadprivateValue =
3573         firOpBuilder.create<mlir::omp::ThreadprivateOp>(
3574             currentLocation, commonValue.getType(), commonValue);
3575     converter.bindSymbol(*common, commonThreadprivateValue);
3576     // Generate the threadprivate value for the common block member.
3577     symThreadprivateValue = genCommonBlockMember(converter, currentLocation,
3578                                                  sym, commonThreadprivateValue);
3579   } else if (!var.isGlobal()) {
3580     // Non-global variable which can be in threadprivate directive must be one
3581     // variable in main program, and it has implicit SAVE attribute. Take it as
3582     // with SAVE attribute, so to create GlobalOp for it to simplify the
3583     // translation to LLVM IR.
3584     // Avoids performing multiple globalInitializations.
3585     fir::GlobalOp global;
3586     auto module = converter.getModuleOp();
3587     std::string globalName = converter.mangleName(sym);
3588     if (module.lookupSymbol<fir::GlobalOp>(globalName))
3589       global = module.lookupSymbol<fir::GlobalOp>(globalName);
3590     else
3591       global = globalInitialization(converter, firOpBuilder, sym, var,
3592                                     currentLocation);
3593 
3594     mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>(
3595         currentLocation, global.resultType(), global.getSymbol());
3596     symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
3597         currentLocation, symValue.getType(), symValue);
3598   } else {
3599     mlir::Value symValue = converter.getSymbolAddress(sym);
3600 
3601     // The symbol may be use-associated multiple times, and nothing needs to be
3602     // done after the original symbol is mapped to the threadprivatized value
3603     // for the first time. Use the threadprivatized value directly.
3604     mlir::Operation *op;
3605     if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>())
3606       op = declOp.getMemref().getDefiningOp();
3607     else
3608       op = symValue.getDefiningOp();
3609     if (mlir::isa<mlir::omp::ThreadprivateOp>(op))
3610       return;
3611 
3612     symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>(
3613         currentLocation, symValue.getType(), symValue);
3614   }
3615 
3616   fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym);
3617   fir::ExtendedValue symThreadprivateExv =
3618       getExtendedValue(sexv, symThreadprivateValue);
3619   converter.bindSymbol(sym, symThreadprivateExv);
3620 }
3621 
3622 // This function replicates threadprivate's behaviour of generating
3623 // an internal fir.GlobalOp for non-global variables in the main program
3624 // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR
3625 // generation.
3626 void Fortran::lower::genDeclareTargetIntGlobal(
3627     lower::AbstractConverter &converter, const lower::pft::Variable &var) {
3628   if (!var.isGlobal()) {
3629     // A non-global variable which can be in a declare target directive must
3630     // be a variable in the main program, and it has the implicit SAVE
3631     // attribute. We create a GlobalOp for it to simplify the translation to
3632     // LLVM IR.
3633     globalInitialization(converter, converter.getFirOpBuilder(),
3634                          var.getSymbol(), var, converter.getCurrentLocation());
3635   }
3636 }
3637 
3638 bool Fortran::lower::isOpenMPTargetConstruct(
3639     const parser::OpenMPConstruct &omp) {
3640   llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown;
3641   if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) {
3642     const auto &begin = std::get<parser::OmpBeginBlockDirective>(block->t);
3643     dir = std::get<parser::OmpBlockDirective>(begin.t).v;
3644   } else if (const auto *loop =
3645                  std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) {
3646     const auto &begin = std::get<parser::OmpBeginLoopDirective>(loop->t);
3647     dir = std::get<parser::OmpLoopDirective>(begin.t).v;
3648   }
3649   return llvm::omp::allTargetSet.test(dir);
3650 }
3651 
3652 void Fortran::lower::gatherOpenMPDeferredDeclareTargets(
3653     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
3654     lower::pft::Evaluation &eval,
3655     const parser::OpenMPDeclarativeConstruct &ompDecl,
3656     llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo>
3657         &deferredDeclareTarget) {
3658   Fortran::common::visit(
3659       common::visitors{
3660           [&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
3661             collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq,
3662                                           deferredDeclareTarget);
3663           },
3664           [&](const auto &) {},
3665       },
3666       ompDecl.u);
3667 }
3668 
3669 bool Fortran::lower::isOpenMPDeviceDeclareTarget(
3670     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
3671     lower::pft::Evaluation &eval,
3672     const parser::OpenMPDeclarativeConstruct &ompDecl) {
3673   return Fortran::common::visit(
3674       common::visitors{
3675           [&](const parser::OpenMPDeclareTargetConstruct &ompReq) {
3676             mlir::omp::DeclareTargetDeviceType targetType =
3677                 getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq)
3678                     .value_or(mlir::omp::DeclareTargetDeviceType::host);
3679             return targetType != mlir::omp::DeclareTargetDeviceType::host;
3680           },
3681           [&](const auto &) { return false; },
3682       },
3683       ompDecl.u);
3684 }
3685 
3686 // In certain cases such as subroutine or function interfaces which declare
3687 // but do not define or directly call the subroutine or function in the same
3688 // module, their lowering is delayed until after the declare target construct
3689 // itself is processed, so there symbol is not within the table.
3690 //
3691 // This function will also return true if we encounter any device declare
3692 // target cases, to satisfy checking if we require the requires attributes
3693 // on the module.
3694 bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
3695     mlir::Operation *mod,
3696     llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets,
3697     AbstractConverter &converter) {
3698   bool deviceCodeFound = false;
3699   auto modOp = llvm::cast<mlir::ModuleOp>(mod);
3700   for (auto declTar : deferredDeclareTargets) {
3701     mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym));
3702 
3703     // Due to interfaces being optionally emitted on usage in a module,
3704     // not finding an operation at this point cannot be a hard error, we
3705     // simply ignore it for now.
3706     // TODO: Add semantic checks for detecting cases where an erronous
3707     // (undefined) symbol has been supplied to a declare target clause
3708     if (!op)
3709       continue;
3710 
3711     auto devType = declTar.declareTargetDeviceType;
3712     if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host)
3713       deviceCodeFound = true;
3714 
3715     markDeclareTarget(op, converter, declTar.declareTargetCaptureClause,
3716                       devType);
3717   }
3718 
3719   return deviceCodeFound;
3720 }
3721 
3722 void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
3723                                        const semantics::Symbol *symbol) {
3724   using MlirRequires = mlir::omp::ClauseRequires;
3725   using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag;
3726 
3727   if (auto offloadMod =
3728           llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) {
3729     semantics::WithOmpDeclarative::RequiresFlags semaFlags;
3730     if (symbol) {
3731       common::visit(
3732           [&](const auto &details) {
3733             if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative,
3734                                             std::decay_t<decltype(details)>>) {
3735               if (details.has_ompRequires())
3736                 semaFlags = *details.ompRequires();
3737             }
3738           },
3739           symbol->details());
3740     }
3741 
3742     // Use pre-populated omp.requires module attribute if it was set, so that
3743     // the "-fopenmp-force-usm" compiler option is honored.
3744     MlirRequires mlirFlags = offloadMod.getRequires();
3745     if (semaFlags.test(SemaRequires::ReverseOffload))
3746       mlirFlags = mlirFlags | MlirRequires::reverse_offload;
3747     if (semaFlags.test(SemaRequires::UnifiedAddress))
3748       mlirFlags = mlirFlags | MlirRequires::unified_address;
3749     if (semaFlags.test(SemaRequires::UnifiedSharedMemory))
3750       mlirFlags = mlirFlags | MlirRequires::unified_shared_memory;
3751     if (semaFlags.test(SemaRequires::DynamicAllocators))
3752       mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;
3753 
3754     offloadMod.setRequires(mlirFlags);
3755   }
3756 }
3757