xref: /netbsd-src/external/apache2/llvm/dist/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Frontend/OpenMP/OMPConstants.h"
18 #include "llvm/IR/DebugLoc.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/Support/Allocator.h"
21 #include <forward_list>
22 
23 namespace llvm {
24 class CanonicalLoopInfo;
25 
26 /// An interface to create LLVM-IR for OpenMP directives.
27 ///
28 /// Each OpenMP directive has a corresponding public generator method.
29 class OpenMPIRBuilder {
30 public:
31   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
32   /// not have an effect on \p M (see initialize).
OpenMPIRBuilder(Module & M)33   OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
34   ~OpenMPIRBuilder();
35 
36   /// Initialize the internal state, this will put structures types and
37   /// potentially other helpers into the underlying module. Must be called
38   /// before any other method and only once!
39   void initialize();
40 
41   /// Finalize the underlying module, e.g., by outlining regions.
42   /// \param Fn                    The function to be finalized. If not used,
43   ///                              all functions are finalized.
44   /// \param AllowExtractorSinking Flag to include sinking instructions,
45   ///                              emitted by CodeExtractor, in the
46   ///                              outlined region. Default is false.
47   void finalize(Function *Fn = nullptr, bool AllowExtractorSinking = false);
48 
49   /// Add attributes known for \p FnID to \p Fn.
50   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
51 
52   /// Type used throughout for insertion points.
53   using InsertPointTy = IRBuilder<>::InsertPoint;
54 
55   /// Callback type for variable finalization (think destructors).
56   ///
57   /// \param CodeGenIP is the insertion point at which the finalization code
58   ///                  should be placed.
59   ///
60   /// A finalize callback knows about all objects that need finalization, e.g.
61   /// destruction, when the scope of the currently generated construct is left
62   /// at the time, and location, the callback is invoked.
63   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
64 
65   struct FinalizationInfo {
66     /// The finalization callback provided by the last in-flight invocation of
67     /// createXXXX for the directive of kind DK.
68     FinalizeCallbackTy FiniCB;
69 
70     /// The directive kind of the innermost directive that has an associated
71     /// region which might require finalization when it is left.
72     omp::Directive DK;
73 
74     /// Flag to indicate if the directive is cancellable.
75     bool IsCancellable;
76   };
77 
78   /// Push a finalization callback on the finalization stack.
79   ///
80   /// NOTE: Temporary solution until Clang CG is gone.
pushFinalizationCB(const FinalizationInfo & FI)81   void pushFinalizationCB(const FinalizationInfo &FI) {
82     FinalizationStack.push_back(FI);
83   }
84 
85   /// Pop the last finalization callback from the finalization stack.
86   ///
87   /// NOTE: Temporary solution until Clang CG is gone.
popFinalizationCB()88   void popFinalizationCB() { FinalizationStack.pop_back(); }
89 
90   /// Callback type for body (=inner region) code generation
91   ///
92   /// The callback takes code locations as arguments, each describing a
93   /// location at which code might need to be generated or a location that is
94   /// the target of control transfer.
95   ///
96   /// \param AllocaIP is the insertion point at which new alloca instructions
97   ///                 should be placed.
98   /// \param CodeGenIP is the insertion point at which the body code should be
99   ///                  placed.
100   /// \param ContinuationBB is the basic block target to leave the body.
101   ///
102   /// Note that all blocks pointed to by the arguments have terminators.
103   using BodyGenCallbackTy =
104       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
105                         BasicBlock &ContinuationBB)>;
106 
107   // This is created primarily for sections construct as llvm::function_ref
108   // (BodyGenCallbackTy) is not storable (as described in the comments of
109   // function_ref class - function_ref contains non-ownable reference
110   // to the callable.
111   using StorableBodyGenCallbackTy =
112       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
113                          BasicBlock &ContinuationBB)>;
114 
115   /// Callback type for loop body code generation.
116   ///
117   /// \param CodeGenIP is the insertion point where the loop's body code must be
118   ///                  placed. This will be a dedicated BasicBlock with a
119   ///                  conditional branch from the loop condition check and
120   ///                  terminated with an unconditional branch to the loop
121   ///                  latch.
122   /// \param IndVar    is the induction variable usable at the insertion point.
123   using LoopBodyGenCallbackTy =
124       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
125 
126   /// Callback type for variable privatization (think copy & default
127   /// constructor).
128   ///
129   /// \param AllocaIP is the insertion point at which new alloca instructions
130   ///                 should be placed.
131   /// \param CodeGenIP is the insertion point at which the privatization code
132   ///                  should be placed.
133   /// \param Original The value being copied/created, should not be used in the
134   ///                 generated IR.
135   /// \param Inner The equivalent of \p Original that should be used in the
136   ///              generated IR; this is equal to \p Original if the value is
137   ///              a pointer and can thus be passed directly, otherwise it is
138   ///              an equivalent but different value.
139   /// \param ReplVal The replacement value, thus a copy or new created version
140   ///                of \p Inner.
141   ///
142   /// \returns The new insertion point where code generation continues and
143   ///          \p ReplVal the replacement value.
144   using PrivatizeCallbackTy = function_ref<InsertPointTy(
145       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
146       Value &Inner, Value *&ReplVal)>;
147 
148   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
149   /// (filename, line, column, ...).
150   struct LocationDescription {
151     template <typename T, typename U>
LocationDescriptionLocationDescription152     LocationDescription(const IRBuilder<T, U> &IRB)
153         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescriptionLocationDescription154     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
LocationDescriptionLocationDescription155     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
156         : IP(IP), DL(DL) {}
157     InsertPointTy IP;
158     DebugLoc DL;
159   };
160 
161   /// Emitter methods for OpenMP directives.
162   ///
163   ///{
164 
165   /// Generator for '#omp barrier'
166   ///
167   /// \param Loc The location where the barrier directive was encountered.
168   /// \param DK The kind of directive that caused the barrier.
169   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
170   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
171   ///                        should be checked and acted upon.
172   ///
173   /// \returns The insertion point after the barrier.
174   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
175                               bool ForceSimpleCall = false,
176                               bool CheckCancelFlag = true);
177 
178   /// Generator for '#omp cancel'
179   ///
180   /// \param Loc The location where the directive was encountered.
181   /// \param IfCondition The evaluated 'if' clause expression, if any.
182   /// \param CanceledDirective The kind of directive that is cancled.
183   ///
184   /// \returns The insertion point after the barrier.
185   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
186                              omp::Directive CanceledDirective);
187 
188   /// Generator for '#omp parallel'
189   ///
190   /// \param Loc The insert and source location description.
191   /// \param AllocaIP The insertion points to be used for alloca instructions.
192   /// \param BodyGenCB Callback that will generate the region code.
193   /// \param PrivCB Callback to copy a given variable (think copy constructor).
194   /// \param FiniCB Callback to finalize variable copies.
195   /// \param IfCondition The evaluated 'if' clause expression, if any.
196   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
197   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
198   /// \param IsCancellable Flag to indicate a cancellable parallel region.
199   ///
200   /// \returns The insertion position *after* the parallel.
201   IRBuilder<>::InsertPoint
202   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
203                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
204                  FinalizeCallbackTy FiniCB, Value *IfCondition,
205                  Value *NumThreads, omp::ProcBindKind ProcBind,
206                  bool IsCancellable);
207 
208   /// Generator for the control flow structure of an OpenMP canonical loop.
209   ///
210   /// This generator operates on the logical iteration space of the loop, i.e.
211   /// the caller only has to provide a loop trip count of the loop as defined by
212   /// base language semantics. The trip count is interpreted as an unsigned
213   /// integer. The induction variable passed to \p BodyGenCB will be of the same
214   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
215   /// convert the logical iteration variable to the loop counter variable in the
216   /// loop body.
217   ///
218   /// \param Loc       The insert and source location description. The insert
219   ///                  location can be between two instructions or the end of a
220   ///                  degenerate block (e.g. a BB under construction).
221   /// \param BodyGenCB Callback that will generate the loop body code.
222   /// \param TripCount Number of iterations the loop body is executed.
223   /// \param Name      Base name used to derive BB and instruction names.
224   ///
225   /// \returns An object representing the created control flow structure which
226   ///          can be used for loop-associated directives.
227   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
228                                          LoopBodyGenCallbackTy BodyGenCB,
229                                          Value *TripCount,
230                                          const Twine &Name = "loop");
231 
232   /// Generator for the control flow structure of an OpenMP canonical loop.
233   ///
234   /// Instead of a logical iteration space, this allows specifying user-defined
235   /// loop counter values using increment, upper- and lower bounds. To
236   /// disambiguate the terminology when counting downwards, instead of lower
237   /// bounds we use \p Start for the loop counter value in the first body
238   /// iteration.
239   ///
240   /// Consider the following limitations:
241   ///
242   ///  * A loop counter space over all integer values of its bit-width cannot be
243   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
244   ///    stored into an 8 bit integer):
245   ///
246   ///      DO I = 0, 255, 1
247   ///
248   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
249   ///    effectively counting downwards:
250   ///
251   ///      for (uint8_t i = 100u; i > 0; i += 127u)
252   ///
253   ///
254   /// TODO: May need to add additional parameters to represent:
255   ///
256   ///  * Allow representing downcounting with unsigned integers.
257   ///
258   ///  * Sign of the step and the comparison operator might disagree:
259   ///
260   ///      for (int i = 0; i < 42; --i)
261   ///
262   //
263   /// \param Loc       The insert and source location description.
264   /// \param BodyGenCB Callback that will generate the loop body code.
265   /// \param Start     Value of the loop counter for the first iterations.
266   /// \param Stop      Loop counter values past this will stop the the
267   ///                  iterations.
268   /// \param Step      Loop counter increment after each iteration; negative
269   ///                  means counting down. \param IsSigned  Whether Start, Stop
270   ///                  and Stop are signed integers.
271   /// \param InclusiveStop Whether  \p Stop itself is a valid value for the loop
272   ///                      counter.
273   /// \param ComputeIP Insertion point for instructions computing the trip
274   ///                  count. Can be used to ensure the trip count is available
275   ///                  at the outermost loop of a loop nest. If not set,
276   ///                  defaults to the preheader of the generated loop.
277   /// \param Name      Base name used to derive BB and instruction names.
278   ///
279   /// \returns An object representing the created control flow structure which
280   ///          can be used for loop-associated directives.
281   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
282                                          LoopBodyGenCallbackTy BodyGenCB,
283                                          Value *Start, Value *Stop, Value *Step,
284                                          bool IsSigned, bool InclusiveStop,
285                                          InsertPointTy ComputeIP = {},
286                                          const Twine &Name = "loop");
287 
288   /// Collapse a loop nest into a single loop.
289   ///
290   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
291   /// that has the same number of innermost loop iterations as the origin loop
292   /// nest. The induction variables of the input loops are derived from the
293   /// collapsed loop's induction variable. This is intended to be used to
294   /// implement OpenMP's collapse clause. Before applying a directive,
295   /// collapseLoops normalizes a loop nest to contain only a single loop and the
296   /// directive's implementation does not need to handle multiple loops itself.
297   /// This does not remove the need to handle all loop nest handling by
298   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
299   /// modifier of the worksharing-loop directive.
300   ///
301   /// Example:
302   /// \code
303   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
304   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
305   ///       body(i, j);
306   /// \endcode
307   ///
308   /// After collapsing with Loops={i,j}, the loop is changed to
309   /// \code
310   ///   for (int ij = 0; ij < 63; ++ij) {
311   ///     int i = ij / 9;
312   ///     int j = ij % 9;
313   ///     body(i, j);
314   ///   }
315   /// \endcode
316   ///
317   /// In the current implementation, the following limitations apply:
318   ///
319   ///  * All input loops have an induction variable of the same type.
320   ///
321   ///  * The collapsed loop will have the same trip count integer type as the
322   ///    input loops. Therefore it is possible that the collapsed loop cannot
323   ///    represent all iterations of the input loops. For instance, assuming a
324   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
325   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
326   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
327   ///    in this case.
328   ///
329   ///  * The trip counts of every input loop must be available at \p ComputeIP.
330   ///    Non-rectangular loops are not yet supported.
331   ///
332   ///  * At each nest level, code between a surrounding loop and its nested loop
333   ///    is hoisted into the loop body, and such code will be executed more
334   ///    often than before collapsing (or not at all if any inner loop iteration
335   ///    has a trip count of 0). This is permitted by the OpenMP specification.
336   ///
337   /// \param DL        Debug location for instructions added for collapsing,
338   ///                  such as instructions to compute derive the input loop's
339   ///                  induction variables.
340   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
341   ///                  from outermost-to-innermost and every control flow of a
342   ///                  loop's body must pass through its directly nested loop.
343   /// \param ComputeIP Where additional instruction that compute the collapsed
344   ///                  trip count. If not set, defaults to before the generated
345   ///                  loop.
346   ///
347   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
348   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
349                                    ArrayRef<CanonicalLoopInfo *> Loops,
350                                    InsertPointTy ComputeIP);
351 
352   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
353   ///
354   /// This takes a \p LoopInfo representing a canonical loop, such as the one
355   /// created by \p createCanonicalLoop and emits additional instructions to
356   /// turn it into a workshare loop. In particular, it calls to an OpenMP
357   /// runtime function in the preheader to obtain the loop bounds to be used in
358   /// the current thread, updates the relevant instructions in the canonical
359   /// loop and calls to an OpenMP runtime finalization function after the loop.
360   ///
361   /// \param Loc      The source location description, the insertion location
362   ///                 is not used.
363   /// \param CLI      A descriptor of the canonical loop to workshare.
364   /// \param AllocaIP An insertion point for Alloca instructions usable in the
365   ///                 preheader of the loop.
366   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
367   ///                     the loop.
368   /// \param Chunk    The size of loop chunk considered as a unit when
369   ///                 scheduling. If \p nullptr, defaults to 1.
370   ///
371   /// \returns Updated CanonicalLoopInfo.
372   CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
373                                                CanonicalLoopInfo *CLI,
374                                                InsertPointTy AllocaIP,
375                                                bool NeedsBarrier,
376                                                Value *Chunk = nullptr);
377 
378   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
379   ///
380   /// This takes a \p LoopInfo representing a canonical loop, such as the one
381   /// created by \p createCanonicalLoop and emits additional instructions to
382   /// turn it into a workshare loop. In particular, it calls to an OpenMP
383   /// runtime function in the preheader to obtain, and then in each iteration
384   /// to update the loop counter.
385   /// \param Loc      The source location description, the insertion location
386   ///                 is not used.
387   /// \param CLI      A descriptor of the canonical loop to workshare.
388   /// \param AllocaIP An insertion point for Alloca instructions usable in the
389   ///                 preheader of the loop.
390   /// \param SchedType Type of scheduling to be passed to the init function.
391   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
392   ///                     the loop.
393   /// \param Chunk    The size of loop chunk considered as a unit when
394   ///                 scheduling. If \p nullptr, defaults to 1.
395   ///
396   /// \returns Point where to insert code after the loop.
397   InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
398                                            CanonicalLoopInfo *CLI,
399                                            InsertPointTy AllocaIP,
400                                            omp::OMPScheduleType SchedType,
401                                            bool NeedsBarrier,
402                                            Value *Chunk = nullptr);
403 
404   /// Modifies the canonical loop to be a workshare loop.
405   ///
406   /// This takes a \p LoopInfo representing a canonical loop, such as the one
407   /// created by \p createCanonicalLoop and emits additional instructions to
408   /// turn it into a workshare loop. In particular, it calls to an OpenMP
409   /// runtime function in the preheader to obtain the loop bounds to be used in
410   /// the current thread, updates the relevant instructions in the canonical
411   /// loop and calls to an OpenMP runtime finalization function after the loop.
412   ///
413   /// \param Loc      The source location description, the insertion location
414   ///                 is not used.
415   /// \param CLI      A descriptor of the canonical loop to workshare.
416   /// \param AllocaIP An insertion point for Alloca instructions usable in the
417   ///                 preheader of the loop.
418   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
419   ///                     the loop.
420   ///
421   /// \returns Updated CanonicalLoopInfo.
422   CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
423                                          CanonicalLoopInfo *CLI,
424                                          InsertPointTy AllocaIP,
425                                          bool NeedsBarrier);
426 
427   /// Tile a loop nest.
428   ///
429   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
430   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
431   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
432   /// of every loop and every tile sizes must be usable in the outermost
433   /// loop's preheader. This implies that the loop nest is rectangular.
434   ///
435   /// Example:
436   /// \code
437   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
438   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
439   ///         body(i, j);
440   /// \endcode
441   ///
442   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
443   /// \code
444   ///   for (int i1 = 0; i1 < 3; ++i1)
445   ///     for (int j1 = 0; j1 < 2; ++j1)
446   ///       for (int i2 = 0; i2 < 5; ++i2)
447   ///         for (int j2 = 0; j2 < 7; ++j2)
448   ///           body(i1*3+i2, j1*3+j2);
449   /// \endcode
450   ///
451   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
452   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
453   /// handles non-constant trip counts, non-constant tile sizes and trip counts
454   /// that are not multiples of the tile size. In the latter case the tile loop
455   /// of the last floor-loop iteration will have fewer iterations than specified
456   /// as its tile size.
457   ///
458   ///
459   /// @param DL        Debug location for instructions added by tiling, for
460   ///                  instance the floor- and tile trip count computation.
461   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
462   ///                  invalidated by this method, i.e. should not used after
463   ///                  tiling.
464   /// @param TileSizes For each loop in \p Loops, the tile size for that
465   ///                  dimensions.
466   ///
467   /// \returns A list of generated loops. Contains twice as many loops as the
468   ///          input loop nest; the first half are the floor loops and the
469   ///          second half are the tile loops.
470   std::vector<CanonicalLoopInfo *>
471   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
472             ArrayRef<Value *> TileSizes);
473 
474   /// Generator for '#omp flush'
475   ///
476   /// \param Loc The location where the flush directive was encountered
477   void createFlush(const LocationDescription &Loc);
478 
479   /// Generator for '#omp taskwait'
480   ///
481   /// \param Loc The location where the taskwait directive was encountered.
482   void createTaskwait(const LocationDescription &Loc);
483 
484   /// Generator for '#omp taskyield'
485   ///
486   /// \param Loc The location where the taskyield directive was encountered.
487   void createTaskyield(const LocationDescription &Loc);
488 
489   ///}
490 
491   /// Return the insertion point used by the underlying IRBuilder.
getInsertionPoint()492   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
493 
494   /// Update the internal location to \p Loc.
updateToLocation(const LocationDescription & Loc)495   bool updateToLocation(const LocationDescription &Loc) {
496     Builder.restoreIP(Loc.IP);
497     Builder.SetCurrentDebugLocation(Loc.DL);
498     return Loc.IP.getBlock() != nullptr;
499   }
500 
501   /// Return the function declaration for the runtime function with \p FnID.
502   FunctionCallee getOrCreateRuntimeFunction(Module &M,
503                                             omp::RuntimeFunction FnID);
504 
505   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
506 
507   /// Return the (LLVM-IR) string describing the source location \p LocStr.
508   Constant *getOrCreateSrcLocStr(StringRef LocStr);
509 
510   /// Return the (LLVM-IR) string describing the default source location.
511   Constant *getOrCreateDefaultSrcLocStr();
512 
513   /// Return the (LLVM-IR) string describing the source location identified by
514   /// the arguments.
515   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
516                                  unsigned Line, unsigned Column);
517 
518   /// Return the (LLVM-IR) string describing the source location \p Loc.
519   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
520 
521   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
522   /// TODO: Create a enum class for the Reserve2Flags
523   Value *getOrCreateIdent(Constant *SrcLocStr,
524                           omp::IdentFlag Flags = omp::IdentFlag(0),
525                           unsigned Reserve2Flags = 0);
526 
527   // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
528   Type *getLanemaskType();
529 
530   /// Generate control flow and cleanup for cancellation.
531   ///
532   /// \param CancelFlag Flag indicating if the cancellation is performed.
533   /// \param CanceledDirective The kind of directive that is cancled.
534   void emitCancelationCheckImpl(Value *CancelFlag,
535                                 omp::Directive CanceledDirective);
536 
537   /// Generate a barrier runtime call.
538   ///
539   /// \param Loc The location at which the request originated and is fulfilled.
540   /// \param DK The directive which caused the barrier
541   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
542   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
543   ///                        should be checked and acted upon.
544   ///
545   /// \returns The insertion point after the barrier.
546   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
547                                 omp::Directive DK, bool ForceSimpleCall,
548                                 bool CheckCancelFlag);
549 
550   /// Generate a flush runtime call.
551   ///
552   /// \param Loc The location at which the request originated and is fulfilled.
553   void emitFlush(const LocationDescription &Loc);
554 
555   /// The finalization stack made up of finalize callbacks currently in-flight,
556   /// wrapped into FinalizationInfo objects that reference also the finalization
557   /// target block and the kind of cancellable directive.
558   SmallVector<FinalizationInfo, 8> FinalizationStack;
559 
560   /// Return true if the last entry in the finalization stack is of kind \p DK
561   /// and cancellable.
isLastFinalizationInfoCancellable(omp::Directive DK)562   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
563     return !FinalizationStack.empty() &&
564            FinalizationStack.back().IsCancellable &&
565            FinalizationStack.back().DK == DK;
566   }
567 
568   /// Generate a taskwait runtime call.
569   ///
570   /// \param Loc The location at which the request originated and is fulfilled.
571   void emitTaskwaitImpl(const LocationDescription &Loc);
572 
573   /// Generate a taskyield runtime call.
574   ///
575   /// \param Loc The location at which the request originated and is fulfilled.
576   void emitTaskyieldImpl(const LocationDescription &Loc);
577 
578   /// Return the current thread ID.
579   ///
580   /// \param Ident The ident (ident_t*) describing the query origin.
581   Value *getOrCreateThreadID(Value *Ident);
582 
583   /// The underlying LLVM-IR module
584   Module &M;
585 
586   /// The LLVM-IR Builder used to create IR.
587   IRBuilder<> Builder;
588 
589   /// Map to remember source location strings
590   StringMap<Constant *> SrcLocStrMap;
591 
592   /// Map to remember existing ident_t*.
593   DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;
594 
595   /// Helper that contains information about regions we need to outline
596   /// during finalization.
597   struct OutlineInfo {
598     using PostOutlineCBTy = std::function<void(Function &)>;
599     PostOutlineCBTy PostOutlineCB;
600     BasicBlock *EntryBB, *ExitBB;
601 
602     /// Collect all blocks in between EntryBB and ExitBB in both the given
603     /// vector and set.
604     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
605                        SmallVectorImpl<BasicBlock *> &BlockVector);
606 
607     /// Return the function that contains the region to be outlined.
getFunctionOutlineInfo608     Function *getFunction() const { return EntryBB->getParent(); }
609   };
610 
611   /// Collection of regions that need to be outlined during finalization.
612   SmallVector<OutlineInfo, 16> OutlineInfos;
613 
614   /// Collection of owned canonical loop objects that eventually need to be
615   /// free'd.
616   std::forward_list<CanonicalLoopInfo> LoopInfos;
617 
618   /// Add a new region that will be outlined later.
addOutlineInfo(OutlineInfo && OI)619   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
620 
621   /// An ordered map of auto-generated variables to their unique names.
622   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
623   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
624   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
625   /// variables.
626   StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
627 
628   /// Create the global variable holding the offload mappings information.
629   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
630                                         std::string VarName);
631 
632   /// Create the global variable holding the offload names information.
633   GlobalVariable *
634   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
635                         std::string VarName);
636 
637 public:
638   /// Generator for __kmpc_copyprivate
639   ///
640   /// \param Loc The source location description.
641   /// \param BufSize Number of elements in the buffer.
642   /// \param CpyBuf List of pointers to data to be copied.
643   /// \param CpyFn function to call for copying data.
644   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
645   ///
646   /// \return The insertion position *after* the CopyPrivate call.
647 
648   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
649                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
650                                   llvm::Value *CpyFn, llvm::Value *DidIt);
651 
652   /// Generator for '#omp single'
653   ///
654   /// \param Loc The source location description.
655   /// \param BodyGenCB Callback that will generate the region code.
656   /// \param FiniCB Callback to finalize variable copies.
657   /// \param DidIt Local variable used as a flag to indicate 'single' thread
658   ///
659   /// \returns The insertion position *after* the single call.
660   InsertPointTy createSingle(const LocationDescription &Loc,
661                              BodyGenCallbackTy BodyGenCB,
662                              FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
663 
664   /// Generator for '#omp master'
665   ///
666   /// \param Loc The insert and source location description.
667   /// \param BodyGenCB Callback that will generate the region code.
668   /// \param FiniCB Callback to finalize variable copies.
669   ///
670   /// \returns The insertion position *after* the master.
671   InsertPointTy createMaster(const LocationDescription &Loc,
672                              BodyGenCallbackTy BodyGenCB,
673                              FinalizeCallbackTy FiniCB);
674 
675   /// Generator for '#omp masked'
676   ///
677   /// \param Loc The insert and source location description.
678   /// \param BodyGenCB Callback that will generate the region code.
679   /// \param FiniCB Callback to finialize variable copies.
680   ///
681   /// \returns The insertion position *after* the master.
682   InsertPointTy createMasked(const LocationDescription &Loc,
683                              BodyGenCallbackTy BodyGenCB,
684                              FinalizeCallbackTy FiniCB, Value *Filter);
685 
686   /// Generator for '#omp critical'
687   ///
688   /// \param Loc The insert and source location description.
689   /// \param BodyGenCB Callback that will generate the region body code.
690   /// \param FiniCB Callback to finalize variable copies.
691   /// \param CriticalName name of the lock used by the critical directive
692   /// \param HintInst Hint Instruction for hint clause associated with critical
693   ///
694   /// \returns The insertion position *after* the master.
695   InsertPointTy createCritical(const LocationDescription &Loc,
696                                BodyGenCallbackTy BodyGenCB,
697                                FinalizeCallbackTy FiniCB,
698                                StringRef CriticalName, Value *HintInst);
699 
700   /// Generator for '#omp sections'
701   ///
702   /// \param Loc The insert and source location description.
703   /// \param AllocaIP The insertion points to be used for alloca instructions.
704   /// \param SectionCBs Callbacks that will generate body of each section.
705   /// \param PrivCB Callback to copy a given variable (think copy constructor).
706   /// \param FiniCB Callback to finalize variable copies.
707   /// \param IsCancellable Flag to indicate a cancellable parallel region.
708   /// \param IsNowait If true, barrier - to ensure all sections are executed
709   /// before moving forward will not be generated.
710   /// \returns The insertion position *after* the sections.
711   InsertPointTy createSections(const LocationDescription &Loc,
712                                InsertPointTy AllocaIP,
713                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
714                                PrivatizeCallbackTy PrivCB,
715                                FinalizeCallbackTy FiniCB, bool IsCancellable,
716                                bool IsNowait);
717 
718   /// Generator for '#omp section'
719   ///
720   /// \param Loc The insert and source location description.
721   /// \param BodyGenCB Callback that will generate the region body code.
722   /// \param FiniCB Callback to finalize variable copies.
723   /// \returns The insertion position *after* the section.
724   InsertPointTy createSection(const LocationDescription &Loc,
725                               BodyGenCallbackTy BodyGenCB,
726                               FinalizeCallbackTy FiniCB);
727 
728   /// Generate conditional branch and relevant BasicBlocks through which private
729   /// threads copy the 'copyin' variables from Master copy to threadprivate
730   /// copies.
731   ///
732   /// \param IP insertion block for copyin conditional
733   /// \param MasterVarPtr a pointer to the master variable
734   /// \param PrivateVarPtr a pointer to the threadprivate variable
735   /// \param IntPtrTy Pointer size type
736   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
737   //				 and copy.in.end block
738   ///
739   /// \returns The insertion point where copying operation to be emitted.
740   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
741                                          Value *PrivateAddr,
742                                          llvm::IntegerType *IntPtrTy,
743                                          bool BranchtoEnd = true);
744 
745   /// Create a runtime call for kmpc_Alloc
746   ///
747   /// \param Loc The insert and source location description.
748   /// \param Size Size of allocated memory space
749   /// \param Allocator Allocator information instruction
750   /// \param Name Name of call Instruction for OMP_alloc
751   ///
752   /// \returns CallInst to the OMP_Alloc call
753   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
754                            Value *Allocator, std::string Name = "");
755 
756   /// Create a runtime call for kmpc_free
757   ///
758   /// \param Loc The insert and source location description.
759   /// \param Addr Address of memory space to be freed
760   /// \param Allocator Allocator information instruction
761   /// \param Name Name of call Instruction for OMP_Free
762   ///
763   /// \returns CallInst to the OMP_Free call
764   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
765                           Value *Allocator, std::string Name = "");
766 
767   /// Create a runtime call for kmpc_threadprivate_cached
768   ///
769   /// \param Loc The insert and source location description.
770   /// \param Pointer pointer to data to be cached
771   /// \param Size size of data to be cached
772   /// \param Name Name of call Instruction for callinst
773   ///
774   /// \returns CallInst to the thread private cache call.
775   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
776                                       llvm::Value *Pointer,
777                                       llvm::ConstantInt *Size,
778                                       const llvm::Twine &Name = Twine(""));
779 
780   /// Declarations for LLVM-IR types (simple, array, function and structure) are
781   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
782   /// we provide the declarations, the initializeTypes function will provide the
783   /// values.
784   ///
785   ///{
786 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
787 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
788   ArrayType *VarName##Ty = nullptr;                                            \
789   PointerType *VarName##PtrTy = nullptr;
790 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
791   FunctionType *VarName = nullptr;                                             \
792   PointerType *VarName##Ptr = nullptr;
793 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
794   StructType *VarName = nullptr;                                               \
795   PointerType *VarName##Ptr = nullptr;
796 #include "llvm/Frontend/OpenMP/OMPKinds.def"
797 
798   ///}
799 
800 private:
801   /// Create all simple and struct types exposed by the runtime and remember
802   /// the llvm::PointerTypes of them for easy access later.
803   void initializeTypes(Module &M);
804 
805   /// Common interface for generating entry calls for OMP Directives.
806   /// if the directive has a region/body, It will set the insertion
807   /// point to the body
808   ///
809   /// \param OMPD Directive to generate entry blocks for
810   /// \param EntryCall Call to the entry OMP Runtime Function
811   /// \param ExitBB block where the region ends.
812   /// \param Conditional indicate if the entry call result will be used
813   ///        to evaluate a conditional of whether a thread will execute
814   ///        body code or not.
815   ///
816   /// \return The insertion position in exit block
817   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
818                                          BasicBlock *ExitBB,
819                                          bool Conditional = false);
820 
821   /// Common interface to finalize the region
822   ///
823   /// \param OMPD Directive to generate exiting code for
824   /// \param FinIP Insertion point for emitting Finalization code and exit call
825   /// \param ExitCall Call to the ending OMP Runtime Function
826   /// \param HasFinalize indicate if the directive will require finalization
827   ///         and has a finalization callback in the stack that
828   ///        should be called.
829   ///
830   /// \return The insertion position in exit block
831   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
832                                         InsertPointTy FinIP,
833                                         Instruction *ExitCall,
834                                         bool HasFinalize = true);
835 
836   /// Common Interface to generate OMP inlined regions
837   ///
838   /// \param OMPD Directive to generate inlined region for
839   /// \param EntryCall Call to the entry OMP Runtime Function
840   /// \param ExitCall Call to the ending OMP Runtime Function
841   /// \param BodyGenCB Body code generation callback.
842   /// \param FiniCB Finalization Callback. Will be called when finalizing region
843   /// \param Conditional indicate if the entry call result will be used
844   ///        to evaluate a conditional of whether a thread will execute
845   ///        body code or not.
846   /// \param HasFinalize indicate if the directive will require finalization
847   ///        and has a finalization callback in the stack that
848   ///        should be called.
849   /// \param IsCancellable if HasFinalize is set to true, indicate if the
850   ///        the directive should be cancellable.
851   /// \return The insertion point after the region
852 
853   InsertPointTy
854   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
855                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
856                        FinalizeCallbackTy FiniCB, bool Conditional = false,
857                        bool HasFinalize = true, bool IsCancellable = false);
858 
859   /// Get the platform-specific name separator.
860   /// \param Parts different parts of the final name that needs separation
861   /// \param FirstSeparator First separator used between the initial two
862   ///        parts of the name.
863   /// \param Separator separator used between all of the rest consecutive
864   ///        parts of the name
865   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
866                                            StringRef FirstSeparator,
867                                            StringRef Separator);
868 
869   /// Gets (if variable with the given name already exist) or creates
870   /// internal global variable with the specified Name. The created variable has
871   /// linkage CommonLinkage by default and is initialized by null value.
872   /// \param Ty Type of the global variable. If it is exist already the type
873   /// must be the same.
874   /// \param Name Name of the variable.
875   Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
876                                            unsigned AddressSpace = 0);
877 
878   /// Returns corresponding lock object for the specified critical region
879   /// name. If the lock object does not exist it is created, otherwise the
880   /// reference to the existing copy is returned.
881   /// \param CriticalName Name of the critical region.
882   ///
883   Value *getOMPCriticalRegionLock(StringRef CriticalName);
884 
885   /// Callback type for Atomic Expression update
886   /// ex:
887   /// \code{.cpp}
888   /// unsigned x = 0;
889   /// #pragma omp atomic update
890   /// x = Expr(x_old);  //Expr() is any legal operation
891   /// \endcode
892   ///
893   /// \param XOld the value of the atomic memory address to use for update
894   /// \param IRB reference to the IRBuilder to use
895   ///
896   /// \returns Value to update X to.
897   using AtomicUpdateCallbackTy =
898       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
899 
900 private:
901   enum AtomicKind { Read, Write, Update, Capture };
902 
903   /// Determine whether to emit flush or not
904   ///
905   /// \param Loc    The insert and source location description.
906   /// \param AO     The required atomic ordering
907   /// \param AK     The OpenMP atomic operation kind used.
908   ///
909   /// \returns		wether a flush was emitted or not
910   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
911                                     AtomicOrdering AO, AtomicKind AK);
912 
913   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
914   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
915   /// Only Scalar data types.
916   ///
917   /// \param AllocIP	  Instruction to create AllocaInst before.
918   /// \param X			    The target atomic pointer to be updated
919   /// \param Expr		    The value to update X with.
920   /// \param AO			    Atomic ordering of the generated atomic
921   ///                   instructions.
922   /// \param RMWOp		  The binary operation used for update. If
923   ///                   operation is not supported by atomicRMW,
924   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
925   ///                   Then a `cmpExch` based	atomic will be generated.
926   /// \param UpdateOp 	Code generator for complex expressions that cannot be
927   ///                   expressed through atomicrmw instruction.
928   /// \param VolatileX	     true if \a X volatile?
929   /// \param IsXLHSInRHSPart true if \a X is Left H.S. in Right H.S. part of
930   ///                        the update expression, false otherwise.
931   ///                        (e.g. true for X = X BinOp Expr)
932   ///
933   /// \returns A pair of the old value of X before the update, and the value
934   ///          used for the update.
935   std::pair<Value *, Value *> emitAtomicUpdate(Instruction *AllocIP, Value *X,
936                                                Value *Expr, AtomicOrdering AO,
937                                                AtomicRMWInst::BinOp RMWOp,
938                                                AtomicUpdateCallbackTy &UpdateOp,
939                                                bool VolatileX,
940                                                bool IsXLHSInRHSPart);
941 
942   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
943   ///
944   /// \Return The instruction
945   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
946                                 AtomicRMWInst::BinOp RMWOp);
947 
948 public:
949   /// a struct to pack relevant information while generating atomic Ops
950   struct AtomicOpValue {
951     Value *Var = nullptr;
952     bool IsSigned = false;
953     bool IsVolatile = false;
954   };
955 
956   /// Emit atomic Read for : V = X --- Only Scalar data types.
957   ///
958   /// \param Loc    The insert and source location description.
959   /// \param X			The target pointer to be atomically read
960   /// \param V			Memory address where to store atomically read
961   /// 					    value
962   /// \param AO			Atomic ordering of the generated atomic
963   /// 					    instructions.
964   ///
965   /// \return Insertion point after generated atomic read IR.
966   InsertPointTy createAtomicRead(const LocationDescription &Loc,
967                                  AtomicOpValue &X, AtomicOpValue &V,
968                                  AtomicOrdering AO);
969 
970   /// Emit atomic write for : X = Expr --- Only Scalar data types.
971   ///
972   /// \param Loc    The insert and source location description.
973   /// \param X			The target pointer to be atomically written to
974   /// \param Expr		The value to store.
975   /// \param AO			Atomic ordering of the generated atomic
976   ///               instructions.
977   ///
978   /// \return Insertion point after generated atomic Write IR.
979   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
980                                   AtomicOpValue &X, Value *Expr,
981                                   AtomicOrdering AO);
982 
983   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
984   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
985   /// Only Scalar data types.
986   ///
987   /// \param Loc      The insert and source location description.
988   /// \param AllocIP  Instruction to create AllocaInst before.
989   /// \param X        The target atomic pointer to be updated
990   /// \param Expr     The value to update X with.
991   /// \param AO       Atomic ordering of the generated atomic instructions.
992   /// \param RMWOp    The binary operation used for update. If operation
993   ///                 is	not supported by atomicRMW, or belong to
994   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
995   ///                 atomic will be generated.
996   /// \param UpdateOp 	Code generator for complex expressions that cannot be
997   ///                   expressed through atomicrmw instruction.
998   /// \param IsXLHSInRHSPart true if \a X is Left H.S. in Right H.S. part of
999   ///                        the update expression, false otherwise.
1000   ///	                       (e.g. true for X = X BinOp Expr)
1001   ///
1002   /// \return Insertion point after generated atomic update IR.
1003   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
1004                                    Instruction *AllocIP, AtomicOpValue &X,
1005                                    Value *Expr, AtomicOrdering AO,
1006                                    AtomicRMWInst::BinOp RMWOp,
1007                                    AtomicUpdateCallbackTy &UpdateOp,
1008                                    bool IsXLHSInRHSPart);
1009 
1010   /// Emit atomic update for constructs: --- Only Scalar data types
1011   /// V = X; X = X BinOp Expr ,
1012   /// X = X BinOp Expr; V = X,
1013   /// V = X; X = Expr BinOp X,
1014   /// X = Expr BinOp X; V = X,
1015   /// V = X; X = UpdateOp(X),
1016   /// X = UpdateOp(X); V = X,
1017   ///
1018   /// \param Loc        The insert and source location description.
1019   /// \param AllocIP    Instruction to create AllocaInst before.
1020   /// \param X          The target atomic pointer to be updated
1021   /// \param V          Memory address where to store captured value
1022   /// \param Expr       The value to update X with.
1023   /// \param AO         Atomic ordering of the generated atomic instructions
1024   /// \param RMWOp      The binary operation used for update. If
1025   ///                   operation is not supported by atomicRMW, or belong to
1026   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
1027   ///                   atomic will be generated.
1028   /// \param UpdateOp   Code generator for complex expressions that cannot be
1029   ///                   expressed through atomicrmw instruction.
1030   /// \param UpdateExpr true if X is an in place update of the form
1031   ///                   X = X BinOp Expr or X = Expr BinOp X
1032   /// \param IsXLHSInRHSPart true if X is Left H.S. in Right H.S. part of the
1033   ///                        update expression, false otherwise.
1034   ///                        (e.g. true for X = X BinOp Expr)
1035   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
1036   ///                        'v', not an updated one.
1037   ///
1038   /// \return Insertion point after generated atomic capture IR.
1039   InsertPointTy
1040   createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP,
1041                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
1042                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1043                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
1044                       bool IsPostfixUpdate, bool IsXLHSInRHSPart);
1045 
1046   /// Create the control flow structure of a canonical OpenMP loop.
1047   ///
1048   /// The emitted loop will be disconnected, i.e. no edge to the loop's
1049   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
1050   /// IRBuilder location is not preserved.
1051   ///
1052   /// \param DL        DebugLoc used for the instructions in the skeleton.
1053   /// \param TripCount Value to be used for the trip count.
1054   /// \param F         Function in which to insert the BasicBlocks.
1055   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
1056   ///                         typically the body itself.
1057   /// \param PostInsertBefore Where to insert BBs that execute after the body.
1058   /// \param Name      Base name used to derive BB
1059   ///                  and instruction names.
1060   ///
1061   /// \returns The CanonicalLoopInfo that represents the emitted loop.
1062   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
1063                                         Function *F,
1064                                         BasicBlock *PreInsertBefore,
1065                                         BasicBlock *PostInsertBefore,
1066                                         const Twine &Name = {});
1067 };
1068 
1069 /// Class to represented the control flow structure of an OpenMP canonical loop.
1070 ///
1071 /// The control-flow structure is standardized for easy consumption by
1072 /// directives associated with loops. For instance, the worksharing-loop
1073 /// construct may change this control flow such that each loop iteration is
1074 /// executed on only one thread.
1075 ///
1076 /// The control flow can be described as follows:
1077 ///
1078 ///     Preheader
1079 ///        |
1080 ///  /-> Header
1081 ///  |     |
1082 ///  |    Cond---\
1083 ///  |     |     |
1084 ///  |    Body   |
1085 ///  |    | |    |
1086 ///  |   <...>   |
1087 ///  |    | |    |
1088 ///   \--Latch   |
1089 ///              |
1090 ///             Exit
1091 ///              |
1092 ///            After
1093 ///
1094 /// Code in the header, condition block, latch and exit block must not have any
1095 /// side-effect. The body block is the single entry point into the loop body,
1096 /// which may contain arbitrary control flow as long as all control paths
1097 /// eventually branch to the latch block.
1098 ///
1099 /// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
1100 /// classes.
1101 class CanonicalLoopInfo {
1102   friend class OpenMPIRBuilder;
1103 
1104 private:
1105   /// Whether this object currently represents a loop.
1106   bool IsValid = false;
1107 
1108   BasicBlock *Preheader;
1109   BasicBlock *Header;
1110   BasicBlock *Cond;
1111   BasicBlock *Body;
1112   BasicBlock *Latch;
1113   BasicBlock *Exit;
1114   BasicBlock *After;
1115 
1116   /// Add the control blocks of this loop to \p BBs.
1117   ///
1118   /// This does not include any block from the body, including the one returned
1119   /// by getBody().
1120   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
1121 
1122 public:
1123   /// The preheader ensures that there is only a single edge entering the loop.
1124   /// Code that must be execute before any loop iteration can be emitted here,
1125   /// such as computing the loop trip count and begin lifetime markers. Code in
1126   /// the preheader is not considered part of the canonical loop.
getPreheader()1127   BasicBlock *getPreheader() const { return Preheader; }
1128 
1129   /// The header is the entry for each iteration. In the canonical control flow,
1130   /// it only contains the PHINode for the induction variable.
getHeader()1131   BasicBlock *getHeader() const { return Header; }
1132 
1133   /// The condition block computes whether there is another loop iteration. If
1134   /// yes, branches to the body; otherwise to the exit block.
getCond()1135   BasicBlock *getCond() const { return Cond; }
1136 
1137   /// The body block is the single entry for a loop iteration and not controlled
1138   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
1139   /// eventually branch to the \p Latch block.
getBody()1140   BasicBlock *getBody() const { return Body; }
1141 
1142   /// Reaching the latch indicates the end of the loop body code. In the
1143   /// canonical control flow, it only contains the increment of the induction
1144   /// variable.
getLatch()1145   BasicBlock *getLatch() const { return Latch; }
1146 
1147   /// Reaching the exit indicates no more iterations are being executed.
getExit()1148   BasicBlock *getExit() const { return Exit; }
1149 
1150   /// The after block is intended for clean-up code such as lifetime end
1151   /// markers. It is separate from the exit block to ensure, analogous to the
1152   /// preheader, it having just a single entry edge and being free from PHI
1153   /// nodes should there be multiple loop exits (such as from break
1154   /// statements/cancellations).
getAfter()1155   BasicBlock *getAfter() const { return After; }
1156 
1157   /// Returns the llvm::Value containing the number of loop iterations. It must
1158   /// be valid in the preheader and always interpreted as an unsigned integer of
1159   /// any bit-width.
getTripCount()1160   Value *getTripCount() const {
1161     Instruction *CmpI = &Cond->front();
1162     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1163     return CmpI->getOperand(1);
1164   }
1165 
1166   /// Returns the instruction representing the current logical induction
1167   /// variable. Always unsigned, always starting at 0 with an increment of one.
getIndVar()1168   Instruction *getIndVar() const {
1169     Instruction *IndVarPHI = &Header->front();
1170     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
1171     return IndVarPHI;
1172   }
1173 
1174   /// Return the type of the induction variable (and the trip count).
getIndVarType()1175   Type *getIndVarType() const { return getIndVar()->getType(); }
1176 
1177   /// Return the insertion point for user code before the loop.
getPreheaderIP()1178   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
1179     return {Preheader, std::prev(Preheader->end())};
1180   };
1181 
1182   /// Return the insertion point for user code in the body.
getBodyIP()1183   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
1184     return {Body, Body->begin()};
1185   };
1186 
1187   /// Return the insertion point for user code after the loop.
getAfterIP()1188   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
1189     return {After, After->begin()};
1190   };
1191 
getFunction()1192   Function *getFunction() const { return Header->getParent(); }
1193 
1194   /// Consistency self-check.
1195   void assertOK() const;
1196 };
1197 
1198 } // end namespace llvm
1199 
1200 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
1201