xref: /llvm-project/llvm/include/llvm/Analysis/MustExecute.h (revision 0da2ba811ac8a01509bc533428941fb9519c0715)
1 //===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Contains a collection of routines for determining if a given instruction is
10 /// guaranteed to execute if a given point in control flow is reached. The most
11 /// common example is an instruction within a loop being provably executed if we
12 /// branch to the header of it's containing loop.
13 ///
14 /// There are two interfaces available to determine if an instruction is
15 /// executed once a given point in the control flow is reached:
16 /// 1) A loop-centric one derived from LoopSafetyInfo.
17 /// 2) A "must be executed context"-based one implemented in the
18 ///    MustBeExecutedContextExplorer.
19 /// Please refer to the class comments for more information.
20 ///
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
24 #define LLVM_ANALYSIS_MUSTEXECUTE_H
25 
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/DenseSet.h"
28 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
29 #include "llvm/IR/EHPersonalities.h"
30 #include "llvm/IR/PassManager.h"
31 
32 namespace llvm {
33 
34 namespace {
35 template <typename T> using GetterTy = std::function<T *(const Function &F)>;
36 }
37 
38 class BasicBlock;
39 class DominatorTree;
40 class Loop;
41 class LoopInfo;
42 class PostDominatorTree;
43 class raw_ostream;
44 
45 /// Captures loop safety information.
46 /// It keep information for loop blocks may throw exception or otherwise
47 /// exit abnormally on any iteration of the loop which might actually execute
48 /// at runtime.  The primary way to consume this information is via
49 /// isGuaranteedToExecute below, but some callers bailout or fallback to
50 /// alternate reasoning if a loop contains any implicit control flow.
51 /// NOTE: LoopSafetyInfo contains cached information regarding loops and their
52 /// particular blocks. This information is only dropped on invocation of
53 /// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
54 /// any thrower instructions have been added or removed from them, or if the
55 /// control flow has changed, or in case of other meaningful modifications, the
56 /// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
57 /// loop were made and the info wasn't recomputed properly, the behavior of all
58 /// methods except for computeLoopSafetyInfo is undefined.
59 class LoopSafetyInfo {
60   // Used to update funclet bundle operands.
61   DenseMap<BasicBlock *, ColorVector> BlockColors;
62 
63 protected:
64   /// Computes block colors.
65   void computeBlockColors(const Loop *CurLoop);
66 
67 public:
68   /// Returns block colors map that is used to update funclet operand bundles.
69   const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
70 
71   /// Copy colors of block \p Old into the block \p New.
72   void copyColors(BasicBlock *New, BasicBlock *Old);
73 
74   /// Returns true iff the block \p BB potentially may throw exception. It can
75   /// be false-positive in cases when we want to avoid complex analysis.
76   virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
77 
78   /// Returns true iff any block of the loop for which this info is contains an
79   /// instruction that may throw or otherwise exit abnormally.
80   virtual bool anyBlockMayThrow() const = 0;
81 
82   /// Return true if we must reach the block \p BB under assumption that the
83   /// loop \p CurLoop is entered.
84   bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
85                                const DominatorTree *DT) const;
86 
87   /// Computes safety information for a loop checks loop body & header for
88   /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
89   /// as argument. Updates safety information in LoopSafetyInfo argument.
90   /// Note: This is defined to clear and reinitialize an already initialized
91   /// LoopSafetyInfo.  Some callers rely on this fact.
92   virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
93 
94   /// Returns true if the instruction in a loop is guaranteed to execute at
95   /// least once (under the assumption that the loop is entered).
96   virtual bool isGuaranteedToExecute(const Instruction &Inst,
97                                      const DominatorTree *DT,
98                                      const Loop *CurLoop) const = 0;
99 
100   LoopSafetyInfo() = default;
101 
102   virtual ~LoopSafetyInfo() = default;
103 };
104 
105 
106 /// Simple and conservative implementation of LoopSafetyInfo that can give
107 /// false-positive answers to its queries in order to avoid complicated
108 /// analysis.
109 class SimpleLoopSafetyInfo: public LoopSafetyInfo {
110   bool MayThrow = false;       // The current loop contains an instruction which
111                                // may throw.
112   bool HeaderMayThrow = false; // Same as previous, but specific to loop header
113 
114 public:
115   bool blockMayThrow(const BasicBlock *BB) const override;
116 
117   bool anyBlockMayThrow() const override;
118 
119   void computeLoopSafetyInfo(const Loop *CurLoop) override;
120 
121   bool isGuaranteedToExecute(const Instruction &Inst,
122                              const DominatorTree *DT,
123                              const Loop *CurLoop) const override;
124 };
125 
126 /// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
127 /// give precise answers on "may throw" queries. This implementation uses cache
128 /// that should be invalidated by calling the methods insertInstructionTo and
129 /// removeInstruction whenever we modify a basic block's contents by adding or
130 /// removing instructions.
131 class ICFLoopSafetyInfo: public LoopSafetyInfo {
132   bool MayThrow = false;       // The current loop contains an instruction which
133                                // may throw.
134   // Contains information about implicit control flow in this loop's blocks.
135   mutable ImplicitControlFlowTracking ICF;
136   // Contains information about instruction that may possibly write memory.
137   mutable MemoryWriteTracking MW;
138 
139 public:
140   bool blockMayThrow(const BasicBlock *BB) const override;
141 
142   bool anyBlockMayThrow() const override;
143 
144   void computeLoopSafetyInfo(const Loop *CurLoop) override;
145 
146   bool isGuaranteedToExecute(const Instruction &Inst,
147                              const DominatorTree *DT,
148                              const Loop *CurLoop) const override;
149 
150   /// Returns true if we could not execute a memory-modifying instruction before
151   /// we enter \p BB under assumption that \p CurLoop is entered.
152   bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
153       const;
154 
155   /// Returns true if we could not execute a memory-modifying instruction before
156   /// we execute \p I under assumption that \p CurLoop is entered.
157   bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
158       const;
159 
160   /// Inform the safety info that we are planning to insert a new instruction
161   /// \p Inst into the basic block \p BB. It will make all cache updates to keep
162   /// it correct after this insertion.
163   void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
164 
165   /// Inform safety info that we are planning to remove the instruction \p Inst
166   /// from its block. It will make all cache updates to keep it correct after
167   /// this removal.
168   void removeInstruction(const Instruction *Inst);
169 };
170 
171 bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI);
172 
173 struct MustBeExecutedContextExplorer;
174 
175 /// Enum that allows us to spell out the direction.
176 enum class ExplorationDirection {
177   BACKWARD = 0,
178   FORWARD = 1,
179 };
180 
181 /// Must be executed iterators visit stretches of instructions that are
182 /// guaranteed to be executed together, potentially with other instruction
183 /// executed in-between.
184 ///
185 /// Given the following code, and assuming all statements are single
186 /// instructions which transfer execution to the successor (see
187 /// isGuaranteedToTransferExecutionToSuccessor), there are two possible
188 /// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
189 /// and E. If we start at C or D, we will visit all instructions A-E.
190 ///
191 /// \code
192 ///   A;
193 ///   B;
194 ///   if (...) {
195 ///     C;
196 ///     D;
197 ///   }
198 ///   E;
199 /// \endcode
200 ///
201 ///
202 /// Below is the example extneded with instructions F and G. Now we assume F
203 /// might not transfer execution to it's successor G. As a result we get the
204 /// following visit sets:
205 ///
206 /// Start Instruction   | Visit Set
207 /// A                   | A, B,       E, F
208 ///    B                | A, B,       E, F
209 ///       C             | A, B, C, D, E, F
210 ///          D          | A, B, C, D, E, F
211 ///             E       | A, B,       E, F
212 ///                F    | A, B,       E, F
213 ///                   G | A, B,       E, F, G
214 ///
215 ///
216 /// \code
217 ///   A;
218 ///   B;
219 ///   if (...) {
220 ///     C;
221 ///     D;
222 ///   }
223 ///   E;
224 ///   F;  // Might not transfer execution to its successor G.
225 ///   G;
226 /// \endcode
227 ///
228 ///
229 /// A more complex example involving conditionals, loops, break, and continue
230 /// is shown below. We again assume all instructions will transmit control to
231 /// the successor and we assume we can prove the inner loop to be finite. We
232 /// omit non-trivial branch conditions as the exploration is oblivious to them.
233 /// Constant branches are assumed to be unconditional in the CFG. The resulting
234 /// visist sets are shown in the table below.
235 ///
236 /// \code
237 ///   A;
238 ///   while (true) {
239 ///     B;
240 ///     if (...)
241 ///       C;
242 ///     if (...)
243 ///       continue;
244 ///     D;
245 ///     if (...)
246 ///       break;
247 ///     do {
248 ///       if (...)
249 ///         continue;
250 ///       E;
251 ///     } while (...);
252 ///     F;
253 ///   }
254 ///   G;
255 /// \endcode
256 ///
257 /// Start Instruction    | Visit Set
258 /// A                    | A, B
259 ///    B                 | A, B
260 ///       C              | A, B, C
261 ///          D           | A, B,    D
262 ///             E        | A, B,    D, E, F
263 ///                F     | A, B,    D,    F
264 ///                   G  | A, B,    D,       G
265 ///
266 ///
267 /// Note that the examples show optimal visist sets but not necessarily the ones
268 /// derived by the explorer depending on the available CFG analyses (see
269 /// MustBeExecutedContextExplorer). Also note that we, depending on the options,
270 /// the visit set can contain instructions from other functions.
271 struct MustBeExecutedIterator {
272   /// Type declarations that make his class an input iterator.
273   ///{
274   typedef const Instruction *value_type;
275   typedef std::ptrdiff_t difference_type;
276   typedef const Instruction **pointer;
277   typedef const Instruction *&reference;
278   typedef std::input_iterator_tag iterator_category;
279   ///}
280 
281   using ExplorerTy = MustBeExecutedContextExplorer;
282 
283   MustBeExecutedIterator(const MustBeExecutedIterator &Other) = default;
284 
285   MustBeExecutedIterator(MustBeExecutedIterator &&Other)
286       : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
287         CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
288 
289   MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
290     if (this != &Other) {
291       std::swap(Visited, Other.Visited);
292       std::swap(CurInst, Other.CurInst);
293       std::swap(Head, Other.Head);
294       std::swap(Tail, Other.Tail);
295     }
296     return *this;
297   }
298 
299   ~MustBeExecutedIterator() = default;
300 
301   /// Pre- and post-increment operators.
302   ///{
303   MustBeExecutedIterator &operator++() {
304     CurInst = advance();
305     return *this;
306   }
307 
308   MustBeExecutedIterator operator++(int) {
309     MustBeExecutedIterator tmp(*this);
310     operator++();
311     return tmp;
312   }
313   ///}
314 
315   /// Equality and inequality operators. Note that we ignore the history here.
316   ///{
317   bool operator==(const MustBeExecutedIterator &Other) const {
318     return CurInst == Other.CurInst && Head == Other.Head && Tail == Other.Tail;
319   }
320 
321   bool operator!=(const MustBeExecutedIterator &Other) const {
322     return !(*this == Other);
323   }
324   ///}
325 
326   /// Return the underlying instruction.
327   const Instruction *&operator*() { return CurInst; }
328   const Instruction *getCurrentInst() const { return CurInst; }
329 
330   /// Return true if \p I was encountered by this iterator already.
331   bool count(const Instruction *I) const {
332     return Visited.count({I, ExplorationDirection::FORWARD}) ||
333            Visited.count({I, ExplorationDirection::BACKWARD});
334   }
335 
336 private:
337   using VisitedSetTy =
338       DenseSet<PointerIntPair<const Instruction *, 1, ExplorationDirection>>;
339 
340   /// Private constructors.
341   MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
342 
343   /// Reset the iterator to its initial state pointing at \p I.
344   void reset(const Instruction *I);
345 
346   /// Reset the iterator to point at \p I, keep cached state.
347   void resetInstruction(const Instruction *I);
348 
349   /// Try to advance one of the underlying positions (Head or Tail).
350   ///
351   /// \return The next instruction in the must be executed context, or nullptr
352   ///         if none was found.
353   const Instruction *advance();
354 
355   /// A set to track the visited instructions in order to deal with endless
356   /// loops and recursion.
357   VisitedSetTy Visited;
358 
359   /// A reference to the explorer that created this iterator.
360   ExplorerTy &Explorer;
361 
362   /// The instruction we are currently exposing to the user. There is always an
363   /// instruction that we know is executed with the given program point,
364   /// initially the program point itself.
365   const Instruction *CurInst;
366 
367   /// Two positions that mark the program points where this iterator will look
368   /// for the next instruction. Note that the current instruction is either the
369   /// one pointed to by Head, Tail, or both.
370   const Instruction *Head, *Tail;
371 
372   friend struct MustBeExecutedContextExplorer;
373 };
374 
375 /// A "must be executed context" for a given program point PP is the set of
376 /// instructions, potentially before and after PP, that are executed always when
377 /// PP is reached. The MustBeExecutedContextExplorer an interface to explore
378 /// "must be executed contexts" in a module through the use of
379 /// MustBeExecutedIterator.
380 ///
381 /// The explorer exposes "must be executed iterators" that traverse the must be
382 /// executed context. There is little information sharing between iterators as
383 /// the expected use case involves few iterators for "far apart" instructions.
384 /// If that changes, we should consider caching more intermediate results.
385 struct MustBeExecutedContextExplorer {
386 
387   /// In the description of the parameters we use PP to denote a program point
388   /// for which the must be executed context is explored, or put differently,
389   /// for which the MustBeExecutedIterator is created.
390   ///
391   /// \param ExploreInterBlock    Flag to indicate if instructions in blocks
392   ///                             other than the parent of PP should be
393   ///                             explored.
394   /// \param ExploreCFGForward    Flag to indicate if instructions located after
395   ///                             PP in the CFG, e.g., post-dominating PP,
396   ///                             should be explored.
397   /// \param ExploreCFGBackward   Flag to indicate if instructions located
398   ///                             before PP in the CFG, e.g., dominating PP,
399   ///                             should be explored.
400   MustBeExecutedContextExplorer(
401       bool ExploreInterBlock, bool ExploreCFGForward, bool ExploreCFGBackward,
402       GetterTy<const LoopInfo> LIGetter =
403           [](const Function &) { return nullptr; },
404       GetterTy<const DominatorTree> DTGetter =
405           [](const Function &) { return nullptr; },
406       GetterTy<const PostDominatorTree> PDTGetter =
407           [](const Function &) { return nullptr; })
408       : ExploreInterBlock(ExploreInterBlock),
409         ExploreCFGForward(ExploreCFGForward),
410         ExploreCFGBackward(ExploreCFGBackward), LIGetter(LIGetter),
411         DTGetter(DTGetter), PDTGetter(PDTGetter), EndIterator(*this, nullptr) {}
412 
413   /// Iterator-based interface. \see MustBeExecutedIterator.
414   ///{
415   using iterator = MustBeExecutedIterator;
416   using const_iterator = const MustBeExecutedIterator;
417 
418   /// Return an iterator to explore the context around \p PP.
419   iterator &begin(const Instruction *PP) {
420     auto &It = InstructionIteratorMap[PP];
421     if (!It)
422       It.reset(new iterator(*this, PP));
423     return *It;
424   }
425 
426   /// Return an iterator to explore the cached context around \p PP.
427   const_iterator &begin(const Instruction *PP) const {
428     return *InstructionIteratorMap.find(PP)->second;
429   }
430 
431   /// Return an universal end iterator.
432   ///{
433   iterator &end() { return EndIterator; }
434   iterator &end(const Instruction *) { return EndIterator; }
435 
436   const_iterator &end() const { return EndIterator; }
437   const_iterator &end(const Instruction *) const { return EndIterator; }
438   ///}
439 
440   /// Return an iterator range to explore the context around \p PP.
441   llvm::iterator_range<iterator> range(const Instruction *PP) {
442     return llvm::make_range(begin(PP), end(PP));
443   }
444 
445   /// Return an iterator range to explore the cached context around \p PP.
446   llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
447     return llvm::make_range(begin(PP), end(PP));
448   }
449   ///}
450 
451   /// Check \p Pred on all instructions in the context.
452   ///
453   /// This method will evaluate \p Pred and return
454   /// true if \p Pred holds in every instruction.
455   bool checkForAllContext(const Instruction *PP,
456                           function_ref<bool(const Instruction *)> Pred) {
457     for (auto EIt = begin(PP), EEnd = end(PP); EIt != EEnd; ++EIt)
458       if (!Pred(*EIt))
459         return false;
460     return true;
461   }
462 
463   /// Helper to look for \p I in the context of \p PP.
464   ///
465   /// The context is expanded until \p I was found or no more expansion is
466   /// possible.
467   ///
468   /// \returns True, iff \p I was found.
469   bool findInContextOf(const Instruction *I, const Instruction *PP) {
470     auto EIt = begin(PP), EEnd = end(PP);
471     return findInContextOf(I, EIt, EEnd);
472   }
473 
474   /// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
475   ///
476   /// The context is expanded until \p I was found or no more expansion is
477   /// possible.
478   ///
479   /// \returns True, iff \p I was found.
480   bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
481     bool Found = EIt.count(I);
482     while (!Found && EIt != EEnd)
483       Found = (++EIt).getCurrentInst() == I;
484     return Found;
485   }
486 
487   /// Return the next instruction that is guaranteed to be executed after \p PP.
488   ///
489   /// \param It              The iterator that is used to traverse the must be
490   ///                        executed context.
491   /// \param PP              The program point for which the next instruction
492   ///                        that is guaranteed to execute is determined.
493   const Instruction *
494   getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
495                                    const Instruction *PP);
496   /// Return the previous instr. that is guaranteed to be executed before \p PP.
497   ///
498   /// \param It              The iterator that is used to traverse the must be
499   ///                        executed context.
500   /// \param PP              The program point for which the previous instr.
501   ///                        that is guaranteed to execute is determined.
502   const Instruction *
503   getMustBeExecutedPrevInstruction(MustBeExecutedIterator &It,
504                                    const Instruction *PP);
505 
506   /// Find the next join point from \p InitBB in forward direction.
507   const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB);
508 
509   /// Find the next join point from \p InitBB in backward direction.
510   const BasicBlock *findBackwardJoinPoint(const BasicBlock *InitBB);
511 
512   /// Parameter that limit the performed exploration. See the constructor for
513   /// their meaning.
514   ///{
515   const bool ExploreInterBlock;
516   const bool ExploreCFGForward;
517   const bool ExploreCFGBackward;
518   ///}
519 
520 private:
521   /// Getters for common CFG analyses: LoopInfo, DominatorTree, and
522   /// PostDominatorTree.
523   ///{
524   GetterTy<const LoopInfo> LIGetter;
525   GetterTy<const DominatorTree> DTGetter;
526   GetterTy<const PostDominatorTree> PDTGetter;
527   ///}
528 
529   /// Map to cache isGuaranteedToTransferExecutionToSuccessor results.
530   DenseMap<const BasicBlock *, std::optional<bool>> BlockTransferMap;
531 
532   /// Map to cache containsIrreducibleCFG results.
533   DenseMap<const Function *, std::optional<bool>> IrreducibleControlMap;
534 
535   /// Map from instructions to associated must be executed iterators.
536   DenseMap<const Instruction *, std::unique_ptr<MustBeExecutedIterator>>
537       InstructionIteratorMap;
538 
539   /// A unique end iterator.
540   MustBeExecutedIterator EndIterator;
541 };
542 
543 class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> {
544   raw_ostream &OS;
545 
546 public:
547   MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {}
548   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
549   static bool isRequired() { return true; }
550 };
551 
552 class MustBeExecutedContextPrinterPass
553     : public PassInfoMixin<MustBeExecutedContextPrinterPass> {
554   raw_ostream &OS;
555 
556 public:
557   MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {}
558   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
559   static bool isRequired() { return true; }
560 };
561 
562 } // namespace llvm
563 
564 #endif
565