xref: /llvm-project/bolt/include/bolt/Core/BinaryBasicBlock.h (revision 49ee6069db372ce326bc36678e745459868c3771)
1 //===- bolt/Core/BinaryBasicBlock.h - Low-level basic block -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Sequence of MC/MCPlus instructions. Call/invoke does not terminate the block.
10 // CFI instructions are part of the instruction list with the initial CFI state
11 // defined at the beginning of the block.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H
16 #define BOLT_CORE_BINARY_BASIC_BLOCK_H
17 
18 #include "bolt/Core/FunctionLayout.h"
19 #include "bolt/Core/MCPlus.h"
20 #include "llvm/ADT/GraphTraits.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCSymbol.h"
25 #include "llvm/Support/ErrorOr.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <limits>
28 #include <utility>
29 
30 namespace llvm {
31 class MCCodeEmitter;
32 
33 namespace bolt {
34 
35 class BinaryFunction;
36 class JumpTable;
37 
38 class BinaryBasicBlock {
39 public:
40   /// Profile execution information for a given edge in CFG.
41   ///
42   /// If MispredictedCount equals COUNT_INFERRED, then we have a profile
43   /// data for a fall-through edge with a Count representing an inferred
44   /// execution count, i.e. the count we calculated internally, not the one
45   /// coming from profile data.
46   ///
47   /// For all other values of MispredictedCount, Count represents the number of
48   /// branch executions from a profile, and MispredictedCount is the number
49   /// of times the branch was mispredicted according to this profile.
50   struct BinaryBranchInfo {
51     uint64_t Count;
52     uint64_t MispredictedCount; /// number of branches mispredicted
53 
54     bool operator<(const BinaryBranchInfo &Other) const {
55       return (Count < Other.Count) ||
56              (Count == Other.Count &&
57               MispredictedCount < Other.MispredictedCount);
58     }
59   };
60 
61   static constexpr uint32_t INVALID_OFFSET =
62       std::numeric_limits<uint32_t>::max();
63 
64   using BranchInfoType = SmallVector<BinaryBranchInfo, 0>;
65 
66 private:
67   /// Vector of all instructions in the block.
68   InstructionListType Instructions;
69 
70   /// CFG information.
71   using EdgeListType = SmallVector<BinaryBasicBlock *, 0>;
72   EdgeListType Predecessors;
73   EdgeListType Successors;
74 
75   /// Each successor has a corresponding BranchInfo entry in the list.
76   BranchInfoType BranchInfo;
77 
78   using ExceptionListType = SmallVector<BinaryBasicBlock *, 0>;
79 
80   /// List of blocks that this landing pad is handling.
81   ExceptionListType Throwers;
82 
83   /// List of blocks that can catch exceptions thrown by code in this block.
84   ExceptionListType LandingPads;
85 
86   /// Function that owns this basic block.
87   BinaryFunction *Function;
88 
89   /// Label associated with the block.
90   MCSymbol *Label{nullptr};
91 
92   /// [Begin, End) address range for this block in the output binary.
93   std::pair<uint32_t, uint32_t> OutputAddressRange = {0, 0};
94 
95   /// Original offset range of the basic block in the function.
96   std::pair<uint32_t, uint32_t> InputRange = {INVALID_OFFSET, INVALID_OFFSET};
97 
98   /// Map input offset (from function start) of an instruction to an output
99   /// symbol. Enables writing BOLT address translation tables used for mapping
100   /// control transfer in the output binary back to the original binary.
101   using LocSymsTy = std::vector<std::pair<uint32_t, const MCSymbol *>>;
102   std::unique_ptr<LocSymsTy> LocSyms;
103 
104   /// Alignment requirements for the block.
105   uint32_t Alignment{1};
106 
107   /// Maximum number of bytes to use for alignment of the block.
108   uint32_t AlignmentMaxBytes{0};
109 
110   /// Number of times this basic block was executed.
111   uint64_t ExecutionCount{COUNT_NO_PROFILE};
112 
113   static constexpr unsigned InvalidIndex = ~0u;
114 
115   /// Index to BasicBlocks vector in BinaryFunction.
116   unsigned Index{InvalidIndex};
117 
118   /// Index in the current layout.
119   unsigned LayoutIndex{InvalidIndex};
120 
121   /// Number of pseudo instructions in this block.
122   uint32_t NumPseudos{0};
123 
124   /// CFI state at the entry to this basic block.
125   int32_t CFIState{-1};
126 
127   /// In cases where the parent function has been split, FragmentNum > 0 means
128   /// this BB will be allocated in a fragment outside its parent function.
129   FragmentNum Fragment;
130 
131   /// Indicates if the block could be outlined.
132   bool CanOutline{true};
133 
134   /// Flag to indicate whether this block is valid or not.  Invalid
135   /// blocks may contain out of date or incorrect information.
136   bool IsValid{true};
137 
138   /// Last computed hash value.
139   mutable uint64_t Hash{0};
140 
141 private:
142   BinaryBasicBlock() = delete;
143   BinaryBasicBlock(const BinaryBasicBlock &) = delete;
144   BinaryBasicBlock(const BinaryBasicBlock &&) = delete;
145   BinaryBasicBlock &operator=(const BinaryBasicBlock &) = delete;
146   BinaryBasicBlock &operator=(const BinaryBasicBlock &&) = delete;
147 
148   explicit BinaryBasicBlock(BinaryFunction *Function, MCSymbol *Label)
149       : Function(Function), Label(Label) {
150     assert(Function && "Function must be non-null");
151   }
152 
153   // Exclusively managed by BinaryFunction.
154   friend class BinaryFunction;
155   friend bool operator<(const BinaryBasicBlock &LHS,
156                         const BinaryBasicBlock &RHS);
157 
158   /// Assign new label to the basic block.
159   void setLabel(MCSymbol *Symbol) { Label = Symbol; }
160 
161 public:
162   static constexpr uint64_t COUNT_INFERRED =
163       std::numeric_limits<uint64_t>::max();
164   static constexpr uint64_t COUNT_NO_PROFILE =
165       std::numeric_limits<uint64_t>::max();
166 
167   // Instructions iterators.
168   using iterator = InstructionListType::iterator;
169   using const_iterator = InstructionListType::const_iterator;
170   using reverse_iterator = std::reverse_iterator<iterator>;
171   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
172 
173   bool         empty()            const { assert(hasInstructions());
174                                           return Instructions.empty(); }
175   size_t       size()             const { assert(hasInstructions());
176                                           return Instructions.size(); }
177   MCInst       &front()                 { assert(hasInstructions());
178                                           return Instructions.front();  }
179   MCInst       &back()                  { assert(hasInstructions());
180                                           return Instructions.back();   }
181   const MCInst &front()           const { assert(hasInstructions());
182                                           return Instructions.front();  }
183   const MCInst &back()            const { assert(hasInstructions());
184                                           return Instructions.back();   }
185 
186   iterator                begin()       { assert(hasInstructions());
187                                           return Instructions.begin();  }
188   const_iterator          begin() const { assert(hasInstructions());
189                                           return Instructions.begin();  }
190   iterator                end  ()       { assert(hasInstructions());
191                                           return Instructions.end();    }
192   const_iterator          end  () const { assert(hasInstructions());
193                                           return Instructions.end();    }
194   reverse_iterator       rbegin()       { assert(hasInstructions());
195                                           return Instructions.rbegin(); }
196   const_reverse_iterator rbegin() const { assert(hasInstructions());
197                                           return Instructions.rbegin(); }
198   reverse_iterator       rend  ()       { assert(hasInstructions());
199                                           return Instructions.rend();   }
200   const_reverse_iterator rend  () const { assert(hasInstructions());
201                                           return Instructions.rend();   }
202 
203   // CFG iterators.
204   using pred_iterator = EdgeListType::iterator;
205   using const_pred_iterator = EdgeListType::const_iterator;
206   using succ_iterator = EdgeListType::iterator;
207   using const_succ_iterator = EdgeListType::const_iterator;
208   using throw_iterator = decltype(Throwers)::iterator;
209   using const_throw_iterator = decltype(Throwers)::const_iterator;
210   using lp_iterator = decltype(LandingPads)::iterator;
211   using const_lp_iterator = decltype(LandingPads)::const_iterator;
212 
213   using pred_reverse_iterator = std::reverse_iterator<pred_iterator>;
214   using const_pred_reverse_iterator =
215     std::reverse_iterator<const_pred_iterator>;
216   using succ_reverse_iterator = std::reverse_iterator<succ_iterator>;
217   using const_succ_reverse_iterator =
218     std::reverse_iterator<const_succ_iterator>;
219 
220   pred_iterator        pred_begin()       { return Predecessors.begin(); }
221   const_pred_iterator  pred_begin() const { return Predecessors.begin(); }
222   pred_iterator        pred_end()         { return Predecessors.end();   }
223   const_pred_iterator  pred_end()   const { return Predecessors.end();   }
224   pred_reverse_iterator        pred_rbegin()
225                                           { return Predecessors.rbegin();}
226   const_pred_reverse_iterator  pred_rbegin() const
227                                           { return Predecessors.rbegin();}
228   pred_reverse_iterator        pred_rend()
229                                           { return Predecessors.rend();  }
230   const_pred_reverse_iterator  pred_rend()   const
231                                           { return Predecessors.rend();  }
232   size_t               pred_size()  const {
233     return Predecessors.size();
234   }
235   bool                 pred_empty() const { return Predecessors.empty(); }
236 
237   succ_iterator        succ_begin()       { return Successors.begin();   }
238   const_succ_iterator  succ_begin() const { return Successors.begin();   }
239   succ_iterator        succ_end()         { return Successors.end();     }
240   const_succ_iterator  succ_end()   const { return Successors.end();     }
241   succ_reverse_iterator        succ_rbegin()
242                                           { return Successors.rbegin();  }
243   const_succ_reverse_iterator  succ_rbegin() const
244                                           { return Successors.rbegin();  }
245   succ_reverse_iterator        succ_rend()
246                                           { return Successors.rend();    }
247   const_succ_reverse_iterator  succ_rend()   const
248                                           { return Successors.rend();    }
249   size_t               succ_size()  const {
250     return Successors.size();
251   }
252   bool                 succ_empty() const { return Successors.empty();   }
253 
254   throw_iterator        throw_begin()       { return Throwers.begin(); }
255   const_throw_iterator  throw_begin() const { return Throwers.begin(); }
256   throw_iterator        throw_end()         { return Throwers.end();   }
257   const_throw_iterator  throw_end()   const { return Throwers.end();   }
258   size_t                throw_size()  const {
259     return Throwers.size();
260   }
261   bool                  throw_empty() const { return Throwers.empty(); }
262   bool                  isLandingPad() const { return !Throwers.empty(); }
263 
264   lp_iterator        lp_begin()       { return LandingPads.begin();   }
265   const_lp_iterator  lp_begin() const { return LandingPads.begin();   }
266   lp_iterator        lp_end()         { return LandingPads.end();     }
267   const_lp_iterator  lp_end()   const { return LandingPads.end();     }
268   size_t             lp_size()  const {
269     return LandingPads.size();
270   }
271   bool               lp_empty() const { return LandingPads.empty();   }
272 
273   inline iterator_range<iterator> instructions() {
274     assert(hasInstructions());
275     return iterator_range<iterator>(begin(), end());
276   }
277   inline iterator_range<const_iterator> instructions() const {
278     assert(hasInstructions());
279     return iterator_range<const_iterator>(begin(), end());
280   }
281   inline iterator_range<pred_iterator> predecessors() {
282     assert(hasCFG());
283     return iterator_range<pred_iterator>(pred_begin(), pred_end());
284   }
285   inline iterator_range<const_pred_iterator> predecessors() const {
286     assert(hasCFG());
287     return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
288   }
289   inline iterator_range<succ_iterator> successors() {
290     assert(hasCFG());
291     return iterator_range<succ_iterator>(succ_begin(), succ_end());
292   }
293   inline iterator_range<const_succ_iterator> successors() const {
294     assert(hasCFG());
295     return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
296   }
297   inline iterator_range<throw_iterator> throwers() {
298     assert(hasCFG());
299     return iterator_range<throw_iterator>(throw_begin(), throw_end());
300   }
301   inline iterator_range<const_throw_iterator> throwers() const {
302     assert(hasCFG());
303     return iterator_range<const_throw_iterator>(throw_begin(), throw_end());
304   }
305   inline iterator_range<lp_iterator> landing_pads() {
306     assert(hasCFG());
307     return iterator_range<lp_iterator>(lp_begin(), lp_end());
308   }
309   inline iterator_range<const_lp_iterator> landing_pads() const {
310     assert(hasCFG());
311     return iterator_range<const_lp_iterator>(lp_begin(), lp_end());
312   }
313 
314   // BranchInfo iterators.
315   using branch_info_iterator = BranchInfoType::iterator;
316   using const_branch_info_iterator = BranchInfoType::const_iterator;
317   using branch_info_reverse_iterator =
318       std::reverse_iterator<branch_info_iterator>;
319   using const_branch_info_reverse_iterator =
320       std::reverse_iterator<const_branch_info_iterator>;
321 
322   branch_info_iterator branch_info_begin() { return BranchInfo.begin(); }
323   branch_info_iterator branch_info_end() { return BranchInfo.end(); }
324   const_branch_info_iterator branch_info_begin() const {
325     return BranchInfo.begin();
326   }
327   const_branch_info_iterator branch_info_end() const {
328     return BranchInfo.end();
329   }
330   branch_info_reverse_iterator branch_info_rbegin() {
331     return BranchInfo.rbegin();
332   }
333   branch_info_reverse_iterator branch_info_rend() { return BranchInfo.rend(); }
334   const_branch_info_reverse_iterator branch_info_rbegin() const {
335     return BranchInfo.rbegin();
336   }
337   const_branch_info_reverse_iterator branch_info_rend() const {
338     return BranchInfo.rend();
339   }
340 
341   size_t branch_info_size() const { return BranchInfo.size(); }
342   bool branch_info_empty() const { return BranchInfo.empty(); }
343 
344   inline iterator_range<branch_info_iterator> branch_info() {
345     return iterator_range<branch_info_iterator>(BranchInfo.begin(),
346                                                 BranchInfo.end());
347   }
348   inline iterator_range<const_branch_info_iterator> branch_info() const {
349     return iterator_range<const_branch_info_iterator>(BranchInfo.begin(),
350                                                       BranchInfo.end());
351   }
352 
353   /// Get instruction at given index.
354   MCInst &getInstructionAtIndex(unsigned Index) { return Instructions[Index]; }
355 
356   const MCInst &getInstructionAtIndex(unsigned Index) const {
357     return Instructions[Index];
358   }
359 
360   /// Return symbol marking the start of this basic block.
361   MCSymbol *getLabel() { return Label; }
362 
363   /// Return symbol marking the start of this basic block (const version).
364   const MCSymbol *getLabel() const { return Label; }
365 
366   /// Get successor with given \p Label if \p Label != nullptr.
367   /// Returns nullptr if no such successor is found.
368   /// If the \p Label == nullptr and the block has only one successor then
369   /// return the successor.
370   BinaryBasicBlock *getSuccessor(const MCSymbol *Label = nullptr) const;
371 
372   /// Return the related branch info as well as the successor.
373   BinaryBasicBlock *getSuccessor(const MCSymbol *Label,
374                                  BinaryBranchInfo &BI) const;
375 
376   /// If the basic block ends with a conditional branch (possibly followed by
377   /// an unconditional branch) and thus has 2 successors, return a successor
378   /// corresponding to a jump condition which could be true or false.
379   /// Return nullptr if the basic block does not have a conditional jump.
380   BinaryBasicBlock *getConditionalSuccessor(bool Condition) {
381     if (succ_size() != 2)
382       return nullptr;
383     return Successors[Condition == true ? 0 : 1];
384   }
385 
386   const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const {
387     return const_cast<BinaryBasicBlock *>(this)->getConditionalSuccessor(
388         Condition);
389   }
390 
391   /// Find the fallthrough successor for a block, or nullptr if there is
392   /// none.
393   BinaryBasicBlock *getFallthrough() {
394     if (succ_size() == 2)
395       return getConditionalSuccessor(false);
396     else
397       return getSuccessor();
398   }
399 
400   const BinaryBasicBlock *getFallthrough() const {
401     return const_cast<BinaryBasicBlock *>(this)->getFallthrough();
402   }
403 
404   /// Return branch info corresponding to a taken branch.
405   const BinaryBranchInfo &getTakenBranchInfo() const {
406     assert(BranchInfo.size() == 2 &&
407            "could only be called for blocks with 2 successors");
408     return BranchInfo[0];
409   };
410 
411   /// Return branch info corresponding to a fall-through branch.
412   const BinaryBranchInfo &getFallthroughBranchInfo() const {
413     assert(BranchInfo.size() == 2 &&
414            "could only be called for blocks with 2 successors");
415     return BranchInfo[1];
416   };
417 
418   /// Return branch info corresponding to an edge going to \p Succ basic block.
419   BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ);
420 
421   /// Return branch info corresponding to an edge going to \p Succ basic block.
422   const BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ) const;
423 
424   /// Set branch information for the outgoing edge to block \p Succ.
425   void setSuccessorBranchInfo(const BinaryBasicBlock &Succ, uint64_t Count,
426                               uint64_t MispredictedCount) {
427     BinaryBranchInfo &BI = getBranchInfo(Succ);
428     BI.Count = Count;
429     BI.MispredictedCount = MispredictedCount;
430   }
431 
432   /// Try to compute the taken and misprediction frequencies for the given
433   /// successor.  The result is an error if no information can be found.
434   ErrorOr<std::pair<double, double>>
435   getBranchStats(const BinaryBasicBlock *Succ) const;
436 
437   /// If the basic block ends with a conditional branch (possibly followed by
438   /// an unconditional branch) and thus has 2 successor, reverse the order of
439   /// its successors in CFG, update branch info, and return true. If the basic
440   /// block does not have 2 successors return false.
441   bool swapConditionalSuccessors();
442 
443   /// Add an instruction with unconditional control transfer to \p Successor
444   /// basic block to the end of this basic block.
445   void addBranchInstruction(const BinaryBasicBlock *Successor);
446 
447   /// Add an instruction with tail call control transfer to \p Target
448   /// to the end of this basic block.
449   void addTailCallInstruction(const MCSymbol *Target);
450 
451   /// Return the number of call instructions in this basic block.
452   uint32_t getNumCalls() const;
453 
454   /// Get landing pad with given label. Returns nullptr if no such
455   /// landing pad is found.
456   BinaryBasicBlock *getLandingPad(const MCSymbol *Label) const;
457 
458   /// Return local name for the block.
459   StringRef getName() const { return Label->getName(); }
460 
461   /// Add instruction at the end of this basic block.
462   /// Returns iterator pointing to the inserted instruction.
463   iterator addInstruction(MCInst &&Inst) {
464     adjustNumPseudos(Inst, 1);
465     Instructions.emplace_back(Inst);
466     return std::prev(Instructions.end());
467   }
468 
469   /// Add instruction at the end of this basic block.
470   /// Returns iterator pointing to the inserted instruction.
471   iterator addInstruction(const MCInst &Inst) {
472     adjustNumPseudos(Inst, 1);
473     Instructions.push_back(Inst);
474     return std::prev(Instructions.end());
475   }
476 
477   /// Add a range of instructions to the end of this basic block.
478   template <typename Itr> void addInstructions(Itr Begin, Itr End) {
479     while (Begin != End)
480       addInstruction(*Begin++);
481   }
482 
483   /// Add a range of instructions to the end of this basic block.
484   template <typename RangeTy> void addInstructions(RangeTy R) {
485     for (auto &I : R)
486       addInstruction(I);
487   }
488 
489   /// Add instruction before Pos in this basic block.
490   template <typename Itr> Itr insertPseudoInstr(Itr Pos, MCInst &Instr) {
491     ++NumPseudos;
492     return Instructions.insert(Pos, Instr);
493   }
494 
495   /// Return the number of pseudo instructions in the basic block.
496   uint32_t getNumPseudos() const;
497 
498   /// Return the number of emitted instructions for this basic block.
499   uint32_t getNumNonPseudos() const { return size() - getNumPseudos(); }
500 
501   /// Return iterator to the first non-pseudo instruction or end()
502   /// if no such instruction was found.
503   iterator getFirstNonPseudo();
504 
505   /// Return a pointer to the first non-pseudo instruction in this basic
506   /// block.  Returns nullptr if none exists.
507   MCInst *getFirstNonPseudoInstr() {
508     auto II = getFirstNonPseudo();
509     return II == Instructions.end() ? nullptr : &*II;
510   }
511 
512   /// Return reverse iterator to the last non-pseudo instruction or rend()
513   /// if no such instruction was found.
514   reverse_iterator getLastNonPseudo();
515   const_reverse_iterator getLastNonPseudo() const {
516     return const_cast<BinaryBasicBlock *>(this)->getLastNonPseudo();
517   }
518 
519   /// Return a pointer to the last non-pseudo instruction in this basic
520   /// block.  Returns nullptr if none exists.
521   MCInst *getLastNonPseudoInstr() {
522     auto RII = getLastNonPseudo();
523     return RII == Instructions.rend() ? nullptr : &*RII;
524   }
525   const MCInst *getLastNonPseudoInstr() const {
526     auto RII = getLastNonPseudo();
527     return RII == Instructions.rend() ? nullptr : &*RII;
528   }
529 
530   /// Set CFI state at entry to this basic block.
531   void setCFIState(int32_t NewCFIState) {
532     assert((CFIState == -1 || NewCFIState == CFIState) &&
533            "unexpected change of CFI state for basic block");
534     CFIState = NewCFIState;
535   }
536 
537   /// Return CFI state (expected) at entry of this basic block.
538   int32_t getCFIState() const {
539     assert(CFIState >= 0 && "unknown CFI state");
540     return CFIState;
541   }
542 
543   /// Calculate and return CFI state right before instruction \p Instr in
544   /// this basic block. If \p Instr is nullptr then return the state at
545   /// the end of the basic block.
546   int32_t getCFIStateAtInstr(const MCInst *Instr) const;
547 
548   /// Calculate and return CFI state after execution of this basic block.
549   /// The state depends on CFI state at entry and CFI instructions inside the
550   /// basic block.
551   int32_t getCFIStateAtExit() const { return getCFIStateAtInstr(nullptr); }
552 
553   /// Set minimum alignment for the basic block.
554   void setAlignment(uint32_t Align) { Alignment = Align; }
555 
556   /// Set alignment of the block based on the alignment of its offset.
557   void setDerivedAlignment() {
558     const uint64_t DerivedAlignment = getOffset() & (1 + ~getOffset());
559     Alignment = std::min(DerivedAlignment, uint64_t(32));
560   }
561 
562   /// Return required alignment for the block.
563   Align getAlign() const { return Align(Alignment); }
564   uint32_t getAlignment() const { return Alignment; }
565 
566   /// Set the maximum number of bytes to use for the block alignment.
567   void setAlignmentMaxBytes(uint32_t Value) { AlignmentMaxBytes = Value; }
568 
569   /// Return the maximum number of bytes to use for the block alignment.
570   uint32_t getAlignmentMaxBytes() const { return AlignmentMaxBytes; }
571 
572   /// Adds block to successor list, and also updates predecessor list for
573   /// successor block.
574   /// Set branch info for this path.
575   void addSuccessor(BinaryBasicBlock *Succ, uint64_t Count = 0,
576                     uint64_t MispredictedCount = 0);
577 
578   void addSuccessor(BinaryBasicBlock *Succ, const BinaryBranchInfo &BI) {
579     addSuccessor(Succ, BI.Count, BI.MispredictedCount);
580   }
581 
582   /// Add a range of successors.
583   template <typename Itr> void addSuccessors(Itr Begin, Itr End) {
584     while (Begin != End)
585       addSuccessor(*Begin++);
586   }
587 
588   /// Add a range of successors with branch info.
589   template <typename Itr, typename BrItr>
590   void addSuccessors(Itr Begin, Itr End, BrItr BrBegin, BrItr BrEnd) {
591     assert(std::distance(Begin, End) == std::distance(BrBegin, BrEnd));
592     while (Begin != End)
593       addSuccessor(*Begin++, *BrBegin++);
594   }
595 
596   /// Replace Succ with NewSucc.  This routine is helpful for preserving
597   /// the order of conditional successors when editing the CFG.
598   void replaceSuccessor(BinaryBasicBlock *Succ, BinaryBasicBlock *NewSucc,
599                         uint64_t Count = 0, uint64_t MispredictedCount = 0);
600 
601   /// Move all of this block's successors to a new block, and set the
602   /// execution count of this new block with our execution count. This is
603   /// useful when splitting a block in two.
604   void moveAllSuccessorsTo(BinaryBasicBlock *New) {
605     New->addSuccessors(successors().begin(), successors().end(),
606                        branch_info_begin(), branch_info_end());
607     removeAllSuccessors();
608 
609     // Update the execution count on the new block.
610     New->setExecutionCount(getExecutionCount());
611   }
612 
613   /// Remove /p Succ basic block from the list of successors. Update the
614   /// list of predecessors of /p Succ and update branch info.
615   void removeSuccessor(BinaryBasicBlock *Succ);
616 
617   /// Remove all successors of the basic block, and remove the block
618   /// from respective lists of predecessors.
619   void removeAllSuccessors();
620 
621   /// Remove useless duplicate successors.  When the conditional
622   /// successor is the same as the unconditional successor, we can
623   /// remove the conditional successor and branch instruction.
624   void removeDuplicateConditionalSuccessor(MCInst *CondBranch);
625 
626   /// Update successors of the basic block based on the jump table instruction.
627   /// The block must end with a jump table instruction.
628   void updateJumpTableSuccessors();
629 
630   /// Test if BB is a predecessor of this block.
631   bool isPredecessor(const BinaryBasicBlock *BB) const {
632     return llvm::is_contained(Predecessors, BB);
633   }
634 
635   /// Test if BB is a successor of this block.
636   bool isSuccessor(const BinaryBasicBlock *BB) const {
637     return llvm::is_contained(Successors, BB);
638   }
639 
640   /// Test if this BB has a valid execution count.
641   bool hasProfile() const { return ExecutionCount != COUNT_NO_PROFILE; }
642 
643   /// Return the information about the number of times this basic block was
644   /// executed.
645   ///
646   /// Return COUNT_NO_PROFILE if there's no profile info.
647   uint64_t getExecutionCount() const { return ExecutionCount; }
648 
649   /// Return the execution count for blocks with known profile.
650   /// Return 0 if the block has no profile.
651   uint64_t getKnownExecutionCount() const {
652     return !hasProfile() ? 0 : ExecutionCount;
653   }
654 
655   /// Set the execution count for this block.
656   void setExecutionCount(uint64_t Count) { ExecutionCount = Count; }
657 
658   /// Apply a given \p Ratio to the profile information of this basic block.
659   void adjustExecutionCount(double Ratio);
660 
661   /// Return true if the basic block is an entry point into the function
662   /// (either primary or secondary).
663   bool isEntryPoint() const;
664 
665   bool isValid() const { return IsValid; }
666 
667   void markValid(const bool Valid) { IsValid = Valid; }
668 
669   FragmentNum getFragmentNum() const { return Fragment; }
670 
671   void setFragmentNum(const FragmentNum Value) { Fragment = Value; }
672 
673   bool isSplit() const { return Fragment != FragmentNum::main(); }
674 
675   bool isCold() const {
676     assert(Fragment.get() < 2 &&
677            "Function is split into more than two (hot/cold)-fragments");
678     return isSplit();
679   }
680 
681   void setIsCold(const bool Flag) {
682     Fragment = Flag ? FragmentNum::cold() : FragmentNum::main();
683   }
684 
685   /// Return true if the block can be outlined. At the moment we disallow
686   /// outlining of blocks that can potentially throw exceptions or are
687   /// the beginning of a landing pad. The entry basic block also can
688   /// never be outlined.
689   bool canOutline() const { return CanOutline; }
690 
691   void setCanOutline(const bool Flag) { CanOutline = Flag; }
692 
693   /// Erase pseudo instruction at a given iterator.
694   /// Return iterator following the removed instruction.
695   iterator erasePseudoInstruction(iterator II) {
696     --NumPseudos;
697     return Instructions.erase(II);
698   }
699 
700   /// Erase non-pseudo instruction at a given iterator \p II.
701   /// Return iterator following the removed instruction.
702   iterator eraseInstruction(iterator II) {
703     adjustNumPseudos(*II, -1);
704     return Instructions.erase(II);
705   }
706 
707   /// Erase non-pseudo instruction at a given \p Index
708   void eraseInstructionAtIndex(unsigned Index) {
709     eraseInstruction(Instructions.begin() + Index);
710   }
711 
712   /// Erase instructions in the specified range.
713   template <typename ItrType>
714   void eraseInstructions(ItrType Begin, ItrType End) {
715     while (End > Begin)
716       eraseInstruction(findInstruction(*--End));
717   }
718 
719   /// Erase all instructions.
720   void clear() {
721     Instructions.clear();
722     NumPseudos = 0;
723   }
724 
725   /// Retrieve iterator for \p Inst or return end iterator if instruction is not
726   /// from this basic block.
727   decltype(Instructions)::iterator findInstruction(const MCInst *Inst) {
728     if (Instructions.empty())
729       return Instructions.end();
730     size_t Index = Inst - &Instructions[0];
731     return Index >= Instructions.size() ? Instructions.end()
732                                         : Instructions.begin() + Index;
733   }
734 
735   /// Replace instruction referenced by iterator \II with a sequence of
736   /// instructions defined by [\p Begin, \p End] range.
737   ///
738   /// Return iterator pointing to the first inserted instruction.
739   template <typename Itr>
740   iterator replaceInstruction(iterator II, Itr Begin, Itr End) {
741     adjustNumPseudos(*II, -1);
742     adjustNumPseudos(Begin, End, 1);
743 
744     auto I = II - Instructions.begin();
745     Instructions.insert(Instructions.erase(II), Begin, End);
746     return I + Instructions.begin();
747   }
748 
749   iterator replaceInstruction(iterator II,
750                               const InstructionListType &Replacement) {
751     return replaceInstruction(II, Replacement.begin(), Replacement.end());
752   }
753 
754   /// Insert \p NewInst before \p At, which must be an existing instruction in
755   /// this BB. Return iterator pointing to the newly inserted instruction.
756   iterator insertInstruction(iterator At, MCInst &&NewInst) {
757     adjustNumPseudos(NewInst, 1);
758     return Instructions.emplace(At, std::move(NewInst));
759   }
760 
761   iterator insertInstruction(iterator At, MCInst &NewInst) {
762     adjustNumPseudos(NewInst, 1);
763     return Instructions.insert(At, NewInst);
764   }
765 
766   /// Helper to retrieve any terminators in \p BB before \p Pos. This is used
767   /// to skip CFI instructions and to retrieve the first terminator instruction
768   /// in basic blocks with two terminators (conditional jump and unconditional
769   /// jump).
770   MCInst *getTerminatorBefore(MCInst *Pos);
771 
772   /// Used to identify whether an instruction is before a terminator and whether
773   /// moving it to the end of the BB would render it dead code.
774   bool hasTerminatorAfter(MCInst *Pos);
775 
776   /// Split apart the instructions in this basic block starting at Inst.
777   /// The instructions following Inst are removed and returned in a vector.
778   InstructionListType splitInstructions(const MCInst *Inst) {
779     InstructionListType SplitInst;
780 
781     assert(!Instructions.empty());
782     while (&Instructions.back() != Inst) {
783       SplitInst.push_back(Instructions.back());
784       Instructions.pop_back();
785     }
786     std::reverse(SplitInst.begin(), SplitInst.end());
787     NumPseudos = 0;
788     adjustNumPseudos(Instructions.begin(), Instructions.end(), 1);
789     return SplitInst;
790   }
791 
792   /// Split basic block at the instruction pointed to by II.
793   /// All iterators pointing after II get invalidated.
794   ///
795   /// Return the new basic block that starts with the instruction
796   /// at the split point.
797   BinaryBasicBlock *splitAt(iterator II);
798 
799   /// Set start offset of this basic block in the input binary.
800   void setOffset(uint32_t Offset) { InputRange.first = Offset; };
801 
802   /// Sets address of the basic block in the output.
803   void setOutputStartAddress(uint64_t Address) {
804     OutputAddressRange.first = Address;
805   }
806 
807   /// Sets address past the end of the basic block in the output.
808   void setOutputEndAddress(uint64_t Address) {
809     OutputAddressRange.second = Address;
810   }
811 
812   /// Gets the memory address range of this BB in the input binary.
813   std::pair<uint64_t, uint64_t> getInputAddressRange() const {
814     return InputRange;
815   }
816 
817   /// Gets the memory address range of this BB in the output binary.
818   std::pair<uint64_t, uint64_t> getOutputAddressRange() const {
819     return OutputAddressRange;
820   }
821 
822   uint64_t getOutputStartAddress() const { return OutputAddressRange.first; }
823   uint64_t getOutputEndAddress() const { return OutputAddressRange.second; }
824 
825   bool hasLocSyms() const { return LocSyms != nullptr; }
826 
827   /// Return mapping of input offsets to symbols in the output.
828   LocSymsTy &getLocSyms() {
829     return LocSyms ? *LocSyms : *(LocSyms = std::make_unique<LocSymsTy>());
830   }
831 
832   /// Return mapping of input offsets to symbols in the output.
833   const LocSymsTy &getLocSyms() const {
834     return const_cast<BinaryBasicBlock *>(this)->getLocSyms();
835   }
836 
837   /// Return size of the basic block in the output binary.
838   uint64_t getOutputSize() const {
839     return OutputAddressRange.second - OutputAddressRange.first;
840   }
841 
842   BinaryFunction *getFunction() const { return Function; }
843 
844   /// Analyze and interpret the terminators of this basic block. TBB must be
845   /// initialized with the original fall-through for this BB.
846   bool analyzeBranch(const MCSymbol *&TBB, const MCSymbol *&FBB,
847                      MCInst *&CondBranch, MCInst *&UncondBranch);
848 
849   /// Printer required for printing dominator trees.
850   void printAsOperand(raw_ostream &OS, bool PrintType = true) {
851     if (PrintType)
852       OS << "basic block ";
853     OS << getName();
854   }
855 
856   /// A simple dump function for debugging.
857   void dump() const;
858 
859   /// Validate successor invariants for this BB.
860   bool validateSuccessorInvariants();
861 
862   /// Return offset of the basic block from the function start on input.
863   uint32_t getInputOffset() const { return InputRange.first; }
864 
865   /// Return offset from the function start to location immediately past
866   /// the end of the basic block.
867   uint32_t getEndOffset() const { return InputRange.second; }
868 
869   /// Return size of the basic block on input.
870   uint32_t getOriginalSize() const {
871     return InputRange.second - InputRange.first;
872   }
873 
874   /// Returns an estimate of size of basic block during run time optionally
875   /// using a user-supplied emitter for lock-free multi-thread work.
876   /// MCCodeEmitter is not thread safe and each thread should operate with its
877   /// own copy of it.
878   uint64_t estimateSize(const MCCodeEmitter *Emitter = nullptr) const;
879 
880   /// Return index in the current layout. The user is responsible for
881   /// making sure the indices are up to date,
882   /// e.g. by calling BinaryFunction::updateLayoutIndices();
883   unsigned getLayoutIndex() const {
884     assert(isValid());
885     return LayoutIndex;
886   }
887 
888   /// Set layout index. To be used by BinaryFunction.
889   void setLayoutIndex(unsigned Index) { LayoutIndex = Index; }
890 
891   /// Needed by graph traits.
892   BinaryFunction *getParent() const { return getFunction(); }
893 
894   /// Return true if the containing function is in CFG state.
895   bool hasCFG() const;
896 
897   /// Return true if the containing function is in a state with instructions.
898   bool hasInstructions() const;
899 
900   /// Return offset of the basic block from the function start.
901   uint32_t getOffset() const { return InputRange.first; }
902 
903   /// Get the index of this basic block.
904   unsigned getIndex() const {
905     assert(isValid());
906     return Index;
907   }
908 
909   /// Return jump table if the block contains a jump table instruction or
910   /// nullptr otherwise.
911   const JumpTable *getJumpTable() const;
912 
913   /// Check if the block has a jump table instruction.
914   bool hasJumpTable() const { return getJumpTable() != nullptr; }
915 
916   /// Returns the last computed hash value of the block.
917   uint64_t getHash() const { return Hash; }
918 
919 private:
920   void adjustNumPseudos(const MCInst &Inst, int Sign);
921 
922   template <typename Itr> void adjustNumPseudos(Itr Begin, Itr End, int Sign) {
923     while (Begin != End)
924       adjustNumPseudos(*Begin++, Sign);
925   }
926 
927   /// Adds predecessor to the BB. Most likely you don't need to call this.
928   void addPredecessor(BinaryBasicBlock *Pred);
929 
930   /// Remove predecessor of the basic block. Don't use directly, instead
931   /// use removeSuccessor() function.
932   /// If \p Multiple is set to true, it will remove all predecessors that
933   /// are equal to \p Pred. Otherwise, the first instance of \p Pred found
934   /// will be removed. This only matters in awkward, redundant CFGs.
935   void removePredecessor(BinaryBasicBlock *Pred, bool Multiple = true);
936 
937   /// Set end offset of this basic block.
938   void setEndOffset(uint32_t Offset) { InputRange.second = Offset; }
939 
940   /// Set the index of this basic block.
941   void setIndex(unsigned I) { Index = I; }
942 
943   /// Sets the hash value of the basic block.
944   void setHash(uint64_t Value) const { Hash = Value; }
945 
946   template <typename T> void clearList(T &List) {
947     T TempList;
948     TempList.swap(List);
949   }
950 
951   /// Release memory taken by CFG edges and instructions.
952   void releaseCFG() {
953     clearList(Predecessors);
954     clearList(Successors);
955     clearList(Throwers);
956     clearList(LandingPads);
957     clearList(BranchInfo);
958     clearList(Instructions);
959   }
960 };
961 
962 #if defined(LLVM_ON_UNIX)
963 /// Keep the size of the BinaryBasicBlock within a reasonable size class
964 /// (jemalloc bucket) on Linux
965 static_assert(sizeof(BinaryBasicBlock) <= 256);
966 #endif
967 
968 bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);
969 
970 } // namespace bolt
971 
972 // GraphTraits specializations for basic block graphs (CFGs)
973 template <> struct GraphTraits<bolt::BinaryBasicBlock *> {
974   using NodeRef = bolt::BinaryBasicBlock *;
975   using ChildIteratorType = bolt::BinaryBasicBlock::succ_iterator;
976 
977   static NodeRef getEntryNode(bolt::BinaryBasicBlock *BB) { return BB; }
978   static inline ChildIteratorType child_begin(NodeRef N) {
979     return N->succ_begin();
980   }
981   static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
982 };
983 
984 template <> struct GraphTraits<const bolt::BinaryBasicBlock *> {
985   using NodeRef = const bolt::BinaryBasicBlock *;
986   using ChildIteratorType = bolt::BinaryBasicBlock::const_succ_iterator;
987 
988   static NodeRef getEntryNode(const bolt::BinaryBasicBlock *BB) { return BB; }
989   static inline ChildIteratorType child_begin(NodeRef N) {
990     return N->succ_begin();
991   }
992   static inline ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
993 };
994 
995 template <> struct GraphTraits<Inverse<bolt::BinaryBasicBlock *>> {
996   using NodeRef = bolt::BinaryBasicBlock *;
997   using ChildIteratorType = bolt::BinaryBasicBlock::pred_iterator;
998   static NodeRef getEntryNode(Inverse<bolt::BinaryBasicBlock *> G) {
999     return G.Graph;
1000   }
1001   static inline ChildIteratorType child_begin(NodeRef N) {
1002     return N->pred_begin();
1003   }
1004   static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
1005 };
1006 
1007 template <> struct GraphTraits<Inverse<const bolt::BinaryBasicBlock *>> {
1008   using NodeRef = const bolt::BinaryBasicBlock *;
1009   using ChildIteratorType = bolt::BinaryBasicBlock::const_pred_iterator;
1010   static NodeRef getEntryNode(Inverse<const bolt::BinaryBasicBlock *> G) {
1011     return G.Graph;
1012   }
1013   static inline ChildIteratorType child_begin(NodeRef N) {
1014     return N->pred_begin();
1015   }
1016   static inline ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
1017 };
1018 
1019 } // namespace llvm
1020 
1021 #endif
1022