xref: /llvm-project/bolt/include/bolt/Core/BinaryFunction.h (revision 3c357a49d61e4c81a1ac016502ee504521bc8dda)
1 //===- bolt/Core/BinaryFunction.h - Low-level function ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the BinaryFunction class. It represents
10 // a function at the lowest IR level. Typically, a BinaryFunction represents a
11 // function object in a compiled and linked binary file. However, a
12 // BinaryFunction can also be constructed manually, e.g. for injecting into a
13 // binary file.
14 //
15 // A BinaryFunction could be in one of the several states described in
16 // BinaryFunction::State. While in the disassembled state, it will contain a
17 // list of instructions with their offsets. In the CFG state, it will contain a
18 // list of BinaryBasicBlocks that form a control-flow graph. This state is best
19 // suited for binary analysis and optimizations. However, sometimes it's
20 // impossible to build the precise CFG due to the ambiguity of indirect
21 // branches.
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #ifndef BOLT_CORE_BINARY_FUNCTION_H
26 #define BOLT_CORE_BINARY_FUNCTION_H
27 
28 #include "bolt/Core/BinaryBasicBlock.h"
29 #include "bolt/Core/BinaryContext.h"
30 #include "bolt/Core/BinaryDomTree.h"
31 #include "bolt/Core/BinaryLoop.h"
32 #include "bolt/Core/BinarySection.h"
33 #include "bolt/Core/DebugData.h"
34 #include "bolt/Core/FunctionLayout.h"
35 #include "bolt/Core/JumpTable.h"
36 #include "bolt/Core/MCPlus.h"
37 #include "bolt/Utils/NameResolver.h"
38 #include "llvm/ADT/STLExtras.h"
39 #include "llvm/ADT/SmallString.h"
40 #include "llvm/ADT/StringRef.h"
41 #include "llvm/ADT/iterator.h"
42 #include "llvm/ADT/iterator_range.h"
43 #include "llvm/BinaryFormat/Dwarf.h"
44 #include "llvm/MC/MCContext.h"
45 #include "llvm/MC/MCDwarf.h"
46 #include "llvm/MC/MCInst.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/Object/ObjectFile.h"
49 #include "llvm/Support/RWMutex.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include <algorithm>
52 #include <iterator>
53 #include <limits>
54 #include <unordered_map>
55 #include <utility>
56 #include <vector>
57 
58 using namespace llvm::object;
59 
60 namespace llvm {
61 
62 class DWARFUnit;
63 
64 namespace bolt {
65 
66 using InputOffsetToAddressMapTy = std::unordered_multimap<uint64_t, uint64_t>;
67 
68 /// Types of macro-fusion alignment corrections.
69 enum MacroFusionType { MFT_NONE, MFT_HOT, MFT_ALL };
70 
71 enum IndirectCallPromotionType : char {
72   ICP_NONE,        /// Don't perform ICP.
73   ICP_CALLS,       /// Perform ICP on indirect calls.
74   ICP_JUMP_TABLES, /// Perform ICP on jump tables.
75   ICP_ALL          /// Perform ICP on calls and jump tables.
76 };
77 
78 /// Hash functions supported for BF/BB hashing.
79 enum class HashFunction : char {
80   StdHash, /// std::hash, implementation is platform-dependent. Provided for
81            /// backwards compatibility.
82   XXH3,    /// llvm::xxh3_64bits, the default.
83   Default = XXH3,
84 };
85 
86 /// Information on a single indirect call to a particular callee.
87 struct IndirectCallProfile {
88   MCSymbol *Symbol;
89   uint32_t Offset;
90   uint64_t Count;
91   uint64_t Mispreds;
92 
93   IndirectCallProfile(MCSymbol *Symbol, uint64_t Count, uint64_t Mispreds,
94                       uint32_t Offset = 0)
95       : Symbol(Symbol), Offset(Offset), Count(Count), Mispreds(Mispreds) {}
96 
97   bool operator==(const IndirectCallProfile &Other) const {
98     return Symbol == Other.Symbol && Offset == Other.Offset;
99   }
100 };
101 
102 /// Aggregated information for an indirect call site.
103 using IndirectCallSiteProfile = SmallVector<IndirectCallProfile, 4>;
104 
105 inline raw_ostream &operator<<(raw_ostream &OS,
106                                const bolt::IndirectCallSiteProfile &ICSP) {
107   std::string TempString;
108   raw_string_ostream SS(TempString);
109 
110   const char *Sep = "\n        ";
111   uint64_t TotalCount = 0;
112   uint64_t TotalMispreds = 0;
113   for (const IndirectCallProfile &CSP : ICSP) {
114     SS << Sep << "{ " << (CSP.Symbol ? CSP.Symbol->getName() : "<unknown>")
115        << ": " << CSP.Count << " (" << CSP.Mispreds << " misses) }";
116     Sep = ",\n        ";
117     TotalCount += CSP.Count;
118     TotalMispreds += CSP.Mispreds;
119   }
120 
121   OS << TotalCount << " (" << TotalMispreds << " misses) :" << TempString;
122   return OS;
123 }
124 
125 /// BinaryFunction is a representation of machine-level function.
126 ///
127 /// In the input binary, an instance of BinaryFunction can represent a fragment
128 /// of a function if the higher-level function was split, e.g. into hot and cold
129 /// parts. The fragment containing the main entry point is called a parent
130 /// or the main fragment.
131 class BinaryFunction {
132 public:
133   enum class State : char {
134     Empty = 0,     /// Function body is empty.
135     Disassembled,  /// Function have been disassembled.
136     CFG,           /// Control flow graph has been built.
137     CFG_Finalized, /// CFG is finalized. No optimizations allowed.
138     EmittedCFG,    /// Instructions have been emitted to output.
139     Emitted,       /// Same as above plus CFG is destroyed.
140   };
141 
142   /// Types of profile the function can use. Could be a combination.
143   enum {
144     PF_NONE = 0,     /// No profile.
145     PF_LBR = 1,      /// Profile is based on last branch records.
146     PF_SAMPLE = 2,   /// Non-LBR sample-based profile.
147     PF_MEMEVENT = 4, /// Profile has mem events.
148   };
149 
150   /// Struct for tracking exception handling ranges.
151   struct CallSite {
152     const MCSymbol *Start;
153     const MCSymbol *End;
154     const MCSymbol *LP;
155     uint64_t Action;
156   };
157 
158   using CallSitesList = SmallVector<std::pair<FragmentNum, CallSite>, 0>;
159   using CallSitesRange = iterator_range<CallSitesList::const_iterator>;
160 
161   using IslandProxiesType =
162       std::map<BinaryFunction *, std::map<const MCSymbol *, MCSymbol *>>;
163 
164   struct IslandInfo {
165     /// Temporary holder of offsets that are data markers (used in AArch)
166     /// It is possible to have data in code sections. To ease the identification
167     /// of data in code sections, the ABI requires the symbol table to have
168     /// symbols named "$d" identifying the start of data inside code and "$x"
169     /// identifying the end of a chunk of data inside code. DataOffsets contain
170     /// all offsets of $d symbols and CodeOffsets all offsets of $x symbols.
171     std::set<uint64_t> DataOffsets;
172     std::set<uint64_t> CodeOffsets;
173 
174     /// List of relocations associated with data in the constant island
175     std::map<uint64_t, Relocation> Relocations;
176 
177     /// Set true if constant island contains dynamic relocations, which may
178     /// happen if binary is linked with -z notext option.
179     bool HasDynamicRelocations{false};
180 
181     /// Offsets in function that are data values in a constant island identified
182     /// after disassembling
183     std::map<uint64_t, MCSymbol *> Offsets;
184     SmallPtrSet<MCSymbol *, 4> Symbols;
185     DenseMap<const MCSymbol *, BinaryFunction *> ProxySymbols;
186     DenseMap<const MCSymbol *, MCSymbol *> ColdSymbols;
187     /// Keeps track of other functions we depend on because there is a reference
188     /// to the constant islands in them.
189     IslandProxiesType Proxies, ColdProxies;
190     SmallPtrSet<BinaryFunction *, 1> Dependency; // The other way around
191 
192     mutable MCSymbol *FunctionConstantIslandLabel{nullptr};
193     mutable MCSymbol *FunctionColdConstantIslandLabel{nullptr};
194 
195     // Returns constant island alignment
196     uint16_t getAlignment() const { return sizeof(uint64_t); }
197   };
198 
199   static constexpr uint64_t COUNT_NO_PROFILE =
200       BinaryBasicBlock::COUNT_NO_PROFILE;
201 
202   static const char TimerGroupName[];
203   static const char TimerGroupDesc[];
204 
205   using BasicBlockOrderType = SmallVector<BinaryBasicBlock *, 0>;
206 
207   /// Mark injected functions
208   bool IsInjected = false;
209 
210   using LSDATypeTableTy = SmallVector<uint64_t, 0>;
211 
212   /// List of DWARF CFI instructions. Original CFI from the binary must be
213   /// sorted w.r.t. offset that it appears. We rely on this to replay CFIs
214   /// if needed (to fix state after reordering BBs).
215   using CFIInstrMapType = SmallVector<MCCFIInstruction, 0>;
216   using cfi_iterator = CFIInstrMapType::iterator;
217   using const_cfi_iterator = CFIInstrMapType::const_iterator;
218 
219 private:
220   /// Current state of the function.
221   State CurrentState{State::Empty};
222 
223   /// A list of symbols associated with the function entry point.
224   ///
225   /// Multiple symbols would typically result from identical code-folding
226   /// optimization.
227   typedef SmallVector<MCSymbol *, 1> SymbolListTy;
228   SymbolListTy Symbols;
229 
230   /// The list of names this function is known under. Used for fuzzy-matching
231   /// the function to its name in a profile, command line, etc.
232   SmallVector<std::string, 0> Aliases;
233 
234   /// Containing section in the input file.
235   BinarySection *OriginSection = nullptr;
236 
237   /// Address of the function in memory. Also could be an offset from
238   /// base address for position independent binaries.
239   uint64_t Address;
240 
241   /// Original size of the function.
242   uint64_t Size;
243 
244   /// Address of the function in output.
245   uint64_t OutputAddress{0};
246 
247   /// Size of the function in the output file.
248   uint64_t OutputSize{0};
249 
250   /// Maximum size this function is allowed to have.
251   uint64_t MaxSize{std::numeric_limits<uint64_t>::max()};
252 
253   /// Alignment requirements for the function.
254   uint16_t Alignment{2};
255 
256   /// Maximum number of bytes used for alignment of hot part of the function.
257   uint16_t MaxAlignmentBytes{0};
258 
259   /// Maximum number of bytes used for alignment of cold part of the function.
260   uint16_t MaxColdAlignmentBytes{0};
261 
262   const MCSymbol *PersonalityFunction{nullptr};
263   uint8_t PersonalityEncoding{dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_pcrel};
264 
265   BinaryContext &BC;
266 
267   std::unique_ptr<BinaryLoopInfo> BLI;
268   std::unique_ptr<BinaryDominatorTree> BDT;
269 
270   /// All labels in the function that are referenced via relocations from
271   /// data objects. Typically these are jump table destinations and computed
272   /// goto labels.
273   std::set<uint64_t> ExternallyReferencedOffsets;
274 
275   /// Offsets of indirect branches with unknown destinations.
276   std::set<uint64_t> UnknownIndirectBranchOffsets;
277 
278   /// A set of local and global symbols corresponding to secondary entry points.
279   /// Each additional function entry point has a corresponding entry in the map.
280   /// The key is a local symbol corresponding to a basic block and the value
281   /// is a global symbol corresponding to an external entry point.
282   DenseMap<const MCSymbol *, MCSymbol *> SecondaryEntryPoints;
283 
284   /// False if the function is too complex to reconstruct its control
285   /// flow graph.
286   /// In relocation mode we still disassemble and re-assemble such functions.
287   bool IsSimple{true};
288 
289   /// Indication that the function should be ignored for optimization purposes.
290   /// If we can skip emission of some functions, then ignored functions could
291   /// be not fully disassembled and will not be emitted.
292   bool IsIgnored{false};
293 
294   /// Pseudo functions should not be disassembled or emitted.
295   bool IsPseudo{false};
296 
297   /// True if the original function code has all necessary relocations to track
298   /// addresses of functions emitted to new locations. Typically set for
299   /// functions that we are not going to emit.
300   bool HasExternalRefRelocations{false};
301 
302   /// True if the function has an indirect branch with unknown destination.
303   bool HasUnknownControlFlow{false};
304 
305   /// The code from inside the function references one of the code locations
306   /// from the same function as a data, i.e. it's possible the label is used
307   /// inside an address calculation or could be referenced from outside.
308   bool HasInternalLabelReference{false};
309 
310   /// In AArch64, preserve nops to maintain code equal to input (assuming no
311   /// optimizations are done).
312   bool PreserveNops{false};
313 
314   /// Indicate if this function has associated exception handling metadata.
315   bool HasEHRanges{false};
316 
317   /// True if the function uses DW_CFA_GNU_args_size CFIs.
318   bool UsesGnuArgsSize{false};
319 
320   /// True if the function might have a profile available externally.
321   /// Used to check if processing of the function is required under certain
322   /// conditions.
323   bool HasProfileAvailable{false};
324 
325   bool HasMemoryProfile{false};
326 
327   /// Execution halts whenever this function is entered.
328   bool TrapsOnEntry{false};
329 
330   /// True if the function is a fragment of another function. This means that
331   /// this function could only be entered via its parent or one of its sibling
332   /// fragments. It could be entered at any basic block. It can also return
333   /// the control to any basic block of its parent or its sibling.
334   bool IsFragment{false};
335 
336   /// Indicate that the function body has SDT marker
337   bool HasSDTMarker{false};
338 
339   /// Indicate that the function body has Pseudo Probe
340   bool HasPseudoProbe{BC.getUniqueSectionByName(".pseudo_probe_desc") &&
341                       BC.getUniqueSectionByName(".pseudo_probe")};
342 
343   /// True if the function uses ORC format for stack unwinding.
344   bool HasORC{false};
345 
346   /// True if the original entry point was patched.
347   bool IsPatched{false};
348 
349   /// True if the function contains explicit or implicit indirect branch to its
350   /// split fragments, e.g., split jump table, landing pad in split fragment
351   bool HasIndirectTargetToSplitFragment{false};
352 
353   /// True if there are no control-flow edges with successors in other functions
354   /// (i.e. if tail calls have edges to function-local basic blocks).
355   /// Set to false by SCTC. Dynostats can't be reliably computed for
356   /// functions with non-canonical CFG.
357   /// This attribute is only valid when hasCFG() == true.
358   bool HasCanonicalCFG{true};
359 
360   /// True if another function body was merged into this one.
361   bool HasFunctionsFoldedInto{false};
362 
363   /// Name for the section this function code should reside in.
364   std::string CodeSectionName;
365 
366   /// Name for the corresponding cold code section.
367   std::string ColdCodeSectionName;
368 
369   /// Parent function fragment for split function fragments.
370   using FragmentsSetTy = SmallPtrSet<BinaryFunction *, 1>;
371   FragmentsSetTy ParentFragments;
372 
373   /// Indicate if the function body was folded into another function.
374   /// Used by ICF optimization.
375   BinaryFunction *FoldedIntoFunction{nullptr};
376 
377   /// All fragments for a parent function.
378   FragmentsSetTy Fragments;
379 
380   /// The profile data for the number of times the function was executed.
381   uint64_t ExecutionCount{COUNT_NO_PROFILE};
382 
383   /// Profile match ratio.
384   float ProfileMatchRatio{0.0f};
385 
386   /// Raw branch count for this function in the profile.
387   uint64_t RawBranchCount{0};
388 
389   /// Dynamically executed function bytes, used for density computation.
390   uint64_t SampleCountInBytes{0};
391 
392   /// Indicates the type of profile the function is using.
393   uint16_t ProfileFlags{PF_NONE};
394 
395   /// True if the function's input profile data has been inaccurate but has
396   /// been adjusted by the profile inference algorithm.
397   bool HasInferredProfile{false};
398 
399   /// For functions with mismatched profile we store all call profile
400   /// information at a function level (as opposed to tying it to
401   /// specific call sites).
402   IndirectCallSiteProfile AllCallSites;
403 
404   /// Score of the function (estimated number of instructions executed,
405   /// according to profile data). -1 if the score has not been calculated yet.
406   mutable int64_t FunctionScore{-1};
407 
408   /// Original LSDA address for the function.
409   uint64_t LSDAAddress{0};
410 
411   /// Original LSDA type encoding
412   unsigned LSDATypeEncoding{dwarf::DW_EH_PE_omit};
413 
414   /// Containing compilation unit for the function.
415   DWARFUnit *DwarfUnit{nullptr};
416 
417   /// Last computed hash value. Note that the value could be recomputed using
418   /// different parameters by every pass.
419   mutable uint64_t Hash{0};
420 
421   /// Function GUID assigned externally.
422   uint64_t GUID{0};
423 
424   /// For PLT functions it contains a symbol associated with a function
425   /// reference. It is nullptr for non-PLT functions.
426   const MCSymbol *PLTSymbol{nullptr};
427 
428   /// Function order for streaming into the destination binary.
429   uint32_t Index{-1U};
430 
431   /// Function is referenced by a non-control flow instruction.
432   bool HasAddressTaken{false};
433 
434   /// Get basic block index assuming it belongs to this function.
435   unsigned getIndex(const BinaryBasicBlock *BB) const {
436     assert(BB->getIndex() < BasicBlocks.size());
437     return BB->getIndex();
438   }
439 
440   /// Release memory taken by the list.
441   template <typename T> BinaryFunction &clearList(T &List) {
442     T TempList;
443     TempList.swap(List);
444     return *this;
445   }
446 
447   /// Update the indices of all the basic blocks starting at StartIndex.
448   void updateBBIndices(const unsigned StartIndex);
449 
450   /// Annotate each basic block entry with its current CFI state. This is
451   /// run right after the construction of CFG while basic blocks are in their
452   /// original order.
453   void annotateCFIState();
454 
455   /// Associate DW_CFA_GNU_args_size info with invoke instructions
456   /// (call instructions with non-empty landing pad).
457   void propagateGnuArgsSizeInfo(MCPlusBuilder::AllocatorIdTy AllocId);
458 
459   /// Synchronize branch instructions with CFG.
460   void postProcessBranches();
461 
462   /// The address offset where we emitted the constant island, that is, the
463   /// chunk of data in the function code area (AArch only)
464   int64_t OutputDataOffset{0};
465   int64_t OutputColdDataOffset{0};
466 
467   /// Map labels to corresponding basic blocks.
468   DenseMap<const MCSymbol *, BinaryBasicBlock *> LabelToBB;
469 
470   using BranchListType = SmallVector<std::pair<uint32_t, uint32_t>, 0>;
471   BranchListType TakenBranches;   /// All local taken branches.
472   BranchListType IgnoredBranches; /// Branches ignored by CFG purposes.
473 
474   /// Map offset in the function to a label.
475   /// Labels are used for building CFG for simple functions. For non-simple
476   /// function in relocation mode we need to emit them for relocations
477   /// referencing function internals to work (e.g. jump tables).
478   using LabelsMapType = std::map<uint32_t, MCSymbol *>;
479   LabelsMapType Labels;
480 
481   /// Temporary holder of instructions before CFG is constructed.
482   /// Map offset in the function to MCInst.
483   using InstrMapType = std::map<uint32_t, MCInst>;
484   InstrMapType Instructions;
485 
486   /// We don't decode Call Frame Info encoded in DWARF program state
487   /// machine. Instead we define a "CFI State" - a frame information that
488   /// is a result of executing FDE CFI program up to a given point. The
489   /// program consists of opaque Call Frame Instructions:
490   ///
491   ///   CFI #0
492   ///   CFI #1
493   ///   ....
494   ///   CFI #N
495   ///
496   /// When we refer to "CFI State K" - it corresponds to a row in an abstract
497   /// Call Frame Info table. This row is reached right before executing CFI #K.
498   ///
499   /// At any point of execution in a function we are in any one of (N + 2)
500   /// states described in the original FDE program. We can't have more states
501   /// without intelligent processing of CFIs.
502   ///
503   /// When the final layout of basic blocks is known, and we finalize CFG,
504   /// we modify the original program to make sure the same state could be
505   /// reached even when basic blocks containing CFI instructions are executed
506   /// in a different order.
507   CFIInstrMapType FrameInstructions;
508 
509   /// A map of restore state CFI instructions to their equivalent CFI
510   /// instructions that produce the same state, in order to eliminate
511   /// remember-restore CFI instructions when rewriting CFI.
512   DenseMap<int32_t, SmallVector<int32_t, 4>> FrameRestoreEquivalents;
513 
514   // For tracking exception handling ranges.
515   CallSitesList CallSites;
516 
517   /// Binary blobs representing action, type, and type index tables for this
518   /// function' LSDA (exception handling).
519   ArrayRef<uint8_t> LSDAActionTable;
520   ArrayRef<uint8_t> LSDATypeIndexTable;
521 
522   /// Vector of addresses of types referenced by LSDA.
523   LSDATypeTableTy LSDATypeTable;
524 
525   /// Vector of addresses of entries in LSDATypeTable used for indirect
526   /// addressing.
527   LSDATypeTableTy LSDATypeAddressTable;
528 
529   /// Marking for the beginnings of language-specific data areas for each
530   /// fragment of the function.
531   SmallVector<MCSymbol *, 0> LSDASymbols;
532 
533   /// Each function fragment may have another fragment containing all landing
534   /// pads for it. If that's the case, the LP fragment will be stored in the
535   /// vector below with indexing starting with the main fragment.
536   SmallVector<std::optional<FragmentNum>, 0> LPFragments;
537 
538   /// Map to discover which CFIs are attached to a given instruction offset.
539   /// Maps an instruction offset into a FrameInstructions offset.
540   /// This is only relevant to the buildCFG phase and is discarded afterwards.
541   std::multimap<uint32_t, uint32_t> OffsetToCFI;
542 
543   /// List of CFI instructions associated with the CIE (common to more than one
544   /// function and that apply before the entry basic block).
545   CFIInstrMapType CIEFrameInstructions;
546 
547   /// All compound jump tables for this function. This duplicates what's stored
548   /// in the BinaryContext, but additionally it gives quick access for all
549   /// jump tables used by this function.
550   ///
551   /// <OriginalAddress> -> <JumpTable *>
552   std::map<uint64_t, JumpTable *> JumpTables;
553 
554   /// All jump table sites in the function before CFG is built.
555   SmallVector<std::pair<uint64_t, uint64_t>, 0> JTSites;
556 
557   /// List of relocations in this function.
558   std::map<uint64_t, Relocation> Relocations;
559 
560   /// Information on function constant islands.
561   std::unique_ptr<IslandInfo> Islands;
562 
563   // Blocks are kept sorted in the layout order. If we need to change the
564   // layout (if BasicBlocksLayout stores a different order than BasicBlocks),
565   // the terminating instructions need to be modified.
566   using BasicBlockListType = SmallVector<BinaryBasicBlock *, 0>;
567   BasicBlockListType BasicBlocks;
568   BasicBlockListType DeletedBasicBlocks;
569 
570   FunctionLayout Layout;
571 
572   /// BasicBlockOffsets are used during CFG construction to map from code
573   /// offsets to BinaryBasicBlocks.  Any modifications made to the CFG
574   /// after initial construction are not reflected in this data structure.
575   using BasicBlockOffset = std::pair<uint64_t, BinaryBasicBlock *>;
576   struct CompareBasicBlockOffsets {
577     bool operator()(const BasicBlockOffset &A,
578                     const BasicBlockOffset &B) const {
579       return A.first < B.first;
580     }
581   };
582   SmallVector<BasicBlockOffset, 0> BasicBlockOffsets;
583 
584   SmallVector<MCSymbol *, 0> ColdSymbols;
585 
586   /// Symbol at the end of each fragment of a split function.
587   mutable SmallVector<MCSymbol *, 0> FunctionEndLabels;
588 
589   /// Unique number associated with the function.
590   uint64_t FunctionNumber;
591 
592   /// Count the number of functions created.
593   static uint64_t Count;
594 
595   /// Register alternative function name.
596   void addAlternativeName(std::string NewName) {
597     Aliases.push_back(std::move(NewName));
598   }
599 
600   /// Return a label at a given \p Address in the function. If the label does
601   /// not exist - create it. Assert if the \p Address does not belong to
602   /// the function. If \p CreatePastEnd is true, then return the function
603   /// end label when the \p Address points immediately past the last byte
604   /// of the function.
605   /// NOTE: the function always returns a local (temp) symbol, even if there's
606   ///       a global symbol that corresponds to an entry at this address.
607   MCSymbol *getOrCreateLocalLabel(uint64_t Address, bool CreatePastEnd = false);
608 
609   /// Register an data entry at a given \p Offset into the function.
610   void markDataAtOffset(uint64_t Offset) {
611     if (!Islands)
612       Islands = std::make_unique<IslandInfo>();
613     Islands->DataOffsets.emplace(Offset);
614   }
615 
616   /// Register an entry point at a given \p Offset into the function.
617   void markCodeAtOffset(uint64_t Offset) {
618     if (!Islands)
619       Islands = std::make_unique<IslandInfo>();
620     Islands->CodeOffsets.emplace(Offset);
621   }
622 
623   /// Register an internal offset in a function referenced from outside.
624   void registerReferencedOffset(uint64_t Offset) {
625     ExternallyReferencedOffsets.emplace(Offset);
626   }
627 
628   /// True if there are references to internals of this function from data,
629   /// e.g. from jump tables.
630   bool hasInternalReference() const {
631     return !ExternallyReferencedOffsets.empty();
632   }
633 
634   /// Return an entry ID corresponding to a symbol known to belong to
635   /// the function.
636   ///
637   /// Prefer to use BinaryContext::getFunctionForSymbol(EntrySymbol, &ID)
638   /// instead of calling this function directly.
639   uint64_t getEntryIDForSymbol(const MCSymbol *EntrySymbol) const;
640 
641   /// If the function represents a secondary split function fragment, set its
642   /// parent fragment to \p BF.
643   void addParentFragment(BinaryFunction &BF) {
644     assert(this != &BF);
645     assert(IsFragment && "function must be a fragment to have a parent");
646     ParentFragments.insert(&BF);
647   }
648 
649   /// Register a child fragment for the main fragment of a split function.
650   void addFragment(BinaryFunction &BF) {
651     assert(this != &BF);
652     Fragments.insert(&BF);
653   }
654 
655   void addInstruction(uint64_t Offset, MCInst &&Instruction) {
656     Instructions.emplace(Offset, std::forward<MCInst>(Instruction));
657   }
658 
659   /// Convert CFI instructions to a standard form (remove remember/restore).
660   void normalizeCFIState();
661 
662   /// Analyze and process indirect branch \p Instruction before it is
663   /// added to Instructions list.
664   IndirectBranchType processIndirectBranch(MCInst &Instruction, unsigned Size,
665                                            uint64_t Offset,
666                                            uint64_t &TargetAddress);
667 
668   BinaryFunction &operator=(const BinaryFunction &) = delete;
669   BinaryFunction(const BinaryFunction &) = delete;
670 
671   friend class MachORewriteInstance;
672   friend class RewriteInstance;
673   friend class BinaryContext;
674   friend class DataReader;
675   friend class DataAggregator;
676 
677   static std::string buildCodeSectionName(StringRef Name,
678                                           const BinaryContext &BC);
679   static std::string buildColdCodeSectionName(StringRef Name,
680                                               const BinaryContext &BC);
681 
682   /// Creation should be handled by RewriteInstance or BinaryContext
683   BinaryFunction(const std::string &Name, BinarySection &Section,
684                  uint64_t Address, uint64_t Size, BinaryContext &BC)
685       : OriginSection(&Section), Address(Address), Size(Size), BC(BC),
686         CodeSectionName(buildCodeSectionName(Name, BC)),
687         ColdCodeSectionName(buildColdCodeSectionName(Name, BC)),
688         FunctionNumber(++Count) {
689     Symbols.push_back(BC.Ctx->getOrCreateSymbol(Name));
690   }
691 
692   /// This constructor is used to create an injected function
693   BinaryFunction(const std::string &Name, BinaryContext &BC, bool IsSimple)
694       : Address(0), Size(0), BC(BC), IsSimple(IsSimple),
695         CodeSectionName(buildCodeSectionName(Name, BC)),
696         ColdCodeSectionName(buildColdCodeSectionName(Name, BC)),
697         FunctionNumber(++Count) {
698     Symbols.push_back(BC.Ctx->getOrCreateSymbol(Name));
699     IsInjected = true;
700   }
701 
702   /// Create a basic block at a given \p Offset in the function and append it
703   /// to the end of list of blocks. Used during CFG construction only.
704   BinaryBasicBlock *addBasicBlockAt(uint64_t Offset, MCSymbol *Label) {
705     assert(CurrentState == State::Disassembled &&
706            "Cannot add block with an offset in non-disassembled state.");
707     assert(!getBasicBlockAtOffset(Offset) &&
708            "Basic block already exists at the offset.");
709 
710     BasicBlocks.emplace_back(createBasicBlock(Label).release());
711     BinaryBasicBlock *BB = BasicBlocks.back();
712 
713     BB->setIndex(BasicBlocks.size() - 1);
714     BB->setOffset(Offset);
715 
716     BasicBlockOffsets.emplace_back(Offset, BB);
717     assert(llvm::is_sorted(BasicBlockOffsets, CompareBasicBlockOffsets()) &&
718            llvm::is_sorted(blocks()));
719 
720     return BB;
721   }
722 
723   /// Clear state of the function that could not be disassembled or if its
724   /// disassembled state was later invalidated.
725   void clearDisasmState();
726 
727   /// Release memory allocated for CFG and instructions.
728   /// We still keep basic blocks for address translation/mapping purposes.
729   void releaseCFG() {
730     for (BinaryBasicBlock *BB : BasicBlocks)
731       BB->releaseCFG();
732     for (BinaryBasicBlock *BB : DeletedBasicBlocks)
733       BB->releaseCFG();
734 
735     clearList(CallSites);
736     clearList(LSDATypeTable);
737     clearList(LSDATypeAddressTable);
738 
739     clearList(LabelToBB);
740 
741     if (!isMultiEntry())
742       clearList(Labels);
743 
744     clearList(FrameInstructions);
745     clearList(FrameRestoreEquivalents);
746   }
747 
748 public:
749   BinaryFunction(BinaryFunction &&) = default;
750 
751   using iterator = pointee_iterator<BasicBlockListType::iterator>;
752   using const_iterator = pointee_iterator<BasicBlockListType::const_iterator>;
753   using reverse_iterator =
754       pointee_iterator<BasicBlockListType::reverse_iterator>;
755   using const_reverse_iterator =
756       pointee_iterator<BasicBlockListType::const_reverse_iterator>;
757 
758   // CFG iterators.
759   iterator                 begin()       { return BasicBlocks.begin(); }
760   const_iterator           begin() const { return BasicBlocks.begin(); }
761   iterator                 end  ()       { return BasicBlocks.end();   }
762   const_iterator           end  () const { return BasicBlocks.end();   }
763 
764   reverse_iterator        rbegin()       { return BasicBlocks.rbegin(); }
765   const_reverse_iterator  rbegin() const { return BasicBlocks.rbegin(); }
766   reverse_iterator        rend  ()       { return BasicBlocks.rend();   }
767   const_reverse_iterator  rend  () const { return BasicBlocks.rend();   }
768 
769   size_t                    size() const { return BasicBlocks.size();}
770   bool                     empty() const { return BasicBlocks.empty(); }
771   const BinaryBasicBlock &front() const  { return *BasicBlocks.front(); }
772         BinaryBasicBlock &front()        { return *BasicBlocks.front(); }
773   const BinaryBasicBlock & back() const  { return *BasicBlocks.back(); }
774         BinaryBasicBlock & back()        { return *BasicBlocks.back(); }
775   inline iterator_range<iterator> blocks() {
776     return iterator_range<iterator>(begin(), end());
777   }
778   inline iterator_range<const_iterator> blocks() const {
779     return iterator_range<const_iterator>(begin(), end());
780   }
781 
782   // Iterators by pointer.
783   BasicBlockListType::iterator pbegin()  { return BasicBlocks.begin(); }
784   BasicBlockListType::iterator pend()    { return BasicBlocks.end(); }
785 
786   cfi_iterator        cie_begin()       { return CIEFrameInstructions.begin(); }
787   const_cfi_iterator  cie_begin() const { return CIEFrameInstructions.begin(); }
788   cfi_iterator        cie_end()         { return CIEFrameInstructions.end(); }
789   const_cfi_iterator  cie_end()   const { return CIEFrameInstructions.end(); }
790   bool                cie_empty() const { return CIEFrameInstructions.empty(); }
791 
792   inline iterator_range<cfi_iterator> cie() {
793     return iterator_range<cfi_iterator>(cie_begin(), cie_end());
794   }
795   inline iterator_range<const_cfi_iterator> cie() const {
796     return iterator_range<const_cfi_iterator>(cie_begin(), cie_end());
797   }
798 
799   /// Iterate over all jump tables associated with this function.
800   iterator_range<std::map<uint64_t, JumpTable *>::const_iterator>
801   jumpTables() const {
802     return make_range(JumpTables.begin(), JumpTables.end());
803   }
804 
805   /// Return relocation associated with a given \p Offset in the function,
806   /// or nullptr if no such relocation exists.
807   const Relocation *getRelocationAt(uint64_t Offset) const {
808     assert(CurrentState == State::Empty &&
809            "Relocations unavailable in the current function state.");
810     auto RI = Relocations.find(Offset);
811     return (RI == Relocations.end()) ? nullptr : &RI->second;
812   }
813 
814   /// Return the first relocation in the function that starts at an address in
815   /// the [StartOffset, EndOffset) range. Return nullptr if no such relocation
816   /// exists.
817   const Relocation *getRelocationInRange(uint64_t StartOffset,
818                                          uint64_t EndOffset) const {
819     assert(CurrentState == State::Empty &&
820            "Relocations unavailable in the current function state.");
821     auto RI = Relocations.lower_bound(StartOffset);
822     if (RI != Relocations.end() && RI->first < EndOffset)
823       return &RI->second;
824 
825     return nullptr;
826   }
827 
828   /// Return true if function is referenced in a non-control flow instruction.
829   /// This flag is set when the code and relocation analyses are being
830   /// performed, which occurs when safe ICF (Identical Code Folding) is enabled.
831   bool hasAddressTaken() const { return HasAddressTaken; }
832 
833   /// Set whether function is referenced in a non-control flow instruction.
834   void setHasAddressTaken(bool AddressTaken) { HasAddressTaken = AddressTaken; }
835 
836   /// Returns the raw binary encoding of this function.
837   ErrorOr<ArrayRef<uint8_t>> getData() const;
838 
839   BinaryFunction &updateState(BinaryFunction::State State) {
840     CurrentState = State;
841     return *this;
842   }
843 
844   FunctionLayout &getLayout() { return Layout; }
845 
846   const FunctionLayout &getLayout() const { return Layout; }
847 
848   /// Recompute landing pad information for the function and all its blocks.
849   void recomputeLandingPads();
850 
851   /// Return a list of basic blocks sorted using DFS and update layout indices
852   /// using the same order. Does not modify the current layout.
853   BasicBlockListType dfs() const;
854 
855   /// Find the loops in the CFG of the function and store information about
856   /// them.
857   void calculateLoopInfo();
858 
859   /// Returns if BinaryDominatorTree has been constructed for this function.
860   bool hasDomTree() const { return BDT != nullptr; }
861 
862   BinaryDominatorTree &getDomTree() { return *BDT.get(); }
863 
864   /// Constructs DomTree for this function.
865   void constructDomTree();
866 
867   /// Returns if loop detection has been run for this function.
868   bool hasLoopInfo() const { return BLI != nullptr; }
869 
870   const BinaryLoopInfo &getLoopInfo() { return *BLI.get(); }
871 
872   bool isLoopFree() {
873     if (!hasLoopInfo())
874       calculateLoopInfo();
875     return BLI->empty();
876   }
877 
878   /// Print loop information about the function.
879   void printLoopInfo(raw_ostream &OS) const;
880 
881   /// View CFG in graphviz program
882   void viewGraph() const;
883 
884   /// Dump CFG in graphviz format
885   void dumpGraph(raw_ostream &OS) const;
886 
887   /// Dump CFG in graphviz format to file.
888   void dumpGraphToFile(std::string Filename) const;
889 
890   /// Dump CFG in graphviz format to a file with a filename that is derived
891   /// from the function name and Annotation strings.  Useful for dumping the
892   /// CFG after an optimization pass.
893   void dumpGraphForPass(std::string Annotation = "") const;
894 
895   /// Return BinaryContext for the function.
896   const BinaryContext &getBinaryContext() const { return BC; }
897 
898   /// Return BinaryContext for the function.
899   BinaryContext &getBinaryContext() { return BC; }
900 
901   /// Attempt to validate CFG invariants.
902   bool validateCFG() const;
903 
904   BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) {
905     return LabelToBB.lookup(Label);
906   }
907 
908   const BinaryBasicBlock *getBasicBlockForLabel(const MCSymbol *Label) const {
909     return LabelToBB.lookup(Label);
910   }
911 
912   /// Return basic block that originally contained offset \p Offset
913   /// from the function start.
914   BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset);
915 
916   const BinaryBasicBlock *getBasicBlockContainingOffset(uint64_t Offset) const {
917     return const_cast<BinaryFunction *>(this)->getBasicBlockContainingOffset(
918         Offset);
919   }
920 
921   /// Return basic block that started at offset \p Offset.
922   BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) {
923     BinaryBasicBlock *BB = getBasicBlockContainingOffset(Offset);
924     return BB && BB->getOffset() == Offset ? BB : nullptr;
925   }
926 
927   const BinaryBasicBlock *getBasicBlockAtOffset(uint64_t Offset) const {
928     return const_cast<BinaryFunction *>(this)->getBasicBlockAtOffset(Offset);
929   }
930 
931   /// Retrieve the landing pad BB associated with invoke instruction \p Invoke
932   /// that is in \p BB. Return nullptr if none exists
933   BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB,
934                                        const MCInst &InvokeInst) const {
935     assert(BC.MIB->isInvoke(InvokeInst) && "must be invoke instruction");
936     const std::optional<MCPlus::MCLandingPad> LP =
937         BC.MIB->getEHInfo(InvokeInst);
938     if (LP && LP->first) {
939       BinaryBasicBlock *LBB = BB.getLandingPad(LP->first);
940       assert(LBB && "Landing pad should be defined");
941       return LBB;
942     }
943     return nullptr;
944   }
945 
946   /// Return instruction at a given offset in the function. Valid before
947   /// CFG is constructed or while instruction offsets are available in CFG.
948   MCInst *getInstructionAtOffset(uint64_t Offset);
949 
950   const MCInst *getInstructionAtOffset(uint64_t Offset) const {
951     return const_cast<BinaryFunction *>(this)->getInstructionAtOffset(Offset);
952   }
953 
954   /// When the function is in disassembled state, return an instruction that
955   /// contains the \p Offset.
956   MCInst *getInstructionContainingOffset(uint64_t Offset);
957 
958   std::optional<MCInst> disassembleInstructionAtOffset(uint64_t Offset) const;
959 
960   /// Return offset for the first instruction. If there is data at the
961   /// beginning of a function then offset of the first instruction could
962   /// be different from 0
963   uint64_t getFirstInstructionOffset() const {
964     if (Instructions.empty())
965       return 0;
966     return Instructions.begin()->first;
967   }
968 
969   /// Return jump table that covers a given \p Address in memory.
970   JumpTable *getJumpTableContainingAddress(uint64_t Address) {
971     auto JTI = JumpTables.upper_bound(Address);
972     if (JTI == JumpTables.begin())
973       return nullptr;
974     --JTI;
975     if (JTI->first + JTI->second->getSize() > Address)
976       return JTI->second;
977     if (JTI->second->getSize() == 0 && JTI->first == Address)
978       return JTI->second;
979     return nullptr;
980   }
981 
982   const JumpTable *getJumpTableContainingAddress(uint64_t Address) const {
983     return const_cast<BinaryFunction *>(this)->getJumpTableContainingAddress(
984         Address);
985   }
986 
987   /// Return the name of the function if the function has just one name.
988   /// If the function has multiple names - return one followed
989   /// by "(*#<numnames>)".
990   ///
991   /// We should use getPrintName() for diagnostics and use
992   /// hasName() to match function name against a given string.
993   ///
994   /// NOTE: for disambiguating names of local symbols we use the following
995   ///       naming schemes:
996   ///           primary:     <function>/<id>
997   ///           alternative: <function>/<file>/<id2>
998   std::string getPrintName() const {
999     const size_t NumNames = Symbols.size() + Aliases.size();
1000     return NumNames == 1
1001                ? getOneName().str()
1002                : (getOneName().str() + "(*" + std::to_string(NumNames) + ")");
1003   }
1004 
1005   /// The function may have many names. For that reason, we avoid having
1006   /// getName() method as most of the time the user needs a different
1007   /// interface, such as forEachName(), hasName(), hasNameRegex(), etc.
1008   /// In some cases though, we need just a name uniquely identifying
1009   /// the function, and that's what this method is for.
1010   StringRef getOneName() const { return Symbols[0]->getName(); }
1011 
1012   /// Return the name of the function as getPrintName(), but also trying
1013   /// to demangle it.
1014   std::string getDemangledName() const;
1015 
1016   /// Call \p Callback for every name of this function as long as the Callback
1017   /// returns false. Stop if Callback returns true or all names have been used.
1018   /// Return the name for which the Callback returned true if any.
1019   template <typename FType>
1020   std::optional<StringRef> forEachName(FType Callback) const {
1021     for (MCSymbol *Symbol : Symbols)
1022       if (Callback(Symbol->getName()))
1023         return Symbol->getName();
1024 
1025     for (const std::string &Name : Aliases)
1026       if (Callback(StringRef(Name)))
1027         return StringRef(Name);
1028 
1029     return std::nullopt;
1030   }
1031 
1032   /// Check if (possibly one out of many) function name matches the given
1033   /// string. Use this member function instead of direct name comparison.
1034   bool hasName(const std::string &FunctionName) const {
1035     auto Res =
1036         forEachName([&](StringRef Name) { return Name == FunctionName; });
1037     return Res.has_value();
1038   }
1039 
1040   /// Check if any of function names matches the given regex.
1041   std::optional<StringRef> hasNameRegex(const StringRef NameRegex) const;
1042 
1043   /// Check if any of restored function names matches the given regex.
1044   /// Restored name means stripping BOLT-added suffixes like "/1",
1045   std::optional<StringRef>
1046   hasRestoredNameRegex(const StringRef NameRegex) const;
1047 
1048   /// Return a vector of all possible names for the function.
1049   std::vector<StringRef> getNames() const {
1050     std::vector<StringRef> AllNames;
1051     forEachName([&AllNames](StringRef Name) {
1052       AllNames.push_back(Name);
1053       return false;
1054     });
1055 
1056     return AllNames;
1057   }
1058 
1059   /// Return a state the function is in (see BinaryFunction::State definition
1060   /// for description).
1061   State getState() const { return CurrentState; }
1062 
1063   /// Return true if function has a control flow graph available.
1064   bool hasCFG() const {
1065     return getState() == State::CFG || getState() == State::CFG_Finalized ||
1066            getState() == State::EmittedCFG;
1067   }
1068 
1069   /// Return true if the function state implies that it includes instructions.
1070   bool hasInstructions() const {
1071     return getState() == State::Disassembled || hasCFG();
1072   }
1073 
1074   bool isEmitted() const {
1075     return getState() == State::EmittedCFG || getState() == State::Emitted;
1076   }
1077 
1078   /// Return the section in the input binary this function originated from or
1079   /// nullptr if the function did not originate from the file.
1080   BinarySection *getOriginSection() const { return OriginSection; }
1081 
1082   void setOriginSection(BinarySection *Section) { OriginSection = Section; }
1083 
1084   /// Return true if the function did not originate from the primary input file.
1085   bool isInjected() const { return IsInjected; }
1086 
1087   /// Return original address of the function (or offset from base for PIC).
1088   uint64_t getAddress() const { return Address; }
1089 
1090   uint64_t getOutputAddress() const { return OutputAddress; }
1091 
1092   uint64_t getOutputSize() const { return OutputSize; }
1093 
1094   /// Does this function have a valid streaming order index?
1095   bool hasValidIndex() const { return Index != -1U; }
1096 
1097   /// Get the streaming order index for this function.
1098   uint32_t getIndex() const { return Index; }
1099 
1100   /// Set the streaming order index for this function.
1101   void setIndex(uint32_t Idx) {
1102     assert(!hasValidIndex());
1103     Index = Idx;
1104   }
1105 
1106   /// Return offset of the function body in the binary file.
1107   uint64_t getFileOffset() const {
1108     return getLayout().getMainFragment().getFileOffset();
1109   }
1110 
1111   /// Return (original) byte size of the function.
1112   uint64_t getSize() const { return Size; }
1113 
1114   /// Return the maximum size the body of the function could have.
1115   uint64_t getMaxSize() const { return MaxSize; }
1116 
1117   /// Return the number of emitted instructions for this function.
1118   uint32_t getNumNonPseudos() const {
1119     uint32_t N = 0;
1120     for (const BinaryBasicBlock &BB : blocks())
1121       N += BB.getNumNonPseudos();
1122     return N;
1123   }
1124 
1125   /// Return true if function has instructions to emit.
1126   bool hasNonPseudoInstructions() const {
1127     for (const BinaryBasicBlock &BB : blocks())
1128       if (BB.getNumNonPseudos() > 0)
1129         return true;
1130     return false;
1131   }
1132 
1133   /// Return MC symbol associated with the function.
1134   /// All references to the function should use this symbol.
1135   MCSymbol *getSymbol(const FragmentNum Fragment = FragmentNum::main()) {
1136     if (Fragment == FragmentNum::main())
1137       return Symbols[0];
1138 
1139     size_t ColdSymbolIndex = Fragment.get() - 1;
1140     if (ColdSymbolIndex >= ColdSymbols.size())
1141       ColdSymbols.resize(ColdSymbolIndex + 1);
1142 
1143     MCSymbol *&ColdSymbol = ColdSymbols[ColdSymbolIndex];
1144     if (ColdSymbol == nullptr) {
1145       SmallString<10> Appendix = formatv(".cold.{0}", ColdSymbolIndex);
1146       ColdSymbol = BC.Ctx->getOrCreateSymbol(
1147           NameResolver::append(Symbols[0]->getName(), Appendix));
1148     }
1149 
1150     return ColdSymbol;
1151   }
1152 
1153   /// Return MC symbol associated with the function (const version).
1154   /// All references to the function should use this symbol.
1155   const MCSymbol *getSymbol() const { return Symbols[0]; }
1156 
1157   /// Return a list of symbols associated with the main entry of the function.
1158   SymbolListTy &getSymbols() { return Symbols; }
1159   const SymbolListTy &getSymbols() const { return Symbols; }
1160 
1161   /// If a local symbol \p BBLabel corresponds to a basic block that is a
1162   /// secondary entry point into the function, then return a global symbol
1163   /// that represents the secondary entry point. Otherwise return nullptr.
1164   MCSymbol *getSecondaryEntryPointSymbol(const MCSymbol *BBLabel) const {
1165     return SecondaryEntryPoints.lookup(BBLabel);
1166   }
1167 
1168   /// If the basic block serves as a secondary entry point to the function,
1169   /// return a global symbol representing the entry. Otherwise return nullptr.
1170   MCSymbol *getSecondaryEntryPointSymbol(const BinaryBasicBlock &BB) const {
1171     return getSecondaryEntryPointSymbol(BB.getLabel());
1172   }
1173 
1174   /// Return true if the basic block is an entry point into the function
1175   /// (either primary or secondary).
1176   bool isEntryPoint(const BinaryBasicBlock &BB) const {
1177     if (&BB == BasicBlocks.front())
1178       return true;
1179     return getSecondaryEntryPointSymbol(BB);
1180   }
1181 
1182   /// Return MC symbol corresponding to an enumerated entry for multiple-entry
1183   /// functions.
1184   MCSymbol *getSymbolForEntryID(uint64_t EntryNum);
1185   const MCSymbol *getSymbolForEntryID(uint64_t EntryNum) const {
1186     return const_cast<BinaryFunction *>(this)->getSymbolForEntryID(EntryNum);
1187   }
1188 
1189   using EntryPointCallbackTy = function_ref<bool(uint64_t, const MCSymbol *)>;
1190 
1191   /// Invoke \p Callback function for every entry point in the function starting
1192   /// with the main entry and using entries in the ascending address order.
1193   /// Stop calling the function after false is returned by the callback.
1194   ///
1195   /// Pass an offset of the entry point in the input binary and a corresponding
1196   /// global symbol to the callback function.
1197   ///
1198   /// Return true if all callbacks returned true, false otherwise.
1199   bool forEachEntryPoint(EntryPointCallbackTy Callback) const;
1200 
1201   /// Return MC symbol associated with the end of the function.
1202   MCSymbol *
1203   getFunctionEndLabel(const FragmentNum Fragment = FragmentNum::main()) const {
1204     assert(BC.Ctx && "cannot be called with empty context");
1205 
1206     size_t LabelIndex = Fragment.get();
1207     if (LabelIndex >= FunctionEndLabels.size()) {
1208       FunctionEndLabels.resize(LabelIndex + 1);
1209     }
1210 
1211     MCSymbol *&FunctionEndLabel = FunctionEndLabels[LabelIndex];
1212     if (!FunctionEndLabel) {
1213       std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
1214       if (Fragment == FragmentNum::main())
1215         FunctionEndLabel = BC.Ctx->createNamedTempSymbol("func_end");
1216       else
1217         FunctionEndLabel = BC.Ctx->createNamedTempSymbol(
1218             formatv("func_cold_end.{0}", Fragment.get() - 1));
1219     }
1220     return FunctionEndLabel;
1221   }
1222 
1223   /// Return a label used to identify where the constant island was emitted
1224   /// (AArch only). This is used to update the symbol table accordingly,
1225   /// emitting data marker symbols as required by the ABI.
1226   MCSymbol *getFunctionConstantIslandLabel() const {
1227     assert(Islands && "function expected to have constant islands");
1228 
1229     if (!Islands->FunctionConstantIslandLabel) {
1230       Islands->FunctionConstantIslandLabel =
1231           BC.Ctx->getOrCreateSymbol("func_const_island@" + getOneName());
1232     }
1233     return Islands->FunctionConstantIslandLabel;
1234   }
1235 
1236   MCSymbol *getFunctionColdConstantIslandLabel() const {
1237     assert(Islands && "function expected to have constant islands");
1238 
1239     if (!Islands->FunctionColdConstantIslandLabel) {
1240       Islands->FunctionColdConstantIslandLabel =
1241           BC.Ctx->getOrCreateSymbol("func_cold_const_island@" + getOneName());
1242     }
1243     return Islands->FunctionColdConstantIslandLabel;
1244   }
1245 
1246   /// Return true if this is a function representing a PLT entry.
1247   bool isPLTFunction() const { return PLTSymbol != nullptr; }
1248 
1249   /// Return PLT function reference symbol for PLT functions and nullptr for
1250   /// non-PLT functions.
1251   const MCSymbol *getPLTSymbol() const { return PLTSymbol; }
1252 
1253   /// Set function PLT reference symbol for PLT functions.
1254   void setPLTSymbol(const MCSymbol *Symbol) {
1255     assert(Size == 0 && "function size should be 0 for PLT functions");
1256     PLTSymbol = Symbol;
1257     IsPseudo = true;
1258   }
1259 
1260   /// Update output values of the function based on the final \p Layout.
1261   void updateOutputValues(const BOLTLinker &Linker);
1262 
1263   /// Register relocation type \p RelType at a given \p Address in the function
1264   /// against \p Symbol.
1265   /// Assert if the \p Address is not inside this function.
1266   void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t RelType,
1267                      uint64_t Addend, uint64_t Value);
1268 
1269   /// Return the name of the section this function originated from.
1270   std::optional<StringRef> getOriginSectionName() const {
1271     if (!OriginSection)
1272       return std::nullopt;
1273     return OriginSection->getName();
1274   }
1275 
1276   /// Return internal section name for this function.
1277   SmallString<32>
1278   getCodeSectionName(const FragmentNum Fragment = FragmentNum::main()) const {
1279     if (Fragment == FragmentNum::main())
1280       return SmallString<32>(CodeSectionName);
1281     if (Fragment == FragmentNum::cold())
1282       return SmallString<32>(ColdCodeSectionName);
1283     if (BC.HasWarmSection && Fragment == FragmentNum::warm())
1284       return SmallString<32>(BC.getWarmCodeSectionName());
1285     return formatv("{0}.{1}", ColdCodeSectionName, Fragment.get() - 1);
1286   }
1287 
1288   /// Assign a code section name to the function.
1289   void setCodeSectionName(const StringRef Name) {
1290     CodeSectionName = Name.str();
1291   }
1292 
1293   /// Get output code section.
1294   ErrorOr<BinarySection &>
1295   getCodeSection(const FragmentNum Fragment = FragmentNum::main()) const {
1296     return BC.getUniqueSectionByName(getCodeSectionName(Fragment));
1297   }
1298 
1299   /// Assign a section name for the cold part of the function.
1300   void setColdCodeSectionName(const StringRef Name) {
1301     ColdCodeSectionName = Name.str();
1302   }
1303 
1304   /// Return true iif the function will halt execution on entry.
1305   bool trapsOnEntry() const { return TrapsOnEntry; }
1306 
1307   /// Make the function always trap on entry. Other than the trap instruction,
1308   /// the function body will be empty.
1309   void setTrapOnEntry();
1310 
1311   /// Return true if the function could be correctly processed.
1312   bool isSimple() const { return IsSimple; }
1313 
1314   /// Return true if the function should be ignored for optimization purposes.
1315   bool isIgnored() const { return IsIgnored; }
1316 
1317   /// Return true if the function should not be disassembled, emitted, or
1318   /// otherwise processed.
1319   bool isPseudo() const { return IsPseudo; }
1320 
1321   /// Return true if the function contains explicit or implicit indirect branch
1322   /// to its split fragments, e.g., split jump table, landing pad in split
1323   /// fragment.
1324   bool hasIndirectTargetToSplitFragment() const {
1325     return HasIndirectTargetToSplitFragment;
1326   }
1327 
1328   /// Return true if all CFG edges have local successors.
1329   bool hasCanonicalCFG() const { return HasCanonicalCFG; }
1330 
1331   /// Return true if the original function code has all necessary relocations
1332   /// to track addresses of functions emitted to new locations.
1333   bool hasExternalRefRelocations() const { return HasExternalRefRelocations; }
1334 
1335   /// Return true if the function has instruction(s) with unknown control flow.
1336   bool hasUnknownControlFlow() const { return HasUnknownControlFlow; }
1337 
1338   /// Return true if the function body is non-contiguous.
1339   bool isSplit() const { return isSimple() && getLayout().isSplit(); }
1340 
1341   bool shouldPreserveNops() const { return PreserveNops; }
1342 
1343   /// Return true if the function has exception handling tables.
1344   bool hasEHRanges() const { return HasEHRanges; }
1345 
1346   /// Return true if the function uses DW_CFA_GNU_args_size CFIs.
1347   bool usesGnuArgsSize() const { return UsesGnuArgsSize; }
1348 
1349   /// Return true if the function has more than one entry point.
1350   bool isMultiEntry() const { return !SecondaryEntryPoints.empty(); }
1351 
1352   /// Return true if the function might have a profile available externally,
1353   /// but not yet populated into the function.
1354   bool hasProfileAvailable() const { return HasProfileAvailable; }
1355 
1356   bool hasMemoryProfile() const { return HasMemoryProfile; }
1357 
1358   /// Return true if the body of the function was merged into another function.
1359   bool isFolded() const { return FoldedIntoFunction != nullptr; }
1360 
1361   /// Return true if other functions were folded into this one.
1362   bool hasFunctionsFoldedInto() const { return HasFunctionsFoldedInto; }
1363 
1364   /// If this function was folded, return the function it was folded into.
1365   BinaryFunction *getFoldedIntoFunction() const { return FoldedIntoFunction; }
1366 
1367   /// Return true if the function uses jump tables.
1368   bool hasJumpTables() const { return !JumpTables.empty(); }
1369 
1370   /// Return true if the function has SDT marker
1371   bool hasSDTMarker() const { return HasSDTMarker; }
1372 
1373   /// Return true if the function has Pseudo Probe
1374   bool hasPseudoProbe() const { return HasPseudoProbe; }
1375 
1376   /// Return true if the function uses ORC format for stack unwinding.
1377   bool hasORC() const { return HasORC; }
1378 
1379   /// Return true if the original entry point was patched.
1380   bool isPatched() const { return IsPatched; }
1381 
1382   const JumpTable *getJumpTable(const MCInst &Inst) const {
1383     const uint64_t Address = BC.MIB->getJumpTable(Inst);
1384     return getJumpTableContainingAddress(Address);
1385   }
1386 
1387   JumpTable *getJumpTable(const MCInst &Inst) {
1388     const uint64_t Address = BC.MIB->getJumpTable(Inst);
1389     return getJumpTableContainingAddress(Address);
1390   }
1391 
1392   const MCSymbol *getPersonalityFunction() const { return PersonalityFunction; }
1393 
1394   uint8_t getPersonalityEncoding() const { return PersonalityEncoding; }
1395 
1396   CallSitesRange getCallSites(const FragmentNum F) const {
1397     return make_range(std::equal_range(CallSites.begin(), CallSites.end(),
1398                                        std::make_pair(F, CallSite()),
1399                                        llvm::less_first()));
1400   }
1401 
1402   void
1403   addCallSites(const ArrayRef<std::pair<FragmentNum, CallSite>> NewCallSites) {
1404     llvm::copy(NewCallSites, std::back_inserter(CallSites));
1405     llvm::stable_sort(CallSites, llvm::less_first());
1406   }
1407 
1408   ArrayRef<uint8_t> getLSDAActionTable() const { return LSDAActionTable; }
1409 
1410   const LSDATypeTableTy &getLSDATypeTable() const { return LSDATypeTable; }
1411 
1412   unsigned getLSDATypeEncoding() const { return LSDATypeEncoding; }
1413 
1414   const LSDATypeTableTy &getLSDATypeAddressTable() const {
1415     return LSDATypeAddressTable;
1416   }
1417 
1418   ArrayRef<uint8_t> getLSDATypeIndexTable() const { return LSDATypeIndexTable; }
1419 
1420   IslandInfo &getIslandInfo() {
1421     assert(Islands && "function expected to have constant islands");
1422     return *Islands;
1423   }
1424 
1425   const IslandInfo &getIslandInfo() const {
1426     assert(Islands && "function expected to have constant islands");
1427     return *Islands;
1428   }
1429 
1430   /// Return true if the function has CFI instructions
1431   bool hasCFI() const {
1432     return !FrameInstructions.empty() || !CIEFrameInstructions.empty() ||
1433            IsInjected;
1434   }
1435 
1436   /// Return unique number associated with the function.
1437   uint64_t getFunctionNumber() const { return FunctionNumber; }
1438 
1439   /// Return true if the given address \p PC is inside the function body.
1440   bool containsAddress(uint64_t PC, bool UseMaxSize = false) const {
1441     if (UseMaxSize)
1442       return Address <= PC && PC < Address + MaxSize;
1443     return Address <= PC && PC < Address + Size;
1444   }
1445 
1446   /// Create a basic block in the function. The new block is *NOT* inserted
1447   /// into the CFG. The caller must use insertBasicBlocks() to add any new
1448   /// blocks to the CFG.
1449   std::unique_ptr<BinaryBasicBlock>
1450   createBasicBlock(MCSymbol *Label = nullptr) {
1451     if (!Label) {
1452       std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
1453       Label = BC.Ctx->createNamedTempSymbol("BB");
1454     }
1455     auto BB =
1456         std::unique_ptr<BinaryBasicBlock>(new BinaryBasicBlock(this, Label));
1457 
1458     LabelToBB[Label] = BB.get();
1459 
1460     return BB;
1461   }
1462 
1463   /// Create a new basic block with an optional \p Label and add it to the list
1464   /// of basic blocks of this function.
1465   BinaryBasicBlock *addBasicBlock(MCSymbol *Label = nullptr) {
1466     assert(CurrentState == State::CFG && "Can only add blocks in CFG state");
1467 
1468     BasicBlocks.emplace_back(createBasicBlock(Label).release());
1469     BinaryBasicBlock *BB = BasicBlocks.back();
1470 
1471     BB->setIndex(BasicBlocks.size() - 1);
1472     Layout.addBasicBlock(BB);
1473 
1474     return BB;
1475   }
1476 
1477   /// Add basic block \BB as an entry point to the function. Return global
1478   /// symbol associated with the entry.
1479   MCSymbol *addEntryPoint(const BinaryBasicBlock &BB);
1480 
1481   /// Register secondary entry point at a given \p Offset into the function.
1482   /// Return global symbol for use by extern function references.
1483   MCSymbol *addEntryPointAtOffset(uint64_t Offset);
1484 
1485   /// Mark all blocks that are unreachable from a root (entry point
1486   /// or landing pad) as invalid.
1487   void markUnreachableBlocks();
1488 
1489   /// Rebuilds BBs layout, ignoring dead BBs. Returns the number of removed
1490   /// BBs and the removed number of bytes of code.
1491   std::pair<unsigned, uint64_t>
1492   eraseInvalidBBs(const MCCodeEmitter *Emitter = nullptr);
1493 
1494   /// Get the relative order between two basic blocks in the original
1495   /// layout.  The result is > 0 if B occurs before A and < 0 if B
1496   /// occurs after A.  If A and B are the same block, the result is 0.
1497   signed getOriginalLayoutRelativeOrder(const BinaryBasicBlock *A,
1498                                         const BinaryBasicBlock *B) const {
1499     return getIndex(A) - getIndex(B);
1500   }
1501 
1502   /// Insert the BBs contained in NewBBs into the basic blocks for this
1503   /// function. Update the associated state of all blocks as needed, i.e.
1504   /// BB offsets and BB indices. The new BBs are inserted after Start.
1505   /// This operation could affect fallthrough branches for Start.
1506   ///
1507   void
1508   insertBasicBlocks(BinaryBasicBlock *Start,
1509                     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
1510                     const bool UpdateLayout = true,
1511                     const bool UpdateCFIState = true,
1512                     const bool RecomputeLandingPads = true);
1513 
1514   iterator insertBasicBlocks(
1515       iterator StartBB, std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
1516       const bool UpdateLayout = true, const bool UpdateCFIState = true,
1517       const bool RecomputeLandingPads = true);
1518 
1519   /// Update the basic block layout for this function.  The BBs from
1520   /// [Start->Index, Start->Index + NumNewBlocks) are inserted into the
1521   /// layout after the BB indicated by Start.
1522   void updateLayout(BinaryBasicBlock *Start, const unsigned NumNewBlocks);
1523 
1524   /// Recompute the CFI state for NumNewBlocks following Start after inserting
1525   /// new blocks into the CFG.  This must be called after updateLayout.
1526   void updateCFIState(BinaryBasicBlock *Start, const unsigned NumNewBlocks);
1527 
1528   /// Return true if we detected ambiguous jump tables in this function, which
1529   /// happen when one JT is used in more than one indirect jumps. This precludes
1530   /// us from splitting edges for this JT unless we duplicate the JT (see
1531   /// disambiguateJumpTables).
1532   bool checkForAmbiguousJumpTables();
1533 
1534   /// Detect when two distinct indirect jumps are using the same jump table and
1535   /// duplicate it, allocating a separate JT for each indirect branch. This is
1536   /// necessary for code transformations on the CFG that change an edge induced
1537   /// by an indirect branch, e.g.: instrumentation or shrink wrapping. However,
1538   /// this is only possible if we are not updating jump tables in place, but are
1539   /// writing it to a new location (moving them).
1540   void disambiguateJumpTables(MCPlusBuilder::AllocatorIdTy AllocId);
1541 
1542   /// Change \p OrigDest to \p NewDest in the jump table used at the end of
1543   /// \p BB. Returns false if \p OrigDest couldn't be find as a valid target
1544   /// and no replacement took place.
1545   bool replaceJumpTableEntryIn(BinaryBasicBlock *BB, BinaryBasicBlock *OldDest,
1546                                BinaryBasicBlock *NewDest);
1547 
1548   /// Split the CFG edge <From, To> by inserting an intermediate basic block.
1549   /// Returns a pointer to this new intermediate basic block. BB "From" will be
1550   /// updated to jump to the intermediate block, which in turn will have an
1551   /// unconditional branch to BB "To".
1552   /// User needs to manually call fixBranches(). This function only creates the
1553   /// correct CFG edges.
1554   BinaryBasicBlock *splitEdge(BinaryBasicBlock *From, BinaryBasicBlock *To);
1555 
1556   /// We may have built an overly conservative CFG for functions with calls
1557   /// to functions that the compiler knows will never return. In this case,
1558   /// clear all successors from these blocks.
1559   void deleteConservativeEdges();
1560 
1561   /// Determine direction of the branch based on the current layout.
1562   /// Callee is responsible of updating basic block indices prior to using
1563   /// this function (e.g. by calling BinaryFunction::updateLayoutIndices()).
1564   static bool isForwardBranch(const BinaryBasicBlock *From,
1565                               const BinaryBasicBlock *To) {
1566     assert(From->getFunction() == To->getFunction() &&
1567            "basic blocks should be in the same function");
1568     return To->getLayoutIndex() > From->getLayoutIndex();
1569   }
1570 
1571   /// Determine direction of the call to callee symbol relative to the start
1572   /// of this function.
1573   /// Note: this doesn't take function splitting into account.
1574   bool isForwardCall(const MCSymbol *CalleeSymbol) const;
1575 
1576   /// Dump function information to debug output. If \p PrintInstructions
1577   /// is true - include instruction disassembly.
1578   void dump() const;
1579 
1580   /// Print function information to the \p OS stream.
1581   void print(raw_ostream &OS, std::string Annotation = "");
1582 
1583   /// Print all relocations between \p Offset and \p Offset + \p Size in
1584   /// this function.
1585   void printRelocations(raw_ostream &OS, uint64_t Offset, uint64_t Size) const;
1586 
1587   /// Return true if function has a profile, even if the profile does not
1588   /// match CFG 100%.
1589   bool hasProfile() const { return ExecutionCount != COUNT_NO_PROFILE; }
1590 
1591   /// Return true if function profile is present and accurate.
1592   bool hasValidProfile() const {
1593     return ExecutionCount != COUNT_NO_PROFILE && ProfileMatchRatio == 1.0f;
1594   }
1595 
1596   /// Mark this function as having a valid profile.
1597   void markProfiled(uint16_t Flags) {
1598     if (ExecutionCount == COUNT_NO_PROFILE)
1599       ExecutionCount = 0;
1600     ProfileFlags = Flags;
1601     ProfileMatchRatio = 1.0f;
1602   }
1603 
1604   /// Return flags describing a profile for this function.
1605   uint16_t getProfileFlags() const { return ProfileFlags; }
1606 
1607   /// Return true if the function's input profile data has been inaccurate but
1608   /// has been corrected by the profile inference algorithm.
1609   bool hasInferredProfile() const { return HasInferredProfile; }
1610 
1611   void setHasInferredProfile(bool Inferred) { HasInferredProfile = Inferred; }
1612 
1613   void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) {
1614     assert(!Instructions.empty());
1615 
1616     // Fix CFI instructions skipping NOPs. We need to fix this because changing
1617     // CFI state after a NOP, besides being wrong and inaccurate,  makes it
1618     // harder for us to recover this information, since we can create empty BBs
1619     // with NOPs and then reorder it away.
1620     // We fix this by moving the CFI instruction just before any NOPs.
1621     auto I = Instructions.lower_bound(Offset);
1622     if (Offset == getSize()) {
1623       assert(I == Instructions.end() && "unexpected iterator value");
1624       // Sometimes compiler issues restore_state after all instructions
1625       // in the function (even after nop).
1626       --I;
1627       Offset = I->first;
1628     }
1629     assert(I->first == Offset && "CFI pointing to unknown instruction");
1630     if (I == Instructions.begin()) {
1631       CIEFrameInstructions.emplace_back(std::forward<MCCFIInstruction>(Inst));
1632       return;
1633     }
1634 
1635     --I;
1636     while (I != Instructions.begin() && BC.MIB->isNoop(I->second)) {
1637       Offset = I->first;
1638       --I;
1639     }
1640     OffsetToCFI.emplace(Offset, FrameInstructions.size());
1641     FrameInstructions.emplace_back(std::forward<MCCFIInstruction>(Inst));
1642   }
1643 
1644   BinaryBasicBlock::iterator addCFIInstruction(BinaryBasicBlock *BB,
1645                                                BinaryBasicBlock::iterator Pos,
1646                                                MCCFIInstruction &&Inst) {
1647     size_t Idx = FrameInstructions.size();
1648     FrameInstructions.emplace_back(std::forward<MCCFIInstruction>(Inst));
1649     return addCFIPseudo(BB, Pos, Idx);
1650   }
1651 
1652   /// Insert a CFI pseudo instruction in a basic block. This pseudo instruction
1653   /// is a placeholder that refers to a real MCCFIInstruction object kept by
1654   /// this function that will be emitted at that position.
1655   BinaryBasicBlock::iterator addCFIPseudo(BinaryBasicBlock *BB,
1656                                           BinaryBasicBlock::iterator Pos,
1657                                           uint32_t Offset) {
1658     MCInst CFIPseudo;
1659     BC.MIB->createCFI(CFIPseudo, Offset);
1660     return BB->insertPseudoInstr(Pos, CFIPseudo);
1661   }
1662 
1663   /// Retrieve the MCCFIInstruction object associated with a CFI pseudo.
1664   const MCCFIInstruction *getCFIFor(const MCInst &Instr) const {
1665     if (!BC.MIB->isCFI(Instr))
1666       return nullptr;
1667     uint32_t Offset = Instr.getOperand(0).getImm();
1668     assert(Offset < FrameInstructions.size() && "Invalid CFI offset");
1669     return &FrameInstructions[Offset];
1670   }
1671 
1672   void setCFIFor(const MCInst &Instr, MCCFIInstruction &&CFIInst) {
1673     assert(BC.MIB->isCFI(Instr) &&
1674            "attempting to change CFI in a non-CFI inst");
1675     uint32_t Offset = Instr.getOperand(0).getImm();
1676     assert(Offset < FrameInstructions.size() && "Invalid CFI offset");
1677     FrameInstructions[Offset] = std::move(CFIInst);
1678   }
1679 
1680   void mutateCFIRegisterFor(const MCInst &Instr, MCPhysReg NewReg);
1681 
1682   const MCCFIInstruction *mutateCFIOffsetFor(const MCInst &Instr,
1683                                              int64_t NewOffset);
1684 
1685   BinaryFunction &setFileOffset(uint64_t Offset) {
1686     getLayout().getMainFragment().setFileOffset(Offset);
1687     return *this;
1688   }
1689 
1690   BinaryFunction &setSize(uint64_t S) {
1691     Size = S;
1692     return *this;
1693   }
1694 
1695   BinaryFunction &setMaxSize(uint64_t Size) {
1696     MaxSize = Size;
1697     return *this;
1698   }
1699 
1700   BinaryFunction &setOutputAddress(uint64_t Address) {
1701     OutputAddress = Address;
1702     return *this;
1703   }
1704 
1705   BinaryFunction &setOutputSize(uint64_t Size) {
1706     OutputSize = Size;
1707     return *this;
1708   }
1709 
1710   BinaryFunction &setSimple(bool Simple) {
1711     IsSimple = Simple;
1712     return *this;
1713   }
1714 
1715   void setPseudo(bool Pseudo) { IsPseudo = Pseudo; }
1716 
1717   void setPreserveNops(bool Value) { PreserveNops = Value; }
1718 
1719   BinaryFunction &setUsesGnuArgsSize(bool Uses = true) {
1720     UsesGnuArgsSize = Uses;
1721     return *this;
1722   }
1723 
1724   BinaryFunction &setHasProfileAvailable(bool V = true) {
1725     HasProfileAvailable = V;
1726     return *this;
1727   }
1728 
1729   /// Mark function that should not be emitted.
1730   void setIgnored();
1731 
1732   void setIsPatched(bool V) { IsPatched = V; }
1733 
1734   void setHasIndirectTargetToSplitFragment(bool V) {
1735     HasIndirectTargetToSplitFragment = V;
1736   }
1737 
1738   void setHasCanonicalCFG(bool V) { HasCanonicalCFG = V; }
1739 
1740   void setFolded(BinaryFunction *BF) { FoldedIntoFunction = BF; }
1741 
1742   /// Indicate that another function body was merged with this function.
1743   void setHasFunctionsFoldedInto() { HasFunctionsFoldedInto = true; }
1744 
1745   void setHasSDTMarker(bool V) { HasSDTMarker = V; }
1746 
1747   /// Mark the function as using ORC format for stack unwinding.
1748   void setHasORC(bool V) { HasORC = V; }
1749 
1750   BinaryFunction &setPersonalityFunction(uint64_t Addr) {
1751     assert(!PersonalityFunction && "can't set personality function twice");
1752     PersonalityFunction = BC.getOrCreateGlobalSymbol(Addr, "FUNCat");
1753     return *this;
1754   }
1755 
1756   BinaryFunction &setPersonalityEncoding(uint8_t Encoding) {
1757     PersonalityEncoding = Encoding;
1758     return *this;
1759   }
1760 
1761   BinaryFunction &setAlignment(uint16_t Align) {
1762     Alignment = Align;
1763     return *this;
1764   }
1765 
1766   uint16_t getMinAlignment() const {
1767     // Align data in code BFs minimum to CI alignment
1768     if (!size() && hasIslandsInfo())
1769       return getConstantIslandAlignment();
1770     return BC.MIB->getMinFunctionAlignment();
1771   }
1772 
1773   Align getMinAlign() const { return Align(getMinAlignment()); }
1774 
1775   uint16_t getAlignment() const { return Alignment; }
1776   Align getAlign() const { return Align(getAlignment()); }
1777 
1778   BinaryFunction &setMaxAlignmentBytes(uint16_t MaxAlignBytes) {
1779     MaxAlignmentBytes = MaxAlignBytes;
1780     return *this;
1781   }
1782 
1783   uint16_t getMaxAlignmentBytes() const { return MaxAlignmentBytes; }
1784 
1785   BinaryFunction &setMaxColdAlignmentBytes(uint16_t MaxAlignBytes) {
1786     MaxColdAlignmentBytes = MaxAlignBytes;
1787     return *this;
1788   }
1789 
1790   uint16_t getMaxColdAlignmentBytes() const { return MaxColdAlignmentBytes; }
1791 
1792   BinaryFunction &setImageAddress(uint64_t Address) {
1793     getLayout().getMainFragment().setImageAddress(Address);
1794     return *this;
1795   }
1796 
1797   /// Return the address of this function' image in memory.
1798   uint64_t getImageAddress() const {
1799     return getLayout().getMainFragment().getImageAddress();
1800   }
1801 
1802   BinaryFunction &setImageSize(uint64_t Size) {
1803     getLayout().getMainFragment().setImageSize(Size);
1804     return *this;
1805   }
1806 
1807   /// Return the size of this function' image in memory.
1808   uint64_t getImageSize() const {
1809     return getLayout().getMainFragment().getImageSize();
1810   }
1811 
1812   /// Return true if the function is a secondary fragment of another function.
1813   bool isFragment() const { return IsFragment; }
1814 
1815   /// Returns if this function is a child of \p Other function.
1816   bool isChildOf(const BinaryFunction &Other) const {
1817     return ParentFragments.contains(&Other);
1818   }
1819 
1820   /// Return the child fragment form parent function
1821   iterator_range<FragmentsSetTy::const_iterator> getFragments() const {
1822     return iterator_range<FragmentsSetTy::const_iterator>(Fragments.begin(),
1823                                                           Fragments.end());
1824   }
1825 
1826   /// Return the parent function for split function fragments.
1827   FragmentsSetTy *getParentFragments() { return &ParentFragments; }
1828 
1829   /// Set the profile data for the number of times the function was called.
1830   BinaryFunction &setExecutionCount(uint64_t Count) {
1831     ExecutionCount = Count;
1832     return *this;
1833   }
1834 
1835   /// Adjust execution count for the function by a given \p Count. The value
1836   /// \p Count will be subtracted from the current function count.
1837   ///
1838   /// The function will proportionally adjust execution count for all
1839   /// basic blocks and edges in the control flow graph.
1840   void adjustExecutionCount(uint64_t Count);
1841 
1842   /// Set LSDA address for the function.
1843   BinaryFunction &setLSDAAddress(uint64_t Address) {
1844     LSDAAddress = Address;
1845     return *this;
1846   }
1847 
1848   /// Set main LSDA symbol for the function.
1849   BinaryFunction &setLSDASymbol(MCSymbol *Symbol) {
1850     if (LSDASymbols.empty())
1851       LSDASymbols.resize(1);
1852     LSDASymbols.front() = Symbol;
1853     return *this;
1854   }
1855 
1856   /// Return the profile information about the number of times
1857   /// the function was executed.
1858   ///
1859   /// Return COUNT_NO_PROFILE if there's no profile info.
1860   uint64_t getExecutionCount() const { return ExecutionCount; }
1861 
1862   /// Return the raw profile information about the number of branch
1863   /// executions corresponding to this function.
1864   uint64_t getRawBranchCount() const { return RawBranchCount; }
1865 
1866   /// Set the profile data about the number of branch executions corresponding
1867   /// to this function.
1868   void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }
1869 
1870   /// Return the number of dynamically executed bytes, from raw perf data.
1871   uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }
1872 
1873   /// Return the execution count for functions with known profile.
1874   /// Return 0 if the function has no profile.
1875   uint64_t getKnownExecutionCount() const {
1876     return ExecutionCount == COUNT_NO_PROFILE ? 0 : ExecutionCount;
1877   }
1878 
1879   /// Return original LSDA address for the function or NULL.
1880   uint64_t getLSDAAddress() const { return LSDAAddress; }
1881 
1882   /// Return symbol pointing to function's LSDA.
1883   MCSymbol *getLSDASymbol(const FragmentNum F) {
1884     if (F.get() < LSDASymbols.size() && LSDASymbols[F.get()] != nullptr)
1885       return LSDASymbols[F.get()];
1886     if (getCallSites(F).empty())
1887       return nullptr;
1888 
1889     if (F.get() >= LSDASymbols.size())
1890       LSDASymbols.resize(F.get() + 1);
1891 
1892     SmallString<256> SymbolName;
1893     if (F == FragmentNum::main())
1894       SymbolName = formatv("GCC_except_table{0:x-}", getFunctionNumber());
1895     else
1896       SymbolName = formatv("GCC_cold_except_table{0:x-}.{1}",
1897                            getFunctionNumber(), F.get());
1898 
1899     LSDASymbols[F.get()] = BC.Ctx->getOrCreateSymbol(SymbolName);
1900 
1901     return LSDASymbols[F.get()];
1902   }
1903 
1904   /// If all landing pads for the function fragment \p F are located in fragment
1905   /// \p LPF, designate \p LPF as a landing-pad fragment for \p F. Passing
1906   /// std::nullopt in LPF, means that landing pads for \p F are located in more
1907   /// than one fragment.
1908   void setLPFragment(const FragmentNum F, std::optional<FragmentNum> LPF) {
1909     if (F.get() >= LPFragments.size())
1910       LPFragments.resize(F.get() + 1);
1911 
1912     LPFragments[F.get()] = LPF;
1913   }
1914 
1915   /// If function fragment \p F has a designated landing pad fragment, i.e. a
1916   /// fragment that contains all landing pads for throwers in \p F, then return
1917   /// that landing pad fragment number. If \p F does not need landing pads,
1918   /// return \p F. Return nullptr if landing pads for \p F are scattered among
1919   /// several function fragments.
1920   std::optional<FragmentNum> getLPFragment(const FragmentNum F) {
1921     if (!isSplit()) {
1922       assert(F == FragmentNum::main() && "Invalid fragment number");
1923       return FragmentNum::main();
1924     }
1925 
1926     if (F.get() >= LPFragments.size())
1927       return std::nullopt;
1928 
1929     return LPFragments[F.get()];
1930   }
1931 
1932   /// Return a symbol corresponding to a landing pad fragment for fragment \p F.
1933   /// See getLPFragment().
1934   MCSymbol *getLPStartSymbol(const FragmentNum F) {
1935     if (std::optional<FragmentNum> LPFragment = getLPFragment(F))
1936       return getSymbol(*LPFragment);
1937     return nullptr;
1938   }
1939 
1940   void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
1941 
1942   uint64_t getOutputDataAddress() const { return OutputDataOffset; }
1943 
1944   void setOutputColdDataAddress(uint64_t Address) {
1945     OutputColdDataOffset = Address;
1946   }
1947 
1948   uint64_t getOutputColdDataAddress() const { return OutputColdDataOffset; }
1949 
1950   /// If \p Address represents an access to a constant island managed by this
1951   /// function, return a symbol so code can safely refer to it. Otherwise,
1952   /// return nullptr. First return value is the symbol for reference in the
1953   /// hot code area while the second return value is the symbol for reference
1954   /// in the cold code area, as when the function is split the islands are
1955   /// duplicated.
1956   MCSymbol *getOrCreateIslandAccess(uint64_t Address) {
1957     if (!Islands)
1958       return nullptr;
1959 
1960     MCSymbol *Symbol;
1961     if (!isInConstantIsland(Address))
1962       return nullptr;
1963 
1964     // Register our island at global namespace
1965     Symbol = BC.getOrCreateGlobalSymbol(Address, "ISLANDat");
1966 
1967     // Internal bookkeeping
1968     const uint64_t Offset = Address - getAddress();
1969     assert((!Islands->Offsets.count(Offset) ||
1970             Islands->Offsets[Offset] == Symbol) &&
1971            "Inconsistent island symbol management");
1972     if (!Islands->Offsets.count(Offset)) {
1973       Islands->Offsets[Offset] = Symbol;
1974       Islands->Symbols.insert(Symbol);
1975     }
1976     return Symbol;
1977   }
1978 
1979   /// Support dynamic relocations in constant islands, which may happen if
1980   /// binary is linked with -z notext option.
1981   Error markIslandDynamicRelocationAtAddress(uint64_t Address) {
1982     if (!isInConstantIsland(Address))
1983       return createFatalBOLTError(
1984           Twine("dynamic relocation found for text section at 0x") +
1985           Twine::utohexstr(Address) + Twine("\n"));
1986 
1987     // Mark island to have dynamic relocation
1988     Islands->HasDynamicRelocations = true;
1989 
1990     // Create island access, so we would emit the label and
1991     // move binary data during updateOutputValues, making us emit
1992     // dynamic relocation with the right offset value.
1993     getOrCreateIslandAccess(Address);
1994     return Error::success();
1995   }
1996 
1997   bool hasDynamicRelocationAtIsland() const {
1998     return !!(Islands && Islands->HasDynamicRelocations);
1999   }
2000 
2001   /// Called by an external function which wishes to emit references to constant
2002   /// island symbols of this function. We create a proxy for it, so we emit
2003   /// separate symbols when emitting our constant island on behalf of this other
2004   /// function.
2005   MCSymbol *getOrCreateProxyIslandAccess(uint64_t Address,
2006                                          BinaryFunction &Referrer) {
2007     MCSymbol *Symbol = getOrCreateIslandAccess(Address);
2008     if (!Symbol)
2009       return nullptr;
2010 
2011     MCSymbol *Proxy;
2012     if (!Islands->Proxies[&Referrer].count(Symbol)) {
2013       Proxy = BC.Ctx->getOrCreateSymbol(Symbol->getName() + ".proxy.for." +
2014                                         Referrer.getPrintName());
2015       Islands->Proxies[&Referrer][Symbol] = Proxy;
2016       Islands->Proxies[&Referrer][Proxy] = Symbol;
2017     }
2018     Proxy = Islands->Proxies[&Referrer][Symbol];
2019     return Proxy;
2020   }
2021 
2022   /// Make this function depend on \p BF because we have a reference to its
2023   /// constant island. When emitting this function,  we will also emit
2024   //  \p BF's constants. This only happens in custom AArch64 assembly code.
2025   void createIslandDependency(MCSymbol *Island, BinaryFunction *BF) {
2026     if (!Islands)
2027       Islands = std::make_unique<IslandInfo>();
2028 
2029     Islands->Dependency.insert(BF);
2030     Islands->ProxySymbols[Island] = BF;
2031   }
2032 
2033   /// Detects whether \p Address is inside a data region in this function
2034   /// (constant islands).
2035   bool isInConstantIsland(uint64_t Address) const {
2036     if (!Islands)
2037       return false;
2038 
2039     if (Address < getAddress())
2040       return false;
2041 
2042     uint64_t Offset = Address - getAddress();
2043 
2044     if (Offset >= getMaxSize())
2045       return false;
2046 
2047     auto DataIter = Islands->DataOffsets.upper_bound(Offset);
2048     if (DataIter == Islands->DataOffsets.begin())
2049       return false;
2050     DataIter = std::prev(DataIter);
2051 
2052     auto CodeIter = Islands->CodeOffsets.upper_bound(Offset);
2053     if (CodeIter == Islands->CodeOffsets.begin())
2054       return true;
2055 
2056     return *std::prev(CodeIter) <= *DataIter;
2057   }
2058 
2059   uint16_t getConstantIslandAlignment() const {
2060     return Islands ? Islands->getAlignment() : 1;
2061   }
2062 
2063   uint64_t
2064   estimateConstantIslandSize(const BinaryFunction *OnBehalfOf = nullptr) const {
2065     if (!Islands)
2066       return 0;
2067 
2068     uint64_t Size = 0;
2069     for (auto DataIter = Islands->DataOffsets.begin();
2070          DataIter != Islands->DataOffsets.end(); ++DataIter) {
2071       auto NextData = std::next(DataIter);
2072       auto CodeIter = Islands->CodeOffsets.lower_bound(*DataIter);
2073       if (CodeIter == Islands->CodeOffsets.end() &&
2074           NextData == Islands->DataOffsets.end()) {
2075         Size += getMaxSize() - *DataIter;
2076         continue;
2077       }
2078 
2079       uint64_t NextMarker;
2080       if (CodeIter == Islands->CodeOffsets.end())
2081         NextMarker = *NextData;
2082       else if (NextData == Islands->DataOffsets.end())
2083         NextMarker = *CodeIter;
2084       else
2085         NextMarker = (*CodeIter > *NextData) ? *NextData : *CodeIter;
2086 
2087       Size += NextMarker - *DataIter;
2088     }
2089 
2090     if (!OnBehalfOf) {
2091       for (BinaryFunction *ExternalFunc : Islands->Dependency) {
2092         Size = alignTo(Size, ExternalFunc->getConstantIslandAlignment());
2093         Size += ExternalFunc->estimateConstantIslandSize(this);
2094       }
2095     }
2096 
2097     return Size;
2098   }
2099 
2100   bool hasIslandsInfo() const {
2101     return Islands && (hasConstantIsland() || !Islands->Dependency.empty());
2102   }
2103 
2104   bool hasConstantIsland() const {
2105     return Islands && !Islands->DataOffsets.empty();
2106   }
2107 
2108   /// Return true iff the symbol could be seen inside this function otherwise
2109   /// it is probably another function.
2110   bool isSymbolValidInScope(const SymbolRef &Symbol, uint64_t SymbolSize) const;
2111 
2112   /// Disassemble function from raw data.
2113   /// If successful, this function will populate the list of instructions
2114   /// for this function together with offsets from the function start
2115   /// in the input. It will also populate Labels with destinations for
2116   /// local branches, and TakenBranches with [from, to] info.
2117   ///
2118   /// The Function should be properly initialized before this function
2119   /// is called. I.e. function address and size should be set.
2120   ///
2121   /// Returns true on successful disassembly, and updates the current
2122   /// state to State:Disassembled.
2123   ///
2124   /// Returns false if disassembly failed.
2125   Error disassemble();
2126 
2127   /// An external interface to register a branch while the function is in
2128   /// disassembled state. Allows to make custom modifications to the
2129   /// disassembler. E.g., a pre-CFG pass can add an instruction and register
2130   /// a branch that will later be used during the CFG construction.
2131   ///
2132   /// Return a label at the branch destination.
2133   MCSymbol *registerBranch(uint64_t Src, uint64_t Dst);
2134 
2135   Error handlePCRelOperand(MCInst &Instruction, uint64_t Address,
2136                            uint64_t Size);
2137 
2138   MCSymbol *handleExternalReference(MCInst &Instruction, uint64_t Size,
2139                                     uint64_t Offset, uint64_t TargetAddress,
2140                                     bool &IsCall);
2141 
2142   void handleIndirectBranch(MCInst &Instruction, uint64_t Size,
2143                             uint64_t Offset);
2144 
2145   // Check for linker veneers, which lack relocations and need manual
2146   // adjustments.
2147   void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset);
2148 
2149   /// Analyze instruction to identify a function reference.
2150   void analyzeInstructionForFuncReference(const MCInst &Inst);
2151 
2152   /// Scan function for references to other functions. In relocation mode,
2153   /// add relocations for external references. In non-relocation mode, detect
2154   /// and mark new entry points.
2155   ///
2156   /// Return true on success. False if the disassembly failed or relocations
2157   /// could not be created.
2158   bool scanExternalRefs();
2159 
2160   /// Return the size of a data object located at \p Offset in the function.
2161   /// Return 0 if there is no data object at the \p Offset.
2162   size_t getSizeOfDataInCodeAt(uint64_t Offset) const;
2163 
2164   /// Verify that starting at \p Offset function contents are filled with
2165   /// zero-value bytes.
2166   bool isZeroPaddingAt(uint64_t Offset) const;
2167 
2168   /// Check that entry points have an associated instruction at their
2169   /// offsets after disassembly.
2170   void postProcessEntryPoints();
2171 
2172   /// Post-processing for jump tables after disassembly. Since their
2173   /// boundaries are not known until all call sites are seen, we need this
2174   /// extra pass to perform any final adjustments.
2175   void postProcessJumpTables();
2176 
2177   /// Builds a list of basic blocks with successor and predecessor info.
2178   ///
2179   /// The function should in Disassembled state prior to call.
2180   ///
2181   /// Returns true on success and update the current function state to
2182   /// State::CFG. Returns false if CFG cannot be built.
2183   Error buildCFG(MCPlusBuilder::AllocatorIdTy);
2184 
2185   /// Perform post-processing of the CFG.
2186   void postProcessCFG();
2187 
2188   /// Verify that any assumptions we've made about indirect branches were
2189   /// correct and also make any necessary changes to unknown indirect branches.
2190   ///
2191   /// Catch-22: we need to know indirect branch targets to build CFG, and
2192   /// in order to determine the value for indirect branches we need to know CFG.
2193   ///
2194   /// As such, the process of decoding indirect branches is broken into 2 steps:
2195   /// first we make our best guess about a branch without knowing the CFG,
2196   /// and later after we have the CFG for the function, we verify our earlier
2197   /// assumptions and also do our best at processing unknown indirect branches.
2198   ///
2199   /// Return true upon successful processing, or false if the control flow
2200   /// cannot be statically evaluated for any given indirect branch.
2201   bool postProcessIndirectBranches(MCPlusBuilder::AllocatorIdTy AllocId);
2202 
2203   /// Validate that all data references to function offsets are claimed by
2204   /// recognized jump tables. Register externally referenced blocks as entry
2205   /// points. Returns true if there are no unclaimed externally referenced
2206   /// offsets.
2207   bool validateExternallyReferencedOffsets();
2208 
2209   /// Return all call site profile info for this function.
2210   IndirectCallSiteProfile &getAllCallSites() { return AllCallSites; }
2211 
2212   const IndirectCallSiteProfile &getAllCallSites() const {
2213     return AllCallSites;
2214   }
2215 
2216   /// Walks the list of basic blocks filling in missing information about
2217   /// edge frequency for fall-throughs.
2218   ///
2219   /// Assumes the CFG has been built and edge frequency for taken branches
2220   /// has been filled with LBR data.
2221   void inferFallThroughCounts();
2222 
2223   /// Clear execution profile of the function.
2224   void clearProfile();
2225 
2226   /// Converts conditional tail calls to unconditional tail calls. We do this to
2227   /// handle conditional tail calls correctly and to give a chance to the
2228   /// simplify conditional tail call pass to decide whether to re-optimize them
2229   /// using profile information.
2230   void removeConditionalTailCalls();
2231 
2232   // Convert COUNT_NO_PROFILE to 0
2233   void removeTagsFromProfile();
2234 
2235   /// Computes a function hotness score: the sum of the products of BB frequency
2236   /// and size.
2237   uint64_t getFunctionScore() const;
2238 
2239   /// Get the number of instructions within this function.
2240   uint64_t getInstructionCount() const;
2241 
2242   const CFIInstrMapType &getFDEProgram() const { return FrameInstructions; }
2243 
2244   void moveRememberRestorePair(BinaryBasicBlock *BB);
2245 
2246   bool replayCFIInstrs(int32_t FromState, int32_t ToState,
2247                        BinaryBasicBlock *InBB,
2248                        BinaryBasicBlock::iterator InsertIt);
2249 
2250   /// unwindCFIState is used to unwind from a higher to a lower state number
2251   /// without using remember-restore instructions. We do that by keeping track
2252   /// of what values have been changed from state A to B and emitting
2253   /// instructions that undo this change.
2254   SmallVector<int32_t, 4> unwindCFIState(int32_t FromState, int32_t ToState,
2255                                          BinaryBasicBlock *InBB,
2256                                          BinaryBasicBlock::iterator &InsertIt);
2257 
2258   /// After reordering, this function checks the state of CFI and fixes it if it
2259   /// is corrupted. If it is unable to fix it, it returns false.
2260   bool finalizeCFIState();
2261 
2262   /// Return true if this function needs an address-translation table after
2263   /// its code emission.
2264   bool requiresAddressTranslation() const;
2265 
2266   /// Return true if the linker needs to generate an address map for this
2267   /// function. Used for keeping track of the mapping from input to out
2268   /// addresses of basic blocks.
2269   bool requiresAddressMap() const;
2270 
2271   /// Adjust branch instructions to match the CFG.
2272   ///
2273   /// As it comes to internal branches, the CFG represents "the ultimate source
2274   /// of truth". Transformations on functions and blocks have to update the CFG
2275   /// and fixBranches() would make sure the correct branch instructions are
2276   /// inserted at the end of basic blocks.
2277   ///
2278   /// We do require a conditional branch at the end of the basic block if
2279   /// the block has 2 successors as CFG currently lacks the conditional
2280   /// code support (it will probably stay that way). We only use this
2281   /// branch instruction for its conditional code, the destination is
2282   /// determined by CFG - first successor representing true/taken branch,
2283   /// while the second successor - false/fall-through branch.
2284   ///
2285   /// When we reverse the branch condition, the CFG is updated accordingly.
2286   void fixBranches();
2287 
2288   /// Mark function as finalized. No further optimizations are permitted.
2289   void setFinalized() { CurrentState = State::CFG_Finalized; }
2290 
2291   void setEmitted(bool KeepCFG = false) {
2292     CurrentState = State::EmittedCFG;
2293     if (!KeepCFG) {
2294       releaseCFG();
2295       CurrentState = State::Emitted;
2296     }
2297   }
2298 
2299   /// Process LSDA information for the function.
2300   Error parseLSDA(ArrayRef<uint8_t> LSDAData, uint64_t LSDAAddress);
2301 
2302   /// Update exception handling ranges for the function.
2303   void updateEHRanges();
2304 
2305   /// Traverse cold basic blocks and replace references to constants in islands
2306   /// with a proxy symbol for the duplicated constant island that is going to be
2307   /// emitted in the cold region.
2308   void duplicateConstantIslands();
2309 
2310   /// Merge profile data of this function into those of the given
2311   /// function. The functions should have been proven identical with
2312   /// isIdenticalWith.
2313   void mergeProfileDataInto(BinaryFunction &BF) const;
2314 
2315   /// Returns the last computed hash value of the function.
2316   size_t getHash() const { return Hash; }
2317 
2318   /// Returns the function GUID.
2319   uint64_t getGUID() const { return GUID; }
2320 
2321   void setGUID(uint64_t Id) { GUID = Id; }
2322 
2323   using OperandHashFuncTy =
2324       function_ref<typename std::string(const MCOperand &)>;
2325 
2326   /// Compute the hash value of the function based on its contents.
2327   ///
2328   /// If \p UseDFS is set, process basic blocks in DFS order. Otherwise, use
2329   /// the existing layout order.
2330   /// \p HashFunction specifies which function is used for BF hashing.
2331   ///
2332   /// By default, instruction operands are ignored while calculating the hash.
2333   /// The caller can change this via passing \p OperandHashFunc function.
2334   /// The return result of this function will be mixed with internal hash.
2335   size_t computeHash(
2336       bool UseDFS = false, HashFunction HashFunction = HashFunction::Default,
2337       OperandHashFuncTy OperandHashFunc = [](const MCOperand &) {
2338         return std::string();
2339       }) const;
2340 
2341   /// Compute hash values for each block of the function.
2342   /// \p HashFunction specifies which function is used for BB hashing.
2343   void
2344   computeBlockHashes(HashFunction HashFunction = HashFunction::Default) const;
2345 
2346   void setDWARFUnit(DWARFUnit *Unit) { DwarfUnit = Unit; }
2347 
2348   /// Return DWARF compile unit for this function.
2349   DWARFUnit *getDWARFUnit() const { return DwarfUnit; }
2350 
2351   /// Return line info table for this function.
2352   const DWARFDebugLine::LineTable *getDWARFLineTable() const {
2353     return getDWARFUnit() ? BC.DwCtx->getLineTableForUnit(getDWARFUnit())
2354                           : nullptr;
2355   }
2356 
2357   /// Finalize profile for the function.
2358   void postProcessProfile();
2359 
2360   /// Returns an estimate of the function's hot part after splitting.
2361   /// This is a very rough estimate, as with C++ exceptions there are
2362   /// blocks we don't move, and it makes no attempt at estimating the size
2363   /// of the added/removed branch instructions.
2364   /// Note that this size is optimistic and the actual size may increase
2365   /// after relaxation.
2366   size_t estimateHotSize(const bool UseSplitSize = true) const {
2367     size_t Estimate = 0;
2368     if (UseSplitSize && isSplit()) {
2369       for (const BinaryBasicBlock &BB : blocks())
2370         if (!BB.isCold())
2371           Estimate += BC.computeCodeSize(BB.begin(), BB.end());
2372     } else {
2373       for (const BinaryBasicBlock &BB : blocks())
2374         if (BB.getKnownExecutionCount() != 0)
2375           Estimate += BC.computeCodeSize(BB.begin(), BB.end());
2376     }
2377     return Estimate;
2378   }
2379 
2380   size_t estimateColdSize() const {
2381     if (!isSplit())
2382       return estimateSize();
2383     size_t Estimate = 0;
2384     for (const BinaryBasicBlock &BB : blocks())
2385       if (BB.isCold())
2386         Estimate += BC.computeCodeSize(BB.begin(), BB.end());
2387     return Estimate;
2388   }
2389 
2390   size_t estimateSize() const {
2391     size_t Estimate = 0;
2392     for (const BinaryBasicBlock &BB : blocks())
2393       Estimate += BC.computeCodeSize(BB.begin(), BB.end());
2394     return Estimate;
2395   }
2396 
2397   /// Return output address ranges for a function.
2398   DebugAddressRangesVector getOutputAddressRanges() const;
2399 
2400   /// Given an address corresponding to an instruction in the input binary,
2401   /// return an address of this instruction in output binary.
2402   ///
2403   /// Return 0 if no matching address could be found or the instruction was
2404   /// removed.
2405   uint64_t translateInputToOutputAddress(uint64_t Address) const;
2406 
2407   /// Translate a contiguous range of addresses in the input binary into a set
2408   /// of ranges in the output binary.
2409   DebugAddressRangesVector
2410   translateInputToOutputRange(DebugAddressRange InRange) const;
2411 
2412   /// Return true if the function is an AArch64 linker inserted veneer
2413   bool isAArch64Veneer() const;
2414 
2415   virtual ~BinaryFunction();
2416 };
2417 
2418 inline raw_ostream &operator<<(raw_ostream &OS,
2419                                const BinaryFunction &Function) {
2420   OS << Function.getPrintName();
2421   return OS;
2422 }
2423 
2424 /// Compare function by index if it is valid, fall back to the original address
2425 /// otherwise.
2426 inline bool compareBinaryFunctionByIndex(const BinaryFunction *A,
2427                                          const BinaryFunction *B) {
2428   if (A->hasValidIndex() && B->hasValidIndex())
2429     return A->getIndex() < B->getIndex();
2430   if (A->hasValidIndex() && !B->hasValidIndex())
2431     return true;
2432   if (!A->hasValidIndex() && B->hasValidIndex())
2433     return false;
2434   return A->getAddress() < B->getAddress();
2435 }
2436 
2437 } // namespace bolt
2438 
2439 // GraphTraits specializations for function basic block graphs (CFGs)
2440 template <>
2441 struct GraphTraits<bolt::BinaryFunction *>
2442     : public GraphTraits<bolt::BinaryBasicBlock *> {
2443   static NodeRef getEntryNode(bolt::BinaryFunction *F) {
2444     return F->getLayout().block_front();
2445   }
2446 
2447   using nodes_iterator = pointer_iterator<bolt::BinaryFunction::iterator>;
2448 
2449   static nodes_iterator nodes_begin(bolt::BinaryFunction *F) {
2450     llvm_unreachable("Not implemented");
2451     return nodes_iterator(F->begin());
2452   }
2453   static nodes_iterator nodes_end(bolt::BinaryFunction *F) {
2454     llvm_unreachable("Not implemented");
2455     return nodes_iterator(F->end());
2456   }
2457   static size_t size(bolt::BinaryFunction *F) { return F->size(); }
2458 };
2459 
2460 template <>
2461 struct GraphTraits<const bolt::BinaryFunction *>
2462     : public GraphTraits<const bolt::BinaryBasicBlock *> {
2463   static NodeRef getEntryNode(const bolt::BinaryFunction *F) {
2464     return F->getLayout().block_front();
2465   }
2466 
2467   using nodes_iterator = pointer_iterator<bolt::BinaryFunction::const_iterator>;
2468 
2469   static nodes_iterator nodes_begin(const bolt::BinaryFunction *F) {
2470     llvm_unreachable("Not implemented");
2471     return nodes_iterator(F->begin());
2472   }
2473   static nodes_iterator nodes_end(const bolt::BinaryFunction *F) {
2474     llvm_unreachable("Not implemented");
2475     return nodes_iterator(F->end());
2476   }
2477   static size_t size(const bolt::BinaryFunction *F) { return F->size(); }
2478 };
2479 
2480 template <>
2481 struct GraphTraits<Inverse<bolt::BinaryFunction *>>
2482     : public GraphTraits<Inverse<bolt::BinaryBasicBlock *>> {
2483   static NodeRef getEntryNode(Inverse<bolt::BinaryFunction *> G) {
2484     return G.Graph->getLayout().block_front();
2485   }
2486 };
2487 
2488 template <>
2489 struct GraphTraits<Inverse<const bolt::BinaryFunction *>>
2490     : public GraphTraits<Inverse<const bolt::BinaryBasicBlock *>> {
2491   static NodeRef getEntryNode(Inverse<const bolt::BinaryFunction *> G) {
2492     return G.Graph->getLayout().block_front();
2493   }
2494 };
2495 
2496 } // namespace llvm
2497 
2498 #endif
2499