xref: /llvm-project/bolt/include/bolt/Core/BinaryContext.h (revision 1fa02b968431ed0830eb57a229fb864659a1f43f)
1 //===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Context for processing binary executable/library files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef BOLT_CORE_BINARY_CONTEXT_H
14 #define BOLT_CORE_BINARY_CONTEXT_H
15 
16 #include "bolt/Core/AddressMap.h"
17 #include "bolt/Core/BinaryData.h"
18 #include "bolt/Core/BinarySection.h"
19 #include "bolt/Core/DebugData.h"
20 #include "bolt/Core/DynoStats.h"
21 #include "bolt/Core/JumpTable.h"
22 #include "bolt/Core/MCPlusBuilder.h"
23 #include "bolt/RuntimeLibs/RuntimeLibrary.h"
24 #include "llvm/ADT/AddressRanges.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/StringMap.h"
28 #include "llvm/ADT/iterator.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/BinaryFormat/MachO.h"
31 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
32 #include "llvm/MC/MCAsmInfo.h"
33 #include "llvm/MC/MCCodeEmitter.h"
34 #include "llvm/MC/MCContext.h"
35 #include "llvm/MC/MCObjectFileInfo.h"
36 #include "llvm/MC/MCObjectWriter.h"
37 #include "llvm/MC/MCPseudoProbe.h"
38 #include "llvm/MC/MCSectionELF.h"
39 #include "llvm/MC/MCSectionMachO.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSymbol.h"
42 #include "llvm/MC/TargetRegistry.h"
43 #include "llvm/Support/ErrorOr.h"
44 #include "llvm/Support/RWMutex.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/TargetParser/Triple.h"
47 #include <functional>
48 #include <list>
49 #include <map>
50 #include <optional>
51 #include <set>
52 #include <string>
53 #include <system_error>
54 #include <type_traits>
55 #include <unordered_map>
56 #include <vector>
57 
58 namespace llvm {
59 class MCDisassembler;
60 class MCInstPrinter;
61 
62 using namespace object;
63 
64 namespace bolt {
65 
66 class BinaryFunction;
67 
68 /// Information on loadable part of the file.
69 struct SegmentInfo {
70   uint64_t Address;           /// Address of the segment in memory.
71   uint64_t Size;              /// Size of the segment in memory.
72   uint64_t FileOffset;        /// Offset in the file.
73   uint64_t FileSize;          /// Size in file.
74   uint64_t Alignment;         /// Alignment of the segment.
75   bool IsExecutable;          /// Is the executable bit set on the Segment?
76 
77   void print(raw_ostream &OS) const {
78     OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Address)
79        << ", Size: 0x" << Twine::utohexstr(Size) << ", FileOffset: 0x"
80        << Twine::utohexstr(FileOffset) << ", FileSize: 0x"
81        << Twine::utohexstr(FileSize) << ", Alignment: 0x"
82        << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : " ")
83        << "}";
84   };
85 };
86 
87 inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
88   SegInfo.print(OS);
89   return OS;
90 }
91 
92 // AArch64-specific symbol markers used to delimit code/data in .text.
93 enum class MarkerSymType : char {
94   NONE = 0,
95   CODE,
96   DATA,
97 };
98 
99 enum class MemoryContentsType : char {
100   UNKNOWN = 0,             /// Unknown contents.
101   POSSIBLE_JUMP_TABLE,     /// Possibly a non-PIC jump table.
102   POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
103 };
104 
105 /// Helper function to truncate a \p Value to given size in \p Bytes.
106 inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
107   return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8));
108 }
109 
110 /// Filter iterator.
111 template <typename ItrType,
112           typename PredType = std::function<bool(const ItrType &)>>
113 class FilterIterator {
114   using inner_traits = std::iterator_traits<ItrType>;
115   using Iterator = FilterIterator;
116 
117   PredType Pred;
118   ItrType Itr, End;
119 
120   void prev() {
121     while (!Pred(--Itr))
122       ;
123   }
124   void next() {
125     ++Itr;
126     nextMatching();
127   }
128   void nextMatching() {
129     while (Itr != End && !Pred(Itr))
130       ++Itr;
131   }
132 
133 public:
134   using iterator_category = std::bidirectional_iterator_tag;
135   using value_type = typename inner_traits::value_type;
136   using difference_type = typename inner_traits::difference_type;
137   using pointer = typename inner_traits::pointer;
138   using reference = typename inner_traits::reference;
139 
140   Iterator &operator++() { next(); return *this; }
141   Iterator &operator--() { prev(); return *this; }
142   Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; }
143   Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; }
144   bool operator==(const Iterator &Other) const { return Itr == Other.Itr; }
145   bool operator!=(const Iterator &Other) const { return !operator==(Other); }
146   reference operator*() { return *Itr; }
147   pointer operator->() { return &operator*(); }
148   FilterIterator(PredType Pred, ItrType Itr, ItrType End)
149       : Pred(Pred), Itr(Itr), End(End) {
150     nextMatching();
151   }
152 };
153 
154 /// BOLT-exclusive errors generated in core BOLT libraries, optionally holding a
155 /// string message and whether it is fatal or not. In case it is fatal and if
156 /// BOLT is running as a standalone process, the process might be killed as soon
157 /// as the error is checked.
158 class BOLTError : public ErrorInfo<BOLTError> {
159 public:
160   static char ID;
161 
162   BOLTError(bool IsFatal, const Twine &S = Twine());
163   void log(raw_ostream &OS) const override;
164   bool isFatal() const { return IsFatal; }
165 
166   const std::string &getMessage() const { return Msg; }
167   std::error_code convertToErrorCode() const override;
168 
169 private:
170   bool IsFatal;
171   std::string Msg;
172 };
173 
174 /// Streams used by BOLT to log regular or error events
175 struct JournalingStreams {
176   raw_ostream &Out;
177   raw_ostream &Err;
178 };
179 
180 Error createNonFatalBOLTError(const Twine &S);
181 Error createFatalBOLTError(const Twine &S);
182 
183 class BinaryContext {
184   BinaryContext() = delete;
185 
186   /// Name of the binary file the context originated from.
187   std::string Filename;
188 
189   /// Unique build ID if available for the binary.
190   std::optional<std::string> FileBuildID;
191 
192   /// Set of all sections.
193   struct CompareSections {
194     bool operator()(const BinarySection *A, const BinarySection *B) const {
195       return *A < *B;
196     }
197   };
198   using SectionSetType = std::set<BinarySection *, CompareSections>;
199   SectionSetType Sections;
200 
201   using SectionIterator = pointee_iterator<SectionSetType::iterator>;
202   using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>;
203 
204   using FilteredSectionIterator = FilterIterator<SectionIterator>;
205   using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>;
206 
207   /// Map virtual address to a section.  It is possible to have more than one
208   /// section mapped to the same address, e.g. non-allocatable sections.
209   using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
210   AddressToSectionMapType AddressToSection;
211 
212   /// multimap of section name to BinarySection object.  Some binaries
213   /// have multiple sections with the same name.
214   using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
215   NameToSectionMapType NameToSection;
216 
217   /// Map section references to BinarySection for matching sections in the
218   /// input file to internal section representation.
219   DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection;
220 
221   /// Low level section registration.
222   BinarySection &registerSection(BinarySection *Section);
223 
224   /// Store all functions in the binary, sorted by original address.
225   std::map<uint64_t, BinaryFunction> BinaryFunctions;
226 
227   /// A mutex that is used to control parallel accesses to BinaryFunctions
228   mutable llvm::sys::RWMutex BinaryFunctionsMutex;
229 
230   /// Functions injected by BOLT
231   std::vector<BinaryFunction *> InjectedBinaryFunctions;
232 
233   /// Jump tables for all functions mapped by address.
234   std::map<uint64_t, JumpTable *> JumpTables;
235 
236   /// Locations of PC-relative relocations in data objects.
237   std::unordered_set<uint64_t> DataPCRelocations;
238 
239   /// Used in duplicateJumpTable() to uniquely identify a JT clone
240   /// Start our IDs with a high number so getJumpTableContainingAddress checks
241   /// with size won't overflow
242   uint32_t DuplicatedJumpTables{0x10000000};
243 
244   /// Function fragments to skip.
245   std::unordered_set<BinaryFunction *> FragmentsToSkip;
246 
247   /// Fragment equivalence classes to query belonging to the same "family" in
248   /// presence of multiple fragments/multiple parents.
249   EquivalenceClasses<const BinaryFunction *> FragmentClasses;
250 
251   /// The runtime library.
252   std::unique_ptr<RuntimeLibrary> RtLibrary;
253 
254   /// DWP Context.
255   std::shared_ptr<DWARFContext> DWPContext;
256 
257   /// Decoded pseudo probes.
258   std::shared_ptr<MCPseudoProbeDecoder> PseudoProbeDecoder;
259 
260   /// A map of DWO Ids to CUs.
261   using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>;
262   DWOIdToCUMapType DWOCUs;
263 
264   bool ContainsDwarf5{false};
265   bool ContainsDwarfLegacy{false};
266 
267   /// Mapping from input to output addresses.
268   std::optional<AddressMap> IOAddressMap;
269 
270   /// Preprocess DWO debug information.
271   void preprocessDWODebugInfo();
272 
273   /// DWARF line info for CUs.
274   std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
275 
276   /// Internal helper for removing section name from a lookup table.
277   void deregisterSectionName(const BinarySection &Section);
278 
279 public:
280   static Expected<std::unique_ptr<BinaryContext>> createBinaryContext(
281       Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
282       StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
283       std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger);
284 
285   /// Superset of compiler units that will contain overwritten code that needs
286   /// new debug info. In a few cases, functions may end up not being
287   /// overwritten, but it is okay to re-generate debug info for them.
288   std::set<const DWARFUnit *> ProcessedCUs;
289 
290   // Setup MCPlus target builder
291   void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
292     MIB = std::move(TargetBuilder);
293   }
294 
295   /// Return function fragments to skip.
296   const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
297     return FragmentsToSkip;
298   }
299 
300   /// Add function fragment to skip
301   void addFragmentsToSkip(BinaryFunction *Function) {
302     FragmentsToSkip.insert(Function);
303   }
304 
305   void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
306 
307   /// Given DWOId returns CU if it exists in DWOCUs.
308   std::optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
309 
310   /// Returns DWOContext if it exists.
311   DWARFContext *getDWOContext() const;
312 
313   /// Get Number of DWOCUs in a map.
314   uint32_t getNumDWOCUs() { return DWOCUs.size(); }
315 
316   /// Returns true if DWARF5 is used.
317   bool isDWARF5Used() const { return ContainsDwarf5; }
318 
319   /// Returns true if DWARF4 or lower is used.
320   bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; }
321 
322   std::map<unsigned, DwarfLineTable> &getDwarfLineTables() {
323     return DwarfLineTablesCUMap;
324   }
325 
326   DwarfLineTable &getDwarfLineTable(unsigned CUID) {
327     return DwarfLineTablesCUMap[CUID];
328   }
329 
330   Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
331                                   unsigned FileNumber,
332                                   std::optional<MD5::MD5Result> Checksum,
333                                   std::optional<StringRef> Source,
334                                   unsigned CUID, unsigned DWARFVersion);
335 
336   /// [start memory address] -> [segment info] mapping.
337   std::map<uint64_t, SegmentInfo> SegmentMapInfo;
338 
339   /// Symbols that are expected to be undefined in MCContext during emission.
340   std::unordered_set<MCSymbol *> UndefinedSymbols;
341 
342   /// [name] -> [BinaryData*] map used for global symbol resolution.
343   using SymbolMapType = StringMap<BinaryData *>;
344   SymbolMapType GlobalSymbols;
345 
346   /// [address] -> [BinaryData], ...
347   /// Addresses never change.
348   /// Note: it is important that clients do not hold on to instances of
349   /// BinaryData* while the map is still being modified during BinaryFunction
350   /// disassembly.  This is because of the possibility that a regular
351   /// BinaryData is later discovered to be a JumpTable.
352   using BinaryDataMapType = std::map<uint64_t, BinaryData *>;
353   using binary_data_iterator = BinaryDataMapType::iterator;
354   using binary_data_const_iterator = BinaryDataMapType::const_iterator;
355   BinaryDataMapType BinaryDataMap;
356 
357   using FilteredBinaryDataConstIterator =
358       FilterIterator<binary_data_const_iterator>;
359   using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>;
360 
361   StringRef getFilename() const { return Filename; }
362   void setFilename(StringRef Name) { Filename = std::string(Name); }
363 
364   std::optional<StringRef> getFileBuildID() const {
365     if (FileBuildID)
366       return StringRef(*FileBuildID);
367 
368     return std::nullopt;
369   }
370   void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
371 
372   bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
373   void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; }
374 
375   std::shared_ptr<orc::SymbolStringPool> getSymbolStringPool() { return SSP; }
376   /// Return true if relocations against symbol with a given name
377   /// must be created.
378   bool forceSymbolRelocations(StringRef SymbolName) const;
379 
380   uint64_t getNumUnusedProfiledObjects() const {
381     return NumUnusedProfiledObjects;
382   }
383   void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; }
384 
385   RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); }
386   void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) {
387     assert(!RtLibrary && "Cannot set runtime library twice.");
388     RtLibrary = std::move(Lib);
389   }
390 
391   const MCPseudoProbeDecoder *getPseudoProbeDecoder() const {
392     return PseudoProbeDecoder.get();
393   }
394 
395   void setPseudoProbeDecoder(std::shared_ptr<MCPseudoProbeDecoder> Decoder) {
396     assert(!PseudoProbeDecoder && "Cannot set pseudo probe decoder twice.");
397     PseudoProbeDecoder = Decoder;
398   }
399 
400   /// Return BinaryFunction containing a given \p Address or nullptr if
401   /// no registered function contains the \p Address.
402   ///
403   /// In a binary a function has somewhat vague  boundaries. E.g. a function can
404   /// refer to the first byte past the end of the function, and it will still be
405   /// referring to this function, not the function following it in the address
406   /// space. Thus we have the following flags that allow to lookup for
407   /// a function where a caller has more context for the search.
408   ///
409   /// If \p CheckPastEnd is true and the \p Address falls on a byte
410   /// immediately following the last byte of some function and there's no other
411   /// function that starts there, then return the function as the one containing
412   /// the \p Address. This is useful when we need to locate functions for
413   /// references pointing immediately past a function body.
414   ///
415   /// If \p UseMaxSize is true, then include the space between this function
416   /// body and the next object in address ranges that we check.
417   BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
418                                                      bool CheckPastEnd = false,
419                                                      bool UseMaxSize = false);
420   const BinaryFunction *
421   getBinaryFunctionContainingAddress(uint64_t Address,
422                                      bool CheckPastEnd = false,
423                                      bool UseMaxSize = false) const {
424     return const_cast<BinaryContext *>(this)
425         ->getBinaryFunctionContainingAddress(Address, CheckPastEnd, UseMaxSize);
426   }
427 
428   /// Return a BinaryFunction that starts at a given \p Address.
429   BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
430 
431   const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
432     return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress(
433         Address);
434   }
435 
436   /// Return size of an entry for the given jump table \p Type.
437   uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
438     return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
439   }
440 
441   /// Return JumpTable containing a given \p Address.
442   JumpTable *getJumpTableContainingAddress(uint64_t Address) {
443     auto JTI = JumpTables.upper_bound(Address);
444     if (JTI == JumpTables.begin())
445       return nullptr;
446     --JTI;
447     if (JTI->first + JTI->second->getSize() > Address)
448       return JTI->second;
449     if (JTI->second->getSize() == 0 && JTI->first == Address)
450       return JTI->second;
451     return nullptr;
452   }
453 
454   /// Deregister JumpTable registered at a given \p Address and delete it.
455   void deleteJumpTable(uint64_t Address);
456 
457   unsigned getDWARFEncodingSize(unsigned Encoding) {
458     if (Encoding == dwarf::DW_EH_PE_omit)
459       return 0;
460     switch (Encoding & 0x0f) {
461     default:
462       llvm_unreachable("unknown encoding");
463     case dwarf::DW_EH_PE_absptr:
464     case dwarf::DW_EH_PE_signed:
465       return AsmInfo->getCodePointerSize();
466     case dwarf::DW_EH_PE_udata2:
467     case dwarf::DW_EH_PE_sdata2:
468       return 2;
469     case dwarf::DW_EH_PE_udata4:
470     case dwarf::DW_EH_PE_sdata4:
471       return 4;
472     case dwarf::DW_EH_PE_udata8:
473     case dwarf::DW_EH_PE_sdata8:
474       return 8;
475     }
476   }
477 
478   /// [MCSymbol] -> [BinaryFunction]
479   ///
480   /// As we fold identical functions, multiple symbols can point
481   /// to the same BinaryFunction.
482   std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
483 
484   /// A mutex that is used to control parallel accesses to SymbolToFunctionMap
485   mutable llvm::sys::RWMutex SymbolToFunctionMapMutex;
486 
487   /// Look up the symbol entry that contains the given \p Address (based on
488   /// the start address and size for each symbol).  Returns a pointer to
489   /// the BinaryData for that symbol.  If no data is found, nullptr is returned.
490   const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const;
491 
492   /// Update the Parent fields in BinaryDatas after adding a new entry into
493   /// \p BinaryDataMap.
494   void updateObjectNesting(BinaryDataMapType::iterator GAI);
495 
496   /// Validate that if object address ranges overlap that the object with
497   /// the larger range is a parent of the object with the smaller range.
498   bool validateObjectNesting() const;
499 
500   /// Validate that there are no top level "holes" in each section
501   /// and that all relocations with a section are mapped to a valid
502   /// top level BinaryData.
503   bool validateHoles() const;
504 
505   /// Produce output address ranges based on input ranges for some module.
506   DebugAddressRangesVector translateModuleAddressRanges(
507       const DWARFAddressRangesVector &InputRanges) const;
508 
509   /// Get a bogus "absolute" section that will be associated with all
510   /// absolute BinaryDatas.
511   BinarySection &absoluteSection();
512 
513   /// Process "holes" in between known BinaryData objects.  For now,
514   /// symbols are padded with the space before the next BinaryData object.
515   void fixBinaryDataHoles();
516 
517   /// Generate names based on data hashes for unknown symbols.
518   void generateSymbolHashes();
519 
520   /// Construct BinaryFunction object and add it to internal maps.
521   BinaryFunction *createBinaryFunction(const std::string &Name,
522                                        BinarySection &Section, uint64_t Address,
523                                        uint64_t Size, uint64_t SymbolSize = 0,
524                                        uint16_t Alignment = 0);
525 
526   /// Return all functions for this rewrite instance.
527   std::map<uint64_t, BinaryFunction> &getBinaryFunctions() {
528     return BinaryFunctions;
529   }
530 
531   /// Return all functions for this rewrite instance.
532   const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const {
533     return BinaryFunctions;
534   }
535 
536   /// Create BOLT-injected function
537   BinaryFunction *createInjectedBinaryFunction(const std::string &Name,
538                                                bool IsSimple = true);
539 
540   std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
541     return InjectedBinaryFunctions;
542   }
543 
544   /// Return vector with all functions, i.e. include functions from the input
545   /// binary and functions created by BOLT.
546   std::vector<BinaryFunction *> getAllBinaryFunctions();
547 
548   /// Construct a jump table for \p Function at \p Address or return an existing
549   /// one at that location.
550   ///
551   /// May create an embedded jump table and return its label as the second
552   /// element of the pair.
553   const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function,
554                                        uint64_t Address,
555                                        JumpTable::JumpTableType Type);
556 
557   /// Analyze a possible jump table of type \p Type at a given \p Address.
558   /// \p BF is a function referencing the jump table.
559   /// Return true if the jump table was detected at \p Address, and false
560   /// otherwise.
561   ///
562   /// If \p NextJTAddress is different from zero, it is used as an upper
563   /// bound for jump table memory layout.
564   ///
565   /// Optionally, populate \p Address from jump table entries. The entries
566   /// could be partially populated if the jump table detection fails.
567   bool analyzeJumpTable(const uint64_t Address,
568                         const JumpTable::JumpTableType Type,
569                         const BinaryFunction &BF,
570                         const uint64_t NextJTAddress = 0,
571                         JumpTable::AddressesType *EntriesAsAddress = nullptr,
572                         bool *HasEntryInFragment = nullptr) const;
573 
574   /// After jump table locations are established, this function will populate
575   /// their EntriesAsAddress based on memory contents.
576   void populateJumpTables();
577 
578   /// Returns a jump table ID and label pointing to the duplicated jump table.
579   /// Ordinarily, jump tables are identified by their address in the input
580   /// binary. We return an ID with the high bit set to differentiate it from
581   /// regular addresses, avoiding conflicts with standard jump tables.
582   std::pair<uint64_t, const MCSymbol *>
583   duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
584                      const MCSymbol *OldLabel);
585 
586   /// Generate a unique name for jump table at a given \p Address belonging
587   /// to function \p BF.
588   std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
589 
590   /// Free memory used by JumpTable's EntriesAsAddress
591   void clearJumpTableTempData() {
592     for (auto &JTI : JumpTables) {
593       JumpTable &JT = *JTI.second;
594       JumpTable::AddressesType Temp;
595       Temp.swap(JT.EntriesAsAddress);
596     }
597   }
598   /// Return true if the array of bytes represents a valid code padding.
599   bool hasValidCodePadding(const BinaryFunction &BF);
600 
601   /// Verify padding area between functions, and adjust max function size
602   /// accordingly.
603   void adjustCodePadding();
604 
605   /// Regular page size.
606   unsigned RegularPageSize{0x1000};
607   static constexpr unsigned RegularPageSizeX86 = 0x1000;
608   static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
609 
610   /// Huge page size to use.
611   static constexpr unsigned HugePageSize = 0x200000;
612 
613   /// Addresses reserved for kernel on x86_64 start at this location.
614   static constexpr uint64_t KernelStartX86_64 = 0xFFFF'FFFF'8000'0000;
615 
616   /// Map address to a constant island owner (constant data in code section)
617   std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
618 
619   /// A map from jump table address to insertion order.  Used for generating
620   /// jump table names.
621   std::map<uint64_t, size_t> JumpTableIds;
622 
623   std::unique_ptr<MCContext> Ctx;
624 
625   /// A mutex that is used to control parallel accesses to Ctx
626   mutable llvm::sys::RWMutex CtxMutex;
627   std::unique_lock<llvm::sys::RWMutex> scopeLock() const {
628     return std::unique_lock<llvm::sys::RWMutex>(CtxMutex);
629   }
630 
631   std::unique_ptr<DWARFContext> DwCtx;
632 
633   std::unique_ptr<Triple> TheTriple;
634 
635   std::shared_ptr<orc::SymbolStringPool> SSP;
636 
637   const Target *TheTarget;
638 
639   std::string TripleName;
640 
641   std::unique_ptr<MCCodeEmitter> MCE;
642 
643   std::unique_ptr<MCObjectFileInfo> MOFI;
644 
645   std::unique_ptr<const MCAsmInfo> AsmInfo;
646 
647   std::unique_ptr<const MCInstrInfo> MII;
648 
649   std::unique_ptr<const MCSubtargetInfo> STI;
650 
651   std::unique_ptr<MCInstPrinter> InstPrinter;
652 
653   std::unique_ptr<const MCInstrAnalysis> MIA;
654 
655   std::unique_ptr<MCPlusBuilder> MIB;
656 
657   std::unique_ptr<const MCRegisterInfo> MRI;
658 
659   std::unique_ptr<MCDisassembler> DisAsm;
660 
661   /// Symbolic disassembler.
662   std::unique_ptr<MCDisassembler> SymbolicDisAsm;
663 
664   std::unique_ptr<MCAsmBackend> MAB;
665 
666   /// Allows BOLT to print to log whenever it is necessary (with or without
667   /// const references)
668   mutable JournalingStreams Logger;
669 
670   /// Indicates if the binary is Linux kernel.
671   bool IsLinuxKernel{false};
672 
673   /// Indicates if relocations are available for usage.
674   bool HasRelocations{false};
675 
676   /// Indicates if the binary is stripped
677   bool IsStripped{false};
678 
679   /// Indicates if the binary contains split functions.
680   bool HasSplitFunctions{false};
681 
682   /// Indicates if the function ordering of the binary is finalized.
683   bool HasFinalizedFunctionOrder{false};
684 
685   /// Indicates if a separate .text.warm section is needed that contains
686   /// function fragments with
687   /// FunctionFragment::getFragmentNum() == FragmentNum::warm()
688   bool HasWarmSection{false};
689 
690   /// Is the binary always loaded at a fixed address. Shared objects and
691   /// position-independent executables (PIEs) are examples of binaries that
692   /// will have HasFixedLoadAddress set to false.
693   bool HasFixedLoadAddress{true};
694 
695   /// True if the binary has no dynamic dependencies, i.e., if it was statically
696   /// linked.
697   bool IsStaticExecutable{false};
698 
699   /// Set to true if the binary contains PT_INTERP header.
700   bool HasInterpHeader{false};
701 
702   /// Indicates if any of local symbols used for functions or data objects
703   /// have an origin file name available.
704   bool HasSymbolsWithFileName{false};
705 
706   /// Does the binary have BAT section.
707   bool HasBATSection{false};
708 
709   /// Sum of execution count of all functions
710   uint64_t SumExecutionCount{0};
711 
712   /// Number of functions with profile information
713   uint64_t NumProfiledFuncs{0};
714 
715   /// Number of functions with stale profile information
716   uint64_t NumStaleProfileFuncs{0};
717 
718   /// Number of objects in profile whose profile was ignored.
719   uint64_t NumUnusedProfiledObjects{0};
720 
721   /// Total hotness score according to profiling data for this binary.
722   uint64_t TotalScore{0};
723 
724   /// Binary-wide aggregated stats.
725   struct BinaryStats {
726     /// Stats for stale profile matching:
727     ///   the total number of basic blocks in the profile
728     uint32_t NumStaleBlocks{0};
729     ///   the number of exactly matched basic blocks
730     uint32_t NumExactMatchedBlocks{0};
731     ///   the number of loosely matched basic blocks
732     uint32_t NumLooseMatchedBlocks{0};
733     ///   the number of exactly pseudo probe matched basic blocks
734     uint32_t NumPseudoProbeExactMatchedBlocks{0};
735     ///   the number of loosely pseudo probe matched basic blocks
736     uint32_t NumPseudoProbeLooseMatchedBlocks{0};
737     ///   the number of call matched basic blocks
738     uint32_t NumCallMatchedBlocks{0};
739     ///   the total count of samples in the profile
740     uint64_t StaleSampleCount{0};
741     ///   the count of exactly matched samples
742     uint64_t ExactMatchedSampleCount{0};
743     ///   the count of loosely matched samples
744     uint64_t LooseMatchedSampleCount{0};
745     ///   the count of exactly pseudo probe matched samples
746     uint64_t PseudoProbeExactMatchedSampleCount{0};
747     ///   the count of loosely pseudo probe matched samples
748     uint64_t PseudoProbeLooseMatchedSampleCount{0};
749     ///   the count of call matched samples
750     uint64_t CallMatchedSampleCount{0};
751     ///   the number of stale functions that have matching number of blocks in
752     ///   the profile
753     uint64_t NumStaleFuncsWithEqualBlockCount{0};
754     ///   the number of blocks that have matching size but a differing hash
755     uint64_t NumStaleBlocksWithEqualIcount{0};
756   } Stats;
757 
758   // Original binary execution count stats.
759   DynoStats InitialDynoStats;
760 
761   // Address of the first allocated segment.
762   uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
763 
764   /// Track next available address for new allocatable sections. RewriteInstance
765   /// sets this prior to running BOLT passes, so layout passes are aware of the
766   /// final addresses functions will have.
767   uint64_t LayoutStartAddress{0};
768 
769   /// Old .text info.
770   uint64_t OldTextSectionAddress{0};
771   uint64_t OldTextSectionOffset{0};
772   uint64_t OldTextSectionSize{0};
773 
774   /// Area in the input binary reserved for BOLT.
775   AddressRange BOLTReserved;
776 
777   /// Address of the code/function that is executed before any other code in
778   /// the binary.
779   std::optional<uint64_t> StartFunctionAddress;
780 
781   /// Address of the code/function that is going to be executed right before
782   /// the execution of the binary is completed.
783   std::optional<uint64_t> FiniFunctionAddress;
784 
785   /// DT_FINI.
786   std::optional<uint64_t> FiniAddress;
787 
788   /// DT_FINI_ARRAY. Only used when DT_FINI is not set.
789   std::optional<uint64_t> FiniArrayAddress;
790 
791   /// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
792   std::optional<uint64_t> FiniArraySize;
793 
794   /// Page alignment used for code layout.
795   uint64_t PageAlign{HugePageSize};
796 
797   /// True if the binary requires immediate relocation processing.
798   bool RequiresZNow{false};
799 
800   /// List of functions that always trap.
801   std::vector<const BinaryFunction *> TrappedFunctions;
802 
803   /// List of external addresses in the code that are not a function start
804   /// and are referenced from BinaryFunction.
805   std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
806 
807   /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h
808   /// enum Constants, e.g. DW_EH_PE_omit.
809   unsigned LSDAEncoding = dwarf::DW_EH_PE_omit;
810 
811   BinaryContext(std::unique_ptr<MCContext> Ctx,
812                 std::unique_ptr<DWARFContext> DwCtx,
813                 std::unique_ptr<Triple> TheTriple,
814                 std::shared_ptr<orc::SymbolStringPool> SSP,
815                 const Target *TheTarget, std::string TripleName,
816                 std::unique_ptr<MCCodeEmitter> MCE,
817                 std::unique_ptr<MCObjectFileInfo> MOFI,
818                 std::unique_ptr<const MCAsmInfo> AsmInfo,
819                 std::unique_ptr<const MCInstrInfo> MII,
820                 std::unique_ptr<const MCSubtargetInfo> STI,
821                 std::unique_ptr<MCInstPrinter> InstPrinter,
822                 std::unique_ptr<const MCInstrAnalysis> MIA,
823                 std::unique_ptr<MCPlusBuilder> MIB,
824                 std::unique_ptr<const MCRegisterInfo> MRI,
825                 std::unique_ptr<MCDisassembler> DisAsm,
826                 JournalingStreams Logger);
827 
828   ~BinaryContext();
829 
830   std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
831 
832   bool isELF() const { return TheTriple->isOSBinFormatELF(); }
833 
834   bool isMachO() const { return TheTriple->isOSBinFormatMachO(); }
835 
836   bool isAArch64() const {
837     return TheTriple->getArch() == llvm::Triple::aarch64;
838   }
839 
840   bool isX86() const {
841     return TheTriple->getArch() == llvm::Triple::x86 ||
842            TheTriple->getArch() == llvm::Triple::x86_64;
843   }
844 
845   bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; }
846 
847   // AArch64-specific functions to check if symbol is used to delimit
848   // code/data in .text. Code is marked by $x, data by $d.
849   MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
850   bool isMarker(const SymbolRef &Symbol) const;
851 
852   /// Iterate over all BinaryData.
853   iterator_range<binary_data_const_iterator> getBinaryData() const {
854     return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
855   }
856 
857   /// Iterate over all BinaryData.
858   iterator_range<binary_data_iterator> getBinaryData() {
859     return make_range(BinaryDataMap.begin(), BinaryDataMap.end());
860   }
861 
862   /// Iterate over all BinaryData associated with the given \p Section.
863   iterator_range<FilteredBinaryDataConstIterator>
864   getBinaryDataForSection(const BinarySection &Section) const {
865     auto Begin = BinaryDataMap.lower_bound(Section.getAddress());
866     if (Begin != BinaryDataMap.begin())
867       --Begin;
868     auto End = BinaryDataMap.upper_bound(Section.getEndAddress());
869     auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool {
870       return Itr->second->getSection() == Section;
871     };
872     return make_range(FilteredBinaryDataConstIterator(pred, Begin, End),
873                       FilteredBinaryDataConstIterator(pred, End, End));
874   }
875 
876   /// Iterate over all BinaryData associated with the given \p Section.
877   iterator_range<FilteredBinaryDataIterator>
878   getBinaryDataForSection(BinarySection &Section) {
879     auto Begin = BinaryDataMap.lower_bound(Section.getAddress());
880     if (Begin != BinaryDataMap.begin())
881       --Begin;
882     auto End = BinaryDataMap.upper_bound(Section.getEndAddress());
883     auto pred = [&Section](const binary_data_iterator &Itr) -> bool {
884       return Itr->second->getSection() == Section;
885     };
886     return make_range(FilteredBinaryDataIterator(pred, Begin, End),
887                       FilteredBinaryDataIterator(pred, End, End));
888   }
889 
890   /// Iterate over all the sub-symbols of /p BD (if any).
891   iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD);
892 
893   /// Clear the global symbol address -> name(s) map.
894   void clearBinaryData() {
895     GlobalSymbols.clear();
896     for (auto &Entry : BinaryDataMap)
897       delete Entry.second;
898     BinaryDataMap.clear();
899   }
900 
901   /// Process \p Address reference from code in function \BF.
902   /// \p IsPCRel indicates if the reference is PC-relative.
903   /// Return <Symbol, Addend> pair corresponding to the \p Address.
904   std::pair<const MCSymbol *, uint64_t>
905   handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel);
906 
907   /// Analyze memory contents at the given \p Address and return the type of
908   /// memory contents (such as a possible jump table).
909   MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
910 
911   /// Return a value of the global \p Symbol or an error if the value
912   /// was not set.
913   ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
914     const BinaryData *BD = getBinaryDataByName(Symbol.getName());
915     if (!BD)
916       return std::make_error_code(std::errc::bad_address);
917     return BD->getAddress();
918   }
919 
920   /// Return a global symbol registered at a given \p Address and \p Size.
921   /// If no symbol exists, create one with unique name using \p Prefix.
922   /// If there are multiple symbols registered at the \p Address, then
923   /// return the first one.
924   MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
925                                     uint64_t Size = 0, uint16_t Alignment = 0,
926                                     unsigned Flags = 0);
927 
928   /// Create a global symbol without registering an address.
929   MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name);
930 
931   /// Register a symbol with \p Name at a given \p Address using \p Size,
932   /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
933   /// of \p Flags.
934   MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
935                                   uint64_t Size, uint16_t Alignment,
936                                   unsigned Flags = 0);
937 
938   /// Return BinaryData registered at a given \p Address or nullptr if no
939   /// global symbol was registered at the location.
940   const BinaryData *getBinaryDataAtAddress(uint64_t Address) const {
941     auto NI = BinaryDataMap.find(Address);
942     return NI != BinaryDataMap.end() ? NI->second : nullptr;
943   }
944 
945   BinaryData *getBinaryDataAtAddress(uint64_t Address) {
946     auto NI = BinaryDataMap.find(Address);
947     return NI != BinaryDataMap.end() ? NI->second : nullptr;
948   }
949 
950   /// Look up the symbol entry that contains the given \p Address (based on
951   /// the start address and size for each symbol).  Returns a pointer to
952   /// the BinaryData for that symbol.  If no data is found, nullptr is returned.
953   const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const {
954     return getBinaryDataContainingAddressImpl(Address);
955   }
956 
957   BinaryData *getBinaryDataContainingAddress(uint64_t Address) {
958     return const_cast<BinaryData *>(
959         getBinaryDataContainingAddressImpl(Address));
960   }
961 
962   /// Return BinaryData for the given \p Name or nullptr if no
963   /// global symbol with that name exists.
964   const BinaryData *getBinaryDataByName(StringRef Name) const {
965     return GlobalSymbols.lookup(Name);
966   }
967 
968   BinaryData *getBinaryDataByName(StringRef Name) {
969     return GlobalSymbols.lookup(Name);
970   }
971 
972   /// Return registered PLT entry BinaryData with the given \p Name
973   /// or nullptr if no global PLT symbol with that name exists.
974   const BinaryData *getPLTBinaryDataByName(StringRef Name) const {
975     if (const BinaryData *Data = getBinaryDataByName(Name.str() + "@PLT"))
976       return Data;
977 
978     // The symbol name might contain versioning information e.g
979     // memcpy@@GLIBC_2.17. Remove it and try to locate binary data
980     // without it.
981     size_t At = Name.find("@");
982     if (At != std::string::npos)
983       return getBinaryDataByName(Name.str().substr(0, At) + "@PLT");
984 
985     return nullptr;
986   }
987 
988   /// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
989   /// at GOT, or null if it is not present in the input binary symtab.
990   BinaryData *getGOTSymbol();
991 
992   /// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
993   bool isGOTSymbol(StringRef SymName) const {
994     return SymName == "_GLOBAL_OFFSET_TABLE_";
995   }
996 
997   /// Return true if \p SymbolName was generated internally and was not present
998   /// in the input binary.
999   bool isInternalSymbolName(const StringRef Name) {
1000     return Name.starts_with("SYMBOLat") || Name.starts_with("DATAat") ||
1001            Name.starts_with("HOLEat");
1002   }
1003 
1004   MCSymbol *getHotTextStartSymbol() const {
1005     return Ctx->getOrCreateSymbol("__hot_start");
1006   }
1007 
1008   MCSymbol *getHotTextEndSymbol() const {
1009     return Ctx->getOrCreateSymbol("__hot_end");
1010   }
1011 
1012   MCSection *getTextSection() const { return MOFI->getTextSection(); }
1013 
1014   /// Return code section with a given name.
1015   MCSection *getCodeSection(StringRef SectionName) const {
1016     if (isELF())
1017       return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS,
1018                                 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1019     else
1020       return Ctx->getMachOSection("__TEXT", SectionName,
1021                                   MachO::S_ATTR_PURE_INSTRUCTIONS,
1022                                   SectionKind::getText());
1023   }
1024 
1025   /// Return data section with a given name.
1026   MCSection *getDataSection(StringRef SectionName) const {
1027     return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
1028   }
1029 
1030   /// \name Pre-assigned Section Names
1031   /// @{
1032 
1033   const char *getMainCodeSectionName() const { return ".text"; }
1034 
1035   const char *getWarmCodeSectionName() const { return ".text.warm"; }
1036 
1037   const char *getColdCodeSectionName() const { return ".text.cold"; }
1038 
1039   const char *getHotTextMoverSectionName() const { return ".text.mover"; }
1040 
1041   const char *getInjectedCodeSectionName() const { return ".text.injected"; }
1042 
1043   const char *getInjectedColdCodeSectionName() const {
1044     return ".text.injected.cold";
1045   }
1046 
1047   ErrorOr<BinarySection &> getGdbIndexSection() const {
1048     return getUniqueSectionByName(".gdb_index");
1049   }
1050 
1051   ErrorOr<BinarySection &> getDebugNamesSection() const {
1052     return getUniqueSectionByName(".debug_names");
1053   }
1054 
1055   /// @}
1056 
1057   /// Register \p TargetFunction as a fragment of \p Function if checks pass:
1058   /// - if \p TargetFunction name matches \p Function name with a suffix:
1059   ///   fragment_name == parent_name.cold(.\d+)?
1060   /// True if the Function is registered, false if the check failed.
1061   bool registerFragment(BinaryFunction &TargetFunction,
1062                         BinaryFunction &Function);
1063 
1064   /// Return true if two functions belong to the same "family": are fragments
1065   /// of one another, or fragments of the same parent, or transitively fragment-
1066   /// related.
1067   bool areRelatedFragments(const BinaryFunction *LHS,
1068                            const BinaryFunction *RHS) const {
1069     return FragmentClasses.isEquivalent(LHS, RHS);
1070   }
1071 
1072   /// Add interprocedural reference for \p Function to \p Address
1073   void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
1074     InterproceduralReferences.push_back({Function, Address});
1075   }
1076 
1077   /// Used to fix the target of linker-generated AArch64 adrp + add
1078   /// sequence with no relocation info.
1079   void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
1080                               MCInst &LoadHiBits, uint64_t Target);
1081 
1082   /// Return true if AARch64 veneer was successfully matched at a given
1083   /// \p Address and register veneer binary function if \p MatchOnly
1084   /// argument is false.
1085   bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
1086 
1087   /// Resolve inter-procedural dependencies from
1088   void processInterproceduralReferences();
1089 
1090   /// Skip functions with all parent and child fragments transitively.
1091   void skipMarkedFragments();
1092 
1093   /// Perform any necessary post processing on the symbol table after
1094   /// function disassembly is complete.  This processing fixes top
1095   /// level data holes and makes sure the symbol table is valid.
1096   /// It also assigns all memory profiling info to the appropriate
1097   /// BinaryData objects.
1098   void postProcessSymbolTable();
1099 
1100   /// Set the size of the global symbol located at \p Address.  Return
1101   /// false if no symbol exists, true otherwise.
1102   bool setBinaryDataSize(uint64_t Address, uint64_t Size);
1103 
1104   /// Print the global symbol table.
1105   void printGlobalSymbols(raw_ostream &OS) const;
1106 
1107   /// Register information about the given \p Section so we can look up
1108   /// sections by address.
1109   BinarySection &registerSection(SectionRef Section);
1110 
1111   /// Register a copy of /p OriginalSection under a different name.
1112   BinarySection &registerSection(const Twine &SectionName,
1113                                  const BinarySection &OriginalSection);
1114 
1115   /// Register or update the information for the section with the given
1116   /// /p Name.  If the section already exists, the information in the
1117   /// section will be updated with the new data.
1118   BinarySection &registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1119                                          unsigned ELFFlags,
1120                                          uint8_t *Data = nullptr,
1121                                          uint64_t Size = 0,
1122                                          unsigned Alignment = 1);
1123 
1124   /// Register the information for the note (non-allocatable) section
1125   /// with the given /p Name.  If the section already exists, the
1126   /// information in the section will be updated with the new data.
1127   BinarySection &
1128   registerOrUpdateNoteSection(const Twine &Name, uint8_t *Data = nullptr,
1129                               uint64_t Size = 0, unsigned Alignment = 1,
1130                               bool IsReadOnly = true,
1131                               unsigned ELFType = ELF::SHT_PROGBITS) {
1132     return registerOrUpdateSection(Name, ELFType,
1133                                    BinarySection::getFlags(IsReadOnly), Data,
1134                                    Size, Alignment);
1135   }
1136 
1137   /// Remove sections that were preregistered but never used.
1138   void deregisterUnusedSections();
1139 
1140   /// Remove the given /p Section from the set of all sections.  Return
1141   /// true if the section was removed (and deleted), otherwise false.
1142   bool deregisterSection(BinarySection &Section);
1143 
1144   /// Re-register \p Section under the \p NewName.
1145   void renameSection(BinarySection &Section, const Twine &NewName);
1146 
1147   /// Iterate over all registered sections.
1148   iterator_range<FilteredSectionIterator> sections() {
1149     auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; };
1150     return make_range(
1151         FilteredSectionIterator(notNull, Sections.begin(), Sections.end()),
1152         FilteredSectionIterator(notNull, Sections.end(), Sections.end()));
1153   }
1154 
1155   /// Iterate over all registered sections.
1156   iterator_range<FilteredSectionConstIterator> sections() const {
1157     return const_cast<BinaryContext *>(this)->sections();
1158   }
1159 
1160   /// Iterate over all registered allocatable sections.
1161   iterator_range<FilteredSectionIterator> allocatableSections() {
1162     auto isAllocatable = [](const SectionIterator &Itr) {
1163       return *Itr && Itr->isAllocatable();
1164     };
1165     return make_range(
1166         FilteredSectionIterator(isAllocatable, Sections.begin(),
1167                                 Sections.end()),
1168         FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end()));
1169   }
1170 
1171   /// Iterate over all registered code sections.
1172   iterator_range<FilteredSectionIterator> textSections() {
1173     auto isText = [](const SectionIterator &Itr) {
1174       return *Itr && Itr->isAllocatable() && Itr->isText();
1175     };
1176     return make_range(
1177         FilteredSectionIterator(isText, Sections.begin(), Sections.end()),
1178         FilteredSectionIterator(isText, Sections.end(), Sections.end()));
1179   }
1180 
1181   /// Iterate over all registered allocatable sections.
1182   iterator_range<FilteredSectionConstIterator> allocatableSections() const {
1183     return const_cast<BinaryContext *>(this)->allocatableSections();
1184   }
1185 
1186   /// Iterate over all registered non-allocatable sections.
1187   iterator_range<FilteredSectionIterator> nonAllocatableSections() {
1188     auto notAllocated = [](const SectionIterator &Itr) {
1189       return *Itr && !Itr->isAllocatable();
1190     };
1191     return make_range(
1192         FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()),
1193         FilteredSectionIterator(notAllocated, Sections.end(), Sections.end()));
1194   }
1195 
1196   /// Iterate over all registered non-allocatable sections.
1197   iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const {
1198     return const_cast<BinaryContext *>(this)->nonAllocatableSections();
1199   }
1200 
1201   /// Iterate over all allocatable relocation sections.
1202   iterator_range<FilteredSectionIterator> allocatableRelaSections() {
1203     auto isAllocatableRela = [](const SectionIterator &Itr) {
1204       return *Itr && Itr->isAllocatable() && Itr->isRela();
1205     };
1206     return make_range(FilteredSectionIterator(isAllocatableRela,
1207                                               Sections.begin(), Sections.end()),
1208                       FilteredSectionIterator(isAllocatableRela, Sections.end(),
1209                                               Sections.end()));
1210   }
1211 
1212   /// Return base address for the shared object or PIE based on the segment
1213   /// mapping information. \p MMapAddress is an address where one of the
1214   /// segments was mapped. \p FileOffset is the offset in the file of the
1215   /// mapping. Note that \p FileOffset should be page-aligned and could be
1216   /// different from the file offset of the segment which could be unaligned.
1217   /// If no segment is found that matches \p FileOffset, return std::nullopt.
1218   std::optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
1219                                                    uint64_t FileOffset) const;
1220 
1221   /// Check if the address belongs to this binary's static allocation space.
1222   bool containsAddress(uint64_t Address) const {
1223     return Address >= FirstAllocAddress && Address < LayoutStartAddress;
1224   }
1225 
1226   /// Return section name containing the given \p Address.
1227   ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const;
1228 
1229   /// Print all sections.
1230   void printSections(raw_ostream &OS) const;
1231 
1232   /// Return largest section containing the given \p Address.  These
1233   /// functions only work for allocatable sections, i.e. ones with non-zero
1234   /// addresses.
1235   ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
1236   ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const {
1237     return const_cast<BinaryContext *>(this)->getSectionForAddress(Address);
1238   }
1239 
1240   /// Return internal section representation for a section in a file.
1241   BinarySection *getSectionForSectionRef(SectionRef Section) const {
1242     return SectionRefToBinarySection.lookup(Section);
1243   }
1244 
1245   /// Return section(s) associated with given \p Name.
1246   iterator_range<NameToSectionMapType::iterator>
1247   getSectionByName(const Twine &Name) {
1248     return make_range(NameToSection.equal_range(Name.str()));
1249   }
1250   iterator_range<NameToSectionMapType::const_iterator>
1251   getSectionByName(const Twine &Name) const {
1252     return make_range(NameToSection.equal_range(Name.str()));
1253   }
1254 
1255   /// Return the unique section associated with given \p Name.
1256   /// If there is more than one section with the same name, return an error
1257   /// object.
1258   ErrorOr<BinarySection &>
1259   getUniqueSectionByName(const Twine &SectionName) const {
1260     auto Sections = getSectionByName(SectionName);
1261     if (Sections.begin() != Sections.end() &&
1262         std::next(Sections.begin()) == Sections.end())
1263       return *Sections.begin()->second;
1264     return std::make_error_code(std::errc::bad_address);
1265   }
1266 
1267   /// Return an unsigned value of \p Size stored at \p Address. The address has
1268   /// to be a valid statically allocated address for the binary.
1269   ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address,
1270                                               size_t Size) const;
1271 
1272   /// Return a signed value of \p Size stored at \p Address. The address has
1273   /// to be a valid statically allocated address for the binary.
1274   ErrorOr<int64_t> getSignedValueAtAddress(uint64_t Address, size_t Size) const;
1275 
1276   /// Special case of getUnsignedValueAtAddress() that uses a pointer size.
1277   ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
1278     return getUnsignedValueAtAddress(Address, AsmInfo->getCodePointerSize());
1279   }
1280 
1281   /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
1282   /// removed from the list of functions \p BFs. The profile data of \p ChildBF
1283   /// is merged into that of \p ParentBF. This function is thread safe.
1284   void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF);
1285 
1286   /// Add a Section relocation at a given \p Address.
1287   void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1288                      uint64_t Addend = 0, uint64_t Value = 0);
1289 
1290   /// Return a relocation registered at a given \p Address, or nullptr if there
1291   /// is no relocation at such address.
1292   const Relocation *getRelocationAt(uint64_t Address) const;
1293 
1294   /// Register a presence of PC-relative relocation at the given \p Address.
1295   void addPCRelativeDataRelocation(uint64_t Address) {
1296     DataPCRelocations.emplace(Address);
1297   }
1298 
1299   /// Register dynamic relocation at \p Address.
1300   void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1301                             uint64_t Addend, uint64_t Value = 0);
1302 
1303   /// Return a dynamic relocation registered at a given \p Address, or nullptr
1304   /// if there is no dynamic relocation at such address.
1305   const Relocation *getDynamicRelocationAt(uint64_t Address) const;
1306 
1307   /// Remove registered relocation at a given \p Address.
1308   bool removeRelocationAt(uint64_t Address);
1309 
1310   /// This function makes sure that symbols referenced by ambiguous relocations
1311   /// are marked as immovable. For now, if a section relocation points at the
1312   /// boundary between two symbols then those symbols are marked as immovable.
1313   void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address);
1314 
1315   /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not
1316   /// nullptr, set it to entry descriminator corresponding to \p Symbol
1317   /// (0 for single-entry functions). This function is thread safe.
1318   BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol,
1319                                        uint64_t *EntryDesc = nullptr);
1320 
1321   const BinaryFunction *
1322   getFunctionForSymbol(const MCSymbol *Symbol,
1323                        uint64_t *EntryDesc = nullptr) const {
1324     return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol,
1325                                                                    EntryDesc);
1326   }
1327 
1328   /// Associate the symbol \p Sym with the function \p BF for lookups with
1329   /// getFunctionForSymbol().
1330   void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) {
1331     SymbolToFunctionMap[Sym] = BF;
1332   }
1333 
1334   /// Populate some internal data structures with debug info.
1335   void preprocessDebugInfo();
1336 
1337   /// Add a filename entry from SrcCUID to DestCUID.
1338   unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
1339                                   const uint32_t SrcCUID, unsigned FileIndex);
1340 
1341   /// Return functions in output layout order
1342   std::vector<BinaryFunction *> getSortedFunctions();
1343 
1344   /// Do the best effort to calculate the size of the function by emitting
1345   /// its code, and relaxing branch instructions. By default, branch
1346   /// instructions are updated to match the layout. Pass \p FixBranches set to
1347   /// false if the branches are known to be up to date with the code layout.
1348   ///
1349   /// Return the pair where the first size is for the main part, and the second
1350   /// size is for the cold one.
1351   /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
1352   /// function in place so that BinaryBasicBlock::getOutputSize() gives the
1353   /// emitted size of the basic block.
1354   std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
1355                                                  bool FixBranches = true);
1356 
1357   /// Calculate the size of the instruction \p Inst optionally using a
1358   /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
1359   /// not thread safe and each thread should operate with its own copy of it.
1360   uint64_t
1361   computeInstructionSize(const MCInst &Inst,
1362                          const MCCodeEmitter *Emitter = nullptr) const {
1363     if (std::optional<uint32_t> Size = MIB->getSize(Inst))
1364       return *Size;
1365 
1366     if (MIB->isPseudo(Inst))
1367       return 0;
1368 
1369     if (std::optional<uint32_t> Size = MIB->getInstructionSize(Inst))
1370       return *Size;
1371 
1372     if (!Emitter)
1373       Emitter = this->MCE.get();
1374     SmallString<256> Code;
1375     SmallVector<MCFixup, 4> Fixups;
1376     Emitter->encodeInstruction(Inst, Code, Fixups, *STI);
1377     return Code.size();
1378   }
1379 
1380   /// Compute the native code size for a range of instructions.
1381   /// Note: this can be imprecise wrt the final binary since happening prior to
1382   /// relaxation, as well as wrt the original binary because of opcode
1383   /// shortening.MCCodeEmitter is not thread safe and each thread should operate
1384   /// with its own copy of it.
1385   template <typename Itr>
1386   uint64_t computeCodeSize(Itr Beg, Itr End,
1387                            const MCCodeEmitter *Emitter = nullptr) const {
1388     uint64_t Size = 0;
1389     while (Beg != End) {
1390       if (!MIB->isPseudo(*Beg))
1391         Size += computeInstructionSize(*Beg, Emitter);
1392       ++Beg;
1393     }
1394     return Size;
1395   }
1396 
1397   /// Validate that disassembling the \p Sequence of bytes into an instruction
1398   /// and assembling the instruction again, results in a byte sequence identical
1399   /// to the original one.
1400   bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const;
1401 
1402   /// Return a function execution count threshold for determining whether
1403   /// the function is 'hot'. Consider it hot if count is above the average exec
1404   /// count of profiled functions.
1405   uint64_t getHotThreshold() const;
1406 
1407   /// Return true if instruction \p Inst requires an offset for further
1408   /// processing (e.g. assigning a profile).
1409   bool keepOffsetForInstruction(const MCInst &Inst) const {
1410     if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) ||
1411         MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) {
1412       return true;
1413     }
1414     return false;
1415   }
1416 
1417   /// Return true if the function should be emitted to the output file.
1418   bool shouldEmit(const BinaryFunction &Function) const;
1419 
1420   /// Dump the assembly representation of MCInst to debug output.
1421   void dump(const MCInst &Inst) const;
1422 
1423   /// Print the string name for a CFI operation.
1424   static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
1425 
1426   /// Print a single MCInst in native format.  If Function is non-null,
1427   /// the instruction will be annotated with CFI and possibly DWARF line table
1428   /// info.
1429   /// If printMCInst is true, the instruction is also printed in the
1430   /// architecture independent format.
1431   void printInstruction(raw_ostream &OS, const MCInst &Instruction,
1432                         uint64_t Offset = 0,
1433                         const BinaryFunction *Function = nullptr,
1434                         bool PrintMCInst = false, bool PrintMemData = false,
1435                         bool PrintRelocations = false,
1436                         StringRef Endl = "\n") const;
1437 
1438   /// Print a range of instructions.
1439   template <typename Itr>
1440   uint64_t
1441   printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0,
1442                     const BinaryFunction *Function = nullptr,
1443                     bool PrintMCInst = false, bool PrintMemData = false,
1444                     bool PrintRelocations = false,
1445                     StringRef Endl = "\n") const {
1446     while (Begin != End) {
1447       printInstruction(OS, *Begin, Offset, Function, PrintMCInst, PrintMemData,
1448                        PrintRelocations, Endl);
1449       Offset += computeCodeSize(Begin, Begin + 1);
1450       ++Begin;
1451     }
1452     return Offset;
1453   }
1454 
1455   /// Log BOLT errors to journaling streams and quit process with non-zero error
1456   /// code 1 if error is fatal.
1457   void logBOLTErrorsAndQuitOnFatal(Error E);
1458 
1459   std::string generateBugReportMessage(StringRef Message,
1460                                        const BinaryFunction &Function) const;
1461 
1462   struct IndependentCodeEmitter {
1463     std::unique_ptr<MCObjectFileInfo> LocalMOFI;
1464     std::unique_ptr<MCContext> LocalCtx;
1465     std::unique_ptr<MCCodeEmitter> MCE;
1466   };
1467 
1468   /// Encapsulates an independent MCCodeEmitter that doesn't share resources
1469   /// with the main one available through BinaryContext::MCE, managed by
1470   /// BinaryContext.
1471   /// This is intended to create a lock-free environment for an auxiliary thread
1472   /// that needs to perform work with an MCCodeEmitter that can be transient or
1473   /// won't be used in the main code emitter.
1474   IndependentCodeEmitter createIndependentMCCodeEmitter() const {
1475     IndependentCodeEmitter MCEInstance;
1476     MCEInstance.LocalCtx.reset(
1477         new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
1478     MCEInstance.LocalMOFI.reset(
1479         TheTarget->createMCObjectFileInfo(*MCEInstance.LocalCtx.get(),
1480                                           /*PIC=*/!HasFixedLoadAddress));
1481     MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get());
1482     MCEInstance.MCE.reset(
1483         TheTarget->createMCCodeEmitter(*MII, *MCEInstance.LocalCtx));
1484     return MCEInstance;
1485   }
1486 
1487   /// Creating MCStreamer instance.
1488   std::unique_ptr<MCStreamer>
1489   createStreamer(llvm::raw_pwrite_stream &OS) const {
1490     MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *Ctx);
1491     MCAsmBackend *MAB =
1492         TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions());
1493     std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
1494     std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1495         *TheTriple, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW),
1496         std::unique_ptr<MCCodeEmitter>(MCE), *STI));
1497     return Streamer;
1498   }
1499 
1500   void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
1501   const AddressMap &getIOAddressMap() const {
1502     assert(IOAddressMap && "Address map not set yet");
1503     return *IOAddressMap;
1504   }
1505 
1506   raw_ostream &outs() const { return Logger.Out; }
1507 
1508   raw_ostream &errs() const { return Logger.Err; }
1509 };
1510 
1511 template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
1512 inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) {
1513   const char *Sep = "";
1514   for (const auto Byte : ByteArray) {
1515     OS << Sep << format("%.2x", Byte);
1516     Sep = " ";
1517   }
1518   return OS;
1519 }
1520 
1521 } // namespace bolt
1522 } // namespace llvm
1523 
1524 #endif
1525