1 //===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Context for processing binary executable/library files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef BOLT_CORE_BINARY_CONTEXT_H 14 #define BOLT_CORE_BINARY_CONTEXT_H 15 16 #include "bolt/Core/AddressMap.h" 17 #include "bolt/Core/BinaryData.h" 18 #include "bolt/Core/BinarySection.h" 19 #include "bolt/Core/DebugData.h" 20 #include "bolt/Core/DynoStats.h" 21 #include "bolt/Core/JumpTable.h" 22 #include "bolt/Core/MCPlusBuilder.h" 23 #include "bolt/RuntimeLibs/RuntimeLibrary.h" 24 #include "llvm/ADT/AddressRanges.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/EquivalenceClasses.h" 27 #include "llvm/ADT/StringMap.h" 28 #include "llvm/ADT/iterator.h" 29 #include "llvm/BinaryFormat/Dwarf.h" 30 #include "llvm/BinaryFormat/MachO.h" 31 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" 32 #include "llvm/MC/MCAsmInfo.h" 33 #include "llvm/MC/MCCodeEmitter.h" 34 #include "llvm/MC/MCContext.h" 35 #include "llvm/MC/MCObjectFileInfo.h" 36 #include "llvm/MC/MCObjectWriter.h" 37 #include "llvm/MC/MCPseudoProbe.h" 38 #include "llvm/MC/MCSectionELF.h" 39 #include "llvm/MC/MCSectionMachO.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSymbol.h" 42 #include "llvm/MC/TargetRegistry.h" 43 #include "llvm/Support/ErrorOr.h" 44 #include "llvm/Support/RWMutex.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include "llvm/TargetParser/Triple.h" 47 #include <functional> 48 #include <list> 49 #include <map> 50 #include <optional> 51 #include <set> 52 #include <string> 53 #include <system_error> 54 #include <type_traits> 55 #include <unordered_map> 56 #include <vector> 57 58 namespace llvm { 59 class MCDisassembler; 60 class MCInstPrinter; 61 62 using namespace object; 63 64 namespace bolt { 65 66 class BinaryFunction; 67 68 /// Information on loadable part of the file. 69 struct SegmentInfo { 70 uint64_t Address; /// Address of the segment in memory. 71 uint64_t Size; /// Size of the segment in memory. 72 uint64_t FileOffset; /// Offset in the file. 73 uint64_t FileSize; /// Size in file. 74 uint64_t Alignment; /// Alignment of the segment. 75 bool IsExecutable; /// Is the executable bit set on the Segment? 76 77 void print(raw_ostream &OS) const { 78 OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Address) 79 << ", Size: 0x" << Twine::utohexstr(Size) << ", FileOffset: 0x" 80 << Twine::utohexstr(FileOffset) << ", FileSize: 0x" 81 << Twine::utohexstr(FileSize) << ", Alignment: 0x" 82 << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : " ") 83 << "}"; 84 }; 85 }; 86 87 inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) { 88 SegInfo.print(OS); 89 return OS; 90 } 91 92 // AArch64-specific symbol markers used to delimit code/data in .text. 93 enum class MarkerSymType : char { 94 NONE = 0, 95 CODE, 96 DATA, 97 }; 98 99 enum class MemoryContentsType : char { 100 UNKNOWN = 0, /// Unknown contents. 101 POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table. 102 POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table. 103 }; 104 105 /// Helper function to truncate a \p Value to given size in \p Bytes. 106 inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { 107 return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8)); 108 } 109 110 /// Filter iterator. 111 template <typename ItrType, 112 typename PredType = std::function<bool(const ItrType &)>> 113 class FilterIterator { 114 using inner_traits = std::iterator_traits<ItrType>; 115 using Iterator = FilterIterator; 116 117 PredType Pred; 118 ItrType Itr, End; 119 120 void prev() { 121 while (!Pred(--Itr)) 122 ; 123 } 124 void next() { 125 ++Itr; 126 nextMatching(); 127 } 128 void nextMatching() { 129 while (Itr != End && !Pred(Itr)) 130 ++Itr; 131 } 132 133 public: 134 using iterator_category = std::bidirectional_iterator_tag; 135 using value_type = typename inner_traits::value_type; 136 using difference_type = typename inner_traits::difference_type; 137 using pointer = typename inner_traits::pointer; 138 using reference = typename inner_traits::reference; 139 140 Iterator &operator++() { next(); return *this; } 141 Iterator &operator--() { prev(); return *this; } 142 Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; } 143 Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; } 144 bool operator==(const Iterator &Other) const { return Itr == Other.Itr; } 145 bool operator!=(const Iterator &Other) const { return !operator==(Other); } 146 reference operator*() { return *Itr; } 147 pointer operator->() { return &operator*(); } 148 FilterIterator(PredType Pred, ItrType Itr, ItrType End) 149 : Pred(Pred), Itr(Itr), End(End) { 150 nextMatching(); 151 } 152 }; 153 154 /// BOLT-exclusive errors generated in core BOLT libraries, optionally holding a 155 /// string message and whether it is fatal or not. In case it is fatal and if 156 /// BOLT is running as a standalone process, the process might be killed as soon 157 /// as the error is checked. 158 class BOLTError : public ErrorInfo<BOLTError> { 159 public: 160 static char ID; 161 162 BOLTError(bool IsFatal, const Twine &S = Twine()); 163 void log(raw_ostream &OS) const override; 164 bool isFatal() const { return IsFatal; } 165 166 const std::string &getMessage() const { return Msg; } 167 std::error_code convertToErrorCode() const override; 168 169 private: 170 bool IsFatal; 171 std::string Msg; 172 }; 173 174 /// Streams used by BOLT to log regular or error events 175 struct JournalingStreams { 176 raw_ostream &Out; 177 raw_ostream &Err; 178 }; 179 180 Error createNonFatalBOLTError(const Twine &S); 181 Error createFatalBOLTError(const Twine &S); 182 183 class BinaryContext { 184 BinaryContext() = delete; 185 186 /// Name of the binary file the context originated from. 187 std::string Filename; 188 189 /// Unique build ID if available for the binary. 190 std::optional<std::string> FileBuildID; 191 192 /// Set of all sections. 193 struct CompareSections { 194 bool operator()(const BinarySection *A, const BinarySection *B) const { 195 return *A < *B; 196 } 197 }; 198 using SectionSetType = std::set<BinarySection *, CompareSections>; 199 SectionSetType Sections; 200 201 using SectionIterator = pointee_iterator<SectionSetType::iterator>; 202 using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>; 203 204 using FilteredSectionIterator = FilterIterator<SectionIterator>; 205 using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>; 206 207 /// Map virtual address to a section. It is possible to have more than one 208 /// section mapped to the same address, e.g. non-allocatable sections. 209 using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>; 210 AddressToSectionMapType AddressToSection; 211 212 /// multimap of section name to BinarySection object. Some binaries 213 /// have multiple sections with the same name. 214 using NameToSectionMapType = std::multimap<std::string, BinarySection *>; 215 NameToSectionMapType NameToSection; 216 217 /// Map section references to BinarySection for matching sections in the 218 /// input file to internal section representation. 219 DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection; 220 221 /// Low level section registration. 222 BinarySection ®isterSection(BinarySection *Section); 223 224 /// Store all functions in the binary, sorted by original address. 225 std::map<uint64_t, BinaryFunction> BinaryFunctions; 226 227 /// A mutex that is used to control parallel accesses to BinaryFunctions 228 mutable llvm::sys::RWMutex BinaryFunctionsMutex; 229 230 /// Functions injected by BOLT 231 std::vector<BinaryFunction *> InjectedBinaryFunctions; 232 233 /// Jump tables for all functions mapped by address. 234 std::map<uint64_t, JumpTable *> JumpTables; 235 236 /// Locations of PC-relative relocations in data objects. 237 std::unordered_set<uint64_t> DataPCRelocations; 238 239 /// Used in duplicateJumpTable() to uniquely identify a JT clone 240 /// Start our IDs with a high number so getJumpTableContainingAddress checks 241 /// with size won't overflow 242 uint32_t DuplicatedJumpTables{0x10000000}; 243 244 /// Function fragments to skip. 245 std::unordered_set<BinaryFunction *> FragmentsToSkip; 246 247 /// Fragment equivalence classes to query belonging to the same "family" in 248 /// presence of multiple fragments/multiple parents. 249 EquivalenceClasses<const BinaryFunction *> FragmentClasses; 250 251 /// The runtime library. 252 std::unique_ptr<RuntimeLibrary> RtLibrary; 253 254 /// DWP Context. 255 std::shared_ptr<DWARFContext> DWPContext; 256 257 /// Decoded pseudo probes. 258 std::shared_ptr<MCPseudoProbeDecoder> PseudoProbeDecoder; 259 260 /// A map of DWO Ids to CUs. 261 using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>; 262 DWOIdToCUMapType DWOCUs; 263 264 bool ContainsDwarf5{false}; 265 bool ContainsDwarfLegacy{false}; 266 267 /// Mapping from input to output addresses. 268 std::optional<AddressMap> IOAddressMap; 269 270 /// Preprocess DWO debug information. 271 void preprocessDWODebugInfo(); 272 273 /// DWARF line info for CUs. 274 std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap; 275 276 /// Internal helper for removing section name from a lookup table. 277 void deregisterSectionName(const BinarySection &Section); 278 279 public: 280 static Expected<std::unique_ptr<BinaryContext>> createBinaryContext( 281 Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP, 282 StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC, 283 std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger); 284 285 /// Superset of compiler units that will contain overwritten code that needs 286 /// new debug info. In a few cases, functions may end up not being 287 /// overwritten, but it is okay to re-generate debug info for them. 288 std::set<const DWARFUnit *> ProcessedCUs; 289 290 // Setup MCPlus target builder 291 void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) { 292 MIB = std::move(TargetBuilder); 293 } 294 295 /// Return function fragments to skip. 296 const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() { 297 return FragmentsToSkip; 298 } 299 300 /// Add function fragment to skip 301 void addFragmentsToSkip(BinaryFunction *Function) { 302 FragmentsToSkip.insert(Function); 303 } 304 305 void clearFragmentsToSkip() { FragmentsToSkip.clear(); } 306 307 /// Given DWOId returns CU if it exists in DWOCUs. 308 std::optional<DWARFUnit *> getDWOCU(uint64_t DWOId); 309 310 /// Returns DWOContext if it exists. 311 DWARFContext *getDWOContext() const; 312 313 /// Get Number of DWOCUs in a map. 314 uint32_t getNumDWOCUs() { return DWOCUs.size(); } 315 316 /// Returns true if DWARF5 is used. 317 bool isDWARF5Used() const { return ContainsDwarf5; } 318 319 /// Returns true if DWARF4 or lower is used. 320 bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; } 321 322 std::map<unsigned, DwarfLineTable> &getDwarfLineTables() { 323 return DwarfLineTablesCUMap; 324 } 325 326 DwarfLineTable &getDwarfLineTable(unsigned CUID) { 327 return DwarfLineTablesCUMap[CUID]; 328 } 329 330 Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName, 331 unsigned FileNumber, 332 std::optional<MD5::MD5Result> Checksum, 333 std::optional<StringRef> Source, 334 unsigned CUID, unsigned DWARFVersion); 335 336 /// [start memory address] -> [segment info] mapping. 337 std::map<uint64_t, SegmentInfo> SegmentMapInfo; 338 339 /// Symbols that are expected to be undefined in MCContext during emission. 340 std::unordered_set<MCSymbol *> UndefinedSymbols; 341 342 /// [name] -> [BinaryData*] map used for global symbol resolution. 343 using SymbolMapType = StringMap<BinaryData *>; 344 SymbolMapType GlobalSymbols; 345 346 /// [address] -> [BinaryData], ... 347 /// Addresses never change. 348 /// Note: it is important that clients do not hold on to instances of 349 /// BinaryData* while the map is still being modified during BinaryFunction 350 /// disassembly. This is because of the possibility that a regular 351 /// BinaryData is later discovered to be a JumpTable. 352 using BinaryDataMapType = std::map<uint64_t, BinaryData *>; 353 using binary_data_iterator = BinaryDataMapType::iterator; 354 using binary_data_const_iterator = BinaryDataMapType::const_iterator; 355 BinaryDataMapType BinaryDataMap; 356 357 using FilteredBinaryDataConstIterator = 358 FilterIterator<binary_data_const_iterator>; 359 using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>; 360 361 StringRef getFilename() const { return Filename; } 362 void setFilename(StringRef Name) { Filename = std::string(Name); } 363 364 std::optional<StringRef> getFileBuildID() const { 365 if (FileBuildID) 366 return StringRef(*FileBuildID); 367 368 return std::nullopt; 369 } 370 void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); } 371 372 bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; } 373 void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; } 374 375 std::shared_ptr<orc::SymbolStringPool> getSymbolStringPool() { return SSP; } 376 /// Return true if relocations against symbol with a given name 377 /// must be created. 378 bool forceSymbolRelocations(StringRef SymbolName) const; 379 380 uint64_t getNumUnusedProfiledObjects() const { 381 return NumUnusedProfiledObjects; 382 } 383 void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; } 384 385 RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); } 386 void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) { 387 assert(!RtLibrary && "Cannot set runtime library twice."); 388 RtLibrary = std::move(Lib); 389 } 390 391 const MCPseudoProbeDecoder *getPseudoProbeDecoder() const { 392 return PseudoProbeDecoder.get(); 393 } 394 395 void setPseudoProbeDecoder(std::shared_ptr<MCPseudoProbeDecoder> Decoder) { 396 assert(!PseudoProbeDecoder && "Cannot set pseudo probe decoder twice."); 397 PseudoProbeDecoder = Decoder; 398 } 399 400 /// Return BinaryFunction containing a given \p Address or nullptr if 401 /// no registered function contains the \p Address. 402 /// 403 /// In a binary a function has somewhat vague boundaries. E.g. a function can 404 /// refer to the first byte past the end of the function, and it will still be 405 /// referring to this function, not the function following it in the address 406 /// space. Thus we have the following flags that allow to lookup for 407 /// a function where a caller has more context for the search. 408 /// 409 /// If \p CheckPastEnd is true and the \p Address falls on a byte 410 /// immediately following the last byte of some function and there's no other 411 /// function that starts there, then return the function as the one containing 412 /// the \p Address. This is useful when we need to locate functions for 413 /// references pointing immediately past a function body. 414 /// 415 /// If \p UseMaxSize is true, then include the space between this function 416 /// body and the next object in address ranges that we check. 417 BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address, 418 bool CheckPastEnd = false, 419 bool UseMaxSize = false); 420 const BinaryFunction * 421 getBinaryFunctionContainingAddress(uint64_t Address, 422 bool CheckPastEnd = false, 423 bool UseMaxSize = false) const { 424 return const_cast<BinaryContext *>(this) 425 ->getBinaryFunctionContainingAddress(Address, CheckPastEnd, UseMaxSize); 426 } 427 428 /// Return a BinaryFunction that starts at a given \p Address. 429 BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address); 430 431 const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const { 432 return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress( 433 Address); 434 } 435 436 /// Return size of an entry for the given jump table \p Type. 437 uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const { 438 return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize(); 439 } 440 441 /// Return JumpTable containing a given \p Address. 442 JumpTable *getJumpTableContainingAddress(uint64_t Address) { 443 auto JTI = JumpTables.upper_bound(Address); 444 if (JTI == JumpTables.begin()) 445 return nullptr; 446 --JTI; 447 if (JTI->first + JTI->second->getSize() > Address) 448 return JTI->second; 449 if (JTI->second->getSize() == 0 && JTI->first == Address) 450 return JTI->second; 451 return nullptr; 452 } 453 454 /// Deregister JumpTable registered at a given \p Address and delete it. 455 void deleteJumpTable(uint64_t Address); 456 457 unsigned getDWARFEncodingSize(unsigned Encoding) { 458 if (Encoding == dwarf::DW_EH_PE_omit) 459 return 0; 460 switch (Encoding & 0x0f) { 461 default: 462 llvm_unreachable("unknown encoding"); 463 case dwarf::DW_EH_PE_absptr: 464 case dwarf::DW_EH_PE_signed: 465 return AsmInfo->getCodePointerSize(); 466 case dwarf::DW_EH_PE_udata2: 467 case dwarf::DW_EH_PE_sdata2: 468 return 2; 469 case dwarf::DW_EH_PE_udata4: 470 case dwarf::DW_EH_PE_sdata4: 471 return 4; 472 case dwarf::DW_EH_PE_udata8: 473 case dwarf::DW_EH_PE_sdata8: 474 return 8; 475 } 476 } 477 478 /// [MCSymbol] -> [BinaryFunction] 479 /// 480 /// As we fold identical functions, multiple symbols can point 481 /// to the same BinaryFunction. 482 std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap; 483 484 /// A mutex that is used to control parallel accesses to SymbolToFunctionMap 485 mutable llvm::sys::RWMutex SymbolToFunctionMapMutex; 486 487 /// Look up the symbol entry that contains the given \p Address (based on 488 /// the start address and size for each symbol). Returns a pointer to 489 /// the BinaryData for that symbol. If no data is found, nullptr is returned. 490 const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const; 491 492 /// Update the Parent fields in BinaryDatas after adding a new entry into 493 /// \p BinaryDataMap. 494 void updateObjectNesting(BinaryDataMapType::iterator GAI); 495 496 /// Validate that if object address ranges overlap that the object with 497 /// the larger range is a parent of the object with the smaller range. 498 bool validateObjectNesting() const; 499 500 /// Validate that there are no top level "holes" in each section 501 /// and that all relocations with a section are mapped to a valid 502 /// top level BinaryData. 503 bool validateHoles() const; 504 505 /// Produce output address ranges based on input ranges for some module. 506 DebugAddressRangesVector translateModuleAddressRanges( 507 const DWARFAddressRangesVector &InputRanges) const; 508 509 /// Get a bogus "absolute" section that will be associated with all 510 /// absolute BinaryDatas. 511 BinarySection &absoluteSection(); 512 513 /// Process "holes" in between known BinaryData objects. For now, 514 /// symbols are padded with the space before the next BinaryData object. 515 void fixBinaryDataHoles(); 516 517 /// Generate names based on data hashes for unknown symbols. 518 void generateSymbolHashes(); 519 520 /// Construct BinaryFunction object and add it to internal maps. 521 BinaryFunction *createBinaryFunction(const std::string &Name, 522 BinarySection &Section, uint64_t Address, 523 uint64_t Size, uint64_t SymbolSize = 0, 524 uint16_t Alignment = 0); 525 526 /// Return all functions for this rewrite instance. 527 std::map<uint64_t, BinaryFunction> &getBinaryFunctions() { 528 return BinaryFunctions; 529 } 530 531 /// Return all functions for this rewrite instance. 532 const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const { 533 return BinaryFunctions; 534 } 535 536 /// Create BOLT-injected function 537 BinaryFunction *createInjectedBinaryFunction(const std::string &Name, 538 bool IsSimple = true); 539 540 std::vector<BinaryFunction *> &getInjectedBinaryFunctions() { 541 return InjectedBinaryFunctions; 542 } 543 544 /// Return vector with all functions, i.e. include functions from the input 545 /// binary and functions created by BOLT. 546 std::vector<BinaryFunction *> getAllBinaryFunctions(); 547 548 /// Construct a jump table for \p Function at \p Address or return an existing 549 /// one at that location. 550 /// 551 /// May create an embedded jump table and return its label as the second 552 /// element of the pair. 553 const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function, 554 uint64_t Address, 555 JumpTable::JumpTableType Type); 556 557 /// Analyze a possible jump table of type \p Type at a given \p Address. 558 /// \p BF is a function referencing the jump table. 559 /// Return true if the jump table was detected at \p Address, and false 560 /// otherwise. 561 /// 562 /// If \p NextJTAddress is different from zero, it is used as an upper 563 /// bound for jump table memory layout. 564 /// 565 /// Optionally, populate \p Address from jump table entries. The entries 566 /// could be partially populated if the jump table detection fails. 567 bool analyzeJumpTable(const uint64_t Address, 568 const JumpTable::JumpTableType Type, 569 const BinaryFunction &BF, 570 const uint64_t NextJTAddress = 0, 571 JumpTable::AddressesType *EntriesAsAddress = nullptr, 572 bool *HasEntryInFragment = nullptr) const; 573 574 /// After jump table locations are established, this function will populate 575 /// their EntriesAsAddress based on memory contents. 576 void populateJumpTables(); 577 578 /// Returns a jump table ID and label pointing to the duplicated jump table. 579 /// Ordinarily, jump tables are identified by their address in the input 580 /// binary. We return an ID with the high bit set to differentiate it from 581 /// regular addresses, avoiding conflicts with standard jump tables. 582 std::pair<uint64_t, const MCSymbol *> 583 duplicateJumpTable(BinaryFunction &Function, JumpTable *JT, 584 const MCSymbol *OldLabel); 585 586 /// Generate a unique name for jump table at a given \p Address belonging 587 /// to function \p BF. 588 std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address); 589 590 /// Free memory used by JumpTable's EntriesAsAddress 591 void clearJumpTableTempData() { 592 for (auto &JTI : JumpTables) { 593 JumpTable &JT = *JTI.second; 594 JumpTable::AddressesType Temp; 595 Temp.swap(JT.EntriesAsAddress); 596 } 597 } 598 /// Return true if the array of bytes represents a valid code padding. 599 bool hasValidCodePadding(const BinaryFunction &BF); 600 601 /// Verify padding area between functions, and adjust max function size 602 /// accordingly. 603 void adjustCodePadding(); 604 605 /// Regular page size. 606 unsigned RegularPageSize{0x1000}; 607 static constexpr unsigned RegularPageSizeX86 = 0x1000; 608 static constexpr unsigned RegularPageSizeAArch64 = 0x10000; 609 610 /// Huge page size to use. 611 static constexpr unsigned HugePageSize = 0x200000; 612 613 /// Addresses reserved for kernel on x86_64 start at this location. 614 static constexpr uint64_t KernelStartX86_64 = 0xFFFF'FFFF'8000'0000; 615 616 /// Map address to a constant island owner (constant data in code section) 617 std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap; 618 619 /// A map from jump table address to insertion order. Used for generating 620 /// jump table names. 621 std::map<uint64_t, size_t> JumpTableIds; 622 623 std::unique_ptr<MCContext> Ctx; 624 625 /// A mutex that is used to control parallel accesses to Ctx 626 mutable llvm::sys::RWMutex CtxMutex; 627 std::unique_lock<llvm::sys::RWMutex> scopeLock() const { 628 return std::unique_lock<llvm::sys::RWMutex>(CtxMutex); 629 } 630 631 std::unique_ptr<DWARFContext> DwCtx; 632 633 std::unique_ptr<Triple> TheTriple; 634 635 std::shared_ptr<orc::SymbolStringPool> SSP; 636 637 const Target *TheTarget; 638 639 std::string TripleName; 640 641 std::unique_ptr<MCCodeEmitter> MCE; 642 643 std::unique_ptr<MCObjectFileInfo> MOFI; 644 645 std::unique_ptr<const MCAsmInfo> AsmInfo; 646 647 std::unique_ptr<const MCInstrInfo> MII; 648 649 std::unique_ptr<const MCSubtargetInfo> STI; 650 651 std::unique_ptr<MCInstPrinter> InstPrinter; 652 653 std::unique_ptr<const MCInstrAnalysis> MIA; 654 655 std::unique_ptr<MCPlusBuilder> MIB; 656 657 std::unique_ptr<const MCRegisterInfo> MRI; 658 659 std::unique_ptr<MCDisassembler> DisAsm; 660 661 /// Symbolic disassembler. 662 std::unique_ptr<MCDisassembler> SymbolicDisAsm; 663 664 std::unique_ptr<MCAsmBackend> MAB; 665 666 /// Allows BOLT to print to log whenever it is necessary (with or without 667 /// const references) 668 mutable JournalingStreams Logger; 669 670 /// Indicates if the binary is Linux kernel. 671 bool IsLinuxKernel{false}; 672 673 /// Indicates if relocations are available for usage. 674 bool HasRelocations{false}; 675 676 /// Indicates if the binary is stripped 677 bool IsStripped{false}; 678 679 /// Indicates if the binary contains split functions. 680 bool HasSplitFunctions{false}; 681 682 /// Indicates if the function ordering of the binary is finalized. 683 bool HasFinalizedFunctionOrder{false}; 684 685 /// Indicates if a separate .text.warm section is needed that contains 686 /// function fragments with 687 /// FunctionFragment::getFragmentNum() == FragmentNum::warm() 688 bool HasWarmSection{false}; 689 690 /// Is the binary always loaded at a fixed address. Shared objects and 691 /// position-independent executables (PIEs) are examples of binaries that 692 /// will have HasFixedLoadAddress set to false. 693 bool HasFixedLoadAddress{true}; 694 695 /// True if the binary has no dynamic dependencies, i.e., if it was statically 696 /// linked. 697 bool IsStaticExecutable{false}; 698 699 /// Set to true if the binary contains PT_INTERP header. 700 bool HasInterpHeader{false}; 701 702 /// Indicates if any of local symbols used for functions or data objects 703 /// have an origin file name available. 704 bool HasSymbolsWithFileName{false}; 705 706 /// Does the binary have BAT section. 707 bool HasBATSection{false}; 708 709 /// Sum of execution count of all functions 710 uint64_t SumExecutionCount{0}; 711 712 /// Number of functions with profile information 713 uint64_t NumProfiledFuncs{0}; 714 715 /// Number of functions with stale profile information 716 uint64_t NumStaleProfileFuncs{0}; 717 718 /// Number of objects in profile whose profile was ignored. 719 uint64_t NumUnusedProfiledObjects{0}; 720 721 /// Total hotness score according to profiling data for this binary. 722 uint64_t TotalScore{0}; 723 724 /// Binary-wide aggregated stats. 725 struct BinaryStats { 726 /// Stats for stale profile matching: 727 /// the total number of basic blocks in the profile 728 uint32_t NumStaleBlocks{0}; 729 /// the number of exactly matched basic blocks 730 uint32_t NumExactMatchedBlocks{0}; 731 /// the number of loosely matched basic blocks 732 uint32_t NumLooseMatchedBlocks{0}; 733 /// the number of exactly pseudo probe matched basic blocks 734 uint32_t NumPseudoProbeExactMatchedBlocks{0}; 735 /// the number of loosely pseudo probe matched basic blocks 736 uint32_t NumPseudoProbeLooseMatchedBlocks{0}; 737 /// the number of call matched basic blocks 738 uint32_t NumCallMatchedBlocks{0}; 739 /// the total count of samples in the profile 740 uint64_t StaleSampleCount{0}; 741 /// the count of exactly matched samples 742 uint64_t ExactMatchedSampleCount{0}; 743 /// the count of loosely matched samples 744 uint64_t LooseMatchedSampleCount{0}; 745 /// the count of exactly pseudo probe matched samples 746 uint64_t PseudoProbeExactMatchedSampleCount{0}; 747 /// the count of loosely pseudo probe matched samples 748 uint64_t PseudoProbeLooseMatchedSampleCount{0}; 749 /// the count of call matched samples 750 uint64_t CallMatchedSampleCount{0}; 751 /// the number of stale functions that have matching number of blocks in 752 /// the profile 753 uint64_t NumStaleFuncsWithEqualBlockCount{0}; 754 /// the number of blocks that have matching size but a differing hash 755 uint64_t NumStaleBlocksWithEqualIcount{0}; 756 } Stats; 757 758 // Original binary execution count stats. 759 DynoStats InitialDynoStats; 760 761 // Address of the first allocated segment. 762 uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()}; 763 764 /// Track next available address for new allocatable sections. RewriteInstance 765 /// sets this prior to running BOLT passes, so layout passes are aware of the 766 /// final addresses functions will have. 767 uint64_t LayoutStartAddress{0}; 768 769 /// Old .text info. 770 uint64_t OldTextSectionAddress{0}; 771 uint64_t OldTextSectionOffset{0}; 772 uint64_t OldTextSectionSize{0}; 773 774 /// Area in the input binary reserved for BOLT. 775 AddressRange BOLTReserved; 776 777 /// Address of the code/function that is executed before any other code in 778 /// the binary. 779 std::optional<uint64_t> StartFunctionAddress; 780 781 /// Address of the code/function that is going to be executed right before 782 /// the execution of the binary is completed. 783 std::optional<uint64_t> FiniFunctionAddress; 784 785 /// DT_FINI. 786 std::optional<uint64_t> FiniAddress; 787 788 /// DT_FINI_ARRAY. Only used when DT_FINI is not set. 789 std::optional<uint64_t> FiniArrayAddress; 790 791 /// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set. 792 std::optional<uint64_t> FiniArraySize; 793 794 /// Page alignment used for code layout. 795 uint64_t PageAlign{HugePageSize}; 796 797 /// True if the binary requires immediate relocation processing. 798 bool RequiresZNow{false}; 799 800 /// List of functions that always trap. 801 std::vector<const BinaryFunction *> TrappedFunctions; 802 803 /// List of external addresses in the code that are not a function start 804 /// and are referenced from BinaryFunction. 805 std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences; 806 807 /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h 808 /// enum Constants, e.g. DW_EH_PE_omit. 809 unsigned LSDAEncoding = dwarf::DW_EH_PE_omit; 810 811 BinaryContext(std::unique_ptr<MCContext> Ctx, 812 std::unique_ptr<DWARFContext> DwCtx, 813 std::unique_ptr<Triple> TheTriple, 814 std::shared_ptr<orc::SymbolStringPool> SSP, 815 const Target *TheTarget, std::string TripleName, 816 std::unique_ptr<MCCodeEmitter> MCE, 817 std::unique_ptr<MCObjectFileInfo> MOFI, 818 std::unique_ptr<const MCAsmInfo> AsmInfo, 819 std::unique_ptr<const MCInstrInfo> MII, 820 std::unique_ptr<const MCSubtargetInfo> STI, 821 std::unique_ptr<MCInstPrinter> InstPrinter, 822 std::unique_ptr<const MCInstrAnalysis> MIA, 823 std::unique_ptr<MCPlusBuilder> MIB, 824 std::unique_ptr<const MCRegisterInfo> MRI, 825 std::unique_ptr<MCDisassembler> DisAsm, 826 JournalingStreams Logger); 827 828 ~BinaryContext(); 829 830 std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS); 831 832 bool isELF() const { return TheTriple->isOSBinFormatELF(); } 833 834 bool isMachO() const { return TheTriple->isOSBinFormatMachO(); } 835 836 bool isAArch64() const { 837 return TheTriple->getArch() == llvm::Triple::aarch64; 838 } 839 840 bool isX86() const { 841 return TheTriple->getArch() == llvm::Triple::x86 || 842 TheTriple->getArch() == llvm::Triple::x86_64; 843 } 844 845 bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; } 846 847 // AArch64-specific functions to check if symbol is used to delimit 848 // code/data in .text. Code is marked by $x, data by $d. 849 MarkerSymType getMarkerType(const SymbolRef &Symbol) const; 850 bool isMarker(const SymbolRef &Symbol) const; 851 852 /// Iterate over all BinaryData. 853 iterator_range<binary_data_const_iterator> getBinaryData() const { 854 return make_range(BinaryDataMap.begin(), BinaryDataMap.end()); 855 } 856 857 /// Iterate over all BinaryData. 858 iterator_range<binary_data_iterator> getBinaryData() { 859 return make_range(BinaryDataMap.begin(), BinaryDataMap.end()); 860 } 861 862 /// Iterate over all BinaryData associated with the given \p Section. 863 iterator_range<FilteredBinaryDataConstIterator> 864 getBinaryDataForSection(const BinarySection &Section) const { 865 auto Begin = BinaryDataMap.lower_bound(Section.getAddress()); 866 if (Begin != BinaryDataMap.begin()) 867 --Begin; 868 auto End = BinaryDataMap.upper_bound(Section.getEndAddress()); 869 auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool { 870 return Itr->second->getSection() == Section; 871 }; 872 return make_range(FilteredBinaryDataConstIterator(pred, Begin, End), 873 FilteredBinaryDataConstIterator(pred, End, End)); 874 } 875 876 /// Iterate over all BinaryData associated with the given \p Section. 877 iterator_range<FilteredBinaryDataIterator> 878 getBinaryDataForSection(BinarySection &Section) { 879 auto Begin = BinaryDataMap.lower_bound(Section.getAddress()); 880 if (Begin != BinaryDataMap.begin()) 881 --Begin; 882 auto End = BinaryDataMap.upper_bound(Section.getEndAddress()); 883 auto pred = [&Section](const binary_data_iterator &Itr) -> bool { 884 return Itr->second->getSection() == Section; 885 }; 886 return make_range(FilteredBinaryDataIterator(pred, Begin, End), 887 FilteredBinaryDataIterator(pred, End, End)); 888 } 889 890 /// Iterate over all the sub-symbols of /p BD (if any). 891 iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD); 892 893 /// Clear the global symbol address -> name(s) map. 894 void clearBinaryData() { 895 GlobalSymbols.clear(); 896 for (auto &Entry : BinaryDataMap) 897 delete Entry.second; 898 BinaryDataMap.clear(); 899 } 900 901 /// Process \p Address reference from code in function \BF. 902 /// \p IsPCRel indicates if the reference is PC-relative. 903 /// Return <Symbol, Addend> pair corresponding to the \p Address. 904 std::pair<const MCSymbol *, uint64_t> 905 handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel); 906 907 /// Analyze memory contents at the given \p Address and return the type of 908 /// memory contents (such as a possible jump table). 909 MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF); 910 911 /// Return a value of the global \p Symbol or an error if the value 912 /// was not set. 913 ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const { 914 const BinaryData *BD = getBinaryDataByName(Symbol.getName()); 915 if (!BD) 916 return std::make_error_code(std::errc::bad_address); 917 return BD->getAddress(); 918 } 919 920 /// Return a global symbol registered at a given \p Address and \p Size. 921 /// If no symbol exists, create one with unique name using \p Prefix. 922 /// If there are multiple symbols registered at the \p Address, then 923 /// return the first one. 924 MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix, 925 uint64_t Size = 0, uint16_t Alignment = 0, 926 unsigned Flags = 0); 927 928 /// Create a global symbol without registering an address. 929 MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name); 930 931 /// Register a symbol with \p Name at a given \p Address using \p Size, 932 /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition 933 /// of \p Flags. 934 MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address, 935 uint64_t Size, uint16_t Alignment, 936 unsigned Flags = 0); 937 938 /// Return BinaryData registered at a given \p Address or nullptr if no 939 /// global symbol was registered at the location. 940 const BinaryData *getBinaryDataAtAddress(uint64_t Address) const { 941 auto NI = BinaryDataMap.find(Address); 942 return NI != BinaryDataMap.end() ? NI->second : nullptr; 943 } 944 945 BinaryData *getBinaryDataAtAddress(uint64_t Address) { 946 auto NI = BinaryDataMap.find(Address); 947 return NI != BinaryDataMap.end() ? NI->second : nullptr; 948 } 949 950 /// Look up the symbol entry that contains the given \p Address (based on 951 /// the start address and size for each symbol). Returns a pointer to 952 /// the BinaryData for that symbol. If no data is found, nullptr is returned. 953 const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const { 954 return getBinaryDataContainingAddressImpl(Address); 955 } 956 957 BinaryData *getBinaryDataContainingAddress(uint64_t Address) { 958 return const_cast<BinaryData *>( 959 getBinaryDataContainingAddressImpl(Address)); 960 } 961 962 /// Return BinaryData for the given \p Name or nullptr if no 963 /// global symbol with that name exists. 964 const BinaryData *getBinaryDataByName(StringRef Name) const { 965 return GlobalSymbols.lookup(Name); 966 } 967 968 BinaryData *getBinaryDataByName(StringRef Name) { 969 return GlobalSymbols.lookup(Name); 970 } 971 972 /// Return registered PLT entry BinaryData with the given \p Name 973 /// or nullptr if no global PLT symbol with that name exists. 974 const BinaryData *getPLTBinaryDataByName(StringRef Name) const { 975 if (const BinaryData *Data = getBinaryDataByName(Name.str() + "@PLT")) 976 return Data; 977 978 // The symbol name might contain versioning information e.g 979 // memcpy@@GLIBC_2.17. Remove it and try to locate binary data 980 // without it. 981 size_t At = Name.find("@"); 982 if (At != std::string::npos) 983 return getBinaryDataByName(Name.str().substr(0, At) + "@PLT"); 984 985 return nullptr; 986 } 987 988 /// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points 989 /// at GOT, or null if it is not present in the input binary symtab. 990 BinaryData *getGOTSymbol(); 991 992 /// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol 993 bool isGOTSymbol(StringRef SymName) const { 994 return SymName == "_GLOBAL_OFFSET_TABLE_"; 995 } 996 997 /// Return true if \p SymbolName was generated internally and was not present 998 /// in the input binary. 999 bool isInternalSymbolName(const StringRef Name) { 1000 return Name.starts_with("SYMBOLat") || Name.starts_with("DATAat") || 1001 Name.starts_with("HOLEat"); 1002 } 1003 1004 MCSymbol *getHotTextStartSymbol() const { 1005 return Ctx->getOrCreateSymbol("__hot_start"); 1006 } 1007 1008 MCSymbol *getHotTextEndSymbol() const { 1009 return Ctx->getOrCreateSymbol("__hot_end"); 1010 } 1011 1012 MCSection *getTextSection() const { return MOFI->getTextSection(); } 1013 1014 /// Return code section with a given name. 1015 MCSection *getCodeSection(StringRef SectionName) const { 1016 if (isELF()) 1017 return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS, 1018 ELF::SHF_EXECINSTR | ELF::SHF_ALLOC); 1019 else 1020 return Ctx->getMachOSection("__TEXT", SectionName, 1021 MachO::S_ATTR_PURE_INSTRUCTIONS, 1022 SectionKind::getText()); 1023 } 1024 1025 /// Return data section with a given name. 1026 MCSection *getDataSection(StringRef SectionName) const { 1027 return Ctx->getELFSection(SectionName, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); 1028 } 1029 1030 /// \name Pre-assigned Section Names 1031 /// @{ 1032 1033 const char *getMainCodeSectionName() const { return ".text"; } 1034 1035 const char *getWarmCodeSectionName() const { return ".text.warm"; } 1036 1037 const char *getColdCodeSectionName() const { return ".text.cold"; } 1038 1039 const char *getHotTextMoverSectionName() const { return ".text.mover"; } 1040 1041 const char *getInjectedCodeSectionName() const { return ".text.injected"; } 1042 1043 const char *getInjectedColdCodeSectionName() const { 1044 return ".text.injected.cold"; 1045 } 1046 1047 ErrorOr<BinarySection &> getGdbIndexSection() const { 1048 return getUniqueSectionByName(".gdb_index"); 1049 } 1050 1051 ErrorOr<BinarySection &> getDebugNamesSection() const { 1052 return getUniqueSectionByName(".debug_names"); 1053 } 1054 1055 /// @} 1056 1057 /// Register \p TargetFunction as a fragment of \p Function if checks pass: 1058 /// - if \p TargetFunction name matches \p Function name with a suffix: 1059 /// fragment_name == parent_name.cold(.\d+)? 1060 /// True if the Function is registered, false if the check failed. 1061 bool registerFragment(BinaryFunction &TargetFunction, 1062 BinaryFunction &Function); 1063 1064 /// Return true if two functions belong to the same "family": are fragments 1065 /// of one another, or fragments of the same parent, or transitively fragment- 1066 /// related. 1067 bool areRelatedFragments(const BinaryFunction *LHS, 1068 const BinaryFunction *RHS) const { 1069 return FragmentClasses.isEquivalent(LHS, RHS); 1070 } 1071 1072 /// Add interprocedural reference for \p Function to \p Address 1073 void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) { 1074 InterproceduralReferences.push_back({Function, Address}); 1075 } 1076 1077 /// Used to fix the target of linker-generated AArch64 adrp + add 1078 /// sequence with no relocation info. 1079 void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits, 1080 MCInst &LoadHiBits, uint64_t Target); 1081 1082 /// Return true if AARch64 veneer was successfully matched at a given 1083 /// \p Address and register veneer binary function if \p MatchOnly 1084 /// argument is false. 1085 bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false); 1086 1087 /// Resolve inter-procedural dependencies from 1088 void processInterproceduralReferences(); 1089 1090 /// Skip functions with all parent and child fragments transitively. 1091 void skipMarkedFragments(); 1092 1093 /// Perform any necessary post processing on the symbol table after 1094 /// function disassembly is complete. This processing fixes top 1095 /// level data holes and makes sure the symbol table is valid. 1096 /// It also assigns all memory profiling info to the appropriate 1097 /// BinaryData objects. 1098 void postProcessSymbolTable(); 1099 1100 /// Set the size of the global symbol located at \p Address. Return 1101 /// false if no symbol exists, true otherwise. 1102 bool setBinaryDataSize(uint64_t Address, uint64_t Size); 1103 1104 /// Print the global symbol table. 1105 void printGlobalSymbols(raw_ostream &OS) const; 1106 1107 /// Register information about the given \p Section so we can look up 1108 /// sections by address. 1109 BinarySection ®isterSection(SectionRef Section); 1110 1111 /// Register a copy of /p OriginalSection under a different name. 1112 BinarySection ®isterSection(const Twine &SectionName, 1113 const BinarySection &OriginalSection); 1114 1115 /// Register or update the information for the section with the given 1116 /// /p Name. If the section already exists, the information in the 1117 /// section will be updated with the new data. 1118 BinarySection ®isterOrUpdateSection(const Twine &Name, unsigned ELFType, 1119 unsigned ELFFlags, 1120 uint8_t *Data = nullptr, 1121 uint64_t Size = 0, 1122 unsigned Alignment = 1); 1123 1124 /// Register the information for the note (non-allocatable) section 1125 /// with the given /p Name. If the section already exists, the 1126 /// information in the section will be updated with the new data. 1127 BinarySection & 1128 registerOrUpdateNoteSection(const Twine &Name, uint8_t *Data = nullptr, 1129 uint64_t Size = 0, unsigned Alignment = 1, 1130 bool IsReadOnly = true, 1131 unsigned ELFType = ELF::SHT_PROGBITS) { 1132 return registerOrUpdateSection(Name, ELFType, 1133 BinarySection::getFlags(IsReadOnly), Data, 1134 Size, Alignment); 1135 } 1136 1137 /// Remove sections that were preregistered but never used. 1138 void deregisterUnusedSections(); 1139 1140 /// Remove the given /p Section from the set of all sections. Return 1141 /// true if the section was removed (and deleted), otherwise false. 1142 bool deregisterSection(BinarySection &Section); 1143 1144 /// Re-register \p Section under the \p NewName. 1145 void renameSection(BinarySection &Section, const Twine &NewName); 1146 1147 /// Iterate over all registered sections. 1148 iterator_range<FilteredSectionIterator> sections() { 1149 auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; }; 1150 return make_range( 1151 FilteredSectionIterator(notNull, Sections.begin(), Sections.end()), 1152 FilteredSectionIterator(notNull, Sections.end(), Sections.end())); 1153 } 1154 1155 /// Iterate over all registered sections. 1156 iterator_range<FilteredSectionConstIterator> sections() const { 1157 return const_cast<BinaryContext *>(this)->sections(); 1158 } 1159 1160 /// Iterate over all registered allocatable sections. 1161 iterator_range<FilteredSectionIterator> allocatableSections() { 1162 auto isAllocatable = [](const SectionIterator &Itr) { 1163 return *Itr && Itr->isAllocatable(); 1164 }; 1165 return make_range( 1166 FilteredSectionIterator(isAllocatable, Sections.begin(), 1167 Sections.end()), 1168 FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end())); 1169 } 1170 1171 /// Iterate over all registered code sections. 1172 iterator_range<FilteredSectionIterator> textSections() { 1173 auto isText = [](const SectionIterator &Itr) { 1174 return *Itr && Itr->isAllocatable() && Itr->isText(); 1175 }; 1176 return make_range( 1177 FilteredSectionIterator(isText, Sections.begin(), Sections.end()), 1178 FilteredSectionIterator(isText, Sections.end(), Sections.end())); 1179 } 1180 1181 /// Iterate over all registered allocatable sections. 1182 iterator_range<FilteredSectionConstIterator> allocatableSections() const { 1183 return const_cast<BinaryContext *>(this)->allocatableSections(); 1184 } 1185 1186 /// Iterate over all registered non-allocatable sections. 1187 iterator_range<FilteredSectionIterator> nonAllocatableSections() { 1188 auto notAllocated = [](const SectionIterator &Itr) { 1189 return *Itr && !Itr->isAllocatable(); 1190 }; 1191 return make_range( 1192 FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()), 1193 FilteredSectionIterator(notAllocated, Sections.end(), Sections.end())); 1194 } 1195 1196 /// Iterate over all registered non-allocatable sections. 1197 iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const { 1198 return const_cast<BinaryContext *>(this)->nonAllocatableSections(); 1199 } 1200 1201 /// Iterate over all allocatable relocation sections. 1202 iterator_range<FilteredSectionIterator> allocatableRelaSections() { 1203 auto isAllocatableRela = [](const SectionIterator &Itr) { 1204 return *Itr && Itr->isAllocatable() && Itr->isRela(); 1205 }; 1206 return make_range(FilteredSectionIterator(isAllocatableRela, 1207 Sections.begin(), Sections.end()), 1208 FilteredSectionIterator(isAllocatableRela, Sections.end(), 1209 Sections.end())); 1210 } 1211 1212 /// Return base address for the shared object or PIE based on the segment 1213 /// mapping information. \p MMapAddress is an address where one of the 1214 /// segments was mapped. \p FileOffset is the offset in the file of the 1215 /// mapping. Note that \p FileOffset should be page-aligned and could be 1216 /// different from the file offset of the segment which could be unaligned. 1217 /// If no segment is found that matches \p FileOffset, return std::nullopt. 1218 std::optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress, 1219 uint64_t FileOffset) const; 1220 1221 /// Check if the address belongs to this binary's static allocation space. 1222 bool containsAddress(uint64_t Address) const { 1223 return Address >= FirstAllocAddress && Address < LayoutStartAddress; 1224 } 1225 1226 /// Return section name containing the given \p Address. 1227 ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const; 1228 1229 /// Print all sections. 1230 void printSections(raw_ostream &OS) const; 1231 1232 /// Return largest section containing the given \p Address. These 1233 /// functions only work for allocatable sections, i.e. ones with non-zero 1234 /// addresses. 1235 ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address); 1236 ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const { 1237 return const_cast<BinaryContext *>(this)->getSectionForAddress(Address); 1238 } 1239 1240 /// Return internal section representation for a section in a file. 1241 BinarySection *getSectionForSectionRef(SectionRef Section) const { 1242 return SectionRefToBinarySection.lookup(Section); 1243 } 1244 1245 /// Return section(s) associated with given \p Name. 1246 iterator_range<NameToSectionMapType::iterator> 1247 getSectionByName(const Twine &Name) { 1248 return make_range(NameToSection.equal_range(Name.str())); 1249 } 1250 iterator_range<NameToSectionMapType::const_iterator> 1251 getSectionByName(const Twine &Name) const { 1252 return make_range(NameToSection.equal_range(Name.str())); 1253 } 1254 1255 /// Return the unique section associated with given \p Name. 1256 /// If there is more than one section with the same name, return an error 1257 /// object. 1258 ErrorOr<BinarySection &> 1259 getUniqueSectionByName(const Twine &SectionName) const { 1260 auto Sections = getSectionByName(SectionName); 1261 if (Sections.begin() != Sections.end() && 1262 std::next(Sections.begin()) == Sections.end()) 1263 return *Sections.begin()->second; 1264 return std::make_error_code(std::errc::bad_address); 1265 } 1266 1267 /// Return an unsigned value of \p Size stored at \p Address. The address has 1268 /// to be a valid statically allocated address for the binary. 1269 ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address, 1270 size_t Size) const; 1271 1272 /// Return a signed value of \p Size stored at \p Address. The address has 1273 /// to be a valid statically allocated address for the binary. 1274 ErrorOr<int64_t> getSignedValueAtAddress(uint64_t Address, size_t Size) const; 1275 1276 /// Special case of getUnsignedValueAtAddress() that uses a pointer size. 1277 ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const { 1278 return getUnsignedValueAtAddress(Address, AsmInfo->getCodePointerSize()); 1279 } 1280 1281 /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then 1282 /// removed from the list of functions \p BFs. The profile data of \p ChildBF 1283 /// is merged into that of \p ParentBF. This function is thread safe. 1284 void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF); 1285 1286 /// Add a Section relocation at a given \p Address. 1287 void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type, 1288 uint64_t Addend = 0, uint64_t Value = 0); 1289 1290 /// Return a relocation registered at a given \p Address, or nullptr if there 1291 /// is no relocation at such address. 1292 const Relocation *getRelocationAt(uint64_t Address) const; 1293 1294 /// Register a presence of PC-relative relocation at the given \p Address. 1295 void addPCRelativeDataRelocation(uint64_t Address) { 1296 DataPCRelocations.emplace(Address); 1297 } 1298 1299 /// Register dynamic relocation at \p Address. 1300 void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type, 1301 uint64_t Addend, uint64_t Value = 0); 1302 1303 /// Return a dynamic relocation registered at a given \p Address, or nullptr 1304 /// if there is no dynamic relocation at such address. 1305 const Relocation *getDynamicRelocationAt(uint64_t Address) const; 1306 1307 /// Remove registered relocation at a given \p Address. 1308 bool removeRelocationAt(uint64_t Address); 1309 1310 /// This function makes sure that symbols referenced by ambiguous relocations 1311 /// are marked as immovable. For now, if a section relocation points at the 1312 /// boundary between two symbols then those symbols are marked as immovable. 1313 void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address); 1314 1315 /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not 1316 /// nullptr, set it to entry descriminator corresponding to \p Symbol 1317 /// (0 for single-entry functions). This function is thread safe. 1318 BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol, 1319 uint64_t *EntryDesc = nullptr); 1320 1321 const BinaryFunction * 1322 getFunctionForSymbol(const MCSymbol *Symbol, 1323 uint64_t *EntryDesc = nullptr) const { 1324 return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol, 1325 EntryDesc); 1326 } 1327 1328 /// Associate the symbol \p Sym with the function \p BF for lookups with 1329 /// getFunctionForSymbol(). 1330 void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) { 1331 SymbolToFunctionMap[Sym] = BF; 1332 } 1333 1334 /// Populate some internal data structures with debug info. 1335 void preprocessDebugInfo(); 1336 1337 /// Add a filename entry from SrcCUID to DestCUID. 1338 unsigned addDebugFilenameToUnit(const uint32_t DestCUID, 1339 const uint32_t SrcCUID, unsigned FileIndex); 1340 1341 /// Return functions in output layout order 1342 std::vector<BinaryFunction *> getSortedFunctions(); 1343 1344 /// Do the best effort to calculate the size of the function by emitting 1345 /// its code, and relaxing branch instructions. By default, branch 1346 /// instructions are updated to match the layout. Pass \p FixBranches set to 1347 /// false if the branches are known to be up to date with the code layout. 1348 /// 1349 /// Return the pair where the first size is for the main part, and the second 1350 /// size is for the cold one. 1351 /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the 1352 /// function in place so that BinaryBasicBlock::getOutputSize() gives the 1353 /// emitted size of the basic block. 1354 std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF, 1355 bool FixBranches = true); 1356 1357 /// Calculate the size of the instruction \p Inst optionally using a 1358 /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is 1359 /// not thread safe and each thread should operate with its own copy of it. 1360 uint64_t 1361 computeInstructionSize(const MCInst &Inst, 1362 const MCCodeEmitter *Emitter = nullptr) const { 1363 if (std::optional<uint32_t> Size = MIB->getSize(Inst)) 1364 return *Size; 1365 1366 if (MIB->isPseudo(Inst)) 1367 return 0; 1368 1369 if (std::optional<uint32_t> Size = MIB->getInstructionSize(Inst)) 1370 return *Size; 1371 1372 if (!Emitter) 1373 Emitter = this->MCE.get(); 1374 SmallString<256> Code; 1375 SmallVector<MCFixup, 4> Fixups; 1376 Emitter->encodeInstruction(Inst, Code, Fixups, *STI); 1377 return Code.size(); 1378 } 1379 1380 /// Compute the native code size for a range of instructions. 1381 /// Note: this can be imprecise wrt the final binary since happening prior to 1382 /// relaxation, as well as wrt the original binary because of opcode 1383 /// shortening.MCCodeEmitter is not thread safe and each thread should operate 1384 /// with its own copy of it. 1385 template <typename Itr> 1386 uint64_t computeCodeSize(Itr Beg, Itr End, 1387 const MCCodeEmitter *Emitter = nullptr) const { 1388 uint64_t Size = 0; 1389 while (Beg != End) { 1390 if (!MIB->isPseudo(*Beg)) 1391 Size += computeInstructionSize(*Beg, Emitter); 1392 ++Beg; 1393 } 1394 return Size; 1395 } 1396 1397 /// Validate that disassembling the \p Sequence of bytes into an instruction 1398 /// and assembling the instruction again, results in a byte sequence identical 1399 /// to the original one. 1400 bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const; 1401 1402 /// Return a function execution count threshold for determining whether 1403 /// the function is 'hot'. Consider it hot if count is above the average exec 1404 /// count of profiled functions. 1405 uint64_t getHotThreshold() const; 1406 1407 /// Return true if instruction \p Inst requires an offset for further 1408 /// processing (e.g. assigning a profile). 1409 bool keepOffsetForInstruction(const MCInst &Inst) const { 1410 if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) || 1411 MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) { 1412 return true; 1413 } 1414 return false; 1415 } 1416 1417 /// Return true if the function should be emitted to the output file. 1418 bool shouldEmit(const BinaryFunction &Function) const; 1419 1420 /// Dump the assembly representation of MCInst to debug output. 1421 void dump(const MCInst &Inst) const; 1422 1423 /// Print the string name for a CFI operation. 1424 static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst); 1425 1426 /// Print a single MCInst in native format. If Function is non-null, 1427 /// the instruction will be annotated with CFI and possibly DWARF line table 1428 /// info. 1429 /// If printMCInst is true, the instruction is also printed in the 1430 /// architecture independent format. 1431 void printInstruction(raw_ostream &OS, const MCInst &Instruction, 1432 uint64_t Offset = 0, 1433 const BinaryFunction *Function = nullptr, 1434 bool PrintMCInst = false, bool PrintMemData = false, 1435 bool PrintRelocations = false, 1436 StringRef Endl = "\n") const; 1437 1438 /// Print a range of instructions. 1439 template <typename Itr> 1440 uint64_t 1441 printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0, 1442 const BinaryFunction *Function = nullptr, 1443 bool PrintMCInst = false, bool PrintMemData = false, 1444 bool PrintRelocations = false, 1445 StringRef Endl = "\n") const { 1446 while (Begin != End) { 1447 printInstruction(OS, *Begin, Offset, Function, PrintMCInst, PrintMemData, 1448 PrintRelocations, Endl); 1449 Offset += computeCodeSize(Begin, Begin + 1); 1450 ++Begin; 1451 } 1452 return Offset; 1453 } 1454 1455 /// Log BOLT errors to journaling streams and quit process with non-zero error 1456 /// code 1 if error is fatal. 1457 void logBOLTErrorsAndQuitOnFatal(Error E); 1458 1459 std::string generateBugReportMessage(StringRef Message, 1460 const BinaryFunction &Function) const; 1461 1462 struct IndependentCodeEmitter { 1463 std::unique_ptr<MCObjectFileInfo> LocalMOFI; 1464 std::unique_ptr<MCContext> LocalCtx; 1465 std::unique_ptr<MCCodeEmitter> MCE; 1466 }; 1467 1468 /// Encapsulates an independent MCCodeEmitter that doesn't share resources 1469 /// with the main one available through BinaryContext::MCE, managed by 1470 /// BinaryContext. 1471 /// This is intended to create a lock-free environment for an auxiliary thread 1472 /// that needs to perform work with an MCCodeEmitter that can be transient or 1473 /// won't be used in the main code emitter. 1474 IndependentCodeEmitter createIndependentMCCodeEmitter() const { 1475 IndependentCodeEmitter MCEInstance; 1476 MCEInstance.LocalCtx.reset( 1477 new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get())); 1478 MCEInstance.LocalMOFI.reset( 1479 TheTarget->createMCObjectFileInfo(*MCEInstance.LocalCtx.get(), 1480 /*PIC=*/!HasFixedLoadAddress)); 1481 MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get()); 1482 MCEInstance.MCE.reset( 1483 TheTarget->createMCCodeEmitter(*MII, *MCEInstance.LocalCtx)); 1484 return MCEInstance; 1485 } 1486 1487 /// Creating MCStreamer instance. 1488 std::unique_ptr<MCStreamer> 1489 createStreamer(llvm::raw_pwrite_stream &OS) const { 1490 MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *Ctx); 1491 MCAsmBackend *MAB = 1492 TheTarget->createMCAsmBackend(*STI, *MRI, MCTargetOptions()); 1493 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS); 1494 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer( 1495 *TheTriple, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), std::move(OW), 1496 std::unique_ptr<MCCodeEmitter>(MCE), *STI)); 1497 return Streamer; 1498 } 1499 1500 void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); } 1501 const AddressMap &getIOAddressMap() const { 1502 assert(IOAddressMap && "Address map not set yet"); 1503 return *IOAddressMap; 1504 } 1505 1506 raw_ostream &outs() const { return Logger.Out; } 1507 1508 raw_ostream &errs() const { return Logger.Err; } 1509 }; 1510 1511 template <typename T, typename = std::enable_if_t<sizeof(T) == 1>> 1512 inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) { 1513 const char *Sep = ""; 1514 for (const auto Byte : ByteArray) { 1515 OS << Sep << format("%.2x", Byte); 1516 Sep = " "; 1517 } 1518 return OS; 1519 } 1520 1521 } // namespace bolt 1522 } // namespace llvm 1523 1524 #endif 1525