1 //===- bolt/Core/BinarySection.h - Section in a binary file -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the declaration of the BinarySection class, which 10 // represents a section in an executable file and contains its properties, 11 // flags, contents, and relocations. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef BOLT_CORE_BINARY_SECTION_H 16 #define BOLT_CORE_BINARY_SECTION_H 17 18 #include "bolt/Core/DebugData.h" 19 #include "bolt/Core/Relocation.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/BinaryFormat/ELF.h" 22 #include "llvm/Object/ELFObjectFile.h" 23 #include "llvm/Object/MachO.h" 24 #include "llvm/Object/ObjectFile.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <map> 27 #include <memory> 28 #include <set> 29 30 namespace llvm { 31 class MCStreamer; 32 class MCSymbol; 33 34 using namespace object; 35 36 namespace bolt { 37 38 class BinaryContext; 39 class BinaryData; 40 41 /// A class to manage binary sections that also manages related relocations. 42 class BinarySection { 43 friend class BinaryContext; 44 45 /// Count the number of sections created. 46 static uint64_t Count; 47 48 BinaryContext &BC; // Owning BinaryContext 49 std::string Name; // Section name 50 const SectionRef Section; // SectionRef for input binary sections. 51 StringRef Contents; // Input section contents 52 const uint64_t Address; // Address of section in input binary (may be 0) 53 const uint64_t Size; // Input section size 54 uint64_t InputFileOffset{0}; // Offset in the input binary 55 unsigned Alignment; // alignment in bytes (must be > 0) 56 unsigned ELFType; // ELF section type 57 unsigned ELFFlags; // ELF section flags 58 bool IsRelro{false}; // GNU RELRO section (read-only after relocation) 59 60 // Relocations associated with this section. Relocation offsets are 61 // wrt. to the original section address and size. 62 using RelocationSetType = std::multiset<Relocation, std::less<>>; 63 RelocationSetType Relocations; 64 65 // Dynamic relocations associated with this section. Relocation offsets are 66 // from the original section address. 67 RelocationSetType DynamicRelocations; 68 69 // Pending relocations for this section. 70 std::vector<Relocation> PendingRelocations; 71 72 struct BinaryPatch { 73 uint64_t Offset; 74 SmallString<8> Bytes; 75 76 BinaryPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) 77 : Offset(Offset), Bytes(Bytes.begin(), Bytes.end()) {} 78 }; 79 std::vector<BinaryPatch> Patches; 80 /// Patcher used to apply simple changes to sections of the input binary. 81 std::unique_ptr<BinaryPatcher> Patcher; 82 83 // Output info 84 bool IsFinalized{false}; // Has this section had output information 85 // finalized? 86 std::string OutputName; // Output section name (if the section has 87 // been renamed) 88 uint64_t OutputAddress{0}; // Section address for the rewritten binary. 89 uint64_t OutputSize{0}; // Section size in the rewritten binary. 90 // Can exceed OutputContents with padding. 91 uint64_t OutputFileOffset{0}; // File offset in the rewritten binary file. 92 StringRef OutputContents; // Rewritten section contents. 93 const uint64_t SectionNumber; // Order in which the section was created. 94 std::string SectionID; // Unique ID used for address mapping. 95 // Set by ExecutableFileMemoryManager. 96 uint32_t Index{0}; // Section index in the output file. 97 mutable bool IsReordered{false}; // Have the contents been reordered? 98 bool IsAnonymous{false}; // True if the name should not be included 99 // in the output file. 100 bool IsLinkOnly{false}; // True if the section should not be included 101 // in the output file. 102 103 uint64_t hash(const BinaryData &BD, 104 std::map<const BinaryData *, uint64_t> &Cache) const; 105 106 // non-copyable 107 BinarySection(const BinarySection &) = delete; 108 BinarySection(BinarySection &&) = delete; 109 BinarySection &operator=(const BinarySection &) = delete; 110 BinarySection &operator=(BinarySection &&) = delete; 111 112 static StringRef getName(SectionRef Section) { 113 return cantFail(Section.getName()); 114 } 115 static StringRef getContentsOrQuit(SectionRef Section) { 116 if (Section.getObject()->isELF() && 117 ELFSectionRef(Section).getType() == ELF::SHT_NOBITS) 118 return StringRef(); 119 120 Expected<StringRef> ContentsOrErr = Section.getContents(); 121 if (!ContentsOrErr) { 122 Error E = ContentsOrErr.takeError(); 123 errs() << "BOLT-ERROR: cannot get section contents for " 124 << getName(Section) << ": " << E << ".\n"; 125 exit(1); 126 } 127 return *ContentsOrErr; 128 } 129 130 /// Get the set of relocations referring to data in this section that 131 /// has been reordered. The relocation offsets will be modified to 132 /// reflect the new data locations. 133 RelocationSetType reorderRelocations(bool Inplace) const; 134 135 /// Set output info for this section. 136 void update(uint8_t *NewData, uint64_t NewSize, unsigned NewAlignment, 137 unsigned NewELFType, unsigned NewELFFlags) { 138 assert(NewAlignment > 0 && "section alignment must be > 0"); 139 Alignment = NewAlignment; 140 ELFType = NewELFType; 141 ELFFlags = NewELFFlags; 142 updateContents(NewData, NewSize); 143 } 144 145 public: 146 /// Copy a section. 147 explicit BinarySection(BinaryContext &BC, const Twine &Name, 148 const BinarySection &Section) 149 : BC(BC), Name(Name.str()), Section(SectionRef()), 150 Contents(Section.getContents()), Address(Section.getAddress()), 151 Size(Section.getSize()), Alignment(Section.getAlignment()), 152 ELFType(Section.getELFType()), ELFFlags(Section.getELFFlags()), 153 Relocations(Section.Relocations), 154 PendingRelocations(Section.PendingRelocations), OutputName(Name.str()), 155 SectionNumber(++Count) {} 156 157 BinarySection(BinaryContext &BC, SectionRef Section) 158 : BC(BC), Name(getName(Section)), Section(Section), 159 Contents(getContentsOrQuit(Section)), Address(Section.getAddress()), 160 Size(Section.getSize()), Alignment(Section.getAlignment().value()), 161 OutputName(Name), SectionNumber(++Count) { 162 if (isELF()) { 163 ELFType = ELFSectionRef(Section).getType(); 164 ELFFlags = ELFSectionRef(Section).getFlags(); 165 InputFileOffset = ELFSectionRef(Section).getOffset(); 166 } else if (isMachO()) { 167 auto *O = cast<MachOObjectFile>(Section.getObject()); 168 InputFileOffset = 169 O->is64Bit() ? O->getSection64(Section.getRawDataRefImpl()).offset 170 : O->getSection(Section.getRawDataRefImpl()).offset; 171 } 172 } 173 174 // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method. 175 BinarySection(BinaryContext &BC, const Twine &Name, uint8_t *Data, 176 uint64_t Size, unsigned Alignment, unsigned ELFType, 177 unsigned ELFFlags) 178 : BC(BC), Name(Name.str()), 179 Contents(reinterpret_cast<const char *>(Data), Data ? Size : 0), 180 Address(0), Size(Size), Alignment(Alignment), ELFType(ELFType), 181 ELFFlags(ELFFlags), IsFinalized(true), OutputName(Name.str()), 182 OutputSize(Size), OutputContents(Contents), SectionNumber(++Count) { 183 assert(Alignment > 0 && "section alignment must be > 0"); 184 } 185 186 ~BinarySection(); 187 188 /// Helper function to generate the proper ELF flags from section properties. 189 static unsigned getFlags(bool IsReadOnly = true, bool IsText = false, 190 bool IsAllocatable = false) { 191 unsigned Flags = 0; 192 if (IsAllocatable) 193 Flags |= ELF::SHF_ALLOC; 194 if (!IsReadOnly) 195 Flags |= ELF::SHF_WRITE; 196 if (IsText) 197 Flags |= ELF::SHF_EXECINSTR; 198 return Flags; 199 } 200 201 operator bool() const { return ELFType != ELF::SHT_NULL; } 202 203 bool operator==(const BinarySection &Other) const { 204 return (Name == Other.Name && Address == Other.Address && 205 Size == Other.Size && getData() == Other.getData() && 206 Alignment == Other.Alignment && ELFType == Other.ELFType && 207 ELFFlags == Other.ELFFlags); 208 } 209 210 bool operator!=(const BinarySection &Other) const { 211 return !operator==(Other); 212 } 213 214 // Order sections by their immutable properties. 215 bool operator<(const BinarySection &Other) const { 216 // Allocatable before non-allocatable. 217 if (isAllocatable() != Other.isAllocatable()) 218 return isAllocatable() > Other.isAllocatable(); 219 220 // Input sections take precedence. 221 if (hasSectionRef() != Other.hasSectionRef()) 222 return hasSectionRef() > Other.hasSectionRef(); 223 224 // Compare allocatable input sections by their address. 225 if (hasSectionRef() && getAddress() != Other.getAddress()) 226 return getAddress() < Other.getAddress(); 227 if (hasSectionRef() && getAddress() && getSize() != Other.getSize()) 228 return getSize() < Other.getSize(); 229 230 // Code before data. 231 if (isText() != Other.isText()) 232 return isText() > Other.isText(); 233 234 // Read-only before writable. 235 if (isWritable() != Other.isWritable()) 236 return isWritable() < Other.isWritable(); 237 238 // BSS at the end. 239 if (isBSS() != Other.isBSS()) 240 return isBSS() < Other.isBSS(); 241 242 // Otherwise, preserve the order of creation. 243 return SectionNumber < Other.SectionNumber; 244 } 245 246 /// 247 /// Basic property access. 248 /// 249 BinaryContext &getBinaryContext() { return BC; } 250 bool isELF() const; 251 bool isMachO() const; 252 StringRef getName() const { return Name; } 253 uint64_t getAddress() const { return Address; } 254 uint64_t getEndAddress() const { return Address + Size; } 255 uint64_t getSize() const { return Size; } 256 uint64_t getInputFileOffset() const { return InputFileOffset; } 257 Align getAlign() const { return Align(Alignment); } 258 uint64_t getAlignment() const { return Alignment; } 259 bool isText() const { 260 if (isELF()) 261 return (ELFFlags & ELF::SHF_EXECINSTR); 262 return hasSectionRef() && getSectionRef().isText(); 263 } 264 bool isData() const { 265 if (isELF()) 266 return (ELFType == ELF::SHT_PROGBITS && 267 (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE))); 268 return hasSectionRef() && getSectionRef().isData(); 269 } 270 bool isBSS() const { 271 return (ELFType == ELF::SHT_NOBITS && 272 (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE))); 273 } 274 bool isTLS() const { return (ELFFlags & ELF::SHF_TLS); } 275 bool isTBSS() const { return isBSS() && isTLS(); } 276 bool isVirtual() const { return ELFType == ELF::SHT_NOBITS; } 277 bool isRela() const { return ELFType == ELF::SHT_RELA; } 278 bool isRelr() const { return ELFType == ELF::SHT_RELR; } 279 bool isWritable() const { return (ELFFlags & ELF::SHF_WRITE); } 280 bool isAllocatable() const { 281 if (isELF()) { 282 return (ELFFlags & ELF::SHF_ALLOC) && !isTBSS(); 283 } else { 284 // On non-ELF assume all sections are allocatable. 285 return true; 286 } 287 } 288 bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; } 289 bool isReordered() const { return IsReordered; } 290 bool isAnonymous() const { return IsAnonymous; } 291 bool isRelro() const { return IsRelro; } 292 void setRelro() { IsRelro = true; } 293 unsigned getELFType() const { return ELFType; } 294 unsigned getELFFlags() const { return ELFFlags; } 295 296 uint8_t *getData() { 297 return reinterpret_cast<uint8_t *>( 298 const_cast<char *>(getContents().data())); 299 } 300 const uint8_t *getData() const { 301 return reinterpret_cast<const uint8_t *>(getContents().data()); 302 } 303 StringRef getContents() const { return Contents; } 304 void clearContents() { Contents = {}; } 305 bool hasSectionRef() const { return Section != SectionRef(); } 306 SectionRef getSectionRef() const { return Section; } 307 308 /// Does this section contain the given \p Address? 309 /// Note: this is in terms of the original mapped binary addresses. 310 bool containsAddress(uint64_t Address) const { 311 return (getAddress() <= Address && Address < getEndAddress()) || 312 (getSize() == 0 && getAddress() == Address); 313 } 314 315 /// Does this section contain the range [\p Address, \p Address + \p Size)? 316 /// Note: this is in terms of the original mapped binary addresses. 317 bool containsRange(uint64_t Address, uint64_t Size) const { 318 return containsAddress(Address) && Address + Size <= getEndAddress(); 319 } 320 321 /// Iterate over all non-pending relocations for this section. 322 iterator_range<RelocationSetType::iterator> relocations() { 323 return make_range(Relocations.begin(), Relocations.end()); 324 } 325 326 /// Iterate over all non-pending relocations for this section. 327 iterator_range<RelocationSetType::const_iterator> relocations() const { 328 return make_range(Relocations.begin(), Relocations.end()); 329 } 330 331 /// Iterate over all dynamic relocations for this section. 332 iterator_range<RelocationSetType::iterator> dynamicRelocations() { 333 return make_range(DynamicRelocations.begin(), DynamicRelocations.end()); 334 } 335 336 /// Iterate over all dynamic relocations for this section. 337 iterator_range<RelocationSetType::const_iterator> dynamicRelocations() const { 338 return make_range(DynamicRelocations.begin(), DynamicRelocations.end()); 339 } 340 341 /// Does this section have any non-pending relocations? 342 bool hasRelocations() const { return !Relocations.empty(); } 343 344 /// Does this section have any pending relocations? 345 bool hasPendingRelocations() const { return !PendingRelocations.empty(); } 346 347 /// Remove non-pending relocation with the given /p Offset. 348 bool removeRelocationAt(uint64_t Offset) { 349 auto Itr = Relocations.find(Offset); 350 if (Itr != Relocations.end()) { 351 auto End = Relocations.upper_bound(Offset); 352 Relocations.erase(Itr, End); 353 return true; 354 } 355 return false; 356 } 357 358 void clearRelocations(); 359 360 /// Add a new relocation at the given /p Offset. 361 void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, 362 uint64_t Addend, uint64_t Value = 0, 363 bool Pending = false) { 364 assert(Offset < getSize() && "offset not within section bounds"); 365 if (!Pending) { 366 Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value}); 367 } else { 368 PendingRelocations.emplace_back( 369 Relocation{Offset, Symbol, Type, Addend, Value}); 370 } 371 } 372 373 /// Add a dynamic relocation at the given /p Offset. 374 void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type, 375 uint64_t Addend, uint64_t Value = 0) { 376 addDynamicRelocation(Relocation{Offset, Symbol, Type, Addend, Value}); 377 } 378 379 void addDynamicRelocation(const Relocation &Reloc) { 380 assert(Reloc.Offset < getSize() && "offset not within section bounds"); 381 DynamicRelocations.emplace(Reloc); 382 } 383 384 /// Add relocation against the original contents of this section. 385 void addPendingRelocation(const Relocation &Rel) { 386 PendingRelocations.push_back(Rel); 387 } 388 389 /// Add patch to the input contents of this section. 390 void addPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) { 391 Patches.emplace_back(BinaryPatch(Offset, Bytes)); 392 } 393 394 /// Register patcher for this section. 395 void registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher) { 396 Patcher = std::move(BPatcher); 397 } 398 399 /// Returns the patcher 400 BinaryPatcher *getPatcher() { return Patcher.get(); } 401 402 /// Lookup the relocation (if any) at the given /p Offset. 403 const Relocation *getRelocationAt(uint64_t Offset) const { 404 auto Itr = Relocations.find(Offset); 405 return Itr != Relocations.end() ? &*Itr : nullptr; 406 } 407 408 /// Lookup the relocation (if any) at the given /p Offset. 409 const Relocation *getDynamicRelocationAt(uint64_t Offset) const { 410 Relocation Key{Offset, 0, 0, 0, 0}; 411 auto Itr = DynamicRelocations.find(Key); 412 return Itr != DynamicRelocations.end() ? &*Itr : nullptr; 413 } 414 415 std::optional<Relocation> takeDynamicRelocationAt(uint64_t Offset) { 416 Relocation Key{Offset, 0, 0, 0, 0}; 417 auto Itr = DynamicRelocations.find(Key); 418 419 if (Itr == DynamicRelocations.end()) 420 return std::nullopt; 421 422 Relocation Reloc = *Itr; 423 DynamicRelocations.erase(Itr); 424 return Reloc; 425 } 426 427 uint64_t hash(const BinaryData &BD) const { 428 std::map<const BinaryData *, uint64_t> Cache; 429 return hash(BD, Cache); 430 } 431 432 /// 433 /// Property accessors related to output data. 434 /// 435 436 bool isFinalized() const { return IsFinalized; } 437 void setIsFinalized() { IsFinalized = true; } 438 StringRef getOutputName() const { return OutputName; } 439 uint64_t getOutputSize() const { return OutputSize; } 440 uint8_t *getOutputData() { 441 return reinterpret_cast<uint8_t *>( 442 const_cast<char *>(getOutputContents().data())); 443 } 444 const uint8_t *getOutputData() const { 445 return reinterpret_cast<const uint8_t *>(getOutputContents().data()); 446 } 447 StringRef getOutputContents() const { return OutputContents; } 448 uint64_t getAllocAddress() const { 449 return reinterpret_cast<uint64_t>(getOutputData()); 450 } 451 uint64_t getOutputAddress() const { return OutputAddress; } 452 uint64_t getOutputFileOffset() const { return OutputFileOffset; } 453 StringRef getSectionID() const { 454 assert(hasValidSectionID() && "trying to use uninitialized section id"); 455 return SectionID; 456 } 457 bool hasValidSectionID() const { return !SectionID.empty(); } 458 bool hasValidIndex() { return Index != 0; } 459 uint32_t getIndex() const { return Index; } 460 461 // mutation 462 void setOutputAddress(uint64_t Address) { OutputAddress = Address; } 463 void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; } 464 void setSectionID(StringRef ID) { 465 assert(!hasValidSectionID() && "trying to set section id twice"); 466 SectionID = ID; 467 } 468 void setIndex(uint32_t I) { Index = I; } 469 void setOutputName(const Twine &Name) { OutputName = Name.str(); } 470 void setAnonymous(bool Flag) { IsAnonymous = Flag; } 471 bool isLinkOnly() const { return IsLinkOnly; } 472 void setLinkOnly() { IsLinkOnly = true; } 473 474 /// Emit the section as data, possibly with relocations. 475 /// Use name \p SectionName for the section during the emission. 476 void emitAsData(MCStreamer &Streamer, const Twine &SectionName) const; 477 478 /// Write finalized contents of the section. If OutputSize exceeds the size of 479 /// the OutputContents, append zero padding to the stream and return the 480 /// number of byte written which should match the OutputSize. 481 uint64_t write(raw_ostream &OS) const; 482 483 using SymbolResolverFuncTy = llvm::function_ref<uint64_t(const MCSymbol *)>; 484 485 /// Flush all pending relocations to patch original contents of sections 486 /// that were not emitted via MCStreamer. 487 void flushPendingRelocations(raw_pwrite_stream &OS, 488 SymbolResolverFuncTy Resolver); 489 490 /// Change contents of the section. Unless the section has a valid SectionID, 491 /// the memory passed in \p NewData will be managed by the instance of 492 /// BinarySection. 493 void updateContents(const uint8_t *NewData, size_t NewSize) { 494 if (getOutputData() && !hasValidSectionID() && 495 (!hasSectionRef() || 496 OutputContents.data() != getContentsOrQuit(Section).data())) { 497 delete[] getOutputData(); 498 } 499 500 OutputContents = StringRef(reinterpret_cast<const char *>(NewData), 501 NewData ? NewSize : 0); 502 OutputSize = NewSize; 503 IsFinalized = true; 504 } 505 506 /// When writing section contents, add \p PaddingSize zero bytes at the end. 507 void addPadding(uint64_t PaddingSize) { OutputSize += PaddingSize; } 508 509 /// Reorder the contents of this section according to /p Order. If 510 /// /p Inplace is true, the entire contents of the section is reordered, 511 /// otherwise the new contents contain only the reordered data. 512 void reorderContents(const std::vector<BinaryData *> &Order, bool Inplace); 513 514 void print(raw_ostream &OS) const; 515 516 /// Write the contents of an ELF note section given the name of the producer, 517 /// a number identifying the type of note and the contents of the note in 518 /// \p DescStr. 519 static std::string encodeELFNote(StringRef NameStr, StringRef DescStr, 520 uint32_t Type); 521 522 /// Code for ELF notes written by producer 'BOLT' 523 enum { NT_BOLT_BAT = 1, NT_BOLT_INSTRUMENTATION_TABLES = 2 }; 524 }; 525 526 inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) { 527 auto *Array = new uint8_t[Size]; 528 memcpy(Array, Data, Size); 529 return Array; 530 } 531 532 inline uint8_t *copyByteArray(StringRef Buffer) { 533 return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()), 534 Buffer.size()); 535 } 536 537 inline uint8_t *copyByteArray(ArrayRef<char> Buffer) { 538 return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()), 539 Buffer.size()); 540 } 541 542 inline raw_ostream &operator<<(raw_ostream &OS, const BinarySection &Section) { 543 Section.print(OS); 544 return OS; 545 } 546 547 } // namespace bolt 548 } // namespace llvm 549 550 #endif 551