xref: /llvm-project/bolt/include/bolt/Core/BinarySection.h (revision 996553228f8b2f3219451a2514bd6f9380f13e28)
1 //===- bolt/Core/BinarySection.h - Section in a binary file -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the BinarySection class, which
10 // represents a section in an executable file and contains its properties,
11 // flags, contents, and relocations.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_CORE_BINARY_SECTION_H
16 #define BOLT_CORE_BINARY_SECTION_H
17 
18 #include "bolt/Core/DebugData.h"
19 #include "bolt/Core/Relocation.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/BinaryFormat/ELF.h"
22 #include "llvm/Object/ELFObjectFile.h"
23 #include "llvm/Object/MachO.h"
24 #include "llvm/Object/ObjectFile.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <map>
27 #include <memory>
28 #include <set>
29 
30 namespace llvm {
31 class MCStreamer;
32 class MCSymbol;
33 
34 using namespace object;
35 
36 namespace bolt {
37 
38 class BinaryContext;
39 class BinaryData;
40 
41 /// A class to manage binary sections that also manages related relocations.
42 class BinarySection {
43   friend class BinaryContext;
44 
45   /// Count the number of sections created.
46   static uint64_t Count;
47 
48   BinaryContext &BC;           // Owning BinaryContext
49   std::string Name;            // Section name
50   const SectionRef Section;    // SectionRef for input binary sections.
51   StringRef Contents;          // Input section contents
52   const uint64_t Address;      // Address of section in input binary (may be 0)
53   const uint64_t Size;         // Input section size
54   uint64_t InputFileOffset{0}; // Offset in the input binary
55   unsigned Alignment;          // alignment in bytes (must be > 0)
56   unsigned ELFType;            // ELF section type
57   unsigned ELFFlags;           // ELF section flags
58   bool IsRelro{false};         // GNU RELRO section (read-only after relocation)
59 
60   // Relocations associated with this section. Relocation offsets are
61   // wrt. to the original section address and size.
62   using RelocationSetType = std::multiset<Relocation, std::less<>>;
63   RelocationSetType Relocations;
64 
65   // Dynamic relocations associated with this section. Relocation offsets are
66   // from the original section address.
67   RelocationSetType DynamicRelocations;
68 
69   // Pending relocations for this section.
70   std::vector<Relocation> PendingRelocations;
71 
72   struct BinaryPatch {
73     uint64_t Offset;
74     SmallString<8> Bytes;
75 
76     BinaryPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes)
77         : Offset(Offset), Bytes(Bytes.begin(), Bytes.end()) {}
78   };
79   std::vector<BinaryPatch> Patches;
80   /// Patcher used to apply simple changes to sections of the input binary.
81   std::unique_ptr<BinaryPatcher> Patcher;
82 
83   // Output info
84   bool IsFinalized{false};         // Has this section had output information
85                                    // finalized?
86   std::string OutputName;          // Output section name (if the section has
87                                    // been renamed)
88   uint64_t OutputAddress{0};       // Section address for the rewritten binary.
89   uint64_t OutputSize{0};          // Section size in the rewritten binary.
90                                    // Can exceed OutputContents with padding.
91   uint64_t OutputFileOffset{0};    // File offset in the rewritten binary file.
92   StringRef OutputContents;        // Rewritten section contents.
93   const uint64_t SectionNumber;    // Order in which the section was created.
94   std::string SectionID;           // Unique ID used for address mapping.
95                                    // Set by ExecutableFileMemoryManager.
96   uint32_t Index{0};               // Section index in the output file.
97   mutable bool IsReordered{false}; // Have the contents been reordered?
98   bool IsAnonymous{false};         // True if the name should not be included
99                                    // in the output file.
100   bool IsLinkOnly{false};          // True if the section should not be included
101                                    // in the output file.
102 
103   uint64_t hash(const BinaryData &BD,
104                 std::map<const BinaryData *, uint64_t> &Cache) const;
105 
106   // non-copyable
107   BinarySection(const BinarySection &) = delete;
108   BinarySection(BinarySection &&) = delete;
109   BinarySection &operator=(const BinarySection &) = delete;
110   BinarySection &operator=(BinarySection &&) = delete;
111 
112   static StringRef getName(SectionRef Section) {
113     return cantFail(Section.getName());
114   }
115   static StringRef getContentsOrQuit(SectionRef Section) {
116     if (Section.getObject()->isELF() &&
117         ELFSectionRef(Section).getType() == ELF::SHT_NOBITS)
118       return StringRef();
119 
120     Expected<StringRef> ContentsOrErr = Section.getContents();
121     if (!ContentsOrErr) {
122       Error E = ContentsOrErr.takeError();
123       errs() << "BOLT-ERROR: cannot get section contents for "
124              << getName(Section) << ": " << E << ".\n";
125       exit(1);
126     }
127     return *ContentsOrErr;
128   }
129 
130   /// Get the set of relocations referring to data in this section that
131   /// has been reordered.  The relocation offsets will be modified to
132   /// reflect the new data locations.
133   RelocationSetType reorderRelocations(bool Inplace) const;
134 
135   /// Set output info for this section.
136   void update(uint8_t *NewData, uint64_t NewSize, unsigned NewAlignment,
137               unsigned NewELFType, unsigned NewELFFlags) {
138     assert(NewAlignment > 0 && "section alignment must be > 0");
139     Alignment = NewAlignment;
140     ELFType = NewELFType;
141     ELFFlags = NewELFFlags;
142     updateContents(NewData, NewSize);
143   }
144 
145 public:
146   /// Copy a section.
147   explicit BinarySection(BinaryContext &BC, const Twine &Name,
148                          const BinarySection &Section)
149       : BC(BC), Name(Name.str()), Section(SectionRef()),
150         Contents(Section.getContents()), Address(Section.getAddress()),
151         Size(Section.getSize()), Alignment(Section.getAlignment()),
152         ELFType(Section.getELFType()), ELFFlags(Section.getELFFlags()),
153         Relocations(Section.Relocations),
154         PendingRelocations(Section.PendingRelocations), OutputName(Name.str()),
155         SectionNumber(++Count) {}
156 
157   BinarySection(BinaryContext &BC, SectionRef Section)
158       : BC(BC), Name(getName(Section)), Section(Section),
159         Contents(getContentsOrQuit(Section)), Address(Section.getAddress()),
160         Size(Section.getSize()), Alignment(Section.getAlignment().value()),
161         OutputName(Name), SectionNumber(++Count) {
162     if (isELF()) {
163       ELFType = ELFSectionRef(Section).getType();
164       ELFFlags = ELFSectionRef(Section).getFlags();
165       InputFileOffset = ELFSectionRef(Section).getOffset();
166     } else if (isMachO()) {
167       auto *O = cast<MachOObjectFile>(Section.getObject());
168       InputFileOffset =
169           O->is64Bit() ? O->getSection64(Section.getRawDataRefImpl()).offset
170                        : O->getSection(Section.getRawDataRefImpl()).offset;
171     }
172   }
173 
174   // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method.
175   BinarySection(BinaryContext &BC, const Twine &Name, uint8_t *Data,
176                 uint64_t Size, unsigned Alignment, unsigned ELFType,
177                 unsigned ELFFlags)
178       : BC(BC), Name(Name.str()),
179         Contents(reinterpret_cast<const char *>(Data), Data ? Size : 0),
180         Address(0), Size(Size), Alignment(Alignment), ELFType(ELFType),
181         ELFFlags(ELFFlags), IsFinalized(true), OutputName(Name.str()),
182         OutputSize(Size), OutputContents(Contents), SectionNumber(++Count) {
183     assert(Alignment > 0 && "section alignment must be > 0");
184   }
185 
186   ~BinarySection();
187 
188   /// Helper function to generate the proper ELF flags from section properties.
189   static unsigned getFlags(bool IsReadOnly = true, bool IsText = false,
190                            bool IsAllocatable = false) {
191     unsigned Flags = 0;
192     if (IsAllocatable)
193       Flags |= ELF::SHF_ALLOC;
194     if (!IsReadOnly)
195       Flags |= ELF::SHF_WRITE;
196     if (IsText)
197       Flags |= ELF::SHF_EXECINSTR;
198     return Flags;
199   }
200 
201   operator bool() const { return ELFType != ELF::SHT_NULL; }
202 
203   bool operator==(const BinarySection &Other) const {
204     return (Name == Other.Name && Address == Other.Address &&
205             Size == Other.Size && getData() == Other.getData() &&
206             Alignment == Other.Alignment && ELFType == Other.ELFType &&
207             ELFFlags == Other.ELFFlags);
208   }
209 
210   bool operator!=(const BinarySection &Other) const {
211     return !operator==(Other);
212   }
213 
214   // Order sections by their immutable properties.
215   bool operator<(const BinarySection &Other) const {
216     // Allocatable before non-allocatable.
217     if (isAllocatable() != Other.isAllocatable())
218       return isAllocatable() > Other.isAllocatable();
219 
220     // Input sections take precedence.
221     if (hasSectionRef() != Other.hasSectionRef())
222       return hasSectionRef() > Other.hasSectionRef();
223 
224     // Compare allocatable input sections by their address.
225     if (hasSectionRef() && getAddress() != Other.getAddress())
226       return getAddress() < Other.getAddress();
227     if (hasSectionRef() && getAddress() && getSize() != Other.getSize())
228       return getSize() < Other.getSize();
229 
230     // Code before data.
231     if (isText() != Other.isText())
232       return isText() > Other.isText();
233 
234     // Read-only before writable.
235     if (isWritable() != Other.isWritable())
236       return isWritable() < Other.isWritable();
237 
238     // BSS at the end.
239     if (isBSS() != Other.isBSS())
240       return isBSS() < Other.isBSS();
241 
242     // Otherwise, preserve the order of creation.
243     return SectionNumber < Other.SectionNumber;
244   }
245 
246   ///
247   /// Basic property access.
248   ///
249   BinaryContext &getBinaryContext() { return BC; }
250   bool isELF() const;
251   bool isMachO() const;
252   StringRef getName() const { return Name; }
253   uint64_t getAddress() const { return Address; }
254   uint64_t getEndAddress() const { return Address + Size; }
255   uint64_t getSize() const { return Size; }
256   uint64_t getInputFileOffset() const { return InputFileOffset; }
257   Align getAlign() const { return Align(Alignment); }
258   uint64_t getAlignment() const { return Alignment; }
259   bool isText() const {
260     if (isELF())
261       return (ELFFlags & ELF::SHF_EXECINSTR);
262     return hasSectionRef() && getSectionRef().isText();
263   }
264   bool isData() const {
265     if (isELF())
266       return (ELFType == ELF::SHT_PROGBITS &&
267               (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
268     return hasSectionRef() && getSectionRef().isData();
269   }
270   bool isBSS() const {
271     return (ELFType == ELF::SHT_NOBITS &&
272             (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
273   }
274   bool isTLS() const { return (ELFFlags & ELF::SHF_TLS); }
275   bool isTBSS() const { return isBSS() && isTLS(); }
276   bool isVirtual() const { return ELFType == ELF::SHT_NOBITS; }
277   bool isRela() const { return ELFType == ELF::SHT_RELA; }
278   bool isRelr() const { return ELFType == ELF::SHT_RELR; }
279   bool isWritable() const { return (ELFFlags & ELF::SHF_WRITE); }
280   bool isAllocatable() const {
281     if (isELF()) {
282       return (ELFFlags & ELF::SHF_ALLOC) && !isTBSS();
283     } else {
284       // On non-ELF assume all sections are allocatable.
285       return true;
286     }
287   }
288   bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; }
289   bool isReordered() const { return IsReordered; }
290   bool isAnonymous() const { return IsAnonymous; }
291   bool isRelro() const { return IsRelro; }
292   void setRelro() { IsRelro = true; }
293   unsigned getELFType() const { return ELFType; }
294   unsigned getELFFlags() const { return ELFFlags; }
295 
296   uint8_t *getData() {
297     return reinterpret_cast<uint8_t *>(
298         const_cast<char *>(getContents().data()));
299   }
300   const uint8_t *getData() const {
301     return reinterpret_cast<const uint8_t *>(getContents().data());
302   }
303   StringRef getContents() const { return Contents; }
304   void clearContents() { Contents = {}; }
305   bool hasSectionRef() const { return Section != SectionRef(); }
306   SectionRef getSectionRef() const { return Section; }
307 
308   /// Does this section contain the given \p Address?
309   /// Note: this is in terms of the original mapped binary addresses.
310   bool containsAddress(uint64_t Address) const {
311     return (getAddress() <= Address && Address < getEndAddress()) ||
312            (getSize() == 0 && getAddress() == Address);
313   }
314 
315   /// Does this section contain the range [\p Address, \p Address + \p Size)?
316   /// Note: this is in terms of the original mapped binary addresses.
317   bool containsRange(uint64_t Address, uint64_t Size) const {
318     return containsAddress(Address) && Address + Size <= getEndAddress();
319   }
320 
321   /// Iterate over all non-pending relocations for this section.
322   iterator_range<RelocationSetType::iterator> relocations() {
323     return make_range(Relocations.begin(), Relocations.end());
324   }
325 
326   /// Iterate over all non-pending relocations for this section.
327   iterator_range<RelocationSetType::const_iterator> relocations() const {
328     return make_range(Relocations.begin(), Relocations.end());
329   }
330 
331   /// Iterate over all dynamic relocations for this section.
332   iterator_range<RelocationSetType::iterator> dynamicRelocations() {
333     return make_range(DynamicRelocations.begin(), DynamicRelocations.end());
334   }
335 
336   /// Iterate over all dynamic relocations for this section.
337   iterator_range<RelocationSetType::const_iterator> dynamicRelocations() const {
338     return make_range(DynamicRelocations.begin(), DynamicRelocations.end());
339   }
340 
341   /// Does this section have any non-pending relocations?
342   bool hasRelocations() const { return !Relocations.empty(); }
343 
344   /// Does this section have any pending relocations?
345   bool hasPendingRelocations() const { return !PendingRelocations.empty(); }
346 
347   /// Remove non-pending relocation with the given /p Offset.
348   bool removeRelocationAt(uint64_t Offset) {
349     auto Itr = Relocations.find(Offset);
350     if (Itr != Relocations.end()) {
351       auto End = Relocations.upper_bound(Offset);
352       Relocations.erase(Itr, End);
353       return true;
354     }
355     return false;
356   }
357 
358   void clearRelocations();
359 
360   /// Add a new relocation at the given /p Offset.
361   void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
362                      uint64_t Addend, uint64_t Value = 0,
363                      bool Pending = false) {
364     assert(Offset < getSize() && "offset not within section bounds");
365     if (!Pending) {
366       Relocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
367     } else {
368       PendingRelocations.emplace_back(
369           Relocation{Offset, Symbol, Type, Addend, Value});
370     }
371   }
372 
373   /// Add a dynamic relocation at the given /p Offset.
374   void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
375                             uint64_t Addend, uint64_t Value = 0) {
376     addDynamicRelocation(Relocation{Offset, Symbol, Type, Addend, Value});
377   }
378 
379   void addDynamicRelocation(const Relocation &Reloc) {
380     assert(Reloc.Offset < getSize() && "offset not within section bounds");
381     DynamicRelocations.emplace(Reloc);
382   }
383 
384   /// Add relocation against the original contents of this section.
385   void addPendingRelocation(const Relocation &Rel) {
386     PendingRelocations.push_back(Rel);
387   }
388 
389   /// Add patch to the input contents of this section.
390   void addPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) {
391     Patches.emplace_back(BinaryPatch(Offset, Bytes));
392   }
393 
394   /// Register patcher for this section.
395   void registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher) {
396     Patcher = std::move(BPatcher);
397   }
398 
399   /// Returns the patcher
400   BinaryPatcher *getPatcher() { return Patcher.get(); }
401 
402   /// Lookup the relocation (if any) at the given /p Offset.
403   const Relocation *getRelocationAt(uint64_t Offset) const {
404     auto Itr = Relocations.find(Offset);
405     return Itr != Relocations.end() ? &*Itr : nullptr;
406   }
407 
408   /// Lookup the relocation (if any) at the given /p Offset.
409   const Relocation *getDynamicRelocationAt(uint64_t Offset) const {
410     Relocation Key{Offset, 0, 0, 0, 0};
411     auto Itr = DynamicRelocations.find(Key);
412     return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
413   }
414 
415   std::optional<Relocation> takeDynamicRelocationAt(uint64_t Offset) {
416     Relocation Key{Offset, 0, 0, 0, 0};
417     auto Itr = DynamicRelocations.find(Key);
418 
419     if (Itr == DynamicRelocations.end())
420       return std::nullopt;
421 
422     Relocation Reloc = *Itr;
423     DynamicRelocations.erase(Itr);
424     return Reloc;
425   }
426 
427   uint64_t hash(const BinaryData &BD) const {
428     std::map<const BinaryData *, uint64_t> Cache;
429     return hash(BD, Cache);
430   }
431 
432   ///
433   /// Property accessors related to output data.
434   ///
435 
436   bool isFinalized() const { return IsFinalized; }
437   void setIsFinalized() { IsFinalized = true; }
438   StringRef getOutputName() const { return OutputName; }
439   uint64_t getOutputSize() const { return OutputSize; }
440   uint8_t *getOutputData() {
441     return reinterpret_cast<uint8_t *>(
442         const_cast<char *>(getOutputContents().data()));
443   }
444   const uint8_t *getOutputData() const {
445     return reinterpret_cast<const uint8_t *>(getOutputContents().data());
446   }
447   StringRef getOutputContents() const { return OutputContents; }
448   uint64_t getAllocAddress() const {
449     return reinterpret_cast<uint64_t>(getOutputData());
450   }
451   uint64_t getOutputAddress() const { return OutputAddress; }
452   uint64_t getOutputFileOffset() const { return OutputFileOffset; }
453   StringRef getSectionID() const {
454     assert(hasValidSectionID() && "trying to use uninitialized section id");
455     return SectionID;
456   }
457   bool hasValidSectionID() const { return !SectionID.empty(); }
458   bool hasValidIndex() { return Index != 0; }
459   uint32_t getIndex() const { return Index; }
460 
461   // mutation
462   void setOutputAddress(uint64_t Address) { OutputAddress = Address; }
463   void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; }
464   void setSectionID(StringRef ID) {
465     assert(!hasValidSectionID() && "trying to set section id twice");
466     SectionID = ID;
467   }
468   void setIndex(uint32_t I) { Index = I; }
469   void setOutputName(const Twine &Name) { OutputName = Name.str(); }
470   void setAnonymous(bool Flag) { IsAnonymous = Flag; }
471   bool isLinkOnly() const { return IsLinkOnly; }
472   void setLinkOnly() { IsLinkOnly = true; }
473 
474   /// Emit the section as data, possibly with relocations.
475   /// Use name \p SectionName for the section during the emission.
476   void emitAsData(MCStreamer &Streamer, const Twine &SectionName) const;
477 
478   /// Write finalized contents of the section. If OutputSize exceeds the size of
479   /// the OutputContents, append zero padding to the stream and return the
480   /// number of byte written which should match the OutputSize.
481   uint64_t write(raw_ostream &OS) const;
482 
483   using SymbolResolverFuncTy = llvm::function_ref<uint64_t(const MCSymbol *)>;
484 
485   /// Flush all pending relocations to patch original contents of sections
486   /// that were not emitted via MCStreamer.
487   void flushPendingRelocations(raw_pwrite_stream &OS,
488                                SymbolResolverFuncTy Resolver);
489 
490   /// Change contents of the section. Unless the section has a valid SectionID,
491   /// the memory passed in \p NewData will be managed by the instance of
492   /// BinarySection.
493   void updateContents(const uint8_t *NewData, size_t NewSize) {
494     if (getOutputData() && !hasValidSectionID() &&
495         (!hasSectionRef() ||
496          OutputContents.data() != getContentsOrQuit(Section).data())) {
497       delete[] getOutputData();
498     }
499 
500     OutputContents = StringRef(reinterpret_cast<const char *>(NewData),
501                                NewData ? NewSize : 0);
502     OutputSize = NewSize;
503     IsFinalized = true;
504   }
505 
506   /// When writing section contents, add \p PaddingSize zero bytes at the end.
507   void addPadding(uint64_t PaddingSize) { OutputSize += PaddingSize; }
508 
509   /// Reorder the contents of this section according to /p Order.  If
510   /// /p Inplace is true, the entire contents of the section is reordered,
511   /// otherwise the new contents contain only the reordered data.
512   void reorderContents(const std::vector<BinaryData *> &Order, bool Inplace);
513 
514   void print(raw_ostream &OS) const;
515 
516   /// Write the contents of an ELF note section given the name of the producer,
517   /// a number identifying the type of note and the contents of the note in
518   /// \p DescStr.
519   static std::string encodeELFNote(StringRef NameStr, StringRef DescStr,
520                                    uint32_t Type);
521 
522   /// Code for ELF notes written by producer 'BOLT'
523   enum { NT_BOLT_BAT = 1, NT_BOLT_INSTRUMENTATION_TABLES = 2 };
524 };
525 
526 inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) {
527   auto *Array = new uint8_t[Size];
528   memcpy(Array, Data, Size);
529   return Array;
530 }
531 
532 inline uint8_t *copyByteArray(StringRef Buffer) {
533   return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()),
534                        Buffer.size());
535 }
536 
537 inline uint8_t *copyByteArray(ArrayRef<char> Buffer) {
538   return copyByteArray(reinterpret_cast<const uint8_t *>(Buffer.data()),
539                        Buffer.size());
540 }
541 
542 inline raw_ostream &operator<<(raw_ostream &OS, const BinarySection &Section) {
543   Section.print(OS);
544   return OS;
545 }
546 
547 } // namespace bolt
548 } // namespace llvm
549 
550 #endif
551