xref: /llvm-project/bolt/include/bolt/Core/BinaryData.h (revision 6e8a1a45a783c13e4cd19bfd20b7a56cab6f7d81)
1 //===- bolt/Core/BinaryData.h - Objects in a binary file --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the BinaryData class, which represents
10 // an allocatable entity in a binary file, such as a data object, a jump table,
11 // or a function.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_CORE_BINARY_DATA_H
16 #define BOLT_CORE_BINARY_DATA_H
17 
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCSymbol.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include <string>
22 #include <vector>
23 
24 namespace llvm {
25 namespace bolt {
26 
27 class BinarySection;
28 
29 /// \p BinaryData represents an indivisible part of a data section section.
30 /// BinaryData's may contain sub-components, e.g. jump tables but they are
31 /// considered to be part of the parent symbol in terms of divisibility and
32 /// reordering.
33 class BinaryData {
34   friend class BinaryContext;
35   /// Non-null if this BinaryData is contained in a larger BinaryData object,
36   /// i.e. the start and end addresses are contained within another object.
37   BinaryData *Parent{nullptr};
38 
39   // non-copyable
40   BinaryData() = delete;
41   BinaryData(const BinaryData &) = delete;
42   BinaryData &operator=(const BinaryData &) = delete;
43 
44 protected:
45   /// All symbols associated with this data.
46   std::vector<MCSymbol *> Symbols;
47 
48   /// Section this data belongs to.
49   BinarySection *Section{nullptr};
50 
51   /// Start address of this symbol.
52   uint64_t Address{0};
53   /// Size of this data (can be 0).
54   uint64_t Size{0};
55   /// Alignment of this data.
56   uint16_t Alignment{1};
57 
58   bool IsMoveable{true};
59 
60   /// Symbol flags (same as llvm::SymbolRef::Flags)
61   unsigned Flags{0};
62 
63   /// Output section for this data if it has been moved from the original
64   /// section.
65   BinarySection *OutputSection{nullptr};
66 
67   /// The offset of this symbol in the output section.  This is different
68   /// from \p Address - Section.getAddress() when the data has been reordered.
69   uint64_t OutputOffset{0};
70 
71   BinaryData *getRootData() {
72     BinaryData *BD = this;
73     while (BD->Parent)
74       BD = BD->Parent;
75     return BD;
76   }
77 
78 public:
79   BinaryData(BinaryData &&) = default;
80   BinaryData(MCSymbol &Symbol, uint64_t Address, uint64_t Size,
81              uint16_t Alignment, BinarySection &Section, unsigned Flags = 0);
82   virtual ~BinaryData() {}
83 
84   virtual bool isJumpTable() const { return false; }
85   virtual bool isObject() const { return !isJumpTable(); }
86   virtual void merge(const BinaryData *Other);
87 
88   bool isTopLevelJumpTable() const {
89     return (isJumpTable() &&
90             (!Parent || (!Parent->Parent && Parent->isObject())));
91   }
92 
93   // BinaryData that is considered atomic and potentially moveable.  All
94   // MemInfo data and relocations should be wrt. to atomic data.
95   bool isAtomic() const { return isTopLevelJumpTable() || !Parent; }
96 
97   iterator_range<std::vector<MCSymbol *>::const_iterator> symbols() const {
98     return make_range(Symbols.begin(), Symbols.end());
99   }
100 
101   StringRef getName() const { return getSymbol()->getName(); }
102 
103   MCSymbol *getSymbol() { return Symbols.front(); }
104   const MCSymbol *getSymbol() const { return Symbols.front(); }
105 
106   const std::vector<MCSymbol *> &getSymbols() const { return Symbols; }
107   std::vector<MCSymbol *> &getSymbols() { return Symbols; }
108 
109   bool hasName(StringRef Name) const;
110   bool nameStartsWith(StringRef Prefix) const;
111 
112   bool hasSymbol(const MCSymbol *Symbol) const {
113     return llvm::is_contained(Symbols, Symbol);
114   }
115 
116   bool isAbsolute() const;
117   bool isMoveable() const;
118 
119   uint64_t getAddress() const { return Address; }
120   uint64_t getEndAddress() const { return Address + Size; }
121   uint64_t getOffset() const;
122   uint64_t getSize() const { return Size; }
123   uint16_t getAlignment() const { return Alignment; }
124 
125   BinarySection &getSection() { return *Section; }
126   const BinarySection &getSection() const { return *Section; }
127   StringRef getSectionName() const;
128 
129   BinarySection &getOutputSection() { return *OutputSection; }
130   const BinarySection &getOutputSection() const { return *OutputSection; }
131   StringRef getOutputSectionName() const;
132   uint64_t getOutputAddress() const;
133   uint64_t getOutputOffset() const { return OutputOffset; }
134   uint64_t getOutputSize() const { return Size; }
135 
136   bool isMoved() const;
137   bool containsAddress(uint64_t Address) const {
138     return ((getAddress() <= Address && Address < getEndAddress()) ||
139             (getAddress() == Address && !getSize()));
140   }
141   bool containsRange(uint64_t Address, uint64_t Size) const {
142     return containsAddress(Address) && Address + Size <= getEndAddress();
143   }
144 
145   const BinaryData *getParent() const { return Parent; }
146 
147   const BinaryData *getRootData() const {
148     const BinaryData *BD = this;
149     while (BD->Parent)
150       BD = BD->Parent;
151     return BD;
152   }
153 
154   BinaryData *getAtomicRoot() {
155     BinaryData *BD = this;
156     while (!BD->isAtomic() && BD->Parent)
157       BD = BD->Parent;
158     return BD;
159   }
160 
161   const BinaryData *getAtomicRoot() const {
162     const BinaryData *BD = this;
163     while (!BD->isAtomic() && BD->Parent)
164       BD = BD->Parent;
165     return BD;
166   }
167 
168   bool isAncestorOf(const BinaryData *BD) const {
169     return Parent && (Parent == BD || Parent->isAncestorOf(BD));
170   }
171 
172   void updateSize(uint64_t N) {
173     if (N > Size)
174       Size = N;
175   }
176 
177   void setIsMoveable(bool Flag) { IsMoveable = Flag; }
178   void setSection(BinarySection &NewSection);
179   void setOutputSection(BinarySection &NewSection) {
180     OutputSection = &NewSection;
181   }
182   void setOutputOffset(uint64_t Offset) { OutputOffset = Offset; }
183   void setOutputLocation(BinarySection &NewSection, uint64_t NewOffset) {
184     setOutputSection(NewSection);
185     setOutputOffset(NewOffset);
186   }
187 
188   virtual void printBrief(raw_ostream &OS) const;
189   virtual void print(raw_ostream &OS) const;
190 };
191 
192 inline raw_ostream &operator<<(raw_ostream &OS, const BinaryData &BD) {
193   BD.printBrief(OS);
194   return OS;
195 }
196 
197 /// Address access info used for memory profiling.
198 struct AddressAccess {
199   BinaryData *MemoryObject; /// Object accessed or nullptr
200   uint64_t Offset;          /// Offset within the object or absolute address
201   uint64_t Count;           /// Number of accesses
202   bool operator==(const AddressAccess &Other) const {
203     return MemoryObject == Other.MemoryObject && Offset == Other.Offset &&
204            Count == Other.Count;
205   }
206 };
207 
208 /// Aggregated memory access info per instruction.
209 struct MemoryAccessProfile {
210   uint64_t NextInstrOffset;
211   SmallVector<AddressAccess, 4> AddressAccessInfo;
212   bool operator==(const MemoryAccessProfile &Other) const {
213     return NextInstrOffset == Other.NextInstrOffset &&
214            AddressAccessInfo == Other.AddressAccessInfo;
215   }
216 };
217 
218 inline raw_ostream &operator<<(raw_ostream &OS,
219                                const bolt::MemoryAccessProfile &MAP) {
220   std::string TempString;
221   raw_string_ostream SS(TempString);
222 
223   const char *Sep = "\n        ";
224   uint64_t TotalCount = 0;
225   for (const AddressAccess &AccessInfo : MAP.AddressAccessInfo) {
226     SS << Sep << "{ ";
227     if (AccessInfo.MemoryObject)
228       SS << AccessInfo.MemoryObject->getName() << " + ";
229     SS << "0x" << Twine::utohexstr(AccessInfo.Offset) << ": "
230        << AccessInfo.Count << " }";
231     Sep = ",\n        ";
232     TotalCount += AccessInfo.Count;
233   }
234 
235   OS << TotalCount << " total counts : " << TempString;
236   return OS;
237 }
238 
239 } // namespace bolt
240 } // namespace llvm
241 
242 #endif
243