xref: /llvm-project/bolt/include/bolt/Profile/BoltAddressTranslation.h (revision 79d695f049343c96eccbce9c06357256bc567be3)
1 //===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
10 #define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
11 
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/DataExtractor.h"
15 #include <cstdint>
16 #include <map>
17 #include <optional>
18 #include <system_error>
19 #include <unordered_map>
20 
21 namespace llvm {
22 class MCSymbol;
23 class raw_ostream;
24 
25 namespace object {
26 class ELFObjectFileBase;
27 } // namespace object
28 
29 namespace bolt {
30 class BinaryBasicBlock;
31 class BinaryContext;
32 class BinaryFunction;
33 
34 /// The map of output addresses to input ones to be used when translating
35 /// samples collected in a binary that was already processed by BOLT. We do not
36 /// support reoptimizing a binary already processed by BOLT, but we do support
37 /// collecting samples in a binary processed by BOLT. We then translate samples
38 /// back to addresses from the input (original) binary, one that can be
39 /// optimized. The goal is to avoid special deployments of non-bolted binaries
40 /// just for the purposes of data collection.
41 ///
42 /// The in-memory representation of the map is as follows. Each function has its
43 /// own map. A function is identified by its output address. This is the key to
44 /// retrieve a translation map. The translation map is a collection of ordered
45 /// keys identifying the start of a region (relative to the function start) in
46 /// the output address space (addresses in the binary processed by BOLT).
47 ///
48 /// A translation then happens when perf2bolt needs to convert sample addresses
49 /// in the output address space back to input addresses, valid to run BOLT in
50 /// the original input binary. To convert, perf2bolt first needs to fetch the
51 /// translation map for a sample recorded in a given function. It then finds
52 /// the largest key that is still smaller or equal than the recorded address.
53 /// It then converts this address to use the value of this key.
54 ///
55 ///   Example translation Map for function foo
56 ///      KEY                             VALUE                    BB?
57 ///    Output offset1 (first BB)         Original input offset1   Y
58 ///    ...
59 ///    Output offsetN (last branch)      Original input offsetN   N
60 ///
61 /// The information on whether a given entry is a BB start or an instruction
62 /// that changes control flow is encoded in the last (highest) bit of VALUE.
63 ///
64 /// Notes:
65 /// Instructions that will never appear in LBR because they do not cause control
66 /// flow change are omitted from this map. Basic block locations are recorded
67 /// because they can be a target of a jump (To address in the LBR) and also to
68 /// recreate the BB layout of this function. We use the BB layout map to
69 /// recreate fall-through jumps in the profile, given an LBR trace.
70 class BoltAddressTranslation {
71 public:
72   // In-memory representation of the address translation table
73   using MapTy = std::multimap<uint32_t, uint32_t>;
74 
75   // List of taken fall-throughs
76   using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
77 
78   /// Name of the ELF section where the table will be serialized to in the
79   /// output binary
80   static const char *SECTION_NAME;
81 
82   BoltAddressTranslation() {}
83 
84   /// Write the serialized address translation tables for each reordered
85   /// function
86   void write(const BinaryContext &BC, raw_ostream &OS);
87 
88   /// Read the serialized address translation tables and load them internally
89   /// in memory. Return a parse error if failed.
90   std::error_code parse(raw_ostream &OS, StringRef Buf);
91 
92   /// Dump the parsed address translation tables
93   void dump(raw_ostream &OS) const;
94 
95   /// If the maps are loaded in memory, perform the lookup to translate LBR
96   /// addresses in function located at \p FuncAddress.
97   uint64_t translate(uint64_t FuncAddress, uint64_t Offset,
98                      bool IsBranchSrc) const;
99 
100   /// Use the map keys containing basic block addresses to infer fall-throughs
101   /// taken in the path started at FirstLBR.To and ending at SecondLBR.From.
102   /// Return std::nullopt if trace is invalid or the list of fall-throughs
103   /// otherwise.
104   std::optional<FallthroughListTy> getFallthroughsInTrace(uint64_t FuncAddress,
105                                                           uint64_t From,
106                                                           uint64_t To) const;
107 
108   /// If available, fetch the address of the hot part linked to the cold part
109   /// at \p Address. Return 0 otherwise.
110   uint64_t fetchParentAddress(uint64_t Address) const {
111     auto Iter = ColdPartSource.find(Address);
112     if (Iter == ColdPartSource.end())
113       return 0;
114     return Iter->second;
115   }
116 
117   /// True if the input binary has a translation table we can use to convert
118   /// addresses when aggregating profile
119   bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const;
120 
121   /// Save function and basic block hashes used for metadata dump.
122   void saveMetadata(BinaryContext &BC);
123 
124   /// True if a given \p Address is a function with translation table entry.
125   bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }
126 
127   /// For a given \p Symbol in the output binary and known \p InputOffset
128   /// return a corresponding pair of parent BinaryFunction and secondary entry
129   /// point in it.
130   std::pair<const BinaryFunction *, unsigned>
131   translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol,
132                   uint32_t InputOffset) const;
133 
134 private:
135   /// Helper to update \p Map by inserting one or more BAT entries reflecting
136   /// \p BB for function located at \p FuncAddress. At least one entry will be
137   /// emitted for the start of the BB. More entries may be emitted to cover
138   /// the location of calls or any instruction that may change control flow.
139   void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
140                          uint64_t FuncInputAddress,
141                          uint64_t FuncOutputAddress) const;
142 
143   /// Write the serialized address translation table for a function.
144   template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS);
145 
146   /// Read the serialized address translation table for a function.
147   /// Return a parse error if failed.
148   template <bool Cold>
149   void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset,
150                  Error &Err);
151 
152   /// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
153   /// entries in function address translation map.
154   APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems) const;
155 
156   /// Calculate the number of equal offsets (output = input - skew) in the
157   /// beginning of the function.
158   size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const;
159 
160   std::map<uint64_t, MapTy> Maps;
161 
162   /// Ordered vector with addresses of hot functions.
163   std::vector<uint64_t> HotFuncs;
164 
165   /// Map a function to its basic blocks count
166   std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;
167 
168   /// Map a function to its secondary entry points vector
169   std::unordered_map<uint64_t, std::vector<uint32_t>> SecondaryEntryPointsMap;
170 
171   /// Return a secondary entry point ID for a function located at \p Address and
172   /// \p Offset within that function.
173   unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const;
174 
175   /// Links outlined cold bocks to their original function
176   std::map<uint64_t, uint64_t> ColdPartSource;
177 
178   /// Links output address of a main fragment back to input address.
179   std::unordered_map<uint64_t, uint64_t> ReverseMap;
180 
181   /// Identifies the address of a control-flow changing instructions in a
182   /// translation map entry
183   const static uint32_t BRANCHENTRY = 0x1;
184 
185 public:
186   /// Map basic block input offset to a basic block index and hash pair.
187   class BBHashMapTy {
188     struct EntryTy {
189       unsigned Index;
190       size_t Hash;
191     };
192 
193     std::map<uint32_t, EntryTy> Map;
194     const EntryTy &getEntry(uint32_t BBInputOffset) const {
195       auto It = Map.find(BBInputOffset);
196       assert(It != Map.end());
197       return It->second;
198     }
199 
200   public:
201     bool isInputBlock(uint32_t InputOffset) const {
202       return Map.count(InputOffset);
203     }
204 
205     unsigned getBBIndex(uint32_t BBInputOffset) const {
206       return getEntry(BBInputOffset).Index;
207     }
208 
209     size_t getBBHash(uint32_t BBInputOffset) const {
210       return getEntry(BBInputOffset).Hash;
211     }
212 
213     void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
214       Map.emplace(BBInputOffset, EntryTy{BBIndex, BBHash});
215     }
216 
217     size_t getNumBasicBlocks() const { return Map.size(); }
218 
219     auto begin() const { return Map.begin(); }
220     auto end() const { return Map.end(); }
221     auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
222     auto size() const { return Map.size(); }
223   };
224 
225   /// Map function output address to its hash and basic blocks hash map.
226   class FuncHashesTy {
227     struct EntryTy {
228       size_t Hash;
229       BBHashMapTy BBHashMap;
230     };
231 
232     std::unordered_map<uint64_t, EntryTy> Map;
233     const EntryTy &getEntry(uint64_t FuncOutputAddress) const {
234       auto It = Map.find(FuncOutputAddress);
235       assert(It != Map.end());
236       return It->second;
237     }
238 
239   public:
240     size_t getBFHash(uint64_t FuncOutputAddress) const {
241       return getEntry(FuncOutputAddress).Hash;
242     }
243 
244     const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
245       return getEntry(FuncOutputAddress).BBHashMap;
246     }
247 
248     void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
249       Map.emplace(FuncOutputAddress, EntryTy{BFHash, BBHashMapTy()});
250     }
251 
252     size_t getNumFunctions() const { return Map.size(); };
253 
254     size_t getNumBasicBlocks() const {
255       size_t NumBasicBlocks{0};
256       for (auto &I : Map)
257         NumBasicBlocks += I.second.BBHashMap.getNumBasicBlocks();
258       return NumBasicBlocks;
259     }
260   };
261 
262   /// Returns BF hash by function output address (after BOLT).
263   size_t getBFHash(uint64_t FuncOutputAddress) const {
264     return FuncHashes.getBFHash(FuncOutputAddress);
265   }
266 
267   /// Returns BBHashMap by function output address (after BOLT).
268   const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
269     return FuncHashes.getBBHashMap(FuncOutputAddress);
270   }
271 
272   BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
273     return const_cast<BBHashMapTy &>(
274         std::as_const(*this).getBBHashMap(FuncOutputAddress));
275   }
276 
277   /// Returns the number of basic blocks in a function.
278   size_t getNumBasicBlocks(uint64_t OutputAddress) const {
279     auto It = NumBasicBlocksMap.find(OutputAddress);
280     assert(It != NumBasicBlocksMap.end());
281     return It->second;
282   }
283 
284 private:
285   FuncHashesTy FuncHashes;
286 };
287 } // namespace bolt
288 
289 } // namespace llvm
290 
291 #endif
292