1 //===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H 10 #define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H 11 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/DataExtractor.h" 15 #include <cstdint> 16 #include <map> 17 #include <optional> 18 #include <system_error> 19 #include <unordered_map> 20 21 namespace llvm { 22 class MCSymbol; 23 class raw_ostream; 24 25 namespace object { 26 class ELFObjectFileBase; 27 } // namespace object 28 29 namespace bolt { 30 class BinaryBasicBlock; 31 class BinaryContext; 32 class BinaryFunction; 33 34 /// The map of output addresses to input ones to be used when translating 35 /// samples collected in a binary that was already processed by BOLT. We do not 36 /// support reoptimizing a binary already processed by BOLT, but we do support 37 /// collecting samples in a binary processed by BOLT. We then translate samples 38 /// back to addresses from the input (original) binary, one that can be 39 /// optimized. The goal is to avoid special deployments of non-bolted binaries 40 /// just for the purposes of data collection. 41 /// 42 /// The in-memory representation of the map is as follows. Each function has its 43 /// own map. A function is identified by its output address. This is the key to 44 /// retrieve a translation map. The translation map is a collection of ordered 45 /// keys identifying the start of a region (relative to the function start) in 46 /// the output address space (addresses in the binary processed by BOLT). 47 /// 48 /// A translation then happens when perf2bolt needs to convert sample addresses 49 /// in the output address space back to input addresses, valid to run BOLT in 50 /// the original input binary. To convert, perf2bolt first needs to fetch the 51 /// translation map for a sample recorded in a given function. It then finds 52 /// the largest key that is still smaller or equal than the recorded address. 53 /// It then converts this address to use the value of this key. 54 /// 55 /// Example translation Map for function foo 56 /// KEY VALUE BB? 57 /// Output offset1 (first BB) Original input offset1 Y 58 /// ... 59 /// Output offsetN (last branch) Original input offsetN N 60 /// 61 /// The information on whether a given entry is a BB start or an instruction 62 /// that changes control flow is encoded in the last (highest) bit of VALUE. 63 /// 64 /// Notes: 65 /// Instructions that will never appear in LBR because they do not cause control 66 /// flow change are omitted from this map. Basic block locations are recorded 67 /// because they can be a target of a jump (To address in the LBR) and also to 68 /// recreate the BB layout of this function. We use the BB layout map to 69 /// recreate fall-through jumps in the profile, given an LBR trace. 70 class BoltAddressTranslation { 71 public: 72 // In-memory representation of the address translation table 73 using MapTy = std::multimap<uint32_t, uint32_t>; 74 75 // List of taken fall-throughs 76 using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>; 77 78 /// Name of the ELF section where the table will be serialized to in the 79 /// output binary 80 static const char *SECTION_NAME; 81 82 BoltAddressTranslation() {} 83 84 /// Write the serialized address translation tables for each reordered 85 /// function 86 void write(const BinaryContext &BC, raw_ostream &OS); 87 88 /// Read the serialized address translation tables and load them internally 89 /// in memory. Return a parse error if failed. 90 std::error_code parse(raw_ostream &OS, StringRef Buf); 91 92 /// Dump the parsed address translation tables 93 void dump(raw_ostream &OS) const; 94 95 /// If the maps are loaded in memory, perform the lookup to translate LBR 96 /// addresses in function located at \p FuncAddress. 97 uint64_t translate(uint64_t FuncAddress, uint64_t Offset, 98 bool IsBranchSrc) const; 99 100 /// Use the map keys containing basic block addresses to infer fall-throughs 101 /// taken in the path started at FirstLBR.To and ending at SecondLBR.From. 102 /// Return std::nullopt if trace is invalid or the list of fall-throughs 103 /// otherwise. 104 std::optional<FallthroughListTy> getFallthroughsInTrace(uint64_t FuncAddress, 105 uint64_t From, 106 uint64_t To) const; 107 108 /// If available, fetch the address of the hot part linked to the cold part 109 /// at \p Address. Return 0 otherwise. 110 uint64_t fetchParentAddress(uint64_t Address) const { 111 auto Iter = ColdPartSource.find(Address); 112 if (Iter == ColdPartSource.end()) 113 return 0; 114 return Iter->second; 115 } 116 117 /// True if the input binary has a translation table we can use to convert 118 /// addresses when aggregating profile 119 bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const; 120 121 /// Save function and basic block hashes used for metadata dump. 122 void saveMetadata(BinaryContext &BC); 123 124 /// True if a given \p Address is a function with translation table entry. 125 bool isBATFunction(uint64_t Address) const { return Maps.count(Address); } 126 127 /// For a given \p Symbol in the output binary and known \p InputOffset 128 /// return a corresponding pair of parent BinaryFunction and secondary entry 129 /// point in it. 130 std::pair<const BinaryFunction *, unsigned> 131 translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol, 132 uint32_t InputOffset) const; 133 134 private: 135 /// Helper to update \p Map by inserting one or more BAT entries reflecting 136 /// \p BB for function located at \p FuncAddress. At least one entry will be 137 /// emitted for the start of the BB. More entries may be emitted to cover 138 /// the location of calls or any instruction that may change control flow. 139 void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, 140 uint64_t FuncInputAddress, 141 uint64_t FuncOutputAddress) const; 142 143 /// Write the serialized address translation table for a function. 144 template <bool Cold> void writeMaps(uint64_t &PrevAddress, raw_ostream &OS); 145 146 /// Read the serialized address translation table for a function. 147 /// Return a parse error if failed. 148 template <bool Cold> 149 void parseMaps(uint64_t &PrevAddress, DataExtractor &DE, uint64_t &Offset, 150 Error &Err); 151 152 /// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY 153 /// entries in function address translation map. 154 APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems) const; 155 156 /// Calculate the number of equal offsets (output = input - skew) in the 157 /// beginning of the function. 158 size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const; 159 160 std::map<uint64_t, MapTy> Maps; 161 162 /// Ordered vector with addresses of hot functions. 163 std::vector<uint64_t> HotFuncs; 164 165 /// Map a function to its basic blocks count 166 std::unordered_map<uint64_t, size_t> NumBasicBlocksMap; 167 168 /// Map a function to its secondary entry points vector 169 std::unordered_map<uint64_t, std::vector<uint32_t>> SecondaryEntryPointsMap; 170 171 /// Return a secondary entry point ID for a function located at \p Address and 172 /// \p Offset within that function. 173 unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const; 174 175 /// Links outlined cold bocks to their original function 176 std::map<uint64_t, uint64_t> ColdPartSource; 177 178 /// Links output address of a main fragment back to input address. 179 std::unordered_map<uint64_t, uint64_t> ReverseMap; 180 181 /// Identifies the address of a control-flow changing instructions in a 182 /// translation map entry 183 const static uint32_t BRANCHENTRY = 0x1; 184 185 public: 186 /// Map basic block input offset to a basic block index and hash pair. 187 class BBHashMapTy { 188 struct EntryTy { 189 unsigned Index; 190 size_t Hash; 191 }; 192 193 std::map<uint32_t, EntryTy> Map; 194 const EntryTy &getEntry(uint32_t BBInputOffset) const { 195 auto It = Map.find(BBInputOffset); 196 assert(It != Map.end()); 197 return It->second; 198 } 199 200 public: 201 bool isInputBlock(uint32_t InputOffset) const { 202 return Map.count(InputOffset); 203 } 204 205 unsigned getBBIndex(uint32_t BBInputOffset) const { 206 return getEntry(BBInputOffset).Index; 207 } 208 209 size_t getBBHash(uint32_t BBInputOffset) const { 210 return getEntry(BBInputOffset).Hash; 211 } 212 213 void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) { 214 Map.emplace(BBInputOffset, EntryTy{BBIndex, BBHash}); 215 } 216 217 size_t getNumBasicBlocks() const { return Map.size(); } 218 219 auto begin() const { return Map.begin(); } 220 auto end() const { return Map.end(); } 221 auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); } 222 auto size() const { return Map.size(); } 223 }; 224 225 /// Map function output address to its hash and basic blocks hash map. 226 class FuncHashesTy { 227 struct EntryTy { 228 size_t Hash; 229 BBHashMapTy BBHashMap; 230 }; 231 232 std::unordered_map<uint64_t, EntryTy> Map; 233 const EntryTy &getEntry(uint64_t FuncOutputAddress) const { 234 auto It = Map.find(FuncOutputAddress); 235 assert(It != Map.end()); 236 return It->second; 237 } 238 239 public: 240 size_t getBFHash(uint64_t FuncOutputAddress) const { 241 return getEntry(FuncOutputAddress).Hash; 242 } 243 244 const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const { 245 return getEntry(FuncOutputAddress).BBHashMap; 246 } 247 248 void addEntry(uint64_t FuncOutputAddress, size_t BFHash) { 249 Map.emplace(FuncOutputAddress, EntryTy{BFHash, BBHashMapTy()}); 250 } 251 252 size_t getNumFunctions() const { return Map.size(); }; 253 254 size_t getNumBasicBlocks() const { 255 size_t NumBasicBlocks{0}; 256 for (auto &I : Map) 257 NumBasicBlocks += I.second.BBHashMap.getNumBasicBlocks(); 258 return NumBasicBlocks; 259 } 260 }; 261 262 /// Returns BF hash by function output address (after BOLT). 263 size_t getBFHash(uint64_t FuncOutputAddress) const { 264 return FuncHashes.getBFHash(FuncOutputAddress); 265 } 266 267 /// Returns BBHashMap by function output address (after BOLT). 268 const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const { 269 return FuncHashes.getBBHashMap(FuncOutputAddress); 270 } 271 272 BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) { 273 return const_cast<BBHashMapTy &>( 274 std::as_const(*this).getBBHashMap(FuncOutputAddress)); 275 } 276 277 /// Returns the number of basic blocks in a function. 278 size_t getNumBasicBlocks(uint64_t OutputAddress) const { 279 auto It = NumBasicBlocksMap.find(OutputAddress); 280 assert(It != NumBasicBlocksMap.end()); 281 return It->second; 282 } 283 284 private: 285 FuncHashesTy FuncHashes; 286 }; 287 } // namespace bolt 288 289 } // namespace llvm 290 291 #endif 292