1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H 10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/DebugInfo/GSYM/FileEntry.h" 14 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 15 #include "llvm/DebugInfo/GSYM/Header.h" 16 #include "llvm/DebugInfo/GSYM/LineEntry.h" 17 #include "llvm/DebugInfo/GSYM/StringTable.h" 18 #include "llvm/Support/DataExtractor.h" 19 #include "llvm/Support/Endian.h" 20 #include "llvm/Support/ErrorOr.h" 21 #include <inttypes.h> 22 #include <memory> 23 #include <stdint.h> 24 #include <vector> 25 26 namespace llvm { 27 class MemoryBuffer; 28 class raw_ostream; 29 30 namespace gsym { 31 32 /// GsymReader is used to read GSYM data from a file or buffer. 33 /// 34 /// This class is optimized for very quick lookups when the endianness matches 35 /// the host system. The Header, address table, address info offsets, and file 36 /// table is designed to be mmap'ed as read only into memory and used without 37 /// any parsing needed. If the endianness doesn't match, we swap these objects 38 /// and tables into GsymReader::SwappedData and then point our header and 39 /// ArrayRefs to this swapped internal data. 40 /// 41 /// GsymReader objects must use one of the static functions to create an 42 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). 43 44 class GsymReader { 45 GsymReader(std::unique_ptr<MemoryBuffer> Buffer); 46 llvm::Error parse(); 47 48 std::unique_ptr<MemoryBuffer> MemBuffer; 49 StringRef GsymBytes; 50 llvm::endianness Endian; 51 const Header *Hdr = nullptr; 52 ArrayRef<uint8_t> AddrOffsets; 53 ArrayRef<uint32_t> AddrInfoOffsets; 54 ArrayRef<FileEntry> Files; 55 StringTable StrTab; 56 /// When the GSYM file's endianness doesn't match the host system then 57 /// we must decode all data structures that need to be swapped into 58 /// local storage and set point the ArrayRef objects above to these swapped 59 /// copies. 60 struct SwappedData { 61 Header Hdr; 62 std::vector<uint8_t> AddrOffsets; 63 std::vector<uint32_t> AddrInfoOffsets; 64 std::vector<FileEntry> Files; 65 }; 66 std::unique_ptr<SwappedData> Swap; 67 68 public: 69 GsymReader(GsymReader &&RHS); 70 ~GsymReader(); 71 72 /// Construct a GsymReader from a file on disk. 73 /// 74 /// \param Path The file path the GSYM file to read. 75 /// \returns An expected GsymReader that contains the object or an error 76 /// object that indicates reason for failing to read the GSYM. 77 static llvm::Expected<GsymReader> openFile(StringRef Path); 78 79 /// Construct a GsymReader from a buffer. 80 /// 81 /// \param Bytes A set of bytes that will be copied and owned by the 82 /// returned object on success. 83 /// \returns An expected GsymReader that contains the object or an error 84 /// object that indicates reason for failing to read the GSYM. 85 static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes); 86 87 /// Access the GSYM header. 88 /// \returns A native endian version of the GSYM header. 89 const Header &getHeader() const; 90 91 /// Get the full function info for an address. 92 /// 93 /// This should be called when a client will store a copy of the complete 94 /// FunctionInfo for a given address. For one off lookups, use the lookup() 95 /// function below. 96 /// 97 /// Symbolication server processes might want to parse the entire function 98 /// info for a given address and cache it if the process stays around to 99 /// service many symbolication addresses, like for parsing profiling 100 /// information. 101 /// 102 /// \param Addr A virtual address from the orignal object file to lookup. 103 /// 104 /// \returns An expected FunctionInfo that contains the function info object 105 /// or an error object that indicates reason for failing to lookup the 106 /// address. 107 llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const; 108 109 /// Get the full function info given an address index. 110 /// 111 /// \param AddrIdx A address index for an address in the address table. 112 /// 113 /// \returns An expected FunctionInfo that contains the function info object 114 /// or an error object that indicates reason for failing get the function 115 /// info object. 116 llvm::Expected<FunctionInfo> getFunctionInfoAtIndex(uint64_t AddrIdx) const; 117 118 /// Lookup an address in the a GSYM. 119 /// 120 /// Lookup just the information needed for a specific address \a Addr. This 121 /// function is faster that calling getFunctionInfo() as it will only return 122 /// information that pertains to \a Addr and allows the parsing to skip any 123 /// extra information encoded for other addresses. For example the line table 124 /// parsing can stop when a matching LineEntry has been fouhnd, and the 125 /// InlineInfo can stop parsing early once a match has been found and also 126 /// skip information that doesn't match. This avoids memory allocations and 127 /// is much faster for lookups. 128 /// 129 /// \param Addr A virtual address from the orignal object file to lookup. 130 /// 131 /// \param MergedFuncsData A pointer to an optional DataExtractor that, if 132 /// non-null, will be set to the raw data of the MergedFunctionInfo, if 133 /// present. 134 /// 135 /// \returns An expected LookupResult that contains only the information 136 /// needed for the current address, or an error object that indicates reason 137 /// for failing to lookup the address. 138 llvm::Expected<LookupResult> 139 lookup(uint64_t Addr, 140 std::optional<DataExtractor> *MergedFuncsData = nullptr) const; 141 142 /// Lookup all merged functions for a given address. 143 /// 144 /// This function performs a lookup for the specified address and then 145 /// retrieves additional LookupResults from any merged functions associated 146 /// with the primary LookupResult. 147 /// 148 /// \param Addr The address to lookup. 149 /// 150 /// \returns A vector of LookupResult objects, where the first element is the 151 /// primary result, followed by results for any merged functions 152 llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const; 153 154 /// Get a string from the string table. 155 /// 156 /// \param Offset The string table offset for the string to retrieve. 157 /// \returns The string from the strin table. 158 StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } 159 160 /// Get the a file entry for the suppplied file index. 161 /// 162 /// Used to convert any file indexes in the FunctionInfo data back into 163 /// files. This function can be used for iteration, but is more commonly used 164 /// for random access when doing lookups. 165 /// 166 /// \param Index An index into the file table. 167 /// \returns An optional FileInfo that will be valid if the file index is 168 /// valid, or std::nullopt if the file index is out of bounds, 169 std::optional<FileEntry> getFile(uint32_t Index) const { 170 if (Index < Files.size()) 171 return Files[Index]; 172 return std::nullopt; 173 } 174 175 /// Dump the entire Gsym data contained in this object. 176 /// 177 /// \param OS The output stream to dump to. 178 void dump(raw_ostream &OS); 179 180 /// Dump a FunctionInfo object. 181 /// 182 /// This function will convert any string table indexes and file indexes 183 /// into human readable format. 184 /// 185 /// \param OS The output stream to dump to. 186 /// 187 /// \param FI The object to dump. 188 /// 189 /// \param Indent The indentation as number of spaces. Used when dumping as an 190 /// item within MergedFunctionsInfo. 191 void dump(raw_ostream &OS, const FunctionInfo &FI, uint32_t Indent = 0); 192 193 /// Dump a MergedFunctionsInfo object. 194 /// 195 /// This function will dump a MergedFunctionsInfo object - basically by 196 /// dumping the contained FunctionInfo objects with indentation. 197 /// 198 /// \param OS The output stream to dump to. 199 /// 200 /// \param MFI The object to dump. 201 void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI); 202 203 /// Dump a CallSiteInfo object. 204 /// 205 /// This function will output the details of a CallSiteInfo object in a 206 /// human-readable format. 207 /// 208 /// \param OS The output stream to dump to. 209 /// 210 /// \param CSI The CallSiteInfo object to dump. 211 void dump(raw_ostream &OS, const CallSiteInfo &CSI); 212 213 /// Dump a CallSiteInfoCollection object. 214 /// 215 /// This function will iterate over a collection of CallSiteInfo objects and 216 /// dump each one. 217 /// 218 /// \param OS The output stream to dump to. 219 /// 220 /// \param CSIC The CallSiteInfoCollection object to dump. 221 /// 222 /// \param Indent The indentation as number of spaces. Used when dumping as an 223 /// item from within MergedFunctionsInfo. 224 void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC, 225 uint32_t Indent = 0); 226 227 /// Dump a LineTable object. 228 /// 229 /// This function will convert any string table indexes and file indexes 230 /// into human readable format. 231 /// 232 /// 233 /// \param OS The output stream to dump to. 234 /// 235 /// \param LT The object to dump. 236 /// 237 /// \param Indent The indentation as number of spaces. Used when dumping as an 238 /// item from within MergedFunctionsInfo. 239 void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0); 240 241 /// Dump a InlineInfo object. 242 /// 243 /// This function will convert any string table indexes and file indexes 244 /// into human readable format. 245 /// 246 /// \param OS The output stream to dump to. 247 /// 248 /// \param II The object to dump. 249 /// 250 /// \param Indent The indentation as number of spaces. Used for recurive 251 /// dumping. 252 void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0); 253 254 /// Dump a FileEntry object. 255 /// 256 /// This function will convert any string table indexes into human readable 257 /// format. 258 /// 259 /// \param OS The output stream to dump to. 260 /// 261 /// \param FE The object to dump. 262 void dump(raw_ostream &OS, std::optional<FileEntry> FE); 263 264 /// Get the number of addresses in this Gsym file. 265 uint32_t getNumAddresses() const { 266 return Hdr->NumAddresses; 267 } 268 269 /// Gets an address from the address table. 270 /// 271 /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. 272 /// 273 /// \param Index A index into the address table. 274 /// \returns A resolved virtual address for adddress in the address table 275 /// or std::nullopt if Index is out of bounds. 276 std::optional<uint64_t> getAddress(size_t Index) const; 277 278 protected: 279 280 /// Get an appropriate address info offsets array. 281 /// 282 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 283 /// byte offsets from the The gsym::Header::BaseAddress. The table is stored 284 /// internally as a array of bytes that are in the correct endianness. When 285 /// we access this table we must get an array that matches those sizes. This 286 /// templatized helper function is used when accessing address offsets in the 287 /// AddrOffsets member variable. 288 /// 289 /// \returns An ArrayRef of an appropriate address offset size. 290 template <class T> ArrayRef<T> 291 getAddrOffsets() const { 292 return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()), 293 AddrOffsets.size()/sizeof(T)); 294 } 295 296 /// Get an appropriate address from the address table. 297 /// 298 /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 299 /// byte address offsets from the The gsym::Header::BaseAddress. The table is 300 /// stored internally as a array of bytes that are in the correct endianness. 301 /// In order to extract an address from the address table we must access the 302 /// address offset using the correct size and then add it to the BaseAddress 303 /// in the header. 304 /// 305 /// \param Index An index into the AddrOffsets array. 306 /// \returns An virtual address that matches the original object file for the 307 /// address as the specified index, or std::nullopt if Index is out of bounds. 308 template <class T> 309 std::optional<uint64_t> addressForIndex(size_t Index) const { 310 ArrayRef<T> AIO = getAddrOffsets<T>(); 311 if (Index < AIO.size()) 312 return AIO[Index] + Hdr->BaseAddress; 313 return std::nullopt; 314 } 315 /// Lookup an address offset in the AddrOffsets table. 316 /// 317 /// Given an address offset, look it up using a binary search of the 318 /// AddrOffsets table. 319 /// 320 /// \param AddrOffset An address offset, that has already been computed by 321 /// subtracting the gsym::Header::BaseAddress. 322 /// \returns The matching address offset index. This index will be used to 323 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 324 template <class T> 325 std::optional<uint64_t> 326 getAddressOffsetIndex(const uint64_t AddrOffset) const { 327 ArrayRef<T> AIO = getAddrOffsets<T>(); 328 const auto Begin = AIO.begin(); 329 const auto End = AIO.end(); 330 auto Iter = std::lower_bound(Begin, End, AddrOffset); 331 // Watch for addresses that fall between the gsym::Header::BaseAddress and 332 // the first address offset. 333 if (Iter == Begin && AddrOffset < *Begin) 334 return std::nullopt; 335 if (Iter == End || AddrOffset < *Iter) 336 --Iter; 337 338 // GSYM files have sorted function infos with the most information (line 339 // table and/or inline info) first in the array of function infos, so 340 // always backup as much as possible as long as the address offset is the 341 // same as the previous entry. 342 while (Iter != Begin) { 343 auto Prev = Iter - 1; 344 if (*Prev == *Iter) 345 Iter = Prev; 346 else 347 break; 348 } 349 350 return std::distance(Begin, Iter); 351 } 352 353 /// Create a GSYM from a memory buffer. 354 /// 355 /// Called by both openFile() and copyBuffer(), this function does all of the 356 /// work of parsing the GSYM file and returning an error. 357 /// 358 /// \param MemBuffer A memory buffer that will transfer ownership into the 359 /// GsymReader. 360 /// \returns An expected GsymReader that contains the object or an error 361 /// object that indicates reason for failing to read the GSYM. 362 static llvm::Expected<llvm::gsym::GsymReader> 363 create(std::unique_ptr<MemoryBuffer> &MemBuffer); 364 365 366 /// Given an address, find the address index. 367 /// 368 /// Binary search the address table and find the matching address index. 369 /// 370 /// \param Addr A virtual address that matches the original object file 371 /// to lookup. 372 /// \returns An index into the address table. This index can be used to 373 /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 374 /// Returns an error if the address isn't in the GSYM with details of why. 375 Expected<uint64_t> getAddressIndex(const uint64_t Addr) const; 376 377 /// Given an address index, get the offset for the FunctionInfo. 378 /// 379 /// Looking up an address is done by finding the corresponding address 380 /// index for the address. This index is then used to get the offset of the 381 /// FunctionInfo data that we will decode using this function. 382 /// 383 /// \param Index An index into the address table. 384 /// \returns An optional GSYM data offset for the offset of the FunctionInfo 385 /// that needs to be decoded. 386 std::optional<uint64_t> getAddressInfoOffset(size_t Index) const; 387 388 /// Given an address, find the correct function info data and function 389 /// address. 390 /// 391 /// Binary search the address table and find the matching address info 392 /// and make sure that the function info contains the address. GSYM allows 393 /// functions to overlap, and the most debug info is contained in the first 394 /// entries due to the sorting when GSYM files are created. We can have 395 /// multiple function info that start at the same address only if their 396 /// address range doesn't match. So find the first entry that matches \a Addr 397 /// and iterate forward until we find one that contains the address. 398 /// 399 /// \param[in] Addr A virtual address that matches the original object file 400 /// to lookup. 401 /// 402 /// \param[out] FuncStartAddr A virtual address that is the base address of 403 /// the function that is used for decoding the FunctionInfo. 404 /// 405 /// \returns An valid data extractor on success, or an error if we fail to 406 /// find the address in a function info or corrrectly decode the data 407 llvm::Expected<llvm::DataExtractor> 408 getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const; 409 410 /// Get the function data and address given an address index. 411 /// 412 /// \param AddrIdx A address index from the address table. 413 /// 414 /// \returns An expected FunctionInfo that contains the function info object 415 /// or an error object that indicates reason for failing to lookup the 416 /// address. 417 llvm::Expected<llvm::DataExtractor> 418 getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const; 419 }; 420 421 } // namespace gsym 422 } // namespace llvm 423 424 #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H 425