xref: /llvm-project/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h (revision 6f28b4b5e960e1c4eeebad18b48e667df1e806a8)
1 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/DebugInfo/GSYM/FileEntry.h"
14 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
15 #include "llvm/DebugInfo/GSYM/Header.h"
16 #include "llvm/DebugInfo/GSYM/LineEntry.h"
17 #include "llvm/DebugInfo/GSYM/StringTable.h"
18 #include "llvm/Support/DataExtractor.h"
19 #include "llvm/Support/Endian.h"
20 #include "llvm/Support/ErrorOr.h"
21 #include <inttypes.h>
22 #include <memory>
23 #include <stdint.h>
24 #include <vector>
25 
26 namespace llvm {
27 class MemoryBuffer;
28 class raw_ostream;
29 
30 namespace gsym {
31 
32 /// GsymReader is used to read GSYM data from a file or buffer.
33 ///
34 /// This class is optimized for very quick lookups when the endianness matches
35 /// the host system. The Header, address table, address info offsets, and file
36 /// table is designed to be mmap'ed as read only into memory and used without
37 /// any parsing needed. If the endianness doesn't match, we swap these objects
38 /// and tables into GsymReader::SwappedData and then point our header and
39 /// ArrayRefs to this swapped internal data.
40 ///
41 /// GsymReader objects must use one of the static functions to create an
42 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
43 
44 class GsymReader {
45   GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
46   llvm::Error parse();
47 
48   std::unique_ptr<MemoryBuffer> MemBuffer;
49   StringRef GsymBytes;
50   llvm::endianness Endian;
51   const Header *Hdr = nullptr;
52   ArrayRef<uint8_t> AddrOffsets;
53   ArrayRef<uint32_t> AddrInfoOffsets;
54   ArrayRef<FileEntry> Files;
55   StringTable StrTab;
56   /// When the GSYM file's endianness doesn't match the host system then
57   /// we must decode all data structures that need to be swapped into
58   /// local storage and set point the ArrayRef objects above to these swapped
59   /// copies.
60   struct SwappedData {
61     Header Hdr;
62     std::vector<uint8_t> AddrOffsets;
63     std::vector<uint32_t> AddrInfoOffsets;
64     std::vector<FileEntry> Files;
65   };
66   std::unique_ptr<SwappedData> Swap;
67 
68 public:
69   GsymReader(GsymReader &&RHS);
70   ~GsymReader();
71 
72   /// Construct a GsymReader from a file on disk.
73   ///
74   /// \param Path The file path the GSYM file to read.
75   /// \returns An expected GsymReader that contains the object or an error
76   /// object that indicates reason for failing to read the GSYM.
77   static llvm::Expected<GsymReader> openFile(StringRef Path);
78 
79   /// Construct a GsymReader from a buffer.
80   ///
81   /// \param Bytes A set of bytes that will be copied and owned by the
82   /// returned object on success.
83   /// \returns An expected GsymReader that contains the object or an error
84   /// object that indicates reason for failing to read the GSYM.
85   static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
86 
87   /// Access the GSYM header.
88   /// \returns A native endian version of the GSYM header.
89   const Header &getHeader() const;
90 
91   /// Get the full function info for an address.
92   ///
93   /// This should be called when a client will store a copy of the complete
94   /// FunctionInfo for a given address. For one off lookups, use the lookup()
95   /// function below.
96   ///
97   /// Symbolication server processes might want to parse the entire function
98   /// info for a given address and cache it if the process stays around to
99   /// service many symbolication addresses, like for parsing profiling
100   /// information.
101   ///
102   /// \param Addr A virtual address from the orignal object file to lookup.
103   ///
104   /// \returns An expected FunctionInfo that contains the function info object
105   /// or an error object that indicates reason for failing to lookup the
106   /// address.
107   llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
108 
109   /// Get the full function info given an address index.
110   ///
111   /// \param AddrIdx A address index for an address in the address table.
112   ///
113   /// \returns An expected FunctionInfo that contains the function info object
114   /// or an error object that indicates reason for failing get the function
115   /// info object.
116   llvm::Expected<FunctionInfo> getFunctionInfoAtIndex(uint64_t AddrIdx) const;
117 
118   /// Lookup an address in the a GSYM.
119   ///
120   /// Lookup just the information needed for a specific address \a Addr. This
121   /// function is faster that calling getFunctionInfo() as it will only return
122   /// information that pertains to \a Addr and allows the parsing to skip any
123   /// extra information encoded for other addresses. For example the line table
124   /// parsing can stop when a matching LineEntry has been fouhnd, and the
125   /// InlineInfo can stop parsing early once a match has been found and also
126   /// skip information that doesn't match. This avoids memory allocations and
127   /// is much faster for lookups.
128   ///
129   /// \param Addr A virtual address from the orignal object file to lookup.
130   ///
131   /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
132   /// non-null, will be set to the raw data of the MergedFunctionInfo, if
133   /// present.
134   ///
135   /// \returns An expected LookupResult that contains only the information
136   /// needed for the current address, or an error object that indicates reason
137   /// for failing to lookup the address.
138   llvm::Expected<LookupResult>
139   lookup(uint64_t Addr,
140          std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
141 
142   /// Lookup all merged functions for a given address.
143   ///
144   /// This function performs a lookup for the specified address and then
145   /// retrieves additional LookupResults from any merged functions associated
146   /// with the primary LookupResult.
147   ///
148   /// \param Addr The address to lookup.
149   ///
150   /// \returns A vector of LookupResult objects, where the first element is the
151   /// primary result, followed by results for any merged functions
152   llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;
153 
154   /// Get a string from the string table.
155   ///
156   /// \param Offset The string table offset for the string to retrieve.
157   /// \returns The string from the strin table.
158   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
159 
160   /// Get the a file entry for the suppplied file index.
161   ///
162   /// Used to convert any file indexes in the FunctionInfo data back into
163   /// files. This function can be used for iteration, but is more commonly used
164   /// for random access when doing lookups.
165   ///
166   /// \param Index An index into the file table.
167   /// \returns An optional FileInfo that will be valid if the file index is
168   /// valid, or std::nullopt if the file index is out of bounds,
169   std::optional<FileEntry> getFile(uint32_t Index) const {
170     if (Index < Files.size())
171       return Files[Index];
172     return std::nullopt;
173   }
174 
175   /// Dump the entire Gsym data contained in this object.
176   ///
177   /// \param  OS The output stream to dump to.
178   void dump(raw_ostream &OS);
179 
180   /// Dump a FunctionInfo object.
181   ///
182   /// This function will convert any string table indexes and file indexes
183   /// into human readable format.
184   ///
185   /// \param  OS The output stream to dump to.
186   ///
187   /// \param FI The object to dump.
188   ///
189   /// \param Indent The indentation as number of spaces. Used when dumping as an
190   /// item within MergedFunctionsInfo.
191   void dump(raw_ostream &OS, const FunctionInfo &FI, uint32_t Indent = 0);
192 
193   /// Dump a MergedFunctionsInfo object.
194   ///
195   /// This function will dump a MergedFunctionsInfo object - basically by
196   /// dumping the contained FunctionInfo objects with indentation.
197   ///
198   /// \param  OS The output stream to dump to.
199   ///
200   /// \param MFI The object to dump.
201   void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
202 
203   /// Dump a CallSiteInfo object.
204   ///
205   /// This function will output the details of a CallSiteInfo object in a
206   /// human-readable format.
207   ///
208   /// \param OS The output stream to dump to.
209   ///
210   /// \param CSI The CallSiteInfo object to dump.
211   void dump(raw_ostream &OS, const CallSiteInfo &CSI);
212 
213   /// Dump a CallSiteInfoCollection object.
214   ///
215   /// This function will iterate over a collection of CallSiteInfo objects and
216   /// dump each one.
217   ///
218   /// \param OS The output stream to dump to.
219   ///
220   /// \param CSIC The CallSiteInfoCollection object to dump.
221   ///
222   /// \param Indent The indentation as number of spaces. Used when dumping as an
223   /// item from within MergedFunctionsInfo.
224   void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
225             uint32_t Indent = 0);
226 
227   /// Dump a LineTable object.
228   ///
229   /// This function will convert any string table indexes and file indexes
230   /// into human readable format.
231   ///
232   ///
233   /// \param  OS The output stream to dump to.
234   ///
235   /// \param LT The object to dump.
236   ///
237   /// \param Indent The indentation as number of spaces. Used when dumping as an
238   /// item from within MergedFunctionsInfo.
239   void dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent = 0);
240 
241   /// Dump a InlineInfo object.
242   ///
243   /// This function will convert any string table indexes and file indexes
244   /// into human readable format.
245   ///
246   /// \param  OS The output stream to dump to.
247   ///
248   /// \param II The object to dump.
249   ///
250   /// \param Indent The indentation as number of spaces. Used for recurive
251   /// dumping.
252   void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
253 
254   /// Dump a FileEntry object.
255   ///
256   /// This function will convert any string table indexes into human readable
257   /// format.
258   ///
259   /// \param  OS The output stream to dump to.
260   ///
261   /// \param FE The object to dump.
262   void dump(raw_ostream &OS, std::optional<FileEntry> FE);
263 
264   /// Get the number of addresses in this Gsym file.
265   uint32_t getNumAddresses() const {
266     return Hdr->NumAddresses;
267   }
268 
269   /// Gets an address from the address table.
270   ///
271   /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
272   ///
273   /// \param Index A index into the address table.
274   /// \returns A resolved virtual address for adddress in the address table
275   /// or std::nullopt if Index is out of bounds.
276   std::optional<uint64_t> getAddress(size_t Index) const;
277 
278 protected:
279 
280   /// Get an appropriate address info offsets array.
281   ///
282   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
283   /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
284   /// internally as a array of bytes that are in the correct endianness. When
285   /// we access this table we must get an array that matches those sizes. This
286   /// templatized helper function is used when accessing address offsets in the
287   /// AddrOffsets member variable.
288   ///
289   /// \returns An ArrayRef of an appropriate address offset size.
290   template <class T> ArrayRef<T>
291   getAddrOffsets() const {
292     return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
293                        AddrOffsets.size()/sizeof(T));
294   }
295 
296   /// Get an appropriate address from the address table.
297   ///
298   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
299   /// byte address offsets from the The gsym::Header::BaseAddress. The table is
300   /// stored internally as a array of bytes that are in the correct endianness.
301   /// In order to extract an address from the address table we must access the
302   /// address offset using the correct size and then add it to the BaseAddress
303   /// in the header.
304   ///
305   /// \param Index An index into the AddrOffsets array.
306   /// \returns An virtual address that matches the original object file for the
307   /// address as the specified index, or std::nullopt if Index is out of bounds.
308   template <class T>
309   std::optional<uint64_t> addressForIndex(size_t Index) const {
310     ArrayRef<T> AIO = getAddrOffsets<T>();
311     if (Index < AIO.size())
312       return AIO[Index] + Hdr->BaseAddress;
313     return std::nullopt;
314   }
315   /// Lookup an address offset in the AddrOffsets table.
316   ///
317   /// Given an address offset, look it up using a binary search of the
318   /// AddrOffsets table.
319   ///
320   /// \param AddrOffset An address offset, that has already been computed by
321   /// subtracting the gsym::Header::BaseAddress.
322   /// \returns The matching address offset index. This index will be used to
323   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
324   template <class T>
325   std::optional<uint64_t>
326   getAddressOffsetIndex(const uint64_t AddrOffset) const {
327     ArrayRef<T> AIO = getAddrOffsets<T>();
328     const auto Begin = AIO.begin();
329     const auto End = AIO.end();
330     auto Iter = std::lower_bound(Begin, End, AddrOffset);
331     // Watch for addresses that fall between the gsym::Header::BaseAddress and
332     // the first address offset.
333     if (Iter == Begin && AddrOffset < *Begin)
334       return std::nullopt;
335     if (Iter == End || AddrOffset < *Iter)
336       --Iter;
337 
338     // GSYM files have sorted function infos with the most information (line
339     // table and/or inline info) first in the array of function infos, so
340     // always backup as much as possible as long as the address offset is the
341     // same as the previous entry.
342     while (Iter != Begin) {
343       auto Prev = Iter - 1;
344       if (*Prev == *Iter)
345         Iter = Prev;
346       else
347         break;
348     }
349 
350     return std::distance(Begin, Iter);
351   }
352 
353   /// Create a GSYM from a memory buffer.
354   ///
355   /// Called by both openFile() and copyBuffer(), this function does all of the
356   /// work of parsing the GSYM file and returning an error.
357   ///
358   /// \param MemBuffer A memory buffer that will transfer ownership into the
359   /// GsymReader.
360   /// \returns An expected GsymReader that contains the object or an error
361   /// object that indicates reason for failing to read the GSYM.
362   static llvm::Expected<llvm::gsym::GsymReader>
363   create(std::unique_ptr<MemoryBuffer> &MemBuffer);
364 
365 
366   /// Given an address, find the address index.
367   ///
368   /// Binary search the address table and find the matching address index.
369   ///
370   /// \param Addr A virtual address that matches the original object file
371   /// to lookup.
372   /// \returns An index into the address table. This index can be used to
373   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
374   /// Returns an error if the address isn't in the GSYM with details of why.
375   Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
376 
377   /// Given an address index, get the offset for the FunctionInfo.
378   ///
379   /// Looking up an address is done by finding the corresponding address
380   /// index for the address. This index is then used to get the offset of the
381   /// FunctionInfo data that we will decode using this function.
382   ///
383   /// \param Index An index into the address table.
384   /// \returns An optional GSYM data offset for the offset of the FunctionInfo
385   /// that needs to be decoded.
386   std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
387 
388   /// Given an address, find the correct function info data and function
389   /// address.
390   ///
391   /// Binary search the address table and find the matching address info
392   /// and make sure that the function info contains the address. GSYM allows
393   /// functions to overlap, and the most debug info is contained in the first
394   /// entries due to the sorting when GSYM files are created. We can have
395   /// multiple function info that start at the same address only if their
396   /// address range doesn't match. So find the first entry that matches \a Addr
397   /// and iterate forward until we find one that contains the address.
398   ///
399   /// \param[in] Addr A virtual address that matches the original object file
400   /// to lookup.
401   ///
402   /// \param[out] FuncStartAddr A virtual address that is the base address of
403   /// the function that is used for decoding the FunctionInfo.
404   ///
405   /// \returns An valid data extractor on success, or an error if we fail to
406   /// find the address in a function info or corrrectly decode the data
407   llvm::Expected<llvm::DataExtractor>
408   getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
409 
410   /// Get the function data and address given an address index.
411   ///
412   /// \param AddrIdx A address index from the address table.
413   ///
414   /// \returns An expected FunctionInfo that contains the function info object
415   /// or an error object that indicates reason for failing to lookup the
416   /// address.
417   llvm::Expected<llvm::DataExtractor>
418   getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
419 };
420 
421 } // namespace gsym
422 } // namespace llvm
423 
424 #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H
425