xref: /llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp (revision 6f28b4b5e960e1c4eeebad18b48e667df1e806a8)
1 //===- GsymReader.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/GSYM/GsymReader.h"
10 
11 #include <assert.h>
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 
16 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
17 #include "llvm/DebugInfo/GSYM/LineTable.h"
18 #include "llvm/Support/BinaryStreamReader.h"
19 #include "llvm/Support/DataExtractor.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 
22 using namespace llvm;
23 using namespace gsym;
24 
25 GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
26     : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
27 
28 GsymReader::GsymReader(GsymReader &&RHS) = default;
29 
30 GsymReader::~GsymReader() = default;
31 
32 llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
33   // Open the input file and return an appropriate error if needed.
34   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
35       MemoryBuffer::getFileOrSTDIN(Filename);
36   auto Err = BuffOrErr.getError();
37   if (Err)
38     return llvm::errorCodeToError(Err);
39   return create(BuffOrErr.get());
40 }
41 
42 llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
43   auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
44   return create(MemBuffer);
45 }
46 
47 llvm::Expected<llvm::gsym::GsymReader>
48 GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
49   if (!MemBuffer)
50     return createStringError(std::errc::invalid_argument,
51                              "invalid memory buffer");
52   GsymReader GR(std::move(MemBuffer));
53   llvm::Error Err = GR.parse();
54   if (Err)
55     return std::move(Err);
56   return std::move(GR);
57 }
58 
59 llvm::Error
60 GsymReader::parse() {
61   BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
62   // Check for the magic bytes. This file format is designed to be mmap'ed
63   // into a process and accessed as read only. This is done for performance
64   // and efficiency for symbolicating and parsing GSYM data.
65   if (FileData.readObject(Hdr))
66     return createStringError(std::errc::invalid_argument,
67                              "not enough data for a GSYM header");
68 
69   const auto HostByteOrder = llvm::endianness::native;
70   switch (Hdr->Magic) {
71     case GSYM_MAGIC:
72       Endian = HostByteOrder;
73       break;
74     case GSYM_CIGAM:
75       // This is a GSYM file, but not native endianness.
76       Endian = sys::IsBigEndianHost ? llvm::endianness::little
77                                     : llvm::endianness::big;
78       Swap.reset(new SwappedData);
79       break;
80     default:
81       return createStringError(std::errc::invalid_argument,
82                                "not a GSYM file");
83   }
84 
85   bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
86   // Read a correctly byte swapped header if we need to.
87   if (Swap) {
88     DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
89     if (auto ExpectedHdr = Header::decode(Data))
90       Swap->Hdr = ExpectedHdr.get();
91     else
92       return ExpectedHdr.takeError();
93     Hdr = &Swap->Hdr;
94   }
95 
96   // Detect errors in the header and report any that are found. If we make it
97   // past this without errors, we know we have a good magic value, a supported
98   // version number, verified address offset size and a valid UUID size.
99   if (Error Err = Hdr->checkForError())
100     return Err;
101 
102   if (!Swap) {
103     // This is the native endianness case that is most common and optimized for
104     // efficient lookups. Here we just grab pointers to the native data and
105     // use ArrayRef objects to allow efficient read only access.
106 
107     // Read the address offsets.
108     if (FileData.padToAlignment(Hdr->AddrOffSize) ||
109         FileData.readArray(AddrOffsets,
110                            Hdr->NumAddresses * Hdr->AddrOffSize))
111       return createStringError(std::errc::invalid_argument,
112                               "failed to read address table");
113 
114     // Read the address info offsets.
115     if (FileData.padToAlignment(4) ||
116         FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
117       return createStringError(std::errc::invalid_argument,
118                               "failed to read address info offsets table");
119 
120     // Read the file table.
121     uint32_t NumFiles = 0;
122     if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
123       return createStringError(std::errc::invalid_argument,
124                               "failed to read file table");
125 
126     // Get the string table.
127     FileData.setOffset(Hdr->StrtabOffset);
128     if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
129       return createStringError(std::errc::invalid_argument,
130                               "failed to read string table");
131 } else {
132   // This is the non native endianness case that is not common and not
133   // optimized for lookups. Here we decode the important tables into local
134   // storage and then set the ArrayRef objects to point to these swapped
135   // copies of the read only data so lookups can be as efficient as possible.
136   DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
137 
138   // Read the address offsets.
139   uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
140   Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
141   switch (Hdr->AddrOffSize) {
142     case 1:
143       if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
144         return createStringError(std::errc::invalid_argument,
145                                   "failed to read address table");
146       break;
147     case 2:
148       if (!Data.getU16(&Offset,
149                         reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
150                         Hdr->NumAddresses))
151         return createStringError(std::errc::invalid_argument,
152                                   "failed to read address table");
153       break;
154     case 4:
155       if (!Data.getU32(&Offset,
156                         reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
157                         Hdr->NumAddresses))
158         return createStringError(std::errc::invalid_argument,
159                                   "failed to read address table");
160       break;
161     case 8:
162       if (!Data.getU64(&Offset,
163                         reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
164                         Hdr->NumAddresses))
165         return createStringError(std::errc::invalid_argument,
166                                   "failed to read address table");
167     }
168     AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
169 
170     // Read the address info offsets.
171     Offset = alignTo(Offset, 4);
172     Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
173     if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
174       AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
175     else
176       return createStringError(std::errc::invalid_argument,
177                                "failed to read address table");
178     // Read the file table.
179     const uint32_t NumFiles = Data.getU32(&Offset);
180     if (NumFiles > 0) {
181       Swap->Files.resize(NumFiles);
182       if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
183         Files = ArrayRef<FileEntry>(Swap->Files);
184       else
185         return createStringError(std::errc::invalid_argument,
186                                  "failed to read file table");
187     }
188     // Get the string table.
189     StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
190                                                 Hdr->StrtabSize);
191     if (StrTab.Data.empty())
192       return createStringError(std::errc::invalid_argument,
193                                "failed to read string table");
194   }
195   return Error::success();
196 
197 }
198 
199 const Header &GsymReader::getHeader() const {
200   // The only way to get a GsymReader is from GsymReader::openFile(...) or
201   // GsymReader::copyBuffer() and the header must be valid and initialized to
202   // a valid pointer value, so the assert below should not trigger.
203   assert(Hdr);
204   return *Hdr;
205 }
206 
207 std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
208   switch (Hdr->AddrOffSize) {
209   case 1: return addressForIndex<uint8_t>(Index);
210   case 2: return addressForIndex<uint16_t>(Index);
211   case 4: return addressForIndex<uint32_t>(Index);
212   case 8: return addressForIndex<uint64_t>(Index);
213   }
214   return std::nullopt;
215 }
216 
217 std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
218   const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
219   if (Index < NumAddrInfoOffsets)
220     return AddrInfoOffsets[Index];
221   return std::nullopt;
222 }
223 
224 Expected<uint64_t>
225 GsymReader::getAddressIndex(const uint64_t Addr) const {
226   if (Addr >= Hdr->BaseAddress) {
227     const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
228     std::optional<uint64_t> AddrOffsetIndex;
229     switch (Hdr->AddrOffSize) {
230     case 1:
231       AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
232       break;
233     case 2:
234       AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
235       break;
236     case 4:
237       AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
238       break;
239     case 8:
240       AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
241       break;
242     default:
243       return createStringError(std::errc::invalid_argument,
244                                "unsupported address offset size %u",
245                                Hdr->AddrOffSize);
246     }
247     if (AddrOffsetIndex)
248       return *AddrOffsetIndex;
249   }
250   return createStringError(std::errc::invalid_argument,
251                            "address 0x%" PRIx64 " is not in GSYM", Addr);
252 
253 }
254 
255 llvm::Expected<DataExtractor>
256 GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
257                                           uint64_t &FuncStartAddr) const {
258   Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
259   if (!ExpectedAddrIdx)
260     return ExpectedAddrIdx.takeError();
261   const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
262   // The AddrIdx is the first index of the function info entries that match
263   // \a Addr. We need to iterate over all function info objects that start with
264   // the same address until we find a range that contains \a Addr.
265   std::optional<uint64_t> FirstFuncStartAddr;
266   const size_t NumAddresses = getNumAddresses();
267   for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
268     auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
269     // If there was an error, return the error.
270     if (!ExpextedData)
271       return ExpextedData;
272 
273     // Remember the first function start address if it hasn't already been set.
274     // If it is already valid, check to see if it matches the first function
275     // start address and only continue if it matches.
276     if (FirstFuncStartAddr.has_value()) {
277       if (*FirstFuncStartAddr != FuncStartAddr)
278         break; // Done with consecutive function entries with same address.
279     } else {
280       FirstFuncStartAddr = FuncStartAddr;
281     }
282     // Make sure the current function address ranges contains \a Addr.
283     // Some symbols on Darwin don't have valid sizes, so if we run into a
284     // symbol with zero size, then we have found a match for our address.
285 
286     // The first thing the encoding of a FunctionInfo object is the function
287     // size.
288     uint64_t Offset = 0;
289     uint32_t FuncSize = ExpextedData->getU32(&Offset);
290     if (FuncSize == 0 ||
291         AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
292       return ExpextedData;
293   }
294   return createStringError(std::errc::invalid_argument,
295                            "address 0x%" PRIx64 " is not in GSYM", Addr);
296 }
297 
298 llvm::Expected<DataExtractor>
299 GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
300                                        uint64_t &FuncStartAddr) const {
301   if (AddrIdx >= getNumAddresses())
302     return createStringError(std::errc::invalid_argument,
303                              "invalid address index %" PRIu64, AddrIdx);
304   const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
305   assert((Endian == endianness::big || Endian == endianness::little) &&
306          "Endian must be either big or little");
307   StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
308   if (Bytes.empty())
309     return createStringError(std::errc::invalid_argument,
310                              "invalid address info offset 0x%" PRIx32,
311                              AddrInfoOffset);
312   std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
313   if (!OptFuncStartAddr)
314     return createStringError(std::errc::invalid_argument,
315                              "failed to extract address[%" PRIu64 "]", AddrIdx);
316   FuncStartAddr = *OptFuncStartAddr;
317   return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
318 }
319 
320 llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
321   uint64_t FuncStartAddr = 0;
322   if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
323     return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
324   else
325     return ExpectedData.takeError();
326 }
327 
328 llvm::Expected<FunctionInfo>
329 GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
330   uint64_t FuncStartAddr = 0;
331   if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
332     return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
333   else
334     return ExpectedData.takeError();
335 }
336 
337 llvm::Expected<LookupResult>
338 GsymReader::lookup(uint64_t Addr,
339                    std::optional<DataExtractor> *MergedFunctionsData) const {
340   uint64_t FuncStartAddr = 0;
341   if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
342     return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
343                                 MergedFunctionsData);
344   else
345     return ExpectedData.takeError();
346 }
347 
348 llvm::Expected<std::vector<LookupResult>>
349 GsymReader::lookupAll(uint64_t Addr) const {
350   std::vector<LookupResult> Results;
351   std::optional<DataExtractor> MergedFunctionsData;
352 
353   // First perform a lookup to get the primary function info result.
354   auto MainResult = lookup(Addr, &MergedFunctionsData);
355   if (!MainResult)
356     return MainResult.takeError();
357 
358   // Add the main result as the first entry.
359   Results.push_back(std::move(*MainResult));
360 
361   // Now process any merged functions data that was found during the lookup.
362   if (MergedFunctionsData) {
363     // Get data extractors for each merged function.
364     auto ExpectedMergedFuncExtractors =
365         MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
366     if (!ExpectedMergedFuncExtractors)
367       return ExpectedMergedFuncExtractors.takeError();
368 
369     // Process each merged function data.
370     for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
371       if (auto FI = FunctionInfo::lookup(MergedData, *this,
372                                          MainResult->FuncRange.start(), Addr)) {
373         Results.push_back(std::move(*FI));
374       } else {
375         return FI.takeError();
376       }
377     }
378   }
379 
380   return Results;
381 }
382 
383 void GsymReader::dump(raw_ostream &OS) {
384   const auto &Header = getHeader();
385   // Dump the GSYM header.
386   OS << Header << "\n";
387   // Dump the address table.
388   OS << "Address Table:\n";
389   OS << "INDEX  OFFSET";
390 
391   switch (Hdr->AddrOffSize) {
392   case 1: OS << "8 "; break;
393   case 2: OS << "16"; break;
394   case 4: OS << "32"; break;
395   case 8: OS << "64"; break;
396   default: OS << "??"; break;
397   }
398   OS << " (ADDRESS)\n";
399   OS << "====== =============================== \n";
400   for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
401     OS << format("[%4u] ", I);
402     switch (Hdr->AddrOffSize) {
403     case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
404     case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
405     case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
406     case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
407     default: break;
408     }
409     OS << " (" << HEX64(*getAddress(I)) << ")\n";
410   }
411   // Dump the address info offsets table.
412   OS << "\nAddress Info Offsets:\n";
413   OS << "INDEX  Offset\n";
414   OS << "====== ==========\n";
415   for (uint32_t I = 0; I < Header.NumAddresses; ++I)
416     OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
417   // Dump the file table.
418   OS << "\nFiles:\n";
419   OS << "INDEX  DIRECTORY  BASENAME   PATH\n";
420   OS << "====== ========== ========== ==============================\n";
421   for (uint32_t I = 0; I < Files.size(); ++I) {
422     OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
423        << HEX32(Files[I].Base) << ' ';
424     dump(OS, getFile(I));
425     OS << "\n";
426   }
427   OS << "\n" << StrTab << "\n";
428 
429   for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
430     OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
431     if (auto FI = getFunctionInfoAtIndex(I))
432       dump(OS, *FI);
433     else
434       logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
435   }
436 }
437 
438 void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
439                       uint32_t Indent) {
440   OS.indent(Indent);
441   OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
442   if (FI.OptLineTable)
443     dump(OS, *FI.OptLineTable, Indent);
444   if (FI.Inline)
445     dump(OS, *FI.Inline, Indent);
446 
447   if (FI.CallSites)
448     dump(OS, *FI.CallSites, Indent);
449 
450   if (FI.MergedFunctions) {
451     assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
452     dump(OS, *FI.MergedFunctions);
453   }
454 }
455 
456 void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
457   for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
458     OS << "++ Merged FunctionInfos[" << inx << "]:\n";
459     dump(OS, MFI.MergedFunctions[inx], 4);
460   }
461 }
462 
463 void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
464   OS << HEX16(CSI.ReturnOffset);
465 
466   std::string Flags;
467   auto addFlag = [&](const char *Flag) {
468     if (!Flags.empty())
469       Flags += " | ";
470     Flags += Flag;
471   };
472 
473   if (CSI.Flags == CallSiteInfo::Flags::None)
474     Flags = "None";
475   else {
476     if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
477       addFlag("InternalCall");
478 
479     if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
480       addFlag("ExternalCall");
481   }
482   OS << " Flags[" << Flags << "]";
483 
484   if (!CSI.MatchRegex.empty()) {
485     OS << " MatchRegex[";
486     for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
487       if (i > 0)
488         OS << ";";
489       OS << getString(CSI.MatchRegex[i]);
490     }
491     OS << "]";
492   }
493 }
494 
495 void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
496                       uint32_t Indent) {
497   OS.indent(Indent);
498   OS << "CallSites (by relative return offset):\n";
499   for (const auto &CS : CSIC.CallSites) {
500     OS.indent(Indent);
501     OS << "  ";
502     dump(OS, CS);
503     OS << "\n";
504   }
505 }
506 
507 void GsymReader::dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent) {
508   OS.indent(Indent);
509   OS << "LineTable:\n";
510   for (auto &LE: LT) {
511     OS.indent(Indent);
512     OS << "  " << HEX64(LE.Addr) << ' ';
513     if (LE.File)
514       dump(OS, getFile(LE.File));
515     OS << ':' << LE.Line << '\n';
516   }
517 }
518 
519 void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
520   if (Indent == 0)
521     OS << "InlineInfo:\n";
522   else
523     OS.indent(Indent);
524   OS << II.Ranges << ' ' << getString(II.Name);
525   if (II.CallFile != 0) {
526     if (auto File = getFile(II.CallFile)) {
527       OS << " called from ";
528       dump(OS, File);
529       OS << ':' << II.CallLine;
530     }
531   }
532   OS << '\n';
533   for (const auto &ChildII: II.Children)
534     dump(OS, ChildII, Indent + 2);
535 }
536 
537 void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
538   if (FE) {
539     // IF we have the file from index 0, then don't print anything
540     if (FE->Dir == 0 && FE->Base == 0)
541       return;
542     StringRef Dir = getString(FE->Dir);
543     StringRef Base = getString(FE->Base);
544     if (!Dir.empty()) {
545       OS << Dir;
546       if (Dir.contains('\\') && !Dir.contains('/'))
547         OS << '\\';
548       else
549         OS << '/';
550     }
551     if (!Base.empty()) {
552       OS << Base;
553     }
554     if (!Dir.empty() || !Base.empty())
555       return;
556   }
557   OS << "<invalid-file>";
558 }
559