xref: /openbsd-src/gnu/llvm/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
1 //===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/STLExtras.h"
10 #include "llvm/ADT/StringSet.h"
11 #include "llvm/ADT/Triple.h"
12 #include "llvm/DebugInfo/DIContext.h"
13 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14 #include "llvm/Object/Archive.h"
15 #include "llvm/Object/ELFObjectFile.h"
16 #include "llvm/Object/MachOUniversal.h"
17 #include "llvm/Object/ObjectFile.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/ManagedStatic.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/PrettyStackTrace.h"
24 #include "llvm/Support/Regex.h"
25 #include "llvm/Support/Signals.h"
26 #include "llvm/Support/TargetSelect.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cstring>
30 #include <inttypes.h>
31 #include <iostream>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 
37 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
38 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
39 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
40 #include "llvm/DebugInfo/GSYM/GsymReader.h"
41 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
42 #include "llvm/DebugInfo/GSYM/LookupResult.h"
43 #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
44 #include <optional>
45 
46 using namespace llvm;
47 using namespace gsym;
48 using namespace object;
49 
50 /// @}
51 /// Command line options.
52 /// @{
53 
54 namespace {
55 using namespace cl;
56 
57 OptionCategory GeneralOptions("Options");
58 OptionCategory ConversionOptions("Conversion Options");
59 OptionCategory LookupOptions("Lookup Options");
60 
61 static opt<bool> Help("h", desc("Alias for -help"), Hidden,
62                       cat(GeneralOptions));
63 
64 static opt<bool> Verbose("verbose",
65                          desc("Enable verbose logging and encoding details."),
66                          cat(GeneralOptions));
67 
68 static list<std::string> InputFilenames(Positional, desc("<input GSYM files>"),
69                                         cat(GeneralOptions));
70 
71 static opt<std::string>
72     ConvertFilename("convert", cl::init(""),
73                     cl::desc("Convert the specified file to the GSYM format.\n"
74                              "Supported files include ELF and mach-o files "
75                              "that will have their debug info (DWARF) and "
76                              "symbol table converted."),
77                     cl::value_desc("path"), cat(ConversionOptions));
78 
79 static list<std::string>
80     ArchFilters("arch",
81                 desc("Process debug information for the specified CPU "
82                      "architecture only.\nArchitectures may be specified by "
83                      "name or by number.\nThis option can be specified "
84                      "multiple times, once for each desired architecture."),
85                 cl::value_desc("arch"), cat(ConversionOptions));
86 
87 static opt<std::string>
88     OutputFilename("out-file", cl::init(""),
89                    cl::desc("Specify the path where the converted GSYM file "
90                             "will be saved.\nWhen not specified, a '.gsym' "
91                             "extension will be appended to the file name "
92                             "specified in the --convert option."),
93                    cl::value_desc("path"), cat(ConversionOptions));
94 static alias OutputFilenameAlias("o", desc("Alias for -out-file."),
95                                  aliasopt(OutputFilename),
96                                  cat(ConversionOptions));
97 
98 static opt<bool> Verify("verify",
99                         desc("Verify the generated GSYM file against the "
100                              "information in the file that was converted."),
101                         cat(ConversionOptions));
102 
103 static opt<unsigned>
104     NumThreads("num-threads",
105                desc("Specify the maximum number (n) of simultaneous threads "
106                     "to use when converting files to GSYM.\nDefaults to the "
107                     "number of cores on the current machine."),
108                cl::value_desc("n"), cat(ConversionOptions));
109 
110 static opt<bool>
111     Quiet("quiet", desc("Do not output warnings about the debug information"),
112           cat(ConversionOptions));
113 
114 static list<uint64_t> LookupAddresses("address",
115                                       desc("Lookup an address in a GSYM file"),
116                                       cl::value_desc("addr"),
117                                       cat(LookupOptions));
118 
119 static opt<bool> LookupAddressesFromStdin(
120     "addresses-from-stdin",
121     desc("Lookup addresses in a GSYM file that are read from stdin\nEach input "
122          "line is expected to be of the following format: <addr> <gsym-path>"),
123     cat(LookupOptions));
124 
125 } // namespace
126 /// @}
127 //===----------------------------------------------------------------------===//
128 
error(Error Err)129 static void error(Error Err) {
130   if (!Err)
131     return;
132   WithColor::error() << toString(std::move(Err)) << "\n";
133   exit(1);
134 }
135 
error(StringRef Prefix,llvm::Error Err)136 static void error(StringRef Prefix, llvm::Error Err) {
137   if (!Err)
138     return;
139   errs() << Prefix << ": " << Err << "\n";
140   consumeError(std::move(Err));
141   exit(1);
142 }
143 
error(StringRef Prefix,std::error_code EC)144 static void error(StringRef Prefix, std::error_code EC) {
145   if (!EC)
146     return;
147   errs() << Prefix << ": " << EC.message() << "\n";
148   exit(1);
149 }
150 
getCPUType(MachOObjectFile & MachO)151 static uint32_t getCPUType(MachOObjectFile &MachO) {
152   if (MachO.is64Bit())
153     return MachO.getHeader64().cputype;
154   else
155     return MachO.getHeader().cputype;
156 }
157 
158 /// Return true if the object file has not been filtered by an --arch option.
filterArch(MachOObjectFile & Obj)159 static bool filterArch(MachOObjectFile &Obj) {
160   if (ArchFilters.empty())
161     return true;
162 
163   Triple ObjTriple(Obj.getArchTriple());
164   StringRef ObjArch = ObjTriple.getArchName();
165 
166   for (auto Arch : ArchFilters) {
167     // Match name.
168     if (Arch == ObjArch)
169       return true;
170 
171     // Match architecture number.
172     unsigned Value;
173     if (!StringRef(Arch).getAsInteger(0, Value))
174       if (Value == getCPUType(Obj))
175         return true;
176   }
177   return false;
178 }
179 
180 /// Determine the virtual address that is considered the base address of an ELF
181 /// object file.
182 ///
183 /// The base address of an ELF file is the the "p_vaddr" of the first program
184 /// header whose "p_type" is PT_LOAD.
185 ///
186 /// \param ELFFile An ELF object file we will search.
187 ///
188 /// \returns A valid image base address if we are able to extract one.
189 template <class ELFT>
190 static std::optional<uint64_t>
getImageBaseAddress(const object::ELFFile<ELFT> & ELFFile)191 getImageBaseAddress(const object::ELFFile<ELFT> &ELFFile) {
192   auto PhdrRangeOrErr = ELFFile.program_headers();
193   if (!PhdrRangeOrErr) {
194     consumeError(PhdrRangeOrErr.takeError());
195     return std::nullopt;
196   }
197   for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr)
198     if (Phdr.p_type == ELF::PT_LOAD)
199       return (uint64_t)Phdr.p_vaddr;
200   return std::nullopt;
201 }
202 
203 /// Determine the virtual address that is considered the base address of mach-o
204 /// object file.
205 ///
206 /// The base address of a mach-o file is the vmaddr of the  "__TEXT" segment.
207 ///
208 /// \param MachO A mach-o object file we will search.
209 ///
210 /// \returns A valid image base address if we are able to extract one.
211 static std::optional<uint64_t>
getImageBaseAddress(const object::MachOObjectFile * MachO)212 getImageBaseAddress(const object::MachOObjectFile *MachO) {
213   for (const auto &Command : MachO->load_commands()) {
214     if (Command.C.cmd == MachO::LC_SEGMENT) {
215       MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command);
216       StringRef SegName = SLC.segname;
217       if (SegName == "__TEXT")
218         return SLC.vmaddr;
219     } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
220       MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command);
221       StringRef SegName = SLC.segname;
222       if (SegName == "__TEXT")
223         return SLC.vmaddr;
224     }
225   }
226   return std::nullopt;
227 }
228 
229 /// Determine the virtual address that is considered the base address of an
230 /// object file.
231 ///
232 /// Since GSYM files are used for symbolication, many clients will need to
233 /// easily adjust addresses they find in stack traces so the lookups happen
234 /// on unslid addresses from the original object file. If the base address of
235 /// a GSYM file is set to the base address of the image, then this address
236 /// adjusting is much easier.
237 ///
238 /// \param Obj An object file we will search.
239 ///
240 /// \returns A valid image base address if we are able to extract one.
getImageBaseAddress(object::ObjectFile & Obj)241 static std::optional<uint64_t> getImageBaseAddress(object::ObjectFile &Obj) {
242   if (const auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj))
243     return getImageBaseAddress(MachO);
244   else if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
245     return getImageBaseAddress(ELFObj->getELFFile());
246   else if (const auto *ELFObj = dyn_cast<object::ELF32BEObjectFile>(&Obj))
247     return getImageBaseAddress(ELFObj->getELFFile());
248   else if (const auto *ELFObj = dyn_cast<object::ELF64LEObjectFile>(&Obj))
249     return getImageBaseAddress(ELFObj->getELFFile());
250   else if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
251     return getImageBaseAddress(ELFObj->getELFFile());
252   return std::nullopt;
253 }
254 
handleObjectFile(ObjectFile & Obj,const std::string & OutFile)255 static llvm::Error handleObjectFile(ObjectFile &Obj,
256                                     const std::string &OutFile) {
257   auto ThreadCount =
258       NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
259   auto &OS = outs();
260 
261   GsymCreator Gsym(Quiet);
262 
263   // See if we can figure out the base address for a given object file, and if
264   // we can, then set the base address to use to this value. This will ease
265   // symbolication since clients can slide the GSYM lookup addresses by using
266   // the load bias of the shared library.
267   if (auto ImageBaseAddr = getImageBaseAddress(Obj))
268     Gsym.setBaseAddress(*ImageBaseAddr);
269 
270   // We need to know where the valid sections are that contain instructions.
271   // See header documentation for DWARFTransformer::SetValidTextRanges() for
272   // defails.
273   AddressRanges TextRanges;
274   for (const object::SectionRef &Sect : Obj.sections()) {
275     if (!Sect.isText())
276       continue;
277     const uint64_t Size = Sect.getSize();
278     if (Size == 0)
279       continue;
280     const uint64_t StartAddr = Sect.getAddress();
281     TextRanges.insert(AddressRange(StartAddr, StartAddr + Size));
282   }
283 
284   // Make sure there is DWARF to convert first.
285   std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(Obj);
286   if (!DICtx)
287     return createStringError(std::errc::invalid_argument,
288                              "unable to create DWARF context");
289 
290   // Make a DWARF transformer object and populate the ranges of the code
291   // so we don't end up adding invalid functions to GSYM data.
292   DwarfTransformer DT(*DICtx, OS, Gsym);
293   if (!TextRanges.empty())
294     Gsym.SetValidTextRanges(TextRanges);
295 
296   // Convert all DWARF to GSYM.
297   if (auto Err = DT.convert(ThreadCount))
298     return Err;
299 
300   // Get the UUID and convert symbol table to GSYM.
301   if (auto Err = ObjectFileTransformer::convert(Obj, OS, Gsym))
302     return Err;
303 
304   // Finalize the GSYM to make it ready to save to disk. This will remove
305   // duplicate FunctionInfo entries where we might have found an entry from
306   // debug info and also a symbol table entry from the object file.
307   if (auto Err = Gsym.finalize(OS))
308     return Err;
309 
310   // Save the GSYM file to disk.
311   support::endianness Endian =
312       Obj.makeTriple().isLittleEndian() ? support::little : support::big;
313   if (auto Err = Gsym.save(OutFile, Endian))
314     return Err;
315 
316   // Verify the DWARF if requested. This will ensure all the info in the DWARF
317   // can be looked up in the GSYM and that all lookups get matching data.
318   if (Verify) {
319     if (auto Err = DT.verify(OutFile))
320       return Err;
321   }
322 
323   return Error::success();
324 }
325 
handleBuffer(StringRef Filename,MemoryBufferRef Buffer,const std::string & OutFile)326 static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
327                                 const std::string &OutFile) {
328   Expected<std::unique_ptr<Binary>> BinOrErr = object::createBinary(Buffer);
329   error(Filename, errorToErrorCode(BinOrErr.takeError()));
330 
331   if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
332     Triple ObjTriple(Obj->makeTriple());
333     auto ArchName = ObjTriple.getArchName();
334     outs() << "Output file (" << ArchName << "): " << OutFile << "\n";
335     if (auto Err = handleObjectFile(*Obj, OutFile))
336       return Err;
337   } else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get())) {
338     // Iterate over all contained architectures and filter out any that were
339     // not specified with the "--arch <arch>" option. If the --arch option was
340     // not specified on the command line, we will process all architectures.
341     std::vector<std::unique_ptr<MachOObjectFile>> FilterObjs;
342     for (auto &ObjForArch : Fat->objects()) {
343       if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
344         auto &Obj = **MachOOrErr;
345         if (filterArch(Obj))
346           FilterObjs.emplace_back(MachOOrErr->release());
347       } else {
348         error(Filename, MachOOrErr.takeError());
349       }
350     }
351     if (FilterObjs.empty())
352       error(Filename, createStringError(std::errc::invalid_argument,
353                                         "no matching architectures found"));
354 
355     // Now handle each architecture we need to convert.
356     for (auto &Obj : FilterObjs) {
357       Triple ObjTriple(Obj->getArchTriple());
358       auto ArchName = ObjTriple.getArchName();
359       std::string ArchOutFile(OutFile);
360       // If we are only handling a single architecture, then we will use the
361       // normal output file. If we are handling multiple architectures append
362       // the architecture name to the end of the out file path so that we
363       // don't overwrite the previous architecture's gsym file.
364       if (FilterObjs.size() > 1) {
365         ArchOutFile.append(1, '.');
366         ArchOutFile.append(ArchName.str());
367       }
368       outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n";
369       if (auto Err = handleObjectFile(*Obj, ArchOutFile))
370         return Err;
371     }
372   }
373   return Error::success();
374 }
375 
handleFileConversionToGSYM(StringRef Filename,const std::string & OutFile)376 static llvm::Error handleFileConversionToGSYM(StringRef Filename,
377                                               const std::string &OutFile) {
378   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
379       MemoryBuffer::getFileOrSTDIN(Filename);
380   error(Filename, BuffOrErr.getError());
381   std::unique_ptr<MemoryBuffer> Buffer = std::move(BuffOrErr.get());
382   return handleBuffer(Filename, *Buffer, OutFile);
383 }
384 
convertFileToGSYM(raw_ostream & OS)385 static llvm::Error convertFileToGSYM(raw_ostream &OS) {
386   // Expand any .dSYM bundles to the individual object files contained therein.
387   std::vector<std::string> Objects;
388   std::string OutFile = OutputFilename;
389   if (OutFile.empty()) {
390     OutFile = ConvertFilename;
391     OutFile += ".gsym";
392   }
393 
394   OS << "Input file: " << ConvertFilename << "\n";
395 
396   if (auto DsymObjectsOrErr =
397           MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
398     if (DsymObjectsOrErr->empty())
399       Objects.push_back(ConvertFilename);
400     else
401       llvm::append_range(Objects, *DsymObjectsOrErr);
402   } else {
403     error(DsymObjectsOrErr.takeError());
404   }
405 
406   for (auto Object : Objects) {
407     if (auto Err = handleFileConversionToGSYM(Object, OutFile))
408       return Err;
409   }
410   return Error::success();
411 }
412 
doLookup(GsymReader & Gsym,uint64_t Addr,raw_ostream & OS)413 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
414   if (auto Result = Gsym.lookup(Addr)) {
415     // If verbose is enabled dump the full function info for the address.
416     if (Verbose) {
417       if (auto FI = Gsym.getFunctionInfo(Addr)) {
418         OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
419         Gsym.dump(OS, *FI);
420         OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
421       }
422     }
423     OS << Result.get();
424   } else {
425     if (Verbose)
426       OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
427     OS << HEX64(Addr) << ": ";
428     logAllUnhandledErrors(Result.takeError(), OS, "error: ");
429   }
430   if (Verbose)
431     OS << "\n";
432 }
433 
main(int argc,char const * argv[])434 int main(int argc, char const *argv[]) {
435   // Print a stack trace if we signal out.
436   sys::PrintStackTraceOnErrorSignal(argv[0]);
437   PrettyStackTraceProgram X(argc, argv);
438   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
439 
440   llvm::InitializeAllTargets();
441 
442   const char *Overview =
443       "A tool for dumping, searching and creating GSYM files.\n\n"
444       "Specify one or more GSYM paths as arguments to dump all of the "
445       "information in each GSYM file.\n"
446       "Specify a single GSYM file along with one or more --lookup options to "
447       "lookup addresses within that GSYM file.\n"
448       "Use the --convert option to specify a file with option --out-file "
449       "option to convert to GSYM format.\n";
450   HideUnrelatedOptions({&GeneralOptions, &ConversionOptions, &LookupOptions});
451   cl::ParseCommandLineOptions(argc, argv, Overview);
452 
453   if (Help) {
454     PrintHelpMessage(/*Hidden =*/false, /*Categorized =*/true);
455     return 0;
456   }
457 
458   raw_ostream &OS = outs();
459 
460   if (!ConvertFilename.empty()) {
461     // Convert DWARF to GSYM
462     if (!InputFilenames.empty()) {
463       OS << "error: no input files can be specified when using the --convert "
464             "option.\n";
465       return 1;
466     }
467     // Call error() if we have an error and it will exit with a status of 1
468     if (auto Err = convertFileToGSYM(OS))
469       error("DWARF conversion failed: ", std::move(Err));
470     return 0;
471   }
472 
473   if (LookupAddressesFromStdin) {
474     if (!LookupAddresses.empty() || !InputFilenames.empty()) {
475       OS << "error: no input files or addresses can be specified when using "
476             "the --addresses-from-stdin "
477             "option.\n";
478       return 1;
479     }
480 
481     std::string InputLine;
482     std::string CurrentGSYMPath;
483     std::optional<Expected<GsymReader>> CurrentGsym;
484 
485     while (std::getline(std::cin, InputLine)) {
486       // Strip newline characters.
487       std::string StrippedInputLine(InputLine);
488       llvm::erase_if(StrippedInputLine,
489                      [](char c) { return c == '\r' || c == '\n'; });
490 
491       StringRef AddrStr, GSYMPath;
492       std::tie(AddrStr, GSYMPath) =
493           llvm::StringRef{StrippedInputLine}.split(' ');
494 
495       if (GSYMPath != CurrentGSYMPath) {
496         CurrentGsym = GsymReader::openFile(GSYMPath);
497         if (!*CurrentGsym)
498           error(GSYMPath, CurrentGsym->takeError());
499         CurrentGSYMPath = GSYMPath;
500       }
501 
502       uint64_t Addr;
503       if (AddrStr.getAsInteger(0, Addr)) {
504         OS << "error: invalid address " << AddrStr
505            << ", expected: Address GsymFile.\n";
506         return 1;
507       }
508 
509       doLookup(**CurrentGsym, Addr, OS);
510 
511       OS << "\n";
512       OS.flush();
513     }
514 
515     return EXIT_SUCCESS;
516   }
517 
518   // Dump or access data inside GSYM files
519   for (const auto &GSYMPath : InputFilenames) {
520     auto Gsym = GsymReader::openFile(GSYMPath);
521     if (!Gsym)
522       error(GSYMPath, Gsym.takeError());
523 
524     if (LookupAddresses.empty()) {
525       Gsym->dump(outs());
526       continue;
527     }
528 
529     // Lookup an address in a GSYM file and print any matches.
530     OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
531     for (auto Addr : LookupAddresses) {
532       doLookup(*Gsym, Addr, OS);
533     }
534   }
535   return EXIT_SUCCESS;
536 }
537