xref: /freebsd-src/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
21 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
22 #include "llvm/Debuginfod/HTTPClient.h"
23 #include "llvm/Option/Arg.h"
24 #include "llvm/Option/ArgList.h"
25 #include "llvm/Option/Option.h"
26 #include "llvm/Support/COM.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/FileSystem.h"
30 #include "llvm/Support/InitLLVM.h"
31 #include "llvm/Support/Path.h"
32 #include "llvm/Support/StringSaver.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <algorithm>
35 #include <cstdio>
36 #include <cstring>
37 #include <string>
38 
39 using namespace llvm;
40 using namespace symbolize;
41 
42 namespace {
43 enum ID {
44   OPT_INVALID = 0, // This is not an option ID.
45 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
46                HELPTEXT, METAVAR, VALUES)                                      \
47   OPT_##ID,
48 #include "Opts.inc"
49 #undef OPTION
50 };
51 
52 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
53 #include "Opts.inc"
54 #undef PREFIX
55 
56 const opt::OptTable::Info InfoTable[] = {
57 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
58                HELPTEXT, METAVAR, VALUES)                                      \
59   {                                                                            \
60       PREFIX,      NAME,      HELPTEXT,                                        \
61       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
62       PARAM,       FLAGS,     OPT_##GROUP,                                     \
63       OPT_##ALIAS, ALIASARGS, VALUES},
64 #include "Opts.inc"
65 #undef OPTION
66 };
67 
68 class SymbolizerOptTable : public opt::OptTable {
69 public:
70   SymbolizerOptTable() : OptTable(InfoTable) {
71     setGroupedShortOptions(true);
72   }
73 };
74 } // namespace
75 
76 template <typename T>
77 static void print(const Request &Request, Expected<T> &ResOrErr,
78                   DIPrinter &Printer) {
79   if (ResOrErr) {
80     // No error, print the result.
81     Printer.print(Request, *ResOrErr);
82     return;
83   }
84 
85   // Handle the error.
86   bool PrintEmpty = true;
87   handleAllErrors(std::move(ResOrErr.takeError()),
88                   [&](const ErrorInfoBase &EI) {
89                     PrintEmpty = Printer.printError(
90                         Request, EI, "LLVMSymbolizer: error reading file: ");
91                   });
92 
93   if (PrintEmpty)
94     Printer.print(Request, T());
95 }
96 
97 enum class OutputStyle { LLVM, GNU, JSON };
98 
99 enum class Command {
100   Code,
101   Data,
102   Frame,
103 };
104 
105 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
106                          StringRef InputString, Command &Cmd,
107                          std::string &ModuleName, uint64_t &ModuleOffset) {
108   const char kDelimiters[] = " \n\r";
109   ModuleName = "";
110   if (InputString.consume_front("CODE ")) {
111     Cmd = Command::Code;
112   } else if (InputString.consume_front("DATA ")) {
113     Cmd = Command::Data;
114   } else if (InputString.consume_front("FRAME ")) {
115     Cmd = Command::Frame;
116   } else {
117     // If no cmd, assume it's CODE.
118     Cmd = Command::Code;
119   }
120   const char *Pos = InputString.data();
121   // Skip delimiters and parse input filename (if needed).
122   if (BinaryName.empty()) {
123     Pos += strspn(Pos, kDelimiters);
124     if (*Pos == '"' || *Pos == '\'') {
125       char Quote = *Pos;
126       Pos++;
127       const char *End = strchr(Pos, Quote);
128       if (!End)
129         return false;
130       ModuleName = std::string(Pos, End - Pos);
131       Pos = End + 1;
132     } else {
133       int NameLength = strcspn(Pos, kDelimiters);
134       ModuleName = std::string(Pos, NameLength);
135       Pos += NameLength;
136     }
137   } else {
138     ModuleName = BinaryName.str();
139   }
140   // Skip delimiters and parse module offset.
141   Pos += strspn(Pos, kDelimiters);
142   int OffsetLength = strcspn(Pos, kDelimiters);
143   StringRef Offset(Pos, OffsetLength);
144   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
145   // "0x" or "0X" prefix; do the same for compatibility.
146   if (IsAddr2Line)
147     Offset.consume_front("0x") || Offset.consume_front("0X");
148   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
149 }
150 
151 static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
152                            bool IsAddr2Line, OutputStyle Style,
153                            StringRef InputString, LLVMSymbolizer &Symbolizer,
154                            DIPrinter &Printer) {
155   Command Cmd;
156   std::string ModuleName;
157   uint64_t Offset = 0;
158   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
159                     StringRef(InputString), Cmd, ModuleName, Offset)) {
160     Printer.printInvalidCommand({ModuleName, None}, InputString);
161     return;
162   }
163 
164   uint64_t AdjustedOffset = Offset - AdjustVMA;
165   if (Cmd == Command::Data) {
166     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(
167         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
168     print({ModuleName, Offset}, ResOrErr, Printer);
169   } else if (Cmd == Command::Frame) {
170     Expected<std::vector<DILocal>> ResOrErr = Symbolizer.symbolizeFrame(
171         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
172     print({ModuleName, Offset}, ResOrErr, Printer);
173   } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
174     Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
175         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
176     print({ModuleName, Offset}, ResOrErr, Printer);
177   } else if (Style == OutputStyle::GNU) {
178     // With PrintFunctions == FunctionNameKind::LinkageName (default)
179     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
180     // may override the name of an inlined function with the name of the topmost
181     // caller function in the inlining chain. This contradicts the existing
182     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
183     // the topmost function, which suits our needs better.
184     Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
185         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
186     Expected<DILineInfo> Res0OrErr =
187         !ResOrErr
188             ? Expected<DILineInfo>(ResOrErr.takeError())
189             : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
190                                                     : ResOrErr->getFrame(0));
191     print({ModuleName, Offset}, Res0OrErr, Printer);
192   } else {
193     Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
194         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
195     print({ModuleName, Offset}, ResOrErr, Printer);
196   }
197 }
198 
199 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
200                       raw_ostream &OS) {
201   const char HelpText[] = " [options] addresses...";
202   Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
203                 ToolName.str().c_str());
204   // TODO Replace this with OptTable API once it adds extrahelp support.
205   OS << "\nPass @FILE as argument to read options from FILE.\n";
206 }
207 
208 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
209                                       StringSaver &Saver,
210                                       SymbolizerOptTable &Tbl) {
211   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
212   // The environment variable specifies initial options which can be overridden
213   // by commnad line options.
214   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
215                                                    : "LLVM_SYMBOLIZER_OPTS");
216   bool HasError = false;
217   opt::InputArgList Args =
218       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
219         errs() << ("error: " + Msg + "\n");
220         HasError = true;
221       });
222   if (HasError)
223     exit(1);
224   if (Args.hasArg(OPT_help)) {
225     printHelp(ToolName, Tbl, outs());
226     exit(0);
227   }
228   if (Args.hasArg(OPT_version)) {
229     outs() << ToolName << '\n';
230     cl::PrintVersionMessage();
231     exit(0);
232   }
233 
234   return Args;
235 }
236 
237 template <typename T>
238 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
239   if (const opt::Arg *A = Args.getLastArg(ID)) {
240     StringRef V(A->getValue());
241     if (!llvm::to_integer(V, Value, 0)) {
242       errs() << A->getSpelling() +
243                     ": expected a non-negative integer, but got '" + V + "'";
244       exit(1);
245     }
246   } else {
247     Value = 0;
248   }
249 }
250 
251 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
252                                                   bool IsAddr2Line) {
253   if (Args.hasArg(OPT_functions))
254     return FunctionNameKind::LinkageName;
255   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
256     return StringSwitch<FunctionNameKind>(A->getValue())
257         .Case("none", FunctionNameKind::None)
258         .Case("short", FunctionNameKind::ShortName)
259         .Default(FunctionNameKind::LinkageName);
260   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
261 }
262 
263 int main(int argc, char **argv) {
264   InitLLVM X(argc, argv);
265   // The HTTPClient must be initialized for use by the debuginfod client.
266   HTTPClient::initialize();
267   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
268 
269   bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
270   BumpPtrAllocator A;
271   StringSaver Saver(A);
272   SymbolizerOptTable Tbl;
273   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
274 
275   LLVMSymbolizer::Options Opts;
276   uint64_t AdjustVMA;
277   PrinterConfig Config;
278   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
279   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
280     Opts.PathStyle =
281         A->getOption().matches(OPT_basenames)
282             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
283             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
284   } else {
285     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
286   }
287   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
288   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
289   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
290   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
291   Opts.FallbackDebugPath =
292       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
293   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
294   parseIntArg(Args, OPT_print_source_context_lines_EQ,
295               Config.SourceContextLines);
296   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
297   Opts.UntagAddresses =
298       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
299   Opts.UseDIA = Args.hasArg(OPT_use_dia);
300 #if !defined(LLVM_ENABLE_DIA_SDK)
301   if (Opts.UseDIA) {
302     WithColor::warning() << "DIA not available; using native PDB reader\n";
303     Opts.UseDIA = false;
304   }
305 #endif
306   Opts.UseSymbolTable = true;
307   Config.PrintAddress = Args.hasArg(OPT_addresses);
308   Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
309   Config.Pretty = Args.hasArg(OPT_pretty_print);
310   Config.Verbose = Args.hasArg(OPT_verbose);
311 
312   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
313     StringRef Hint(A->getValue());
314     if (sys::path::extension(Hint) == ".dSYM") {
315       Opts.DsymHints.emplace_back(Hint);
316     } else {
317       errs() << "Warning: invalid dSYM hint: \"" << Hint
318              << "\" (must have the '.dSYM' extension).\n";
319     }
320   }
321 
322   auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
323   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
324     if (strcmp(A->getValue(), "GNU") == 0)
325       Style = OutputStyle::GNU;
326     else if (strcmp(A->getValue(), "JSON") == 0)
327       Style = OutputStyle::JSON;
328     else
329       Style = OutputStyle::LLVM;
330   }
331 
332   LLVMSymbolizer Symbolizer(Opts);
333   std::unique_ptr<DIPrinter> Printer;
334   if (Style == OutputStyle::GNU)
335     Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
336   else if (Style == OutputStyle::JSON)
337     Printer = std::make_unique<JSONPrinter>(outs(), Config);
338   else
339     Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
340 
341   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
342   if (InputAddresses.empty()) {
343     const int kMaxInputStringLength = 1024;
344     char InputString[kMaxInputStringLength];
345 
346     while (fgets(InputString, sizeof(InputString), stdin)) {
347       // Strip newline characters.
348       std::string StrippedInputString(InputString);
349       llvm::erase_if(StrippedInputString,
350                      [](char c) { return c == '\r' || c == '\n'; });
351       symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString,
352                      Symbolizer, *Printer);
353       outs().flush();
354     }
355   } else {
356     Printer->listBegin();
357     for (StringRef Address : InputAddresses)
358       symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer,
359                      *Printer);
360     Printer->listEnd();
361   }
362 
363   return 0;
364 }
365