xref: /freebsd-src/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
21 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
22 #include "llvm/Option/Arg.h"
23 #include "llvm/Option/ArgList.h"
24 #include "llvm/Option/Option.h"
25 #include "llvm/Support/COM.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/InitLLVM.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Support/StringSaver.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <cstdio>
35 #include <cstring>
36 #include <string>
37 
38 using namespace llvm;
39 using namespace symbolize;
40 
41 namespace {
42 enum ID {
43   OPT_INVALID = 0, // This is not an option ID.
44 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
45                HELPTEXT, METAVAR, VALUES)                                      \
46   OPT_##ID,
47 #include "Opts.inc"
48 #undef OPTION
49 };
50 
51 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
52 #include "Opts.inc"
53 #undef PREFIX
54 
55 static const opt::OptTable::Info InfoTable[] = {
56 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
57                HELPTEXT, METAVAR, VALUES)                                      \
58   {                                                                            \
59       PREFIX,      NAME,      HELPTEXT,                                        \
60       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
61       PARAM,       FLAGS,     OPT_##GROUP,                                     \
62       OPT_##ALIAS, ALIASARGS, VALUES},
63 #include "Opts.inc"
64 #undef OPTION
65 };
66 
67 class SymbolizerOptTable : public opt::OptTable {
68 public:
69   SymbolizerOptTable() : OptTable(InfoTable, true) {}
70 };
71 } // namespace
72 
73 static cl::list<std::string> ClInputAddresses(cl::Positional,
74                                               cl::desc("<input addresses>..."),
75                                               cl::ZeroOrMore);
76 
77 template<typename T>
78 static bool error(Expected<T> &ResOrErr) {
79   if (ResOrErr)
80     return false;
81   logAllUnhandledErrors(ResOrErr.takeError(), errs(),
82                         "LLVMSymbolizer: error reading file: ");
83   return true;
84 }
85 
86 enum class Command {
87   Code,
88   Data,
89   Frame,
90 };
91 
92 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
93                          StringRef InputString, Command &Cmd,
94                          std::string &ModuleName, uint64_t &ModuleOffset) {
95   const char kDelimiters[] = " \n\r";
96   ModuleName = "";
97   if (InputString.consume_front("CODE ")) {
98     Cmd = Command::Code;
99   } else if (InputString.consume_front("DATA ")) {
100     Cmd = Command::Data;
101   } else if (InputString.consume_front("FRAME ")) {
102     Cmd = Command::Frame;
103   } else {
104     // If no cmd, assume it's CODE.
105     Cmd = Command::Code;
106   }
107   const char *Pos = InputString.data();
108   // Skip delimiters and parse input filename (if needed).
109   if (BinaryName.empty()) {
110     Pos += strspn(Pos, kDelimiters);
111     if (*Pos == '"' || *Pos == '\'') {
112       char Quote = *Pos;
113       Pos++;
114       const char *End = strchr(Pos, Quote);
115       if (!End)
116         return false;
117       ModuleName = std::string(Pos, End - Pos);
118       Pos = End + 1;
119     } else {
120       int NameLength = strcspn(Pos, kDelimiters);
121       ModuleName = std::string(Pos, NameLength);
122       Pos += NameLength;
123     }
124   } else {
125     ModuleName = BinaryName.str();
126   }
127   // Skip delimiters and parse module offset.
128   Pos += strspn(Pos, kDelimiters);
129   int OffsetLength = strcspn(Pos, kDelimiters);
130   StringRef Offset(Pos, OffsetLength);
131   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
132   // "0x" or "0X" prefix; do the same for compatibility.
133   if (IsAddr2Line)
134     Offset.consume_front("0x") || Offset.consume_front("0X");
135   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
136 }
137 
138 static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
139                            bool IsAddr2Line, DIPrinter::OutputStyle OutputStyle,
140                            StringRef InputString, LLVMSymbolizer &Symbolizer,
141                            DIPrinter &Printer) {
142   Command Cmd;
143   std::string ModuleName;
144   uint64_t Offset = 0;
145   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
146                     StringRef(InputString), Cmd, ModuleName, Offset)) {
147     outs() << InputString << "\n";
148     return;
149   }
150 
151   if (Args.hasArg(OPT_addresses)) {
152     outs() << "0x";
153     outs().write_hex(Offset);
154     StringRef Delimiter = Args.hasArg(OPT_pretty_print) ? ": " : "\n";
155     outs() << Delimiter;
156   }
157   Offset -= AdjustVMA;
158   if (Cmd == Command::Data) {
159     auto ResOrErr = Symbolizer.symbolizeData(
160         ModuleName, {Offset, object::SectionedAddress::UndefSection});
161     Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
162   } else if (Cmd == Command::Frame) {
163     auto ResOrErr = Symbolizer.symbolizeFrame(
164         ModuleName, {Offset, object::SectionedAddress::UndefSection});
165     if (!error(ResOrErr)) {
166       for (DILocal Local : *ResOrErr)
167         Printer << Local;
168       if (ResOrErr->empty())
169         outs() << "??\n";
170     }
171   } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
172     auto ResOrErr = Symbolizer.symbolizeInlinedCode(
173         ModuleName, {Offset, object::SectionedAddress::UndefSection});
174     Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
175   } else if (OutputStyle == DIPrinter::OutputStyle::GNU) {
176     // With PrintFunctions == FunctionNameKind::LinkageName (default)
177     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
178     // may override the name of an inlined function with the name of the topmost
179     // caller function in the inlining chain. This contradicts the existing
180     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
181     // the topmost function, which suits our needs better.
182     auto ResOrErr = Symbolizer.symbolizeInlinedCode(
183         ModuleName, {Offset, object::SectionedAddress::UndefSection});
184     Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
185   } else {
186     auto ResOrErr = Symbolizer.symbolizeCode(
187         ModuleName, {Offset, object::SectionedAddress::UndefSection});
188     Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
189   }
190   if (OutputStyle == DIPrinter::OutputStyle::LLVM)
191     outs() << "\n";
192 }
193 
194 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
195                       raw_ostream &OS) {
196   const char HelpText[] = " [options] addresses...";
197   Tbl.PrintHelp(OS, (ToolName + HelpText).str().c_str(),
198                 ToolName.str().c_str());
199   // TODO Replace this with OptTable API once it adds extrahelp support.
200   OS << "\nPass @FILE as argument to read options from FILE.\n";
201 }
202 
203 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
204                                       StringSaver &Saver,
205                                       SymbolizerOptTable &Tbl) {
206   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
207   Tbl.setGroupedShortOptions(true);
208   // The environment variable specifies initial options which can be overridden
209   // by commnad line options.
210   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
211                                                    : "LLVM_SYMBOLIZER_OPTS");
212   bool HasError = false;
213   opt::InputArgList Args =
214       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
215         errs() << ("error: " + Msg + "\n");
216         HasError = true;
217       });
218   if (HasError)
219     exit(1);
220   if (Args.hasArg(OPT_help)) {
221     printHelp(ToolName, Tbl, outs());
222     exit(0);
223   }
224   if (Args.hasArg(OPT_version)) {
225     outs() << ToolName << '\n';
226     cl::PrintVersionMessage();
227     exit(0);
228   }
229 
230   return Args;
231 }
232 
233 template <typename T>
234 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
235   if (const opt::Arg *A = Args.getLastArg(ID)) {
236     StringRef V(A->getValue());
237     if (!llvm::to_integer(V, Value, 0)) {
238       errs() << A->getSpelling() +
239                     ": expected a non-negative integer, but got '" + V + "'";
240       exit(1);
241     }
242   } else {
243     Value = 0;
244   }
245 }
246 
247 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
248                                                   bool IsAddr2Line) {
249   if (Args.hasArg(OPT_functions))
250     return FunctionNameKind::LinkageName;
251   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
252     return StringSwitch<FunctionNameKind>(A->getValue())
253         .Case("none", FunctionNameKind::None)
254         .Case("short", FunctionNameKind::ShortName)
255         .Default(FunctionNameKind::LinkageName);
256   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
257 }
258 
259 int main(int argc, char **argv) {
260   InitLLVM X(argc, argv);
261   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
262 
263   bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
264   BumpPtrAllocator A;
265   StringSaver Saver(A);
266   SymbolizerOptTable Tbl;
267   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
268 
269   LLVMSymbolizer::Options Opts;
270   uint64_t AdjustVMA;
271   unsigned SourceContextLines;
272   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
273   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
274     Opts.PathStyle =
275         A->getOption().matches(OPT_basenames)
276             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
277             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
278   } else {
279     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
280   }
281   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
282   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
283   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
284   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
285   Opts.FallbackDebugPath =
286       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
287   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
288   parseIntArg(Args, OPT_print_source_context_lines_EQ, SourceContextLines);
289   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
290   Opts.UntagAddresses =
291       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
292   Opts.UseDIA = Args.hasArg(OPT_use_dia);
293 #if !defined(LLVM_ENABLE_DIA_SDK)
294   if (Opts.UseDIA) {
295     WithColor::warning() << "DIA not available; using native PDB reader\n";
296     Opts.UseDIA = false;
297   }
298 #endif
299   Opts.UseSymbolTable = true;
300 
301   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
302     StringRef Hint(A->getValue());
303     if (sys::path::extension(Hint) == ".dSYM") {
304       Opts.DsymHints.emplace_back(Hint);
305     } else {
306       errs() << "Warning: invalid dSYM hint: \"" << Hint
307              << "\" (must have the '.dSYM' extension).\n";
308     }
309   }
310 
311   auto OutputStyle =
312       IsAddr2Line ? DIPrinter::OutputStyle::GNU : DIPrinter::OutputStyle::LLVM;
313   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
314     OutputStyle = strcmp(A->getValue(), "GNU") == 0
315                       ? DIPrinter::OutputStyle::GNU
316                       : DIPrinter::OutputStyle::LLVM;
317   }
318 
319   LLVMSymbolizer Symbolizer(Opts);
320   DIPrinter Printer(outs(), Opts.PrintFunctions != FunctionNameKind::None,
321                     Args.hasArg(OPT_pretty_print), SourceContextLines,
322                     Args.hasArg(OPT_verbose), OutputStyle);
323 
324   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
325   if (InputAddresses.empty()) {
326     const int kMaxInputStringLength = 1024;
327     char InputString[kMaxInputStringLength];
328 
329     while (fgets(InputString, sizeof(InputString), stdin)) {
330       // Strip newline characters.
331       std::string StrippedInputString(InputString);
332       llvm::erase_if(StrippedInputString,
333                      [](char c) { return c == '\r' || c == '\n'; });
334       symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle,
335                      StrippedInputString, Symbolizer, Printer);
336       outs().flush();
337     }
338   } else {
339     for (StringRef Address : InputAddresses)
340       symbolizeInput(Args, AdjustVMA, IsAddr2Line, OutputStyle, Address,
341                      Symbolizer, Printer);
342   }
343 
344   return 0;
345 }
346