xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
21 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
22 #include "llvm/Option/Arg.h"
23 #include "llvm/Option/ArgList.h"
24 #include "llvm/Option/Option.h"
25 #include "llvm/Support/COM.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/InitLLVM.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Support/StringSaver.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <cstdio>
35 #include <cstring>
36 #include <string>
37 
38 using namespace llvm;
39 using namespace symbolize;
40 
41 namespace {
42 enum ID {
43   OPT_INVALID = 0, // This is not an option ID.
44 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
45                HELPTEXT, METAVAR, VALUES)                                      \
46   OPT_##ID,
47 #include "Opts.inc"
48 #undef OPTION
49 };
50 
51 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
52 #include "Opts.inc"
53 #undef PREFIX
54 
55 static const opt::OptTable::Info InfoTable[] = {
56 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
57                HELPTEXT, METAVAR, VALUES)                                      \
58   {                                                                            \
59       PREFIX,      NAME,      HELPTEXT,                                        \
60       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
61       PARAM,       FLAGS,     OPT_##GROUP,                                     \
62       OPT_##ALIAS, ALIASARGS, VALUES},
63 #include "Opts.inc"
64 #undef OPTION
65 };
66 
67 class SymbolizerOptTable : public opt::OptTable {
68 public:
SymbolizerOptTable()69   SymbolizerOptTable() : OptTable(InfoTable, true) {}
70 };
71 } // namespace
72 
73 template <typename T>
print(const Request & Request,Expected<T> & ResOrErr,DIPrinter & Printer)74 static void print(const Request &Request, Expected<T> &ResOrErr,
75                   DIPrinter &Printer) {
76   if (ResOrErr) {
77     // No error, print the result.
78     Printer.print(Request, *ResOrErr);
79     return;
80   }
81 
82   // Handle the error.
83   bool PrintEmpty = true;
84   handleAllErrors(std::move(ResOrErr.takeError()),
85                   [&](const ErrorInfoBase &EI) {
86                     PrintEmpty = Printer.printError(
87                         Request, EI, "LLVMSymbolizer: error reading file: ");
88                   });
89 
90   if (PrintEmpty)
91     Printer.print(Request, T());
92 }
93 
94 enum class OutputStyle { LLVM, GNU, JSON };
95 
96 enum class Command {
97   Code,
98   Data,
99   Frame,
100 };
101 
parseCommand(StringRef BinaryName,bool IsAddr2Line,StringRef InputString,Command & Cmd,std::string & ModuleName,uint64_t & ModuleOffset)102 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
103                          StringRef InputString, Command &Cmd,
104                          std::string &ModuleName, uint64_t &ModuleOffset) {
105   const char kDelimiters[] = " \n\r";
106   ModuleName = "";
107   if (InputString.consume_front("CODE ")) {
108     Cmd = Command::Code;
109   } else if (InputString.consume_front("DATA ")) {
110     Cmd = Command::Data;
111   } else if (InputString.consume_front("FRAME ")) {
112     Cmd = Command::Frame;
113   } else {
114     // If no cmd, assume it's CODE.
115     Cmd = Command::Code;
116   }
117   const char *Pos = InputString.data();
118   // Skip delimiters and parse input filename (if needed).
119   if (BinaryName.empty()) {
120     Pos += strspn(Pos, kDelimiters);
121     if (*Pos == '"' || *Pos == '\'') {
122       char Quote = *Pos;
123       Pos++;
124       const char *End = strchr(Pos, Quote);
125       if (!End)
126         return false;
127       ModuleName = std::string(Pos, End - Pos);
128       Pos = End + 1;
129     } else {
130       int NameLength = strcspn(Pos, kDelimiters);
131       ModuleName = std::string(Pos, NameLength);
132       Pos += NameLength;
133     }
134   } else {
135     ModuleName = BinaryName.str();
136   }
137   // Skip delimiters and parse module offset.
138   Pos += strspn(Pos, kDelimiters);
139   int OffsetLength = strcspn(Pos, kDelimiters);
140   StringRef Offset(Pos, OffsetLength);
141   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
142   // "0x" or "0X" prefix; do the same for compatibility.
143   if (IsAddr2Line)
144     Offset.consume_front("0x") || Offset.consume_front("0X");
145   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
146 }
147 
symbolizeInput(const opt::InputArgList & Args,uint64_t AdjustVMA,bool IsAddr2Line,OutputStyle Style,StringRef InputString,LLVMSymbolizer & Symbolizer,DIPrinter & Printer)148 static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
149                            bool IsAddr2Line, OutputStyle Style,
150                            StringRef InputString, LLVMSymbolizer &Symbolizer,
151                            DIPrinter &Printer) {
152   Command Cmd;
153   std::string ModuleName;
154   uint64_t Offset = 0;
155   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
156                     StringRef(InputString), Cmd, ModuleName, Offset)) {
157     Printer.printInvalidCommand({ModuleName, None}, InputString);
158     return;
159   }
160 
161   uint64_t AdjustedOffset = Offset - AdjustVMA;
162   if (Cmd == Command::Data) {
163     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(
164         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
165     print({ModuleName, Offset}, ResOrErr, Printer);
166   } else if (Cmd == Command::Frame) {
167     Expected<std::vector<DILocal>> ResOrErr = Symbolizer.symbolizeFrame(
168         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
169     print({ModuleName, Offset}, ResOrErr, Printer);
170   } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
171     Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
172         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
173     print({ModuleName, Offset}, ResOrErr, Printer);
174   } else if (Style == OutputStyle::GNU) {
175     // With PrintFunctions == FunctionNameKind::LinkageName (default)
176     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
177     // may override the name of an inlined function with the name of the topmost
178     // caller function in the inlining chain. This contradicts the existing
179     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
180     // the topmost function, which suits our needs better.
181     Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
182         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
183     Expected<DILineInfo> Res0OrErr =
184         !ResOrErr
185             ? Expected<DILineInfo>(ResOrErr.takeError())
186             : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
187                                                     : ResOrErr->getFrame(0));
188     print({ModuleName, Offset}, Res0OrErr, Printer);
189   } else {
190     Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
191         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
192     print({ModuleName, Offset}, ResOrErr, Printer);
193   }
194 }
195 
printHelp(StringRef ToolName,const SymbolizerOptTable & Tbl,raw_ostream & OS)196 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
197                       raw_ostream &OS) {
198   const char HelpText[] = " [options] addresses...";
199   Tbl.PrintHelp(OS, (ToolName + HelpText).str().c_str(),
200                 ToolName.str().c_str());
201   // TODO Replace this with OptTable API once it adds extrahelp support.
202   OS << "\nPass @FILE as argument to read options from FILE.\n";
203 }
204 
parseOptions(int Argc,char * Argv[],bool IsAddr2Line,StringSaver & Saver,SymbolizerOptTable & Tbl)205 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
206                                       StringSaver &Saver,
207                                       SymbolizerOptTable &Tbl) {
208   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
209   Tbl.setGroupedShortOptions(true);
210   // The environment variable specifies initial options which can be overridden
211   // by commnad line options.
212   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
213                                                    : "LLVM_SYMBOLIZER_OPTS");
214   bool HasError = false;
215   opt::InputArgList Args =
216       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
217         errs() << ("error: " + Msg + "\n");
218         HasError = true;
219       });
220   if (HasError)
221     exit(1);
222   if (Args.hasArg(OPT_help)) {
223     printHelp(ToolName, Tbl, outs());
224     exit(0);
225   }
226   if (Args.hasArg(OPT_version)) {
227     outs() << ToolName << '\n';
228     cl::PrintVersionMessage();
229     exit(0);
230   }
231 
232   return Args;
233 }
234 
235 template <typename T>
parseIntArg(const opt::InputArgList & Args,int ID,T & Value)236 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
237   if (const opt::Arg *A = Args.getLastArg(ID)) {
238     StringRef V(A->getValue());
239     if (!llvm::to_integer(V, Value, 0)) {
240       errs() << A->getSpelling() +
241                     ": expected a non-negative integer, but got '" + V + "'";
242       exit(1);
243     }
244   } else {
245     Value = 0;
246   }
247 }
248 
decideHowToPrintFunctions(const opt::InputArgList & Args,bool IsAddr2Line)249 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
250                                                   bool IsAddr2Line) {
251   if (Args.hasArg(OPT_functions))
252     return FunctionNameKind::LinkageName;
253   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
254     return StringSwitch<FunctionNameKind>(A->getValue())
255         .Case("none", FunctionNameKind::None)
256         .Case("short", FunctionNameKind::ShortName)
257         .Default(FunctionNameKind::LinkageName);
258   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
259 }
260 
main(int argc,char ** argv)261 int main(int argc, char **argv) {
262   InitLLVM X(argc, argv);
263   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
264 
265   bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
266   BumpPtrAllocator A;
267   StringSaver Saver(A);
268   SymbolizerOptTable Tbl;
269   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
270 
271   LLVMSymbolizer::Options Opts;
272   uint64_t AdjustVMA;
273   PrinterConfig Config;
274   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
275   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
276     Opts.PathStyle =
277         A->getOption().matches(OPT_basenames)
278             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
279             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
280   } else {
281     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
282   }
283   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
284   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
285   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
286   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
287   Opts.FallbackDebugPath =
288       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
289   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
290   parseIntArg(Args, OPT_print_source_context_lines_EQ,
291               Config.SourceContextLines);
292   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
293   Opts.UntagAddresses =
294       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
295   Opts.UseDIA = Args.hasArg(OPT_use_dia);
296 #if !defined(LLVM_ENABLE_DIA_SDK)
297   if (Opts.UseDIA) {
298     WithColor::warning() << "DIA not available; using native PDB reader\n";
299     Opts.UseDIA = false;
300   }
301 #endif
302   Opts.UseSymbolTable = true;
303   Config.PrintAddress = Args.hasArg(OPT_addresses);
304   Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
305   Config.Pretty = Args.hasArg(OPT_pretty_print);
306   Config.Verbose = Args.hasArg(OPT_verbose);
307 
308   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
309     StringRef Hint(A->getValue());
310     if (sys::path::extension(Hint) == ".dSYM") {
311       Opts.DsymHints.emplace_back(Hint);
312     } else {
313       errs() << "Warning: invalid dSYM hint: \"" << Hint
314              << "\" (must have the '.dSYM' extension).\n";
315     }
316   }
317 
318   auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
319   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
320     if (strcmp(A->getValue(), "GNU") == 0)
321       Style = OutputStyle::GNU;
322     else if (strcmp(A->getValue(), "JSON") == 0)
323       Style = OutputStyle::JSON;
324     else
325       Style = OutputStyle::LLVM;
326   }
327 
328   LLVMSymbolizer Symbolizer(Opts);
329   std::unique_ptr<DIPrinter> Printer;
330   if (Style == OutputStyle::GNU)
331     Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
332   else if (Style == OutputStyle::JSON)
333     Printer = std::make_unique<JSONPrinter>(outs(), Config);
334   else
335     Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
336 
337   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
338   if (InputAddresses.empty()) {
339     const int kMaxInputStringLength = 1024;
340     char InputString[kMaxInputStringLength];
341 
342     while (fgets(InputString, sizeof(InputString), stdin)) {
343       // Strip newline characters.
344       std::string StrippedInputString(InputString);
345       llvm::erase_if(StrippedInputString,
346                      [](char c) { return c == '\r' || c == '\n'; });
347       symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString,
348                      Symbolizer, *Printer);
349       outs().flush();
350     }
351   } else {
352     Printer->listBegin();
353     for (StringRef Address : InputAddresses)
354       symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer,
355                      *Printer);
356     Printer->listEnd();
357   }
358 
359   return 0;
360 }
361