1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "Opts.inc"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
22 #include "llvm/DebugInfo/Symbolize/Markup.h"
23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
25 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
26 #include "llvm/Debuginfod/BuildIDFetcher.h"
27 #include "llvm/Debuginfod/Debuginfod.h"
28 #include "llvm/Debuginfod/HTTPClient.h"
29 #include "llvm/Option/Arg.h"
30 #include "llvm/Option/ArgList.h"
31 #include "llvm/Option/Option.h"
32 #include "llvm/Support/COM.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/InitLLVM.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/StringSaver.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstring>
43 #include <iostream>
44 #include <string>
45
46 using namespace llvm;
47 using namespace symbolize;
48
49 namespace {
50 enum ID {
51 OPT_INVALID = 0, // This is not an option ID.
52 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
53 HELPTEXT, METAVAR, VALUES) \
54 OPT_##ID,
55 #include "Opts.inc"
56 #undef OPTION
57 };
58
59 #define PREFIX(NAME, VALUE) \
60 static constexpr StringLiteral NAME##_init[] = VALUE; \
61 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
62 std::size(NAME##_init) - 1);
63 #include "Opts.inc"
64 #undef PREFIX
65
66 static constexpr opt::OptTable::Info InfoTable[] = {
67 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
68 HELPTEXT, METAVAR, VALUES) \
69 { \
70 PREFIX, NAME, HELPTEXT, \
71 METAVAR, OPT_##ID, opt::Option::KIND##Class, \
72 PARAM, FLAGS, OPT_##GROUP, \
73 OPT_##ALIAS, ALIASARGS, VALUES},
74 #include "Opts.inc"
75 #undef OPTION
76 };
77
78 class SymbolizerOptTable : public opt::GenericOptTable {
79 public:
SymbolizerOptTable()80 SymbolizerOptTable() : GenericOptTable(InfoTable) {
81 setGroupedShortOptions(true);
82 }
83 };
84 } // namespace
85
86 template <typename T>
print(const Request & Request,Expected<T> & ResOrErr,DIPrinter & Printer)87 static void print(const Request &Request, Expected<T> &ResOrErr,
88 DIPrinter &Printer) {
89 if (ResOrErr) {
90 // No error, print the result.
91 Printer.print(Request, *ResOrErr);
92 return;
93 }
94
95 // Handle the error.
96 bool PrintEmpty = true;
97 handleAllErrors(std::move(ResOrErr.takeError()),
98 [&](const ErrorInfoBase &EI) {
99 PrintEmpty = Printer.printError(
100 Request, EI, "LLVMSymbolizer: error reading file: ");
101 });
102
103 if (PrintEmpty)
104 Printer.print(Request, T());
105 }
106
107 enum class OutputStyle { LLVM, GNU, JSON };
108
109 enum class Command {
110 Code,
111 Data,
112 Frame,
113 };
114
enableDebuginfod(LLVMSymbolizer & Symbolizer,const opt::ArgList & Args)115 static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
116 const opt::ArgList &Args) {
117 static bool IsEnabled = false;
118 if (IsEnabled)
119 return;
120 IsEnabled = true;
121 // Look up symbols using the debuginfod client.
122 Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>(
123 Args.getAllArgValues(OPT_debug_file_directory_EQ)));
124 // The HTTPClient must be initialized for use by the debuginfod client.
125 HTTPClient::initialize();
126 }
127
parseBuildID(StringRef Str)128 static object::BuildID parseBuildID(StringRef Str) {
129 std::string Bytes;
130 if (!tryGetFromHex(Str, Bytes))
131 return {};
132 ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
133 Bytes.size());
134 return object::BuildID(BuildID.begin(), BuildID.end());
135 }
136
parseCommand(StringRef BinaryName,bool IsAddr2Line,StringRef InputString,Command & Cmd,std::string & ModuleName,object::BuildID & BuildID,uint64_t & ModuleOffset)137 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
138 StringRef InputString, Command &Cmd,
139 std::string &ModuleName, object::BuildID &BuildID,
140 uint64_t &ModuleOffset) {
141 const char kDelimiters[] = " \n\r";
142 ModuleName = "";
143 if (InputString.consume_front("CODE ")) {
144 Cmd = Command::Code;
145 } else if (InputString.consume_front("DATA ")) {
146 Cmd = Command::Data;
147 } else if (InputString.consume_front("FRAME ")) {
148 Cmd = Command::Frame;
149 } else {
150 // If no cmd, assume it's CODE.
151 Cmd = Command::Code;
152 }
153
154 const char *Pos;
155 // Skip delimiters and parse input filename (if needed).
156 if (BinaryName.empty() && BuildID.empty()) {
157 bool HasFilePrefix = false;
158 bool HasBuildIDPrefix = false;
159 while (true) {
160 if (InputString.consume_front("FILE:")) {
161 if (HasFilePrefix)
162 return false;
163 HasFilePrefix = true;
164 continue;
165 }
166 if (InputString.consume_front("BUILDID:")) {
167 if (HasBuildIDPrefix)
168 return false;
169 HasBuildIDPrefix = true;
170 continue;
171 }
172 break;
173 }
174 if (HasFilePrefix && HasBuildIDPrefix)
175 return false;
176
177 Pos = InputString.data();
178 Pos += strspn(Pos, kDelimiters);
179 if (*Pos == '"' || *Pos == '\'') {
180 char Quote = *Pos;
181 Pos++;
182 const char *End = strchr(Pos, Quote);
183 if (!End)
184 return false;
185 ModuleName = std::string(Pos, End - Pos);
186 Pos = End + 1;
187 } else {
188 int NameLength = strcspn(Pos, kDelimiters);
189 ModuleName = std::string(Pos, NameLength);
190 Pos += NameLength;
191 }
192 if (HasBuildIDPrefix) {
193 BuildID = parseBuildID(ModuleName);
194 if (BuildID.empty())
195 return false;
196 ModuleName.clear();
197 }
198 } else {
199 Pos = InputString.data();
200 ModuleName = BinaryName.str();
201 }
202 // Skip delimiters and parse module offset.
203 Pos += strspn(Pos, kDelimiters);
204 int OffsetLength = strcspn(Pos, kDelimiters);
205 StringRef Offset(Pos, OffsetLength);
206 // GNU addr2line assumes the offset is hexadecimal and allows a redundant
207 // "0x" or "0X" prefix; do the same for compatibility.
208 if (IsAddr2Line)
209 Offset.consume_front("0x") || Offset.consume_front("0X");
210 return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
211 }
212
213 template <typename T>
executeCommand(StringRef ModuleName,const T & ModuleSpec,Command Cmd,uint64_t Offset,uint64_t AdjustVMA,bool ShouldInline,OutputStyle Style,LLVMSymbolizer & Symbolizer,DIPrinter & Printer)214 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
215 uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline,
216 OutputStyle Style, LLVMSymbolizer &Symbolizer,
217 DIPrinter &Printer) {
218 uint64_t AdjustedOffset = Offset - AdjustVMA;
219 object::SectionedAddress Address = {AdjustedOffset,
220 object::SectionedAddress::UndefSection};
221 if (Cmd == Command::Data) {
222 Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
223 print({ModuleName, Offset}, ResOrErr, Printer);
224 } else if (Cmd == Command::Frame) {
225 Expected<std::vector<DILocal>> ResOrErr =
226 Symbolizer.symbolizeFrame(ModuleSpec, Address);
227 print({ModuleName, Offset}, ResOrErr, Printer);
228 } else if (ShouldInline) {
229 Expected<DIInliningInfo> ResOrErr =
230 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
231 print({ModuleName, Offset}, ResOrErr, Printer);
232 } else if (Style == OutputStyle::GNU) {
233 // With PrintFunctions == FunctionNameKind::LinkageName (default)
234 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
235 // may override the name of an inlined function with the name of the topmost
236 // caller function in the inlining chain. This contradicts the existing
237 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
238 // the topmost function, which suits our needs better.
239 Expected<DIInliningInfo> ResOrErr =
240 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
241 Expected<DILineInfo> Res0OrErr =
242 !ResOrErr
243 ? Expected<DILineInfo>(ResOrErr.takeError())
244 : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
245 : ResOrErr->getFrame(0));
246 print({ModuleName, Offset}, Res0OrErr, Printer);
247 } else {
248 Expected<DILineInfo> ResOrErr =
249 Symbolizer.symbolizeCode(ModuleSpec, Address);
250 print({ModuleName, Offset}, ResOrErr, Printer);
251 }
252 Symbolizer.pruneCache();
253 }
254
symbolizeInput(const opt::InputArgList & Args,object::BuildIDRef IncomingBuildID,uint64_t AdjustVMA,bool IsAddr2Line,OutputStyle Style,StringRef InputString,LLVMSymbolizer & Symbolizer,DIPrinter & Printer)255 static void symbolizeInput(const opt::InputArgList &Args,
256 object::BuildIDRef IncomingBuildID,
257 uint64_t AdjustVMA, bool IsAddr2Line,
258 OutputStyle Style, StringRef InputString,
259 LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
260 Command Cmd;
261 std::string ModuleName;
262 object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
263 uint64_t Offset = 0;
264 if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
265 StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) {
266 Printer.printInvalidCommand({ModuleName, std::nullopt}, InputString);
267 return;
268 }
269 bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
270 if (!BuildID.empty()) {
271 assert(ModuleName.empty());
272 if (!Args.hasArg(OPT_no_debuginfod))
273 enableDebuginfod(Symbolizer, Args);
274 std::string BuildIDStr = toHex(BuildID);
275 executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline,
276 Style, Symbolizer, Printer);
277 } else {
278 executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline,
279 Style, Symbolizer, Printer);
280 }
281 }
282
printHelp(StringRef ToolName,const SymbolizerOptTable & Tbl,raw_ostream & OS)283 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
284 raw_ostream &OS) {
285 const char HelpText[] = " [options] addresses...";
286 Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
287 ToolName.str().c_str());
288 // TODO Replace this with OptTable API once it adds extrahelp support.
289 OS << "\nPass @FILE as argument to read options from FILE.\n";
290 }
291
parseOptions(int Argc,char * Argv[],bool IsAddr2Line,StringSaver & Saver,SymbolizerOptTable & Tbl)292 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
293 StringSaver &Saver,
294 SymbolizerOptTable &Tbl) {
295 StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
296 // The environment variable specifies initial options which can be overridden
297 // by commnad line options.
298 Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
299 : "LLVM_SYMBOLIZER_OPTS");
300 bool HasError = false;
301 opt::InputArgList Args =
302 Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
303 errs() << ("error: " + Msg + "\n");
304 HasError = true;
305 });
306 if (HasError)
307 exit(1);
308 if (Args.hasArg(OPT_help)) {
309 printHelp(ToolName, Tbl, outs());
310 exit(0);
311 }
312 if (Args.hasArg(OPT_version)) {
313 outs() << ToolName << '\n';
314 cl::PrintVersionMessage();
315 exit(0);
316 }
317
318 return Args;
319 }
320
321 template <typename T>
parseIntArg(const opt::InputArgList & Args,int ID,T & Value)322 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
323 if (const opt::Arg *A = Args.getLastArg(ID)) {
324 StringRef V(A->getValue());
325 if (!llvm::to_integer(V, Value, 0)) {
326 errs() << A->getSpelling() +
327 ": expected a non-negative integer, but got '" + V + "'";
328 exit(1);
329 }
330 } else {
331 Value = 0;
332 }
333 }
334
decideHowToPrintFunctions(const opt::InputArgList & Args,bool IsAddr2Line)335 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
336 bool IsAddr2Line) {
337 if (Args.hasArg(OPT_functions))
338 return FunctionNameKind::LinkageName;
339 if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
340 return StringSwitch<FunctionNameKind>(A->getValue())
341 .Case("none", FunctionNameKind::None)
342 .Case("short", FunctionNameKind::ShortName)
343 .Default(FunctionNameKind::LinkageName);
344 return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
345 }
346
parseColorArg(const opt::InputArgList & Args)347 static std::optional<bool> parseColorArg(const opt::InputArgList &Args) {
348 if (Args.hasArg(OPT_color))
349 return true;
350 if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))
351 return StringSwitch<std::optional<bool>>(A->getValue())
352 .Case("always", true)
353 .Case("never", false)
354 .Case("auto", std::nullopt);
355 return std::nullopt;
356 }
357
parseBuildIDArg(const opt::InputArgList & Args,int ID)358 static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) {
359 const opt::Arg *A = Args.getLastArg(ID);
360 if (!A)
361 return {};
362
363 StringRef V(A->getValue());
364 object::BuildID BuildID = parseBuildID(V);
365 if (BuildID.empty()) {
366 errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";
367 exit(1);
368 }
369 return BuildID;
370 }
371
372 // Symbolize markup from stdin and write the result to stdout.
filterMarkup(const opt::InputArgList & Args,LLVMSymbolizer & Symbolizer)373 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {
374 MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));
375 std::string InputString;
376 while (std::getline(std::cin, InputString)) {
377 InputString += '\n';
378 Filter.filter(InputString);
379 }
380 Filter.finish();
381 }
382
383 ExitOnError ExitOnErr;
384
main(int argc,char ** argv)385 int main(int argc, char **argv) {
386 InitLLVM X(argc, argv);
387 sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
388
389 bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
390 BumpPtrAllocator A;
391 StringSaver Saver(A);
392 SymbolizerOptTable Tbl;
393 opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
394
395 LLVMSymbolizer::Options Opts;
396 uint64_t AdjustVMA;
397 PrinterConfig Config;
398 parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
399 if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
400 Opts.PathStyle =
401 A->getOption().matches(OPT_basenames)
402 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
403 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
404 } else {
405 Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
406 }
407 Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
408 Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
409 Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
410 Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
411 Opts.FallbackDebugPath =
412 Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
413 Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
414 parseIntArg(Args, OPT_print_source_context_lines_EQ,
415 Config.SourceContextLines);
416 Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
417 Opts.UntagAddresses =
418 Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
419 Opts.UseDIA = Args.hasArg(OPT_use_dia);
420 #if !defined(LLVM_ENABLE_DIA_SDK)
421 if (Opts.UseDIA) {
422 WithColor::warning() << "DIA not available; using native PDB reader\n";
423 Opts.UseDIA = false;
424 }
425 #endif
426 Opts.UseSymbolTable = true;
427 if (Args.hasArg(OPT_cache_size_EQ))
428 parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);
429 Config.PrintAddress = Args.hasArg(OPT_addresses);
430 Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
431 Config.Pretty = Args.hasArg(OPT_pretty_print);
432 Config.Verbose = Args.hasArg(OPT_verbose);
433
434 for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
435 StringRef Hint(A->getValue());
436 if (sys::path::extension(Hint) == ".dSYM") {
437 Opts.DsymHints.emplace_back(Hint);
438 } else {
439 errs() << "Warning: invalid dSYM hint: \"" << Hint
440 << "\" (must have the '.dSYM' extension).\n";
441 }
442 }
443
444 LLVMSymbolizer Symbolizer(Opts);
445
446 if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))
447 enableDebuginfod(Symbolizer, Args);
448
449 if (Args.hasArg(OPT_filter_markup)) {
450 filterMarkup(Args, Symbolizer);
451 return 0;
452 }
453
454 auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
455 if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
456 if (strcmp(A->getValue(), "GNU") == 0)
457 Style = OutputStyle::GNU;
458 else if (strcmp(A->getValue(), "JSON") == 0)
459 Style = OutputStyle::JSON;
460 else
461 Style = OutputStyle::LLVM;
462 }
463
464 if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {
465 errs() << "error: cannot specify both --build-id and --obj\n";
466 return EXIT_FAILURE;
467 }
468 object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);
469
470 std::unique_ptr<DIPrinter> Printer;
471 if (Style == OutputStyle::GNU)
472 Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
473 else if (Style == OutputStyle::JSON)
474 Printer = std::make_unique<JSONPrinter>(outs(), Config);
475 else
476 Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
477
478 std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
479 if (InputAddresses.empty()) {
480 const int kMaxInputStringLength = 1024;
481 char InputString[kMaxInputStringLength];
482
483 while (fgets(InputString, sizeof(InputString), stdin)) {
484 // Strip newline characters.
485 std::string StrippedInputString(InputString);
486 llvm::erase_if(StrippedInputString,
487 [](char c) { return c == '\r' || c == '\n'; });
488 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,
489 StrippedInputString, Symbolizer, *Printer);
490 outs().flush();
491 }
492 } else {
493 Printer->listBegin();
494 for (StringRef Address : InputAddresses)
495 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,
496 Symbolizer, *Printer);
497 Printer->listEnd();
498 }
499
500 return 0;
501 }
502