//===-- gsymutil.cpp - GSYM dumping and creation utility for llvm ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/GSYM/CallSiteInfo.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/JSON.h" #include "llvm/Support/LLVMDriver.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Regex.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include #include #include #include #include #include #include #include #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" #include "llvm/DebugInfo/GSYM/GsymCreator.h" #include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/LookupResult.h" #include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h" #include "llvm/DebugInfo/GSYM/OutputAggregator.h" #include using namespace llvm; using namespace gsym; using namespace object; /// @} /// Command line options. /// @{ using namespace llvm::opt; enum ID { OPT_INVALID = 0, // This is not an option ID. #define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), #include "Opts.inc" #undef OPTION }; #define OPTTABLE_STR_TABLE_CODE #include "Opts.inc" #undef OPTTABLE_STR_TABLE_CODE #define OPTTABLE_PREFIXES_TABLE_CODE #include "Opts.inc" #undef OPTTABLE_PREFIXES_TABLE_CODE const opt::OptTable::Info InfoTable[] = { #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), #include "Opts.inc" #undef OPTION }; class GSYMUtilOptTable : public llvm::opt::GenericOptTable { public: GSYMUtilOptTable() : GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) { setGroupedShortOptions(true); } }; static bool Verbose; static std::vector InputFilenames; static std::string ConvertFilename; static std::vector ArchFilters; static std::string OutputFilename; static std::string JsonSummaryFile; static bool Verify; static unsigned NumThreads; static uint64_t SegmentSize; static bool Quiet; static std::vector LookupAddresses; static bool LookupAddressesFromStdin; static bool UseMergedFunctions = false; static bool LoadDwarfCallSites = false; static std::string CallSiteYamlPath; static std::vector MergedFunctionsFilters; static void parseArgs(int argc, char **argv) { GSYMUtilOptTable Tbl; llvm::StringRef ToolName = argv[0]; llvm::BumpPtrAllocator A; llvm::StringSaver Saver{A}; llvm::opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { llvm::errs() << Msg << '\n'; std::exit(1); }); if (Args.hasArg(OPT_help)) { const char *Overview = "A tool for dumping, searching and creating GSYM files.\n\n" "Specify one or more GSYM paths as arguments to dump all of the " "information in each GSYM file.\n" "Specify a single GSYM file along with one or more --lookup options to " "lookup addresses within that GSYM file.\n" "Use the --convert option to specify a file with option --out-file " "option to convert to GSYM format.\n"; Tbl.printHelp(llvm::outs(), "llvm-gsymutil [options] ", Overview); std::exit(0); } if (Args.hasArg(OPT_version)) { llvm::outs() << ToolName << '\n'; cl::PrintVersionMessage(); std::exit(0); } Verbose = Args.hasArg(OPT_verbose); for (const llvm::opt::Arg *A : Args.filtered(OPT_INPUT)) InputFilenames.emplace_back(A->getValue()); if (const llvm::opt::Arg *A = Args.getLastArg(OPT_convert_EQ)) ConvertFilename = A->getValue(); for (const llvm::opt::Arg *A : Args.filtered(OPT_arch_EQ)) ArchFilters.emplace_back(A->getValue()); if (const llvm::opt::Arg *A = Args.getLastArg(OPT_out_file_EQ)) OutputFilename = A->getValue(); if (const llvm::opt::Arg *A = Args.getLastArg(OPT_json_summary_file_EQ)) JsonSummaryFile = A->getValue(); Verify = Args.hasArg(OPT_verify); if (const llvm::opt::Arg *A = Args.getLastArg(OPT_num_threads_EQ)) { StringRef S{A->getValue()}; if (!llvm::to_integer(S, NumThreads, 0)) { llvm::errs() << ToolName << ": for the --num-threads option: '" << S << "' value invalid for uint argument!\n"; std::exit(1); } } if (const llvm::opt::Arg *A = Args.getLastArg(OPT_segment_size_EQ)) { StringRef S{A->getValue()}; if (!llvm::to_integer(S, SegmentSize, 0)) { llvm::errs() << ToolName << ": for the --segment-size option: '" << S << "' value invalid for uint argument!\n"; std::exit(1); } } Quiet = Args.hasArg(OPT_quiet); for (const llvm::opt::Arg *A : Args.filtered(OPT_address_EQ)) { StringRef S{A->getValue()}; if (!llvm::to_integer(S, LookupAddresses.emplace_back(), 0)) { llvm::errs() << ToolName << ": for the --address option: '" << S << "' value invalid for uint argument!\n"; std::exit(1); } } LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin); UseMergedFunctions = Args.hasArg(OPT_merged_functions); if (Args.hasArg(OPT_callsites_yaml_file_EQ)) { CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ); if (CallSiteYamlPath.empty()) { llvm::errs() << ToolName << ": --callsites-yaml-file option requires a non-empty argument.\n"; std::exit(1); } } LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites); for (const llvm::opt::Arg *A : Args.filtered(OPT_merged_functions_filter_EQ)) { MergedFunctionsFilters.push_back(A->getValue()); // Validate the filter is only used with correct flags if (LookupAddresses.empty() && !LookupAddressesFromStdin) { llvm::errs() << ToolName << ": --merged-functions-filter can only be used with " "--address/--addresses-from-stdin\n"; std::exit(1); } if (!UseMergedFunctions) { llvm::errs() << ToolName << ": --merged-functions-filter requires --merged-functions\n"; std::exit(1); } } } /// @} //===----------------------------------------------------------------------===// static void error(Error Err) { if (!Err) return; WithColor::error() << toString(std::move(Err)) << "\n"; exit(1); } static void error(StringRef Prefix, llvm::Error Err) { if (!Err) return; errs() << Prefix << ": " << Err << "\n"; consumeError(std::move(Err)); exit(1); } static void error(StringRef Prefix, std::error_code EC) { if (!EC) return; errs() << Prefix << ": " << EC.message() << "\n"; exit(1); } static uint32_t getCPUType(MachOObjectFile &MachO) { if (MachO.is64Bit()) return MachO.getHeader64().cputype; else return MachO.getHeader().cputype; } /// Return true if the object file has not been filtered by an --arch option. static bool filterArch(MachOObjectFile &Obj) { if (ArchFilters.empty()) return true; Triple ObjTriple(Obj.getArchTriple()); StringRef ObjArch = ObjTriple.getArchName(); for (StringRef Arch : ArchFilters) { // Match name. if (Arch == ObjArch) return true; // Match architecture number. unsigned Value; if (!Arch.getAsInteger(0, Value)) if (Value == getCPUType(Obj)) return true; } return false; } /// Determine the virtual address that is considered the base address of an ELF /// object file. /// /// The base address of an ELF file is the "p_vaddr" of the first program /// header whose "p_type" is PT_LOAD. /// /// \param ELFFile An ELF object file we will search. /// /// \returns A valid image base address if we are able to extract one. template static std::optional getImageBaseAddress(const object::ELFFile &ELFFile) { auto PhdrRangeOrErr = ELFFile.program_headers(); if (!PhdrRangeOrErr) { consumeError(PhdrRangeOrErr.takeError()); return std::nullopt; } for (const typename ELFT::Phdr &Phdr : *PhdrRangeOrErr) if (Phdr.p_type == ELF::PT_LOAD) return (uint64_t)Phdr.p_vaddr; return std::nullopt; } /// Determine the virtual address that is considered the base address of mach-o /// object file. /// /// The base address of a mach-o file is the vmaddr of the "__TEXT" segment. /// /// \param MachO A mach-o object file we will search. /// /// \returns A valid image base address if we are able to extract one. static std::optional getImageBaseAddress(const object::MachOObjectFile *MachO) { for (const auto &Command : MachO->load_commands()) { if (Command.C.cmd == MachO::LC_SEGMENT) { MachO::segment_command SLC = MachO->getSegmentLoadCommand(Command); StringRef SegName = SLC.segname; if (SegName == "__TEXT") return SLC.vmaddr; } else if (Command.C.cmd == MachO::LC_SEGMENT_64) { MachO::segment_command_64 SLC = MachO->getSegment64LoadCommand(Command); StringRef SegName = SLC.segname; if (SegName == "__TEXT") return SLC.vmaddr; } } return std::nullopt; } /// Determine the virtual address that is considered the base address of an /// object file. /// /// Since GSYM files are used for symbolication, many clients will need to /// easily adjust addresses they find in stack traces so the lookups happen /// on unslid addresses from the original object file. If the base address of /// a GSYM file is set to the base address of the image, then this address /// adjusting is much easier. /// /// \param Obj An object file we will search. /// /// \returns A valid image base address if we are able to extract one. static std::optional getImageBaseAddress(object::ObjectFile &Obj) { if (const auto *MachO = dyn_cast(&Obj)) return getImageBaseAddress(MachO); else if (const auto *ELFObj = dyn_cast(&Obj)) return getImageBaseAddress(ELFObj->getELFFile()); else if (const auto *ELFObj = dyn_cast(&Obj)) return getImageBaseAddress(ELFObj->getELFFile()); else if (const auto *ELFObj = dyn_cast(&Obj)) return getImageBaseAddress(ELFObj->getELFFile()); else if (const auto *ELFObj = dyn_cast(&Obj)) return getImageBaseAddress(ELFObj->getELFFile()); return std::nullopt; } static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile, OutputAggregator &Out) { auto ThreadCount = NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency(); GsymCreator Gsym(Quiet); // See if we can figure out the base address for a given object file, and if // we can, then set the base address to use to this value. This will ease // symbolication since clients can slide the GSYM lookup addresses by using // the load bias of the shared library. if (auto ImageBaseAddr = getImageBaseAddress(Obj)) Gsym.setBaseAddress(*ImageBaseAddr); // We need to know where the valid sections are that contain instructions. // See header documentation for DWARFTransformer::SetValidTextRanges() for // defails. AddressRanges TextRanges; for (const object::SectionRef &Sect : Obj.sections()) { if (!Sect.isText()) continue; const uint64_t Size = Sect.getSize(); if (Size == 0) continue; const uint64_t StartAddr = Sect.getAddress(); TextRanges.insert(AddressRange(StartAddr, StartAddr + Size)); } // Make sure there is DWARF to convert first. std::unique_ptr DICtx = DWARFContext::create( Obj, /*RelocAction=*/DWARFContext::ProcessDebugRelocations::Process, nullptr, /*DWPName=*/"", /*RecoverableErrorHandler=*/WithColor::defaultErrorHandler, /*WarningHandler=*/WithColor::defaultWarningHandler, /*ThreadSafe*/true); if (!DICtx) return createStringError(std::errc::invalid_argument, "unable to create DWARF context"); // Make a DWARF transformer object and populate the ranges of the code // so we don't end up adding invalid functions to GSYM data. DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites); if (!TextRanges.empty()) Gsym.SetValidTextRanges(TextRanges); // Convert all DWARF to GSYM. if (auto Err = DT.convert(ThreadCount, Out)) return Err; // If enabled, merge functions with identical address ranges as merged // functions in the first FunctionInfo with that address range. Do this right // after loading the DWARF data so we don't have to deal with functions from // the symbol table. if (UseMergedFunctions) Gsym.prepareMergedFunctions(Out); // Get the UUID and convert symbol table to GSYM. if (auto Err = ObjectFileTransformer::convert(Obj, Out, Gsym)) return Err; // If any call site YAML files were specified, load them now. if (!CallSiteYamlPath.empty()) if (auto Err = Gsym.loadCallSitesFromYAML(CallSiteYamlPath)) return Err; // Finalize the GSYM to make it ready to save to disk. This will remove // duplicate FunctionInfo entries where we might have found an entry from // debug info and also a symbol table entry from the object file. if (auto Err = Gsym.finalize(Out)) return Err; // Save the GSYM file to disk. llvm::endianness Endian = Obj.makeTriple().isLittleEndian() ? llvm::endianness::little : llvm::endianness::big; std::optional OptSegmentSize; if (SegmentSize > 0) OptSegmentSize = SegmentSize; if (auto Err = Gsym.save(OutFile, Endian, OptSegmentSize)) return Err; // Verify the DWARF if requested. This will ensure all the info in the DWARF // can be looked up in the GSYM and that all lookups get matching data. if (Verify) { if (auto Err = DT.verify(OutFile, Out)) return Err; } return Error::success(); } static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer, const std::string &OutFile, OutputAggregator &Out) { Expected> BinOrErr = object::createBinary(Buffer); error(Filename, errorToErrorCode(BinOrErr.takeError())); if (auto *Obj = dyn_cast(BinOrErr->get())) { Triple ObjTriple(Obj->makeTriple()); auto ArchName = ObjTriple.getArchName(); outs() << "Output file (" << ArchName << "): " << OutFile << "\n"; if (auto Err = handleObjectFile(*Obj, OutFile, Out)) return Err; } else if (auto *Fat = dyn_cast(BinOrErr->get())) { // Iterate over all contained architectures and filter out any that were // not specified with the "--arch " option. If the --arch option was // not specified on the command line, we will process all architectures. std::vector> FilterObjs; for (auto &ObjForArch : Fat->objects()) { if (auto MachOOrErr = ObjForArch.getAsObjectFile()) { auto &Obj = **MachOOrErr; if (filterArch(Obj)) FilterObjs.emplace_back(MachOOrErr->release()); } else { error(Filename, MachOOrErr.takeError()); } } if (FilterObjs.empty()) error(Filename, createStringError(std::errc::invalid_argument, "no matching architectures found")); // Now handle each architecture we need to convert. for (auto &Obj : FilterObjs) { Triple ObjTriple(Obj->getArchTriple()); auto ArchName = ObjTriple.getArchName(); std::string ArchOutFile(OutFile); // If we are only handling a single architecture, then we will use the // normal output file. If we are handling multiple architectures append // the architecture name to the end of the out file path so that we // don't overwrite the previous architecture's gsym file. if (FilterObjs.size() > 1) { ArchOutFile.append(1, '.'); ArchOutFile.append(ArchName.str()); } outs() << "Output file (" << ArchName << "): " << ArchOutFile << "\n"; if (auto Err = handleObjectFile(*Obj, ArchOutFile, Out)) return Err; } } return Error::success(); } static llvm::Error handleFileConversionToGSYM(StringRef Filename, const std::string &OutFile, OutputAggregator &Out) { ErrorOr> BuffOrErr = MemoryBuffer::getFileOrSTDIN(Filename); error(Filename, BuffOrErr.getError()); std::unique_ptr Buffer = std::move(BuffOrErr.get()); return handleBuffer(Filename, *Buffer, OutFile, Out); } static llvm::Error convertFileToGSYM(OutputAggregator &Out) { // Expand any .dSYM bundles to the individual object files contained therein. std::vector Objects; std::string OutFile = OutputFilename; if (OutFile.empty()) { OutFile = ConvertFilename; OutFile += ".gsym"; } Out << "Input file: " << ConvertFilename << "\n"; if (auto DsymObjectsOrErr = MachOObjectFile::findDsymObjectMembers(ConvertFilename)) { if (DsymObjectsOrErr->empty()) Objects.push_back(ConvertFilename); else llvm::append_range(Objects, *DsymObjectsOrErr); } else { error(DsymObjectsOrErr.takeError()); } for (StringRef Object : Objects) if (Error Err = handleFileConversionToGSYM(Object, OutFile, Out)) return Err; return Error::success(); } static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) { if (UseMergedFunctions) { if (auto Results = Gsym.lookupAll(Addr)) { // If we have filters, count matching results first size_t NumMatching = Results->size(); if (!MergedFunctionsFilters.empty()) { NumMatching = 0; for (const auto &Result : *Results) { bool Matches = false; for (const auto &Filter : MergedFunctionsFilters) { Regex Pattern(Filter); if (Pattern.match(Result.FuncName)) { Matches = true; break; } } if (Matches) NumMatching++; } } OS << "Found " << NumMatching << " function" << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr) << ":\n"; for (size_t i = 0; i < Results->size(); ++i) { // Skip if doesn't match any filter if (!MergedFunctionsFilters.empty()) { bool Matches = false; for (const auto &Filter : MergedFunctionsFilters) { Regex Pattern(Filter); if (Pattern.match(Results->at(i).FuncName)) { Matches = true; break; } } if (!Matches) continue; } OS << " " << Results->at(i); if (i != Results->size() - 1) OS << "\n"; } } } else { /* UseMergedFunctions == false */ if (auto Result = Gsym.lookup(Addr)) { // If verbose is enabled dump the full function info for the address. if (Verbose) { if (auto FI = Gsym.getFunctionInfo(Addr)) { OS << "FunctionInfo for " << HEX64(Addr) << ":\n"; Gsym.dump(OS, *FI); OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; } } // Don't print call site info if --merged-functions is not specified. Result->CallSiteFuncRegex.clear(); OS << Result.get(); } else { if (Verbose) OS << "\nLookupResult for " << HEX64(Addr) << ":\n"; OS << HEX64(Addr) << ": "; logAllUnhandledErrors(Result.takeError(), OS, "error: "); } if (Verbose) OS << "\n"; } } int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(argv[0]); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. llvm::InitializeAllTargets(); parseArgs(argc, argv); raw_ostream &OS = outs(); OutputAggregator Aggregation(&OS); if (!ConvertFilename.empty()) { // Convert DWARF to GSYM if (!InputFilenames.empty()) { OS << "error: no input files can be specified when using the --convert " "option.\n"; return 1; } // Call error() if we have an error and it will exit with a status of 1 if (auto Err = convertFileToGSYM(Aggregation)) error("DWARF conversion failed: ", std::move(Err)); // Report the errors from aggregator: Aggregation.EnumerateResults([&](StringRef category, unsigned count) { OS << category << " occurred " << count << " time(s)\n"; }); if (!JsonSummaryFile.empty()) { std::error_code EC; raw_fd_ostream JsonStream(JsonSummaryFile, EC, sys::fs::OF_Text); if (EC) { OS << "error opening aggregate error json file '" << JsonSummaryFile << "' for writing: " << EC.message() << '\n'; return 1; } llvm::json::Object Categories; uint64_t ErrorCount = 0; Aggregation.EnumerateResults([&](StringRef Category, unsigned Count) { llvm::json::Object Val; Val.try_emplace("count", Count); Categories.try_emplace(Category, std::move(Val)); ErrorCount += Count; }); llvm::json::Object RootNode; RootNode.try_emplace("error-categories", std::move(Categories)); RootNode.try_emplace("error-count", ErrorCount); JsonStream << llvm::json::Value(std::move(RootNode)); } return 0; } if (LookupAddressesFromStdin) { if (!LookupAddresses.empty() || !InputFilenames.empty()) { OS << "error: no input files or addresses can be specified when using " "the --addresses-from-stdin " "option.\n"; return 1; } std::string InputLine; std::string CurrentGSYMPath; std::optional> CurrentGsym; while (std::getline(std::cin, InputLine)) { // Strip newline characters. std::string StrippedInputLine(InputLine); llvm::erase_if(StrippedInputLine, [](char c) { return c == '\r' || c == '\n'; }); StringRef AddrStr, GSYMPath; std::tie(AddrStr, GSYMPath) = llvm::StringRef{StrippedInputLine}.split(' '); if (GSYMPath != CurrentGSYMPath) { CurrentGsym = GsymReader::openFile(GSYMPath); if (!*CurrentGsym) error(GSYMPath, CurrentGsym->takeError()); CurrentGSYMPath = GSYMPath; } uint64_t Addr; if (AddrStr.getAsInteger(0, Addr)) { OS << "error: invalid address " << AddrStr << ", expected: Address GsymFile.\n"; return 1; } doLookup(**CurrentGsym, Addr, OS); OS << "\n"; OS.flush(); } return EXIT_SUCCESS; } // Dump or access data inside GSYM files for (const auto &GSYMPath : InputFilenames) { auto Gsym = GsymReader::openFile(GSYMPath); if (!Gsym) error(GSYMPath, Gsym.takeError()); if (LookupAddresses.empty()) { Gsym->dump(outs()); continue; } // Lookup an address in a GSYM file and print any matches. OS << "Looking up addresses in \"" << GSYMPath << "\":\n"; for (auto Addr : LookupAddresses) { doLookup(*Gsym, Addr, OS); } } return EXIT_SUCCESS; }