1 //===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This program is a utility that works like binutils "objdump", that is, it 10 // dumps out a plethora of information about an object file depending on the 11 // flags. 12 // 13 // The flags and output of this program should be near identical to those of 14 // binutils objdump. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm-objdump.h" 19 #include "COFFDump.h" 20 #include "ELFDump.h" 21 #include "MachODump.h" 22 #include "ObjdumpOptID.h" 23 #include "OffloadDump.h" 24 #include "SourcePrinter.h" 25 #include "WasmDump.h" 26 #include "XCOFFDump.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SetOperations.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/StringSet.h" 31 #include "llvm/ADT/Twine.h" 32 #include "llvm/BinaryFormat/Wasm.h" 33 #include "llvm/DebugInfo/BTF/BTFParser.h" 34 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 35 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 36 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 37 #include "llvm/Debuginfod/BuildIDFetcher.h" 38 #include "llvm/Debuginfod/Debuginfod.h" 39 #include "llvm/Debuginfod/HTTPClient.h" 40 #include "llvm/Demangle/Demangle.h" 41 #include "llvm/MC/MCAsmInfo.h" 42 #include "llvm/MC/MCContext.h" 43 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 44 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 45 #include "llvm/MC/MCInst.h" 46 #include "llvm/MC/MCInstPrinter.h" 47 #include "llvm/MC/MCInstrAnalysis.h" 48 #include "llvm/MC/MCInstrInfo.h" 49 #include "llvm/MC/MCObjectFileInfo.h" 50 #include "llvm/MC/MCRegisterInfo.h" 51 #include "llvm/MC/MCTargetOptions.h" 52 #include "llvm/MC/TargetRegistry.h" 53 #include "llvm/Object/Archive.h" 54 #include "llvm/Object/BuildID.h" 55 #include "llvm/Object/COFF.h" 56 #include "llvm/Object/COFFImportFile.h" 57 #include "llvm/Object/ELFObjectFile.h" 58 #include "llvm/Object/ELFTypes.h" 59 #include "llvm/Object/FaultMapParser.h" 60 #include "llvm/Object/MachO.h" 61 #include "llvm/Object/MachOUniversal.h" 62 #include "llvm/Object/ObjectFile.h" 63 #include "llvm/Object/OffloadBinary.h" 64 #include "llvm/Object/Wasm.h" 65 #include "llvm/Option/Arg.h" 66 #include "llvm/Option/ArgList.h" 67 #include "llvm/Option/Option.h" 68 #include "llvm/Support/Casting.h" 69 #include "llvm/Support/Debug.h" 70 #include "llvm/Support/Errc.h" 71 #include "llvm/Support/FileSystem.h" 72 #include "llvm/Support/Format.h" 73 #include "llvm/Support/FormatVariadic.h" 74 #include "llvm/Support/GraphWriter.h" 75 #include "llvm/Support/LLVMDriver.h" 76 #include "llvm/Support/MemoryBuffer.h" 77 #include "llvm/Support/SourceMgr.h" 78 #include "llvm/Support/StringSaver.h" 79 #include "llvm/Support/TargetSelect.h" 80 #include "llvm/Support/WithColor.h" 81 #include "llvm/Support/raw_ostream.h" 82 #include "llvm/TargetParser/Host.h" 83 #include "llvm/TargetParser/Triple.h" 84 #include <algorithm> 85 #include <cctype> 86 #include <cstring> 87 #include <optional> 88 #include <set> 89 #include <system_error> 90 #include <unordered_map> 91 #include <utility> 92 93 using namespace llvm; 94 using namespace llvm::object; 95 using namespace llvm::objdump; 96 using namespace llvm::opt; 97 98 namespace { 99 100 class CommonOptTable : public opt::GenericOptTable { 101 public: 102 CommonOptTable(const char *StrTable, ArrayRef<unsigned> PrefixesTable, 103 ArrayRef<Info> OptionInfos, const char *Usage, 104 const char *Description) 105 : opt::GenericOptTable(StrTable, PrefixesTable, OptionInfos), 106 Usage(Usage), Description(Description) { 107 setGroupedShortOptions(true); 108 } 109 110 void printHelp(StringRef Argv0, bool ShowHidden = false) const { 111 Argv0 = sys::path::filename(Argv0); 112 opt::GenericOptTable::printHelp(outs(), (Argv0 + Usage).str().c_str(), 113 Description, ShowHidden, ShowHidden); 114 // TODO Replace this with OptTable API once it adds extrahelp support. 115 outs() << "\nPass @FILE as argument to read options from FILE.\n"; 116 } 117 118 private: 119 const char *Usage; 120 const char *Description; 121 }; 122 123 // ObjdumpOptID is in ObjdumpOptID.h 124 namespace objdump_opt { 125 #define OPTTABLE_STR_TABLE_CODE 126 #include "ObjdumpOpts.inc" 127 #undef OPTTABLE_STR_TABLE_CODE 128 129 #define OPTTABLE_PREFIXES_TABLE_CODE 130 #include "ObjdumpOpts.inc" 131 #undef OPTTABLE_PREFIXES_TABLE_CODE 132 133 static constexpr opt::OptTable::Info ObjdumpInfoTable[] = { 134 #define OPTION(...) \ 135 LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(OBJDUMP_, __VA_ARGS__), 136 #include "ObjdumpOpts.inc" 137 #undef OPTION 138 }; 139 } // namespace objdump_opt 140 141 class ObjdumpOptTable : public CommonOptTable { 142 public: 143 ObjdumpOptTable() 144 : CommonOptTable( 145 objdump_opt::OptionStrTable, objdump_opt::OptionPrefixesTable, 146 objdump_opt::ObjdumpInfoTable, " [options] <input object files>", 147 "llvm object file dumper") {} 148 }; 149 150 enum OtoolOptID { 151 OTOOL_INVALID = 0, // This is not an option ID. 152 #define OPTION(...) LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OTOOL_, __VA_ARGS__), 153 #include "OtoolOpts.inc" 154 #undef OPTION 155 }; 156 157 namespace otool { 158 #define OPTTABLE_STR_TABLE_CODE 159 #include "OtoolOpts.inc" 160 #undef OPTTABLE_STR_TABLE_CODE 161 162 #define OPTTABLE_PREFIXES_TABLE_CODE 163 #include "OtoolOpts.inc" 164 #undef OPTTABLE_PREFIXES_TABLE_CODE 165 166 static constexpr opt::OptTable::Info OtoolInfoTable[] = { 167 #define OPTION(...) LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX(OTOOL_, __VA_ARGS__), 168 #include "OtoolOpts.inc" 169 #undef OPTION 170 }; 171 } // namespace otool 172 173 class OtoolOptTable : public CommonOptTable { 174 public: 175 OtoolOptTable() 176 : CommonOptTable(otool::OptionStrTable, otool::OptionPrefixesTable, 177 otool::OtoolInfoTable, " [option...] [file...]", 178 "Mach-O object file displaying tool") {} 179 }; 180 181 struct BBAddrMapLabel { 182 std::string BlockLabel; 183 std::string PGOAnalysis; 184 }; 185 186 // This class represents the BBAddrMap and PGOMap associated with a single 187 // function. 188 class BBAddrMapFunctionEntry { 189 public: 190 BBAddrMapFunctionEntry(BBAddrMap AddrMap, PGOAnalysisMap PGOMap) 191 : AddrMap(std::move(AddrMap)), PGOMap(std::move(PGOMap)) {} 192 193 const BBAddrMap &getAddrMap() const { return AddrMap; } 194 195 // Returns the PGO string associated with the entry of index `PGOBBEntryIndex` 196 // in `PGOMap`. If PrettyPGOAnalysis is true, prints BFI as relative frequency 197 // and BPI as percentage. Otherwise raw values are displayed. 198 std::string constructPGOLabelString(size_t PGOBBEntryIndex, 199 bool PrettyPGOAnalysis) const { 200 if (!PGOMap.FeatEnable.hasPGOAnalysis()) 201 return ""; 202 std::string PGOString; 203 raw_string_ostream PGOSS(PGOString); 204 205 PGOSS << " ("; 206 if (PGOMap.FeatEnable.FuncEntryCount && PGOBBEntryIndex == 0) { 207 PGOSS << "Entry count: " << Twine(PGOMap.FuncEntryCount); 208 if (PGOMap.FeatEnable.hasPGOAnalysisBBData()) { 209 PGOSS << ", "; 210 } 211 } 212 213 if (PGOMap.FeatEnable.hasPGOAnalysisBBData()) { 214 215 assert(PGOBBEntryIndex < PGOMap.BBEntries.size() && 216 "Expected PGOAnalysisMap and BBAddrMap to have the same entries"); 217 const PGOAnalysisMap::PGOBBEntry &PGOBBEntry = 218 PGOMap.BBEntries[PGOBBEntryIndex]; 219 220 if (PGOMap.FeatEnable.BBFreq) { 221 PGOSS << "Frequency: "; 222 if (PrettyPGOAnalysis) 223 printRelativeBlockFreq(PGOSS, PGOMap.BBEntries.front().BlockFreq, 224 PGOBBEntry.BlockFreq); 225 else 226 PGOSS << Twine(PGOBBEntry.BlockFreq.getFrequency()); 227 if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { 228 PGOSS << ", "; 229 } 230 } 231 if (PGOMap.FeatEnable.BrProb && PGOBBEntry.Successors.size() > 0) { 232 PGOSS << "Successors: "; 233 interleaveComma( 234 PGOBBEntry.Successors, PGOSS, 235 [&](const PGOAnalysisMap::PGOBBEntry::SuccessorEntry &SE) { 236 PGOSS << "BB" << SE.ID << ":"; 237 if (PrettyPGOAnalysis) 238 PGOSS << "[" << SE.Prob << "]"; 239 else 240 PGOSS.write_hex(SE.Prob.getNumerator()); 241 }); 242 } 243 } 244 PGOSS << ")"; 245 246 return PGOString; 247 } 248 249 private: 250 const BBAddrMap AddrMap; 251 const PGOAnalysisMap PGOMap; 252 }; 253 254 // This class represents the BBAddrMap and PGOMap of potentially multiple 255 // functions in a section. 256 class BBAddrMapInfo { 257 public: 258 void clear() { 259 FunctionAddrToMap.clear(); 260 RangeBaseAddrToFunctionAddr.clear(); 261 } 262 263 bool empty() const { return FunctionAddrToMap.empty(); } 264 265 void AddFunctionEntry(BBAddrMap AddrMap, PGOAnalysisMap PGOMap) { 266 uint64_t FunctionAddr = AddrMap.getFunctionAddress(); 267 for (size_t I = 1; I < AddrMap.BBRanges.size(); ++I) 268 RangeBaseAddrToFunctionAddr.emplace(AddrMap.BBRanges[I].BaseAddress, 269 FunctionAddr); 270 [[maybe_unused]] auto R = FunctionAddrToMap.try_emplace( 271 FunctionAddr, std::move(AddrMap), std::move(PGOMap)); 272 assert(R.second && "duplicate function address"); 273 } 274 275 // Returns the BBAddrMap entry for the function associated with `BaseAddress`. 276 // `BaseAddress` could be the function address or the address of a range 277 // associated with that function. Returns `nullptr` if `BaseAddress` is not 278 // mapped to any entry. 279 const BBAddrMapFunctionEntry *getEntryForAddress(uint64_t BaseAddress) const { 280 uint64_t FunctionAddr = BaseAddress; 281 auto S = RangeBaseAddrToFunctionAddr.find(BaseAddress); 282 if (S != RangeBaseAddrToFunctionAddr.end()) 283 FunctionAddr = S->second; 284 auto R = FunctionAddrToMap.find(FunctionAddr); 285 if (R == FunctionAddrToMap.end()) 286 return nullptr; 287 return &R->second; 288 } 289 290 private: 291 std::unordered_map<uint64_t, BBAddrMapFunctionEntry> FunctionAddrToMap; 292 std::unordered_map<uint64_t, uint64_t> RangeBaseAddrToFunctionAddr; 293 }; 294 295 } // namespace 296 297 #define DEBUG_TYPE "objdump" 298 299 enum class ColorOutput { 300 Auto, 301 Enable, 302 Disable, 303 Invalid, 304 }; 305 306 static uint64_t AdjustVMA; 307 static bool AllHeaders; 308 static std::string ArchName; 309 bool objdump::ArchiveHeaders; 310 bool objdump::Demangle; 311 bool objdump::Disassemble; 312 bool objdump::DisassembleAll; 313 std::vector<std::string> objdump::DisassemblerOptions; 314 bool objdump::SymbolDescription; 315 bool objdump::TracebackTable; 316 static std::vector<std::string> DisassembleSymbols; 317 static bool DisassembleZeroes; 318 static ColorOutput DisassemblyColor; 319 DIDumpType objdump::DwarfDumpType; 320 static bool DynamicRelocations; 321 static bool FaultMapSection; 322 static bool FileHeaders; 323 bool objdump::SectionContents; 324 static std::vector<std::string> InputFilenames; 325 bool objdump::PrintLines; 326 static bool MachOOpt; 327 std::string objdump::MCPU; 328 std::vector<std::string> objdump::MAttrs; 329 bool objdump::ShowRawInsn; 330 bool objdump::LeadingAddr; 331 static bool Offloading; 332 static bool RawClangAST; 333 bool objdump::Relocations; 334 bool objdump::PrintImmHex; 335 bool objdump::PrivateHeaders; 336 std::vector<std::string> objdump::FilterSections; 337 bool objdump::SectionHeaders; 338 static bool ShowAllSymbols; 339 static bool ShowLMA; 340 bool objdump::PrintSource; 341 342 static uint64_t StartAddress; 343 static bool HasStartAddressFlag; 344 static uint64_t StopAddress = UINT64_MAX; 345 static bool HasStopAddressFlag; 346 347 bool objdump::SymbolTable; 348 static bool SymbolizeOperands; 349 static bool PrettyPGOAnalysisMap; 350 static bool DynamicSymbolTable; 351 std::string objdump::TripleName; 352 bool objdump::UnwindInfo; 353 static bool Wide; 354 std::string objdump::Prefix; 355 uint32_t objdump::PrefixStrip; 356 357 DebugVarsFormat objdump::DbgVariables = DVDisabled; 358 359 int objdump::DbgIndent = 52; 360 361 static StringSet<> DisasmSymbolSet; 362 StringSet<> objdump::FoundSectionSet; 363 static StringRef ToolName; 364 365 std::unique_ptr<BuildIDFetcher> BIDFetcher; 366 367 Dumper::Dumper(const object::ObjectFile &O) : O(O) { 368 WarningHandler = [this](const Twine &Msg) { 369 if (Warnings.insert(Msg.str()).second) 370 reportWarning(Msg, this->O.getFileName()); 371 return Error::success(); 372 }; 373 } 374 375 void Dumper::reportUniqueWarning(Error Err) { 376 reportUniqueWarning(toString(std::move(Err))); 377 } 378 379 void Dumper::reportUniqueWarning(const Twine &Msg) { 380 cantFail(WarningHandler(Msg)); 381 } 382 383 static Expected<std::unique_ptr<Dumper>> createDumper(const ObjectFile &Obj) { 384 if (const auto *O = dyn_cast<COFFObjectFile>(&Obj)) 385 return createCOFFDumper(*O); 386 if (const auto *O = dyn_cast<ELFObjectFileBase>(&Obj)) 387 return createELFDumper(*O); 388 if (const auto *O = dyn_cast<MachOObjectFile>(&Obj)) 389 return createMachODumper(*O); 390 if (const auto *O = dyn_cast<WasmObjectFile>(&Obj)) 391 return createWasmDumper(*O); 392 if (const auto *O = dyn_cast<XCOFFObjectFile>(&Obj)) 393 return createXCOFFDumper(*O); 394 395 return createStringError(errc::invalid_argument, 396 "unsupported object file format"); 397 } 398 399 namespace { 400 struct FilterResult { 401 // True if the section should not be skipped. 402 bool Keep; 403 404 // True if the index counter should be incremented, even if the section should 405 // be skipped. For example, sections may be skipped if they are not included 406 // in the --section flag, but we still want those to count toward the section 407 // count. 408 bool IncrementIndex; 409 }; 410 } // namespace 411 412 static FilterResult checkSectionFilter(object::SectionRef S) { 413 if (FilterSections.empty()) 414 return {/*Keep=*/true, /*IncrementIndex=*/true}; 415 416 Expected<StringRef> SecNameOrErr = S.getName(); 417 if (!SecNameOrErr) { 418 consumeError(SecNameOrErr.takeError()); 419 return {/*Keep=*/false, /*IncrementIndex=*/false}; 420 } 421 StringRef SecName = *SecNameOrErr; 422 423 // StringSet does not allow empty key so avoid adding sections with 424 // no name (such as the section with index 0) here. 425 if (!SecName.empty()) 426 FoundSectionSet.insert(SecName); 427 428 // Only show the section if it's in the FilterSections list, but always 429 // increment so the indexing is stable. 430 return {/*Keep=*/is_contained(FilterSections, SecName), 431 /*IncrementIndex=*/true}; 432 } 433 434 SectionFilter objdump::ToolSectionFilter(object::ObjectFile const &O, 435 uint64_t *Idx) { 436 // Start at UINT64_MAX so that the first index returned after an increment is 437 // zero (after the unsigned wrap). 438 if (Idx) 439 *Idx = UINT64_MAX; 440 return SectionFilter( 441 [Idx](object::SectionRef S) { 442 FilterResult Result = checkSectionFilter(S); 443 if (Idx != nullptr && Result.IncrementIndex) 444 *Idx += 1; 445 return Result.Keep; 446 }, 447 O); 448 } 449 450 std::string objdump::getFileNameForError(const object::Archive::Child &C, 451 unsigned Index) { 452 Expected<StringRef> NameOrErr = C.getName(); 453 if (NameOrErr) 454 return std::string(NameOrErr.get()); 455 // If we have an error getting the name then we print the index of the archive 456 // member. Since we are already in an error state, we just ignore this error. 457 consumeError(NameOrErr.takeError()); 458 return "<file index: " + std::to_string(Index) + ">"; 459 } 460 461 void objdump::reportWarning(const Twine &Message, StringRef File) { 462 // Output order between errs() and outs() matters especially for archive 463 // files where the output is per member object. 464 outs().flush(); 465 WithColor::warning(errs(), ToolName) 466 << "'" << File << "': " << Message << "\n"; 467 } 468 469 [[noreturn]] void objdump::reportError(StringRef File, const Twine &Message) { 470 outs().flush(); 471 WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n"; 472 exit(1); 473 } 474 475 [[noreturn]] void objdump::reportError(Error E, StringRef FileName, 476 StringRef ArchiveName, 477 StringRef ArchitectureName) { 478 assert(E); 479 outs().flush(); 480 WithColor::error(errs(), ToolName); 481 if (ArchiveName != "") 482 errs() << ArchiveName << "(" << FileName << ")"; 483 else 484 errs() << "'" << FileName << "'"; 485 if (!ArchitectureName.empty()) 486 errs() << " (for architecture " << ArchitectureName << ")"; 487 errs() << ": "; 488 logAllUnhandledErrors(std::move(E), errs()); 489 exit(1); 490 } 491 492 static void reportCmdLineWarning(const Twine &Message) { 493 WithColor::warning(errs(), ToolName) << Message << "\n"; 494 } 495 496 [[noreturn]] static void reportCmdLineError(const Twine &Message) { 497 WithColor::error(errs(), ToolName) << Message << "\n"; 498 exit(1); 499 } 500 501 static void warnOnNoMatchForSections() { 502 SetVector<StringRef> MissingSections; 503 for (StringRef S : FilterSections) { 504 if (FoundSectionSet.count(S)) 505 return; 506 // User may specify a unnamed section. Don't warn for it. 507 if (!S.empty()) 508 MissingSections.insert(S); 509 } 510 511 // Warn only if no section in FilterSections is matched. 512 for (StringRef S : MissingSections) 513 reportCmdLineWarning("section '" + S + 514 "' mentioned in a -j/--section option, but not " 515 "found in any input file"); 516 } 517 518 static const Target *getTarget(const ObjectFile *Obj) { 519 // Figure out the target triple. 520 Triple TheTriple("unknown-unknown-unknown"); 521 if (TripleName.empty()) { 522 TheTriple = Obj->makeTriple(); 523 } else { 524 TheTriple.setTriple(Triple::normalize(TripleName)); 525 auto Arch = Obj->getArch(); 526 if (Arch == Triple::arm || Arch == Triple::armeb) 527 Obj->setARMSubArch(TheTriple); 528 } 529 530 // Get the target specific parser. 531 std::string Error; 532 const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, 533 Error); 534 if (!TheTarget) 535 reportError(Obj->getFileName(), "can't find target: " + Error); 536 537 // Update the triple name and return the found target. 538 TripleName = TheTriple.getTriple(); 539 return TheTarget; 540 } 541 542 bool objdump::isRelocAddressLess(RelocationRef A, RelocationRef B) { 543 return A.getOffset() < B.getOffset(); 544 } 545 546 static Error getRelocationValueString(const RelocationRef &Rel, 547 bool SymbolDescription, 548 SmallVectorImpl<char> &Result) { 549 const ObjectFile *Obj = Rel.getObject(); 550 if (auto *ELF = dyn_cast<ELFObjectFileBase>(Obj)) 551 return getELFRelocationValueString(ELF, Rel, Result); 552 if (auto *COFF = dyn_cast<COFFObjectFile>(Obj)) 553 return getCOFFRelocationValueString(COFF, Rel, Result); 554 if (auto *Wasm = dyn_cast<WasmObjectFile>(Obj)) 555 return getWasmRelocationValueString(Wasm, Rel, Result); 556 if (auto *MachO = dyn_cast<MachOObjectFile>(Obj)) 557 return getMachORelocationValueString(MachO, Rel, Result); 558 if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj)) 559 return getXCOFFRelocationValueString(*XCOFF, Rel, SymbolDescription, 560 Result); 561 llvm_unreachable("unknown object file format"); 562 } 563 564 /// Indicates whether this relocation should hidden when listing 565 /// relocations, usually because it is the trailing part of a multipart 566 /// relocation that will be printed as part of the leading relocation. 567 static bool getHidden(RelocationRef RelRef) { 568 auto *MachO = dyn_cast<MachOObjectFile>(RelRef.getObject()); 569 if (!MachO) 570 return false; 571 572 unsigned Arch = MachO->getArch(); 573 DataRefImpl Rel = RelRef.getRawDataRefImpl(); 574 uint64_t Type = MachO->getRelocationType(Rel); 575 576 // On arches that use the generic relocations, GENERIC_RELOC_PAIR 577 // is always hidden. 578 if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) 579 return Type == MachO::GENERIC_RELOC_PAIR; 580 581 if (Arch == Triple::x86_64) { 582 // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows 583 // an X86_64_RELOC_SUBTRACTOR. 584 if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) { 585 DataRefImpl RelPrev = Rel; 586 RelPrev.d.a--; 587 uint64_t PrevType = MachO->getRelocationType(RelPrev); 588 if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR) 589 return true; 590 } 591 } 592 593 return false; 594 } 595 596 /// Get the column at which we want to start printing the instruction 597 /// disassembly, taking into account anything which appears to the left of it. 598 unsigned objdump::getInstStartColumn(const MCSubtargetInfo &STI) { 599 return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24; 600 } 601 602 static void AlignToInstStartColumn(size_t Start, const MCSubtargetInfo &STI, 603 raw_ostream &OS) { 604 // The output of printInst starts with a tab. Print some spaces so that 605 // the tab has 1 column and advances to the target tab stop. 606 unsigned TabStop = getInstStartColumn(STI); 607 unsigned Column = OS.tell() - Start; 608 OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8); 609 } 610 611 void objdump::printRawData(ArrayRef<uint8_t> Bytes, uint64_t Address, 612 formatted_raw_ostream &OS, 613 MCSubtargetInfo const &STI) { 614 size_t Start = OS.tell(); 615 if (LeadingAddr) 616 OS << format("%8" PRIx64 ":", Address); 617 if (ShowRawInsn) { 618 OS << ' '; 619 dumpBytes(Bytes, OS); 620 } 621 AlignToInstStartColumn(Start, STI, OS); 622 } 623 624 namespace { 625 626 static bool isAArch64Elf(const ObjectFile &Obj) { 627 const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj); 628 return Elf && Elf->getEMachine() == ELF::EM_AARCH64; 629 } 630 631 static bool isArmElf(const ObjectFile &Obj) { 632 const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj); 633 return Elf && Elf->getEMachine() == ELF::EM_ARM; 634 } 635 636 static bool isCSKYElf(const ObjectFile &Obj) { 637 const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj); 638 return Elf && Elf->getEMachine() == ELF::EM_CSKY; 639 } 640 641 static bool hasMappingSymbols(const ObjectFile &Obj) { 642 return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) ; 643 } 644 645 static void printRelocation(formatted_raw_ostream &OS, StringRef FileName, 646 const RelocationRef &Rel, uint64_t Address, 647 bool Is64Bits) { 648 StringRef Fmt = Is64Bits ? "%016" PRIx64 ": " : "%08" PRIx64 ": "; 649 SmallString<16> Name; 650 SmallString<32> Val; 651 Rel.getTypeName(Name); 652 if (Error E = getRelocationValueString(Rel, SymbolDescription, Val)) 653 reportError(std::move(E), FileName); 654 OS << (Is64Bits || !LeadingAddr ? "\t\t" : "\t\t\t"); 655 if (LeadingAddr) 656 OS << format(Fmt.data(), Address); 657 OS << Name << "\t" << Val; 658 } 659 660 static void printBTFRelocation(formatted_raw_ostream &FOS, llvm::BTFParser &BTF, 661 object::SectionedAddress Address, 662 LiveVariablePrinter &LVP) { 663 const llvm::BTF::BPFFieldReloc *Reloc = BTF.findFieldReloc(Address); 664 if (!Reloc) 665 return; 666 667 SmallString<64> Val; 668 BTF.symbolize(Reloc, Val); 669 FOS << "\t\t"; 670 if (LeadingAddr) 671 FOS << format("%016" PRIx64 ": ", Address.Address + AdjustVMA); 672 FOS << "CO-RE " << Val; 673 LVP.printAfterOtherLine(FOS, true); 674 } 675 676 class PrettyPrinter { 677 public: 678 virtual ~PrettyPrinter() = default; 679 virtual void 680 printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 681 object::SectionedAddress Address, formatted_raw_ostream &OS, 682 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 683 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 684 LiveVariablePrinter &LVP) { 685 if (SP && (PrintSource || PrintLines)) 686 SP->printSourceLine(OS, Address, ObjectFilename, LVP); 687 LVP.printBetweenInsts(OS, false); 688 689 printRawData(Bytes, Address.Address, OS, STI); 690 691 if (MI) { 692 // See MCInstPrinter::printInst. On targets where a PC relative immediate 693 // is relative to the next instruction and the length of a MCInst is 694 // difficult to measure (x86), this is the address of the next 695 // instruction. 696 uint64_t Addr = 697 Address.Address + (STI.getTargetTriple().isX86() ? Bytes.size() : 0); 698 IP.printInst(MI, Addr, "", STI, OS); 699 } else 700 OS << "\t<unknown>"; 701 } 702 }; 703 PrettyPrinter PrettyPrinterInst; 704 705 class HexagonPrettyPrinter : public PrettyPrinter { 706 public: 707 void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address, 708 formatted_raw_ostream &OS) { 709 uint32_t opcode = 710 (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0]; 711 if (LeadingAddr) 712 OS << format("%8" PRIx64 ":", Address); 713 if (ShowRawInsn) { 714 OS << "\t"; 715 dumpBytes(Bytes.slice(0, 4), OS); 716 OS << format("\t%08" PRIx32, opcode); 717 } 718 } 719 void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 720 object::SectionedAddress Address, formatted_raw_ostream &OS, 721 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 722 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 723 LiveVariablePrinter &LVP) override { 724 if (SP && (PrintSource || PrintLines)) 725 SP->printSourceLine(OS, Address, ObjectFilename, LVP, ""); 726 if (!MI) { 727 printLead(Bytes, Address.Address, OS); 728 OS << " <unknown>"; 729 return; 730 } 731 std::string Buffer; 732 { 733 raw_string_ostream TempStream(Buffer); 734 IP.printInst(MI, Address.Address, "", STI, TempStream); 735 } 736 StringRef Contents(Buffer); 737 // Split off bundle attributes 738 auto PacketBundle = Contents.rsplit('\n'); 739 // Split off first instruction from the rest 740 auto HeadTail = PacketBundle.first.split('\n'); 741 auto Preamble = " { "; 742 auto Separator = ""; 743 744 // Hexagon's packets require relocations to be inline rather than 745 // clustered at the end of the packet. 746 std::vector<RelocationRef>::const_iterator RelCur = Rels->begin(); 747 std::vector<RelocationRef>::const_iterator RelEnd = Rels->end(); 748 auto PrintReloc = [&]() -> void { 749 while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) { 750 if (RelCur->getOffset() == Address.Address) { 751 printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false); 752 return; 753 } 754 ++RelCur; 755 } 756 }; 757 758 while (!HeadTail.first.empty()) { 759 OS << Separator; 760 Separator = "\n"; 761 if (SP && (PrintSource || PrintLines)) 762 SP->printSourceLine(OS, Address, ObjectFilename, LVP, ""); 763 printLead(Bytes, Address.Address, OS); 764 OS << Preamble; 765 Preamble = " "; 766 StringRef Inst; 767 auto Duplex = HeadTail.first.split('\v'); 768 if (!Duplex.second.empty()) { 769 OS << Duplex.first; 770 OS << "; "; 771 Inst = Duplex.second; 772 } 773 else 774 Inst = HeadTail.first; 775 OS << Inst; 776 HeadTail = HeadTail.second.split('\n'); 777 if (HeadTail.first.empty()) 778 OS << " } " << PacketBundle.second; 779 PrintReloc(); 780 Bytes = Bytes.slice(4); 781 Address.Address += 4; 782 } 783 } 784 }; 785 HexagonPrettyPrinter HexagonPrettyPrinterInst; 786 787 class AMDGCNPrettyPrinter : public PrettyPrinter { 788 public: 789 void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 790 object::SectionedAddress Address, formatted_raw_ostream &OS, 791 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 792 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 793 LiveVariablePrinter &LVP) override { 794 if (SP && (PrintSource || PrintLines)) 795 SP->printSourceLine(OS, Address, ObjectFilename, LVP); 796 797 if (MI) { 798 SmallString<40> InstStr; 799 raw_svector_ostream IS(InstStr); 800 801 IP.printInst(MI, Address.Address, "", STI, IS); 802 803 OS << left_justify(IS.str(), 60); 804 } else { 805 // an unrecognized encoding - this is probably data so represent it 806 // using the .long directive, or .byte directive if fewer than 4 bytes 807 // remaining 808 if (Bytes.size() >= 4) { 809 OS << format( 810 "\t.long 0x%08" PRIx32 " ", 811 support::endian::read32<llvm::endianness::little>(Bytes.data())); 812 OS.indent(42); 813 } else { 814 OS << format("\t.byte 0x%02" PRIx8, Bytes[0]); 815 for (unsigned int i = 1; i < Bytes.size(); i++) 816 OS << format(", 0x%02" PRIx8, Bytes[i]); 817 OS.indent(55 - (6 * Bytes.size())); 818 } 819 } 820 821 OS << format("// %012" PRIX64 ":", Address.Address); 822 if (Bytes.size() >= 4) { 823 // D should be casted to uint32_t here as it is passed by format to 824 // snprintf as vararg. 825 for (uint32_t D : 826 ArrayRef(reinterpret_cast<const support::little32_t *>(Bytes.data()), 827 Bytes.size() / 4)) 828 OS << format(" %08" PRIX32, D); 829 } else { 830 for (unsigned char B : Bytes) 831 OS << format(" %02" PRIX8, B); 832 } 833 834 if (!Annot.empty()) 835 OS << " // " << Annot; 836 } 837 }; 838 AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst; 839 840 class BPFPrettyPrinter : public PrettyPrinter { 841 public: 842 void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 843 object::SectionedAddress Address, formatted_raw_ostream &OS, 844 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 845 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 846 LiveVariablePrinter &LVP) override { 847 if (SP && (PrintSource || PrintLines)) 848 SP->printSourceLine(OS, Address, ObjectFilename, LVP); 849 if (LeadingAddr) 850 OS << format("%8" PRId64 ":", Address.Address / 8); 851 if (ShowRawInsn) { 852 OS << "\t"; 853 dumpBytes(Bytes, OS); 854 } 855 if (MI) 856 IP.printInst(MI, Address.Address, "", STI, OS); 857 else 858 OS << "\t<unknown>"; 859 } 860 }; 861 BPFPrettyPrinter BPFPrettyPrinterInst; 862 863 class ARMPrettyPrinter : public PrettyPrinter { 864 public: 865 void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 866 object::SectionedAddress Address, formatted_raw_ostream &OS, 867 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 868 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 869 LiveVariablePrinter &LVP) override { 870 if (SP && (PrintSource || PrintLines)) 871 SP->printSourceLine(OS, Address, ObjectFilename, LVP); 872 LVP.printBetweenInsts(OS, false); 873 874 size_t Start = OS.tell(); 875 if (LeadingAddr) 876 OS << format("%8" PRIx64 ":", Address.Address); 877 if (ShowRawInsn) { 878 size_t Pos = 0, End = Bytes.size(); 879 if (STI.checkFeatures("+thumb-mode")) { 880 for (; Pos + 2 <= End; Pos += 2) 881 OS << ' ' 882 << format_hex_no_prefix( 883 llvm::support::endian::read<uint16_t>( 884 Bytes.data() + Pos, InstructionEndianness), 885 4); 886 } else { 887 for (; Pos + 4 <= End; Pos += 4) 888 OS << ' ' 889 << format_hex_no_prefix( 890 llvm::support::endian::read<uint32_t>( 891 Bytes.data() + Pos, InstructionEndianness), 892 8); 893 } 894 if (Pos < End) { 895 OS << ' '; 896 dumpBytes(Bytes.slice(Pos), OS); 897 } 898 } 899 900 AlignToInstStartColumn(Start, STI, OS); 901 902 if (MI) { 903 IP.printInst(MI, Address.Address, "", STI, OS); 904 } else 905 OS << "\t<unknown>"; 906 } 907 908 void setInstructionEndianness(llvm::endianness Endianness) { 909 InstructionEndianness = Endianness; 910 } 911 912 private: 913 llvm::endianness InstructionEndianness = llvm::endianness::little; 914 }; 915 ARMPrettyPrinter ARMPrettyPrinterInst; 916 917 class AArch64PrettyPrinter : public PrettyPrinter { 918 public: 919 void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 920 object::SectionedAddress Address, formatted_raw_ostream &OS, 921 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 922 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 923 LiveVariablePrinter &LVP) override { 924 if (SP && (PrintSource || PrintLines)) 925 SP->printSourceLine(OS, Address, ObjectFilename, LVP); 926 LVP.printBetweenInsts(OS, false); 927 928 size_t Start = OS.tell(); 929 if (LeadingAddr) 930 OS << format("%8" PRIx64 ":", Address.Address); 931 if (ShowRawInsn) { 932 size_t Pos = 0, End = Bytes.size(); 933 for (; Pos + 4 <= End; Pos += 4) 934 OS << ' ' 935 << format_hex_no_prefix( 936 llvm::support::endian::read<uint32_t>( 937 Bytes.data() + Pos, llvm::endianness::little), 938 8); 939 if (Pos < End) { 940 OS << ' '; 941 dumpBytes(Bytes.slice(Pos), OS); 942 } 943 } 944 945 AlignToInstStartColumn(Start, STI, OS); 946 947 if (MI) { 948 IP.printInst(MI, Address.Address, "", STI, OS); 949 } else 950 OS << "\t<unknown>"; 951 } 952 }; 953 AArch64PrettyPrinter AArch64PrettyPrinterInst; 954 955 class RISCVPrettyPrinter : public PrettyPrinter { 956 public: 957 void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, 958 object::SectionedAddress Address, formatted_raw_ostream &OS, 959 StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, 960 StringRef ObjectFilename, std::vector<RelocationRef> *Rels, 961 LiveVariablePrinter &LVP) override { 962 if (SP && (PrintSource || PrintLines)) 963 SP->printSourceLine(OS, Address, ObjectFilename, LVP); 964 LVP.printBetweenInsts(OS, false); 965 966 size_t Start = OS.tell(); 967 if (LeadingAddr) 968 OS << format("%8" PRIx64 ":", Address.Address); 969 if (ShowRawInsn) { 970 size_t Pos = 0, End = Bytes.size(); 971 if (End % 4 == 0) { 972 // 32-bit and 64-bit instructions. 973 for (; Pos + 4 <= End; Pos += 4) 974 OS << ' ' 975 << format_hex_no_prefix( 976 llvm::support::endian::read<uint32_t>( 977 Bytes.data() + Pos, llvm::endianness::little), 978 8); 979 } else if (End % 2 == 0) { 980 // 16-bit and 48-bits instructions. 981 for (; Pos + 2 <= End; Pos += 2) 982 OS << ' ' 983 << format_hex_no_prefix( 984 llvm::support::endian::read<uint16_t>( 985 Bytes.data() + Pos, llvm::endianness::little), 986 4); 987 } 988 if (Pos < End) { 989 OS << ' '; 990 dumpBytes(Bytes.slice(Pos), OS); 991 } 992 } 993 994 AlignToInstStartColumn(Start, STI, OS); 995 996 if (MI) { 997 IP.printInst(MI, Address.Address, "", STI, OS); 998 } else 999 OS << "\t<unknown>"; 1000 } 1001 }; 1002 RISCVPrettyPrinter RISCVPrettyPrinterInst; 1003 1004 PrettyPrinter &selectPrettyPrinter(Triple const &Triple) { 1005 switch(Triple.getArch()) { 1006 default: 1007 return PrettyPrinterInst; 1008 case Triple::hexagon: 1009 return HexagonPrettyPrinterInst; 1010 case Triple::amdgcn: 1011 return AMDGCNPrettyPrinterInst; 1012 case Triple::bpfel: 1013 case Triple::bpfeb: 1014 return BPFPrettyPrinterInst; 1015 case Triple::arm: 1016 case Triple::armeb: 1017 case Triple::thumb: 1018 case Triple::thumbeb: 1019 return ARMPrettyPrinterInst; 1020 case Triple::aarch64: 1021 case Triple::aarch64_be: 1022 case Triple::aarch64_32: 1023 return AArch64PrettyPrinterInst; 1024 case Triple::riscv32: 1025 case Triple::riscv64: 1026 return RISCVPrettyPrinterInst; 1027 } 1028 } 1029 1030 class DisassemblerTarget { 1031 public: 1032 const Target *TheTarget; 1033 std::unique_ptr<const MCSubtargetInfo> SubtargetInfo; 1034 std::shared_ptr<MCContext> Context; 1035 std::unique_ptr<MCDisassembler> DisAsm; 1036 std::shared_ptr<MCInstrAnalysis> InstrAnalysis; 1037 std::shared_ptr<MCInstPrinter> InstPrinter; 1038 PrettyPrinter *Printer; 1039 1040 DisassemblerTarget(const Target *TheTarget, ObjectFile &Obj, 1041 StringRef TripleName, StringRef MCPU, 1042 SubtargetFeatures &Features); 1043 DisassemblerTarget(DisassemblerTarget &Other, SubtargetFeatures &Features); 1044 1045 private: 1046 MCTargetOptions Options; 1047 std::shared_ptr<const MCRegisterInfo> RegisterInfo; 1048 std::shared_ptr<const MCAsmInfo> AsmInfo; 1049 std::shared_ptr<const MCInstrInfo> InstrInfo; 1050 std::shared_ptr<MCObjectFileInfo> ObjectFileInfo; 1051 }; 1052 1053 DisassemblerTarget::DisassemblerTarget(const Target *TheTarget, ObjectFile &Obj, 1054 StringRef TripleName, StringRef MCPU, 1055 SubtargetFeatures &Features) 1056 : TheTarget(TheTarget), 1057 Printer(&selectPrettyPrinter(Triple(TripleName))), 1058 RegisterInfo(TheTarget->createMCRegInfo(TripleName)) { 1059 if (!RegisterInfo) 1060 reportError(Obj.getFileName(), "no register info for target " + TripleName); 1061 1062 // Set up disassembler. 1063 AsmInfo.reset(TheTarget->createMCAsmInfo(*RegisterInfo, TripleName, Options)); 1064 if (!AsmInfo) 1065 reportError(Obj.getFileName(), "no assembly info for target " + TripleName); 1066 1067 SubtargetInfo.reset( 1068 TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); 1069 if (!SubtargetInfo) 1070 reportError(Obj.getFileName(), 1071 "no subtarget info for target " + TripleName); 1072 InstrInfo.reset(TheTarget->createMCInstrInfo()); 1073 if (!InstrInfo) 1074 reportError(Obj.getFileName(), 1075 "no instruction info for target " + TripleName); 1076 Context = 1077 std::make_shared<MCContext>(Triple(TripleName), AsmInfo.get(), 1078 RegisterInfo.get(), SubtargetInfo.get()); 1079 1080 // FIXME: for now initialize MCObjectFileInfo with default values 1081 ObjectFileInfo.reset( 1082 TheTarget->createMCObjectFileInfo(*Context, /*PIC=*/false)); 1083 Context->setObjectFileInfo(ObjectFileInfo.get()); 1084 1085 DisAsm.reset(TheTarget->createMCDisassembler(*SubtargetInfo, *Context)); 1086 if (!DisAsm) 1087 reportError(Obj.getFileName(), "no disassembler for target " + TripleName); 1088 1089 if (auto *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj)) 1090 DisAsm->setABIVersion(ELFObj->getEIdentABIVersion()); 1091 1092 InstrAnalysis.reset(TheTarget->createMCInstrAnalysis(InstrInfo.get())); 1093 1094 int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); 1095 InstPrinter.reset(TheTarget->createMCInstPrinter(Triple(TripleName), 1096 AsmPrinterVariant, *AsmInfo, 1097 *InstrInfo, *RegisterInfo)); 1098 if (!InstPrinter) 1099 reportError(Obj.getFileName(), 1100 "no instruction printer for target " + TripleName); 1101 InstPrinter->setPrintImmHex(PrintImmHex); 1102 InstPrinter->setPrintBranchImmAsAddress(true); 1103 InstPrinter->setSymbolizeOperands(SymbolizeOperands); 1104 InstPrinter->setMCInstrAnalysis(InstrAnalysis.get()); 1105 1106 switch (DisassemblyColor) { 1107 case ColorOutput::Enable: 1108 InstPrinter->setUseColor(true); 1109 break; 1110 case ColorOutput::Auto: 1111 InstPrinter->setUseColor(outs().has_colors()); 1112 break; 1113 case ColorOutput::Disable: 1114 case ColorOutput::Invalid: 1115 InstPrinter->setUseColor(false); 1116 break; 1117 }; 1118 } 1119 1120 DisassemblerTarget::DisassemblerTarget(DisassemblerTarget &Other, 1121 SubtargetFeatures &Features) 1122 : TheTarget(Other.TheTarget), 1123 SubtargetInfo(TheTarget->createMCSubtargetInfo(TripleName, MCPU, 1124 Features.getString())), 1125 Context(Other.Context), 1126 DisAsm(TheTarget->createMCDisassembler(*SubtargetInfo, *Context)), 1127 InstrAnalysis(Other.InstrAnalysis), InstPrinter(Other.InstPrinter), 1128 Printer(Other.Printer), RegisterInfo(Other.RegisterInfo), 1129 AsmInfo(Other.AsmInfo), InstrInfo(Other.InstrInfo), 1130 ObjectFileInfo(Other.ObjectFileInfo) {} 1131 } // namespace 1132 1133 static uint8_t getElfSymbolType(const ObjectFile &Obj, const SymbolRef &Sym) { 1134 assert(Obj.isELF()); 1135 if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) 1136 return unwrapOrError(Elf32LEObj->getSymbol(Sym.getRawDataRefImpl()), 1137 Obj.getFileName()) 1138 ->getType(); 1139 if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) 1140 return unwrapOrError(Elf64LEObj->getSymbol(Sym.getRawDataRefImpl()), 1141 Obj.getFileName()) 1142 ->getType(); 1143 if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) 1144 return unwrapOrError(Elf32BEObj->getSymbol(Sym.getRawDataRefImpl()), 1145 Obj.getFileName()) 1146 ->getType(); 1147 if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) 1148 return unwrapOrError(Elf64BEObj->getSymbol(Sym.getRawDataRefImpl()), 1149 Obj.getFileName()) 1150 ->getType(); 1151 llvm_unreachable("Unsupported binary format"); 1152 } 1153 1154 template <class ELFT> 1155 static void 1156 addDynamicElfSymbols(const ELFObjectFile<ELFT> &Obj, 1157 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { 1158 for (auto Symbol : Obj.getDynamicSymbolIterators()) { 1159 uint8_t SymbolType = Symbol.getELFType(); 1160 if (SymbolType == ELF::STT_SECTION) 1161 continue; 1162 1163 uint64_t Address = unwrapOrError(Symbol.getAddress(), Obj.getFileName()); 1164 // ELFSymbolRef::getAddress() returns size instead of value for common 1165 // symbols which is not desirable for disassembly output. Overriding. 1166 if (SymbolType == ELF::STT_COMMON) 1167 Address = unwrapOrError(Obj.getSymbol(Symbol.getRawDataRefImpl()), 1168 Obj.getFileName()) 1169 ->st_value; 1170 1171 StringRef Name = unwrapOrError(Symbol.getName(), Obj.getFileName()); 1172 if (Name.empty()) 1173 continue; 1174 1175 section_iterator SecI = 1176 unwrapOrError(Symbol.getSection(), Obj.getFileName()); 1177 if (SecI == Obj.section_end()) 1178 continue; 1179 1180 AllSymbols[*SecI].emplace_back(Address, Name, SymbolType); 1181 } 1182 } 1183 1184 static void 1185 addDynamicElfSymbols(const ELFObjectFileBase &Obj, 1186 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { 1187 if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) 1188 addDynamicElfSymbols(*Elf32LEObj, AllSymbols); 1189 else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) 1190 addDynamicElfSymbols(*Elf64LEObj, AllSymbols); 1191 else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) 1192 addDynamicElfSymbols(*Elf32BEObj, AllSymbols); 1193 else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) 1194 addDynamicElfSymbols(*Elf64BEObj, AllSymbols); 1195 else 1196 llvm_unreachable("Unsupported binary format"); 1197 } 1198 1199 static std::optional<SectionRef> getWasmCodeSection(const WasmObjectFile &Obj) { 1200 for (auto SecI : Obj.sections()) { 1201 const WasmSection &Section = Obj.getWasmSection(SecI); 1202 if (Section.Type == wasm::WASM_SEC_CODE) 1203 return SecI; 1204 } 1205 return std::nullopt; 1206 } 1207 1208 static void 1209 addMissingWasmCodeSymbols(const WasmObjectFile &Obj, 1210 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) { 1211 std::optional<SectionRef> Section = getWasmCodeSection(Obj); 1212 if (!Section) 1213 return; 1214 SectionSymbolsTy &Symbols = AllSymbols[*Section]; 1215 1216 std::set<uint64_t> SymbolAddresses; 1217 for (const auto &Sym : Symbols) 1218 SymbolAddresses.insert(Sym.Addr); 1219 1220 for (const wasm::WasmFunction &Function : Obj.functions()) { 1221 // This adjustment mirrors the one in WasmObjectFile::getSymbolAddress. 1222 uint32_t Adjustment = Obj.isRelocatableObject() || Obj.isSharedObject() 1223 ? 0 1224 : Section->getAddress(); 1225 uint64_t Address = Function.CodeSectionOffset + Adjustment; 1226 // Only add fallback symbols for functions not already present in the symbol 1227 // table. 1228 if (SymbolAddresses.count(Address)) 1229 continue; 1230 // This function has no symbol, so it should have no SymbolName. 1231 assert(Function.SymbolName.empty()); 1232 // We use DebugName for the name, though it may be empty if there is no 1233 // "name" custom section, or that section is missing a name for this 1234 // function. 1235 StringRef Name = Function.DebugName; 1236 Symbols.emplace_back(Address, Name, ELF::STT_NOTYPE); 1237 } 1238 } 1239 1240 static void addPltEntries(const ObjectFile &Obj, 1241 std::map<SectionRef, SectionSymbolsTy> &AllSymbols, 1242 StringSaver &Saver) { 1243 auto *ElfObj = dyn_cast<ELFObjectFileBase>(&Obj); 1244 if (!ElfObj) 1245 return; 1246 DenseMap<StringRef, SectionRef> Sections; 1247 for (SectionRef Section : Obj.sections()) { 1248 Expected<StringRef> SecNameOrErr = Section.getName(); 1249 if (!SecNameOrErr) { 1250 consumeError(SecNameOrErr.takeError()); 1251 continue; 1252 } 1253 Sections[*SecNameOrErr] = Section; 1254 } 1255 for (auto Plt : ElfObj->getPltEntries()) { 1256 if (Plt.Symbol) { 1257 SymbolRef Symbol(*Plt.Symbol, ElfObj); 1258 uint8_t SymbolType = getElfSymbolType(Obj, Symbol); 1259 if (Expected<StringRef> NameOrErr = Symbol.getName()) { 1260 if (!NameOrErr->empty()) 1261 AllSymbols[Sections[Plt.Section]].emplace_back( 1262 Plt.Address, Saver.save((*NameOrErr + "@plt").str()), SymbolType); 1263 continue; 1264 } else { 1265 // The warning has been reported in disassembleObject(). 1266 consumeError(NameOrErr.takeError()); 1267 } 1268 } 1269 reportWarning("PLT entry at 0x" + Twine::utohexstr(Plt.Address) + 1270 " references an invalid symbol", 1271 Obj.getFileName()); 1272 } 1273 } 1274 1275 // Normally the disassembly output will skip blocks of zeroes. This function 1276 // returns the number of zero bytes that can be skipped when dumping the 1277 // disassembly of the instructions in Buf. 1278 static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) { 1279 // Find the number of leading zeroes. 1280 size_t N = 0; 1281 while (N < Buf.size() && !Buf[N]) 1282 ++N; 1283 1284 // We may want to skip blocks of zero bytes, but unless we see 1285 // at least 8 of them in a row. 1286 if (N < 8) 1287 return 0; 1288 1289 // We skip zeroes in multiples of 4 because do not want to truncate an 1290 // instruction if it starts with a zero byte. 1291 return N & ~0x3; 1292 } 1293 1294 // Returns a map from sections to their relocations. 1295 static std::map<SectionRef, std::vector<RelocationRef>> 1296 getRelocsMap(object::ObjectFile const &Obj) { 1297 std::map<SectionRef, std::vector<RelocationRef>> Ret; 1298 uint64_t I = (uint64_t)-1; 1299 for (SectionRef Sec : Obj.sections()) { 1300 ++I; 1301 Expected<section_iterator> RelocatedOrErr = Sec.getRelocatedSection(); 1302 if (!RelocatedOrErr) 1303 reportError(Obj.getFileName(), 1304 "section (" + Twine(I) + 1305 "): failed to get a relocated section: " + 1306 toString(RelocatedOrErr.takeError())); 1307 1308 section_iterator Relocated = *RelocatedOrErr; 1309 if (Relocated == Obj.section_end() || !checkSectionFilter(*Relocated).Keep) 1310 continue; 1311 std::vector<RelocationRef> &V = Ret[*Relocated]; 1312 append_range(V, Sec.relocations()); 1313 // Sort relocations by address. 1314 llvm::stable_sort(V, isRelocAddressLess); 1315 } 1316 return Ret; 1317 } 1318 1319 // Used for --adjust-vma to check if address should be adjusted by the 1320 // specified value for a given section. 1321 // For ELF we do not adjust non-allocatable sections like debug ones, 1322 // because they are not loadable. 1323 // TODO: implement for other file formats. 1324 static bool shouldAdjustVA(const SectionRef &Section) { 1325 const ObjectFile *Obj = Section.getObject(); 1326 if (Obj->isELF()) 1327 return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC; 1328 return false; 1329 } 1330 1331 1332 typedef std::pair<uint64_t, char> MappingSymbolPair; 1333 static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols, 1334 uint64_t Address) { 1335 auto It = 1336 partition_point(MappingSymbols, [Address](const MappingSymbolPair &Val) { 1337 return Val.first <= Address; 1338 }); 1339 // Return zero for any address before the first mapping symbol; this means 1340 // we should use the default disassembly mode, depending on the target. 1341 if (It == MappingSymbols.begin()) 1342 return '\x00'; 1343 return (It - 1)->second; 1344 } 1345 1346 static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, 1347 uint64_t End, const ObjectFile &Obj, 1348 ArrayRef<uint8_t> Bytes, 1349 ArrayRef<MappingSymbolPair> MappingSymbols, 1350 const MCSubtargetInfo &STI, raw_ostream &OS) { 1351 llvm::endianness Endian = 1352 Obj.isLittleEndian() ? llvm::endianness::little : llvm::endianness::big; 1353 size_t Start = OS.tell(); 1354 OS << format("%8" PRIx64 ": ", SectionAddr + Index); 1355 if (Index + 4 <= End) { 1356 dumpBytes(Bytes.slice(Index, 4), OS); 1357 AlignToInstStartColumn(Start, STI, OS); 1358 OS << "\t.word\t" 1359 << format_hex(support::endian::read32(Bytes.data() + Index, Endian), 1360 10); 1361 return 4; 1362 } 1363 if (Index + 2 <= End) { 1364 dumpBytes(Bytes.slice(Index, 2), OS); 1365 AlignToInstStartColumn(Start, STI, OS); 1366 OS << "\t.short\t" 1367 << format_hex(support::endian::read16(Bytes.data() + Index, Endian), 6); 1368 return 2; 1369 } 1370 dumpBytes(Bytes.slice(Index, 1), OS); 1371 AlignToInstStartColumn(Start, STI, OS); 1372 OS << "\t.byte\t" << format_hex(Bytes[Index], 4); 1373 return 1; 1374 } 1375 1376 static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, 1377 ArrayRef<uint8_t> Bytes) { 1378 // print out data up to 8 bytes at a time in hex and ascii 1379 uint8_t AsciiData[9] = {'\0'}; 1380 uint8_t Byte; 1381 int NumBytes = 0; 1382 1383 for (; Index < End; ++Index) { 1384 if (NumBytes == 0) 1385 outs() << format("%8" PRIx64 ":", SectionAddr + Index); 1386 Byte = Bytes.slice(Index)[0]; 1387 outs() << format(" %02x", Byte); 1388 AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.'; 1389 1390 uint8_t IndentOffset = 0; 1391 NumBytes++; 1392 if (Index == End - 1 || NumBytes > 8) { 1393 // Indent the space for less than 8 bytes data. 1394 // 2 spaces for byte and one for space between bytes 1395 IndentOffset = 3 * (8 - NumBytes); 1396 for (int Excess = NumBytes; Excess < 8; Excess++) 1397 AsciiData[Excess] = '\0'; 1398 NumBytes = 8; 1399 } 1400 if (NumBytes == 8) { 1401 AsciiData[8] = '\0'; 1402 outs() << std::string(IndentOffset, ' ') << " "; 1403 outs() << reinterpret_cast<char *>(AsciiData); 1404 outs() << '\n'; 1405 NumBytes = 0; 1406 } 1407 } 1408 } 1409 1410 SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj, 1411 const SymbolRef &Symbol, 1412 bool IsMappingSymbol) { 1413 const StringRef FileName = Obj.getFileName(); 1414 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); 1415 const StringRef Name = unwrapOrError(Symbol.getName(), FileName); 1416 1417 if (Obj.isXCOFF() && (SymbolDescription || TracebackTable)) { 1418 const auto &XCOFFObj = cast<XCOFFObjectFile>(Obj); 1419 DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl(); 1420 1421 const uint32_t SymbolIndex = XCOFFObj.getSymbolIndex(SymbolDRI.p); 1422 std::optional<XCOFF::StorageMappingClass> Smc = 1423 getXCOFFSymbolCsectSMC(XCOFFObj, Symbol); 1424 return SymbolInfoTy(Smc, Addr, Name, SymbolIndex, 1425 isLabel(XCOFFObj, Symbol)); 1426 } else if (Obj.isXCOFF()) { 1427 const SymbolRef::Type SymType = unwrapOrError(Symbol.getType(), FileName); 1428 return SymbolInfoTy(Addr, Name, SymType, /*IsMappingSymbol=*/false, 1429 /*IsXCOFF=*/true); 1430 } else if (Obj.isWasm()) { 1431 uint8_t SymType = 1432 cast<WasmObjectFile>(&Obj)->getWasmSymbol(Symbol).Info.Kind; 1433 return SymbolInfoTy(Addr, Name, SymType, false); 1434 } else { 1435 uint8_t Type = 1436 Obj.isELF() ? getElfSymbolType(Obj, Symbol) : (uint8_t)ELF::STT_NOTYPE; 1437 return SymbolInfoTy(Addr, Name, Type, IsMappingSymbol); 1438 } 1439 } 1440 1441 static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj, 1442 const uint64_t Addr, StringRef &Name, 1443 uint8_t Type) { 1444 if (Obj.isXCOFF() && (SymbolDescription || TracebackTable)) 1445 return SymbolInfoTy(std::nullopt, Addr, Name, std::nullopt, false); 1446 if (Obj.isWasm()) 1447 return SymbolInfoTy(Addr, Name, wasm::WASM_SYMBOL_TYPE_SECTION); 1448 return SymbolInfoTy(Addr, Name, Type); 1449 } 1450 1451 static void collectBBAddrMapLabels( 1452 const BBAddrMapInfo &FullAddrMap, uint64_t SectionAddr, uint64_t Start, 1453 uint64_t End, 1454 std::unordered_map<uint64_t, std::vector<BBAddrMapLabel>> &Labels) { 1455 if (FullAddrMap.empty()) 1456 return; 1457 Labels.clear(); 1458 uint64_t StartAddress = SectionAddr + Start; 1459 uint64_t EndAddress = SectionAddr + End; 1460 const BBAddrMapFunctionEntry *FunctionMap = 1461 FullAddrMap.getEntryForAddress(StartAddress); 1462 if (!FunctionMap) 1463 return; 1464 std::optional<size_t> BBRangeIndex = 1465 FunctionMap->getAddrMap().getBBRangeIndexForBaseAddress(StartAddress); 1466 if (!BBRangeIndex) 1467 return; 1468 size_t NumBBEntriesBeforeRange = 0; 1469 for (size_t I = 0; I < *BBRangeIndex; ++I) 1470 NumBBEntriesBeforeRange += 1471 FunctionMap->getAddrMap().BBRanges[I].BBEntries.size(); 1472 const auto &BBRange = FunctionMap->getAddrMap().BBRanges[*BBRangeIndex]; 1473 for (size_t I = 0; I < BBRange.BBEntries.size(); ++I) { 1474 const BBAddrMap::BBEntry &BBEntry = BBRange.BBEntries[I]; 1475 uint64_t BBAddress = BBEntry.Offset + BBRange.BaseAddress; 1476 if (BBAddress >= EndAddress) 1477 continue; 1478 1479 std::string LabelString = ("BB" + Twine(BBEntry.ID)).str(); 1480 Labels[BBAddress].push_back( 1481 {LabelString, FunctionMap->constructPGOLabelString( 1482 NumBBEntriesBeforeRange + I, PrettyPGOAnalysisMap)}); 1483 } 1484 } 1485 1486 static void 1487 collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA, 1488 MCDisassembler *DisAsm, MCInstPrinter *IP, 1489 const MCSubtargetInfo *STI, uint64_t SectionAddr, 1490 uint64_t Start, uint64_t End, 1491 std::unordered_map<uint64_t, std::string> &Labels) { 1492 // Supported by certain targets. 1493 const bool isPPC = STI->getTargetTriple().isPPC(); 1494 const bool isX86 = STI->getTargetTriple().isX86(); 1495 const bool isBPF = STI->getTargetTriple().isBPF(); 1496 if (!isPPC && !isX86 && !isBPF) 1497 return; 1498 1499 if (MIA) 1500 MIA->resetState(); 1501 1502 Labels.clear(); 1503 unsigned LabelCount = 0; 1504 Start += SectionAddr; 1505 End += SectionAddr; 1506 const bool isXCOFF = STI->getTargetTriple().isOSBinFormatXCOFF(); 1507 for (uint64_t Index = Start; Index < End;) { 1508 // Disassemble a real instruction and record function-local branch labels. 1509 MCInst Inst; 1510 uint64_t Size; 1511 ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index - SectionAddr); 1512 bool Disassembled = 1513 DisAsm->getInstruction(Inst, Size, ThisBytes, Index, nulls()); 1514 if (Size == 0) 1515 Size = std::min<uint64_t>(ThisBytes.size(), 1516 DisAsm->suggestBytesToSkip(ThisBytes, Index)); 1517 1518 if (MIA) { 1519 if (Disassembled) { 1520 uint64_t Target; 1521 bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target); 1522 if (TargetKnown && (Target >= Start && Target < End) && 1523 !Labels.count(Target)) { 1524 // On PowerPC and AIX, a function call is encoded as a branch to 0. 1525 // On other PowerPC platforms (ELF), a function call is encoded as 1526 // a branch to self. Do not add a label for these cases. 1527 if (!(isPPC && 1528 ((Target == 0 && isXCOFF) || (Target == Index && !isXCOFF)))) 1529 Labels[Target] = ("L" + Twine(LabelCount++)).str(); 1530 } 1531 MIA->updateState(Inst, Index); 1532 } else 1533 MIA->resetState(); 1534 } 1535 Index += Size; 1536 } 1537 } 1538 1539 // Create an MCSymbolizer for the target and add it to the MCDisassembler. 1540 // This is currently only used on AMDGPU, and assumes the format of the 1541 // void * argument passed to AMDGPU's createMCSymbolizer. 1542 static void addSymbolizer( 1543 MCContext &Ctx, const Target *Target, StringRef TripleName, 1544 MCDisassembler *DisAsm, uint64_t SectionAddr, ArrayRef<uint8_t> Bytes, 1545 SectionSymbolsTy &Symbols, 1546 std::vector<std::unique_ptr<std::string>> &SynthesizedLabelNames) { 1547 1548 std::unique_ptr<MCRelocationInfo> RelInfo( 1549 Target->createMCRelocationInfo(TripleName, Ctx)); 1550 if (!RelInfo) 1551 return; 1552 std::unique_ptr<MCSymbolizer> Symbolizer(Target->createMCSymbolizer( 1553 TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo))); 1554 MCSymbolizer *SymbolizerPtr = &*Symbolizer; 1555 DisAsm->setSymbolizer(std::move(Symbolizer)); 1556 1557 if (!SymbolizeOperands) 1558 return; 1559 1560 // Synthesize labels referenced by branch instructions by 1561 // disassembling, discarding the output, and collecting the referenced 1562 // addresses from the symbolizer. 1563 for (size_t Index = 0; Index != Bytes.size();) { 1564 MCInst Inst; 1565 uint64_t Size; 1566 ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index); 1567 const uint64_t ThisAddr = SectionAddr + Index; 1568 DisAsm->getInstruction(Inst, Size, ThisBytes, ThisAddr, nulls()); 1569 if (Size == 0) 1570 Size = std::min<uint64_t>(ThisBytes.size(), 1571 DisAsm->suggestBytesToSkip(ThisBytes, Index)); 1572 Index += Size; 1573 } 1574 ArrayRef<uint64_t> LabelAddrsRef = SymbolizerPtr->getReferencedAddresses(); 1575 // Copy and sort to remove duplicates. 1576 std::vector<uint64_t> LabelAddrs; 1577 LabelAddrs.insert(LabelAddrs.end(), LabelAddrsRef.begin(), 1578 LabelAddrsRef.end()); 1579 llvm::sort(LabelAddrs); 1580 LabelAddrs.resize(llvm::unique(LabelAddrs) - LabelAddrs.begin()); 1581 // Add the labels. 1582 for (unsigned LabelNum = 0; LabelNum != LabelAddrs.size(); ++LabelNum) { 1583 auto Name = std::make_unique<std::string>(); 1584 *Name = (Twine("L") + Twine(LabelNum)).str(); 1585 SynthesizedLabelNames.push_back(std::move(Name)); 1586 Symbols.push_back(SymbolInfoTy( 1587 LabelAddrs[LabelNum], *SynthesizedLabelNames.back(), ELF::STT_NOTYPE)); 1588 } 1589 llvm::stable_sort(Symbols); 1590 // Recreate the symbolizer with the new symbols list. 1591 RelInfo.reset(Target->createMCRelocationInfo(TripleName, Ctx)); 1592 Symbolizer.reset(Target->createMCSymbolizer( 1593 TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo))); 1594 DisAsm->setSymbolizer(std::move(Symbolizer)); 1595 } 1596 1597 static StringRef getSegmentName(const MachOObjectFile *MachO, 1598 const SectionRef &Section) { 1599 if (MachO) { 1600 DataRefImpl DR = Section.getRawDataRefImpl(); 1601 StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); 1602 return SegmentName; 1603 } 1604 return ""; 1605 } 1606 1607 static void emitPostInstructionInfo(formatted_raw_ostream &FOS, 1608 const MCAsmInfo &MAI, 1609 const MCSubtargetInfo &STI, 1610 StringRef Comments, 1611 LiveVariablePrinter &LVP) { 1612 do { 1613 if (!Comments.empty()) { 1614 // Emit a line of comments. 1615 StringRef Comment; 1616 std::tie(Comment, Comments) = Comments.split('\n'); 1617 // MAI.getCommentColumn() assumes that instructions are printed at the 1618 // position of 8, while getInstStartColumn() returns the actual position. 1619 unsigned CommentColumn = 1620 MAI.getCommentColumn() - 8 + getInstStartColumn(STI); 1621 FOS.PadToColumn(CommentColumn); 1622 FOS << MAI.getCommentString() << ' ' << Comment; 1623 } 1624 LVP.printAfterInst(FOS); 1625 FOS << '\n'; 1626 } while (!Comments.empty()); 1627 FOS.flush(); 1628 } 1629 1630 static void createFakeELFSections(ObjectFile &Obj) { 1631 assert(Obj.isELF()); 1632 if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(&Obj)) 1633 Elf32LEObj->createFakeSections(); 1634 else if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(&Obj)) 1635 Elf64LEObj->createFakeSections(); 1636 else if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(&Obj)) 1637 Elf32BEObj->createFakeSections(); 1638 else if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(&Obj)) 1639 Elf64BEObj->createFakeSections(); 1640 else 1641 llvm_unreachable("Unsupported binary format"); 1642 } 1643 1644 // Tries to fetch a more complete version of the given object file using its 1645 // Build ID. Returns std::nullopt if nothing was found. 1646 static std::optional<OwningBinary<Binary>> 1647 fetchBinaryByBuildID(const ObjectFile &Obj) { 1648 object::BuildIDRef BuildID = getBuildID(&Obj); 1649 if (BuildID.empty()) 1650 return std::nullopt; 1651 std::optional<std::string> Path = BIDFetcher->fetch(BuildID); 1652 if (!Path) 1653 return std::nullopt; 1654 Expected<OwningBinary<Binary>> DebugBinary = createBinary(*Path); 1655 if (!DebugBinary) { 1656 reportWarning(toString(DebugBinary.takeError()), *Path); 1657 return std::nullopt; 1658 } 1659 return std::move(*DebugBinary); 1660 } 1661 1662 static void 1663 disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, 1664 DisassemblerTarget &PrimaryTarget, 1665 std::optional<DisassemblerTarget> &SecondaryTarget, 1666 SourcePrinter &SP, bool InlineRelocs) { 1667 DisassemblerTarget *DT = &PrimaryTarget; 1668 bool PrimaryIsThumb = false; 1669 SmallVector<std::pair<uint64_t, uint64_t>, 0> CHPECodeMap; 1670 1671 if (SecondaryTarget) { 1672 if (isArmElf(Obj)) { 1673 PrimaryIsThumb = 1674 PrimaryTarget.SubtargetInfo->checkFeatures("+thumb-mode"); 1675 } else if (const auto *COFFObj = dyn_cast<COFFObjectFile>(&Obj)) { 1676 const chpe_metadata *CHPEMetadata = COFFObj->getCHPEMetadata(); 1677 if (CHPEMetadata && CHPEMetadata->CodeMapCount) { 1678 uintptr_t CodeMapInt; 1679 cantFail(COFFObj->getRvaPtr(CHPEMetadata->CodeMap, CodeMapInt)); 1680 auto CodeMap = reinterpret_cast<const chpe_range_entry *>(CodeMapInt); 1681 1682 for (uint32_t i = 0; i < CHPEMetadata->CodeMapCount; ++i) { 1683 if (CodeMap[i].getType() == chpe_range_type::Amd64 && 1684 CodeMap[i].Length) { 1685 // Store x86_64 CHPE code ranges. 1686 uint64_t Start = CodeMap[i].getStart() + COFFObj->getImageBase(); 1687 CHPECodeMap.emplace_back(Start, Start + CodeMap[i].Length); 1688 } 1689 } 1690 llvm::sort(CHPECodeMap); 1691 } 1692 } 1693 } 1694 1695 std::map<SectionRef, std::vector<RelocationRef>> RelocMap; 1696 if (InlineRelocs || Obj.isXCOFF()) 1697 RelocMap = getRelocsMap(Obj); 1698 bool Is64Bits = Obj.getBytesInAddress() > 4; 1699 1700 // Create a mapping from virtual address to symbol name. This is used to 1701 // pretty print the symbols while disassembling. 1702 std::map<SectionRef, SectionSymbolsTy> AllSymbols; 1703 std::map<SectionRef, SmallVector<MappingSymbolPair, 0>> AllMappingSymbols; 1704 SectionSymbolsTy AbsoluteSymbols; 1705 const StringRef FileName = Obj.getFileName(); 1706 const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&Obj); 1707 for (const SymbolRef &Symbol : Obj.symbols()) { 1708 Expected<StringRef> NameOrErr = Symbol.getName(); 1709 if (!NameOrErr) { 1710 reportWarning(toString(NameOrErr.takeError()), FileName); 1711 continue; 1712 } 1713 if (NameOrErr->empty() && !(Obj.isXCOFF() && SymbolDescription)) 1714 continue; 1715 1716 if (Obj.isELF() && 1717 (cantFail(Symbol.getFlags()) & SymbolRef::SF_FormatSpecific)) { 1718 // Symbol is intended not to be displayed by default (STT_FILE, 1719 // STT_SECTION, or a mapping symbol). Ignore STT_SECTION symbols. We will 1720 // synthesize a section symbol if no symbol is defined at offset 0. 1721 // 1722 // For a mapping symbol, store it within both AllSymbols and 1723 // AllMappingSymbols. If --show-all-symbols is unspecified, its label will 1724 // not be printed in disassembly listing. 1725 if (getElfSymbolType(Obj, Symbol) != ELF::STT_SECTION && 1726 hasMappingSymbols(Obj)) { 1727 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); 1728 if (SecI != Obj.section_end()) { 1729 uint64_t SectionAddr = SecI->getAddress(); 1730 uint64_t Address = cantFail(Symbol.getAddress()); 1731 StringRef Name = *NameOrErr; 1732 if (Name.consume_front("$") && Name.size() && 1733 strchr("adtx", Name[0])) { 1734 AllMappingSymbols[*SecI].emplace_back(Address - SectionAddr, 1735 Name[0]); 1736 AllSymbols[*SecI].push_back( 1737 createSymbolInfo(Obj, Symbol, /*MappingSymbol=*/true)); 1738 } 1739 } 1740 } 1741 continue; 1742 } 1743 1744 if (MachO) { 1745 // __mh_(execute|dylib|dylinker|bundle|preload|object)_header are special 1746 // symbols that support MachO header introspection. They do not bind to 1747 // code locations and are irrelevant for disassembly. 1748 if (NameOrErr->starts_with("__mh_") && NameOrErr->ends_with("_header")) 1749 continue; 1750 // Don't ask a Mach-O STAB symbol for its section unless you know that 1751 // STAB symbol's section field refers to a valid section index. Otherwise 1752 // the symbol may error trying to load a section that does not exist. 1753 DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); 1754 uint8_t NType = (MachO->is64Bit() ? 1755 MachO->getSymbol64TableEntry(SymDRI).n_type: 1756 MachO->getSymbolTableEntry(SymDRI).n_type); 1757 if (NType & MachO::N_STAB) 1758 continue; 1759 } 1760 1761 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); 1762 if (SecI != Obj.section_end()) 1763 AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol)); 1764 else 1765 AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol)); 1766 } 1767 1768 if (AllSymbols.empty() && Obj.isELF()) 1769 addDynamicElfSymbols(cast<ELFObjectFileBase>(Obj), AllSymbols); 1770 1771 if (Obj.isWasm()) 1772 addMissingWasmCodeSymbols(cast<WasmObjectFile>(Obj), AllSymbols); 1773 1774 if (Obj.isELF() && Obj.sections().empty()) 1775 createFakeELFSections(Obj); 1776 1777 BumpPtrAllocator A; 1778 StringSaver Saver(A); 1779 addPltEntries(Obj, AllSymbols, Saver); 1780 1781 // Create a mapping from virtual address to section. An empty section can 1782 // cause more than one section at the same address. Sort such sections to be 1783 // before same-addressed non-empty sections so that symbol lookups prefer the 1784 // non-empty section. 1785 std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses; 1786 for (SectionRef Sec : Obj.sections()) 1787 SectionAddresses.emplace_back(Sec.getAddress(), Sec); 1788 llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) { 1789 if (LHS.first != RHS.first) 1790 return LHS.first < RHS.first; 1791 return LHS.second.getSize() < RHS.second.getSize(); 1792 }); 1793 1794 // Linked executables (.exe and .dll files) typically don't include a real 1795 // symbol table but they might contain an export table. 1796 if (const auto *COFFObj = dyn_cast<COFFObjectFile>(&Obj)) { 1797 for (const auto &ExportEntry : COFFObj->export_directories()) { 1798 StringRef Name; 1799 if (Error E = ExportEntry.getSymbolName(Name)) 1800 reportError(std::move(E), Obj.getFileName()); 1801 if (Name.empty()) 1802 continue; 1803 1804 uint32_t RVA; 1805 if (Error E = ExportEntry.getExportRVA(RVA)) 1806 reportError(std::move(E), Obj.getFileName()); 1807 1808 uint64_t VA = COFFObj->getImageBase() + RVA; 1809 auto Sec = partition_point( 1810 SectionAddresses, [VA](const std::pair<uint64_t, SectionRef> &O) { 1811 return O.first <= VA; 1812 }); 1813 if (Sec != SectionAddresses.begin()) { 1814 --Sec; 1815 AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE); 1816 } else 1817 AbsoluteSymbols.emplace_back(VA, Name, ELF::STT_NOTYPE); 1818 } 1819 } 1820 1821 // Sort all the symbols, this allows us to use a simple binary search to find 1822 // Multiple symbols can have the same address. Use a stable sort to stabilize 1823 // the output. 1824 StringSet<> FoundDisasmSymbolSet; 1825 for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols) 1826 llvm::stable_sort(SecSyms.second); 1827 llvm::stable_sort(AbsoluteSymbols); 1828 1829 std::unique_ptr<DWARFContext> DICtx; 1830 LiveVariablePrinter LVP(*DT->Context->getRegisterInfo(), *DT->SubtargetInfo); 1831 1832 if (DbgVariables != DVDisabled) { 1833 DICtx = DWARFContext::create(DbgObj); 1834 for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units()) 1835 LVP.addCompileUnit(CU->getUnitDIE(false)); 1836 } 1837 1838 LLVM_DEBUG(LVP.dump()); 1839 1840 BBAddrMapInfo FullAddrMap; 1841 auto ReadBBAddrMap = [&](std::optional<unsigned> SectionIndex = 1842 std::nullopt) { 1843 FullAddrMap.clear(); 1844 if (const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) { 1845 std::vector<PGOAnalysisMap> PGOAnalyses; 1846 auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex, &PGOAnalyses); 1847 if (!BBAddrMapsOrErr) { 1848 reportWarning(toString(BBAddrMapsOrErr.takeError()), Obj.getFileName()); 1849 return; 1850 } 1851 for (auto &&[FunctionBBAddrMap, FunctionPGOAnalysis] : 1852 zip_equal(*std::move(BBAddrMapsOrErr), std::move(PGOAnalyses))) { 1853 FullAddrMap.AddFunctionEntry(std::move(FunctionBBAddrMap), 1854 std::move(FunctionPGOAnalysis)); 1855 } 1856 } 1857 }; 1858 1859 // For non-relocatable objects, Read all LLVM_BB_ADDR_MAP sections into a 1860 // single mapping, since they don't have any conflicts. 1861 if (SymbolizeOperands && !Obj.isRelocatableObject()) 1862 ReadBBAddrMap(); 1863 1864 std::optional<llvm::BTFParser> BTF; 1865 if (InlineRelocs && BTFParser::hasBTFSections(Obj)) { 1866 BTF.emplace(); 1867 BTFParser::ParseOptions Opts = {}; 1868 Opts.LoadTypes = true; 1869 Opts.LoadRelocs = true; 1870 if (Error E = BTF->parse(Obj, Opts)) 1871 WithColor::defaultErrorHandler(std::move(E)); 1872 } 1873 1874 for (const SectionRef &Section : ToolSectionFilter(Obj)) { 1875 if (FilterSections.empty() && !DisassembleAll && 1876 (!Section.isText() || Section.isVirtual())) 1877 continue; 1878 1879 uint64_t SectionAddr = Section.getAddress(); 1880 uint64_t SectSize = Section.getSize(); 1881 if (!SectSize) 1882 continue; 1883 1884 // For relocatable object files, read the LLVM_BB_ADDR_MAP section 1885 // corresponding to this section, if present. 1886 if (SymbolizeOperands && Obj.isRelocatableObject()) 1887 ReadBBAddrMap(Section.getIndex()); 1888 1889 // Get the list of all the symbols in this section. 1890 SectionSymbolsTy &Symbols = AllSymbols[Section]; 1891 auto &MappingSymbols = AllMappingSymbols[Section]; 1892 llvm::sort(MappingSymbols); 1893 1894 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef( 1895 unwrapOrError(Section.getContents(), Obj.getFileName())); 1896 1897 std::vector<std::unique_ptr<std::string>> SynthesizedLabelNames; 1898 if (Obj.isELF() && Obj.getArch() == Triple::amdgcn) { 1899 // AMDGPU disassembler uses symbolizer for printing labels 1900 addSymbolizer(*DT->Context, DT->TheTarget, TripleName, DT->DisAsm.get(), 1901 SectionAddr, Bytes, Symbols, SynthesizedLabelNames); 1902 } 1903 1904 StringRef SegmentName = getSegmentName(MachO, Section); 1905 StringRef SectionName = unwrapOrError(Section.getName(), Obj.getFileName()); 1906 // If the section has no symbol at the start, just insert a dummy one. 1907 // Without --show-all-symbols, also insert one if all symbols at the start 1908 // are mapping symbols. 1909 bool CreateDummy = Symbols.empty(); 1910 if (!CreateDummy) { 1911 CreateDummy = true; 1912 for (auto &Sym : Symbols) { 1913 if (Sym.Addr != SectionAddr) 1914 break; 1915 if (!Sym.IsMappingSymbol || ShowAllSymbols) 1916 CreateDummy = false; 1917 } 1918 } 1919 if (CreateDummy) { 1920 SymbolInfoTy Sym = createDummySymbolInfo( 1921 Obj, SectionAddr, SectionName, 1922 Section.isText() ? ELF::STT_FUNC : ELF::STT_OBJECT); 1923 if (Obj.isXCOFF()) 1924 Symbols.insert(Symbols.begin(), Sym); 1925 else 1926 Symbols.insert(llvm::lower_bound(Symbols, Sym), Sym); 1927 } 1928 1929 SmallString<40> Comments; 1930 raw_svector_ostream CommentStream(Comments); 1931 1932 uint64_t VMAAdjustment = 0; 1933 if (shouldAdjustVA(Section)) 1934 VMAAdjustment = AdjustVMA; 1935 1936 // In executable and shared objects, r_offset holds a virtual address. 1937 // Subtract SectionAddr from the r_offset field of a relocation to get 1938 // the section offset. 1939 uint64_t RelAdjustment = Obj.isRelocatableObject() ? 0 : SectionAddr; 1940 uint64_t Size; 1941 uint64_t Index; 1942 bool PrintedSection = false; 1943 std::vector<RelocationRef> Rels = RelocMap[Section]; 1944 std::vector<RelocationRef>::const_iterator RelCur = Rels.begin(); 1945 std::vector<RelocationRef>::const_iterator RelEnd = Rels.end(); 1946 1947 // Loop over each chunk of code between two points where at least 1948 // one symbol is defined. 1949 for (size_t SI = 0, SE = Symbols.size(); SI != SE;) { 1950 // Advance SI past all the symbols starting at the same address, 1951 // and make an ArrayRef of them. 1952 unsigned FirstSI = SI; 1953 uint64_t Start = Symbols[SI].Addr; 1954 ArrayRef<SymbolInfoTy> SymbolsHere; 1955 while (SI != SE && Symbols[SI].Addr == Start) 1956 ++SI; 1957 SymbolsHere = ArrayRef<SymbolInfoTy>(&Symbols[FirstSI], SI - FirstSI); 1958 1959 // Get the demangled names of all those symbols. We end up with a vector 1960 // of StringRef that holds the names we're going to use, and a vector of 1961 // std::string that stores the new strings returned by demangle(), if 1962 // any. If we don't call demangle() then that vector can stay empty. 1963 std::vector<StringRef> SymNamesHere; 1964 std::vector<std::string> DemangledSymNamesHere; 1965 if (Demangle) { 1966 // Fetch the demangled names and store them locally. 1967 for (const SymbolInfoTy &Symbol : SymbolsHere) 1968 DemangledSymNamesHere.push_back(demangle(Symbol.Name)); 1969 // Now we've finished modifying that vector, it's safe to make 1970 // a vector of StringRefs pointing into it. 1971 SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(), 1972 DemangledSymNamesHere.end()); 1973 } else { 1974 for (const SymbolInfoTy &Symbol : SymbolsHere) 1975 SymNamesHere.push_back(Symbol.Name); 1976 } 1977 1978 // Distinguish ELF data from code symbols, which will be used later on to 1979 // decide whether to 'disassemble' this chunk as a data declaration via 1980 // dumpELFData(), or whether to treat it as code. 1981 // 1982 // If data _and_ code symbols are defined at the same address, the code 1983 // takes priority, on the grounds that disassembling code is our main 1984 // purpose here, and it would be a worse failure to _not_ interpret 1985 // something that _was_ meaningful as code than vice versa. 1986 // 1987 // Any ELF symbol type that is not clearly data will be regarded as code. 1988 // In particular, one of the uses of STT_NOTYPE is for branch targets 1989 // inside functions, for which STT_FUNC would be inaccurate. 1990 // 1991 // So here, we spot whether there's any non-data symbol present at all, 1992 // and only set the DisassembleAsELFData flag if there isn't. Also, we use 1993 // this distinction to inform the decision of which symbol to print at 1994 // the head of the section, so that if we're printing code, we print a 1995 // code-related symbol name to go with it. 1996 bool DisassembleAsELFData = false; 1997 size_t DisplaySymIndex = SymbolsHere.size() - 1; 1998 if (Obj.isELF() && !DisassembleAll && Section.isText()) { 1999 DisassembleAsELFData = true; // unless we find a code symbol below 2000 2001 for (size_t i = 0; i < SymbolsHere.size(); ++i) { 2002 uint8_t SymTy = SymbolsHere[i].Type; 2003 if (SymTy != ELF::STT_OBJECT && SymTy != ELF::STT_COMMON) { 2004 DisassembleAsELFData = false; 2005 DisplaySymIndex = i; 2006 } 2007 } 2008 } 2009 2010 // Decide which symbol(s) from this collection we're going to print. 2011 std::vector<bool> SymsToPrint(SymbolsHere.size(), false); 2012 // If the user has given the --disassemble-symbols option, then we must 2013 // display every symbol in that set, and no others. 2014 if (!DisasmSymbolSet.empty()) { 2015 bool FoundAny = false; 2016 for (size_t i = 0; i < SymbolsHere.size(); ++i) { 2017 if (DisasmSymbolSet.count(SymNamesHere[i])) { 2018 SymsToPrint[i] = true; 2019 FoundAny = true; 2020 } 2021 } 2022 2023 // And if none of the symbols here is one that the user asked for, skip 2024 // disassembling this entire chunk of code. 2025 if (!FoundAny) 2026 continue; 2027 } else if (!SymbolsHere[DisplaySymIndex].IsMappingSymbol) { 2028 // Otherwise, print whichever symbol at this location is last in the 2029 // Symbols array, because that array is pre-sorted in a way intended to 2030 // correlate with priority of which symbol to display. 2031 SymsToPrint[DisplaySymIndex] = true; 2032 } 2033 2034 // Now that we know we're disassembling this section, override the choice 2035 // of which symbols to display by printing _all_ of them at this address 2036 // if the user asked for all symbols. 2037 // 2038 // That way, '--show-all-symbols --disassemble-symbol=foo' will print 2039 // only the chunk of code headed by 'foo', but also show any other 2040 // symbols defined at that address, such as aliases for 'foo', or the ARM 2041 // mapping symbol preceding its code. 2042 if (ShowAllSymbols) { 2043 for (size_t i = 0; i < SymbolsHere.size(); ++i) 2044 SymsToPrint[i] = true; 2045 } 2046 2047 if (Start < SectionAddr || StopAddress <= Start) 2048 continue; 2049 2050 for (size_t i = 0; i < SymbolsHere.size(); ++i) 2051 FoundDisasmSymbolSet.insert(SymNamesHere[i]); 2052 2053 // The end is the section end, the beginning of the next symbol, or 2054 // --stop-address. 2055 uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress); 2056 if (SI < SE) 2057 End = std::min(End, Symbols[SI].Addr); 2058 if (Start >= End || End <= StartAddress) 2059 continue; 2060 Start -= SectionAddr; 2061 End -= SectionAddr; 2062 2063 if (!PrintedSection) { 2064 PrintedSection = true; 2065 outs() << "\nDisassembly of section "; 2066 if (!SegmentName.empty()) 2067 outs() << SegmentName << ","; 2068 outs() << SectionName << ":\n"; 2069 } 2070 2071 bool PrintedLabel = false; 2072 for (size_t i = 0; i < SymbolsHere.size(); ++i) { 2073 if (!SymsToPrint[i]) 2074 continue; 2075 2076 const SymbolInfoTy &Symbol = SymbolsHere[i]; 2077 const StringRef SymbolName = SymNamesHere[i]; 2078 2079 if (!PrintedLabel) { 2080 outs() << '\n'; 2081 PrintedLabel = true; 2082 } 2083 if (LeadingAddr) 2084 outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", 2085 SectionAddr + Start + VMAAdjustment); 2086 if (Obj.isXCOFF() && SymbolDescription) { 2087 outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n"; 2088 } else 2089 outs() << '<' << SymbolName << ">:\n"; 2090 } 2091 2092 // Don't print raw contents of a virtual section. A virtual section 2093 // doesn't have any contents in the file. 2094 if (Section.isVirtual()) { 2095 outs() << "...\n"; 2096 continue; 2097 } 2098 2099 // See if any of the symbols defined at this location triggers target- 2100 // specific disassembly behavior, e.g. of special descriptors or function 2101 // prelude information. 2102 // 2103 // We stop this loop at the first symbol that triggers some kind of 2104 // interesting behavior (if any), on the assumption that if two symbols 2105 // defined at the same address trigger two conflicting symbol handlers, 2106 // the object file is probably confused anyway, and it would make even 2107 // less sense to present the output of _both_ handlers, because that 2108 // would describe the same data twice. 2109 for (size_t SHI = 0; SHI < SymbolsHere.size(); ++SHI) { 2110 SymbolInfoTy Symbol = SymbolsHere[SHI]; 2111 2112 Expected<bool> RespondedOrErr = DT->DisAsm->onSymbolStart( 2113 Symbol, Size, Bytes.slice(Start, End - Start), SectionAddr + Start); 2114 2115 if (RespondedOrErr && !*RespondedOrErr) { 2116 // This symbol didn't trigger any interesting handling. Try the other 2117 // symbols defined at this address. 2118 continue; 2119 } 2120 2121 // If onSymbolStart returned an Error, that means it identified some 2122 // kind of special data at this address, but wasn't able to disassemble 2123 // it meaningfully. So we fall back to printing the error out and 2124 // disassembling the failed region as bytes, assuming that the target 2125 // detected the failure before printing anything. 2126 if (!RespondedOrErr) { 2127 std::string ErrMsgStr = toString(RespondedOrErr.takeError()); 2128 StringRef ErrMsg = ErrMsgStr; 2129 do { 2130 StringRef Line; 2131 std::tie(Line, ErrMsg) = ErrMsg.split('\n'); 2132 outs() << DT->Context->getAsmInfo()->getCommentString() 2133 << " error decoding " << SymNamesHere[SHI] << ": " << Line 2134 << '\n'; 2135 } while (!ErrMsg.empty()); 2136 2137 if (Size) { 2138 outs() << DT->Context->getAsmInfo()->getCommentString() 2139 << " decoding failed region as bytes\n"; 2140 for (uint64_t I = 0; I < Size; ++I) 2141 outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) 2142 << '\n'; 2143 } 2144 } 2145 2146 // Regardless of whether onSymbolStart returned an Error or true, 'Size' 2147 // will have been set to the amount of data covered by whatever prologue 2148 // the target identified. So we advance our own position to beyond that. 2149 // Sometimes that will be the entire distance to the next symbol, and 2150 // sometimes it will be just a prologue and we should start 2151 // disassembling instructions from where it left off. 2152 Start += Size; 2153 break; 2154 } 2155 2156 Index = Start; 2157 if (SectionAddr < StartAddress) 2158 Index = std::max<uint64_t>(Index, StartAddress - SectionAddr); 2159 2160 if (DisassembleAsELFData) { 2161 dumpELFData(SectionAddr, Index, End, Bytes); 2162 Index = End; 2163 continue; 2164 } 2165 2166 // Skip relocations from symbols that are not dumped. 2167 for (; RelCur != RelEnd; ++RelCur) { 2168 uint64_t Offset = RelCur->getOffset() - RelAdjustment; 2169 if (Index <= Offset) 2170 break; 2171 } 2172 2173 bool DumpARMELFData = false; 2174 bool DumpTracebackTableForXCOFFFunction = 2175 Obj.isXCOFF() && Section.isText() && TracebackTable && 2176 Symbols[SI - 1].XCOFFSymInfo.StorageMappingClass && 2177 (*Symbols[SI - 1].XCOFFSymInfo.StorageMappingClass == XCOFF::XMC_PR); 2178 2179 formatted_raw_ostream FOS(outs()); 2180 2181 std::unordered_map<uint64_t, std::string> AllLabels; 2182 std::unordered_map<uint64_t, std::vector<BBAddrMapLabel>> BBAddrMapLabels; 2183 if (SymbolizeOperands) { 2184 collectLocalBranchTargets(Bytes, DT->InstrAnalysis.get(), 2185 DT->DisAsm.get(), DT->InstPrinter.get(), 2186 PrimaryTarget.SubtargetInfo.get(), 2187 SectionAddr, Index, End, AllLabels); 2188 collectBBAddrMapLabels(FullAddrMap, SectionAddr, Index, End, 2189 BBAddrMapLabels); 2190 } 2191 2192 if (DT->InstrAnalysis) 2193 DT->InstrAnalysis->resetState(); 2194 2195 while (Index < End) { 2196 uint64_t RelOffset; 2197 2198 // ARM and AArch64 ELF binaries can interleave data and text in the 2199 // same section. We rely on the markers introduced to understand what 2200 // we need to dump. If the data marker is within a function, it is 2201 // denoted as a word/short etc. 2202 if (!MappingSymbols.empty()) { 2203 char Kind = getMappingSymbolKind(MappingSymbols, Index); 2204 DumpARMELFData = Kind == 'd'; 2205 if (SecondaryTarget) { 2206 if (Kind == 'a') { 2207 DT = PrimaryIsThumb ? &*SecondaryTarget : &PrimaryTarget; 2208 } else if (Kind == 't') { 2209 DT = PrimaryIsThumb ? &PrimaryTarget : &*SecondaryTarget; 2210 } 2211 } 2212 } else if (!CHPECodeMap.empty()) { 2213 uint64_t Address = SectionAddr + Index; 2214 auto It = partition_point( 2215 CHPECodeMap, 2216 [Address](const std::pair<uint64_t, uint64_t> &Entry) { 2217 return Entry.first <= Address; 2218 }); 2219 if (It != CHPECodeMap.begin() && Address < (It - 1)->second) { 2220 DT = &*SecondaryTarget; 2221 } else { 2222 DT = &PrimaryTarget; 2223 // X64 disassembler range may have left Index unaligned, so 2224 // make sure that it's aligned when we switch back to ARM64 2225 // code. 2226 Index = llvm::alignTo(Index, 4); 2227 if (Index >= End) 2228 break; 2229 } 2230 } 2231 2232 auto findRel = [&]() { 2233 while (RelCur != RelEnd) { 2234 RelOffset = RelCur->getOffset() - RelAdjustment; 2235 // If this relocation is hidden, skip it. 2236 if (getHidden(*RelCur) || SectionAddr + RelOffset < StartAddress) { 2237 ++RelCur; 2238 continue; 2239 } 2240 2241 // Stop when RelCur's offset is past the disassembled 2242 // instruction/data. 2243 if (RelOffset >= Index + Size) 2244 return false; 2245 if (RelOffset >= Index) 2246 return true; 2247 ++RelCur; 2248 } 2249 return false; 2250 }; 2251 2252 // When -z or --disassemble-zeroes are given we always dissasemble 2253 // them. Otherwise we might want to skip zero bytes we see. 2254 if (!DisassembleZeroes) { 2255 uint64_t MaxOffset = End - Index; 2256 // For --reloc: print zero blocks patched by relocations, so that 2257 // relocations can be shown in the dump. 2258 if (InlineRelocs && RelCur != RelEnd) 2259 MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index, 2260 MaxOffset); 2261 2262 if (size_t N = 2263 countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { 2264 FOS << "\t\t..." << '\n'; 2265 Index += N; 2266 continue; 2267 } 2268 } 2269 2270 if (DumpARMELFData) { 2271 Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, 2272 MappingSymbols, *DT->SubtargetInfo, FOS); 2273 } else { 2274 2275 if (DumpTracebackTableForXCOFFFunction && 2276 doesXCOFFTracebackTableBegin(Bytes.slice(Index, 4))) { 2277 dumpTracebackTable(Bytes.slice(Index), 2278 SectionAddr + Index + VMAAdjustment, FOS, 2279 SectionAddr + End + VMAAdjustment, 2280 *DT->SubtargetInfo, cast<XCOFFObjectFile>(&Obj)); 2281 Index = End; 2282 continue; 2283 } 2284 2285 // Print local label if there's any. 2286 auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index); 2287 if (Iter1 != BBAddrMapLabels.end()) { 2288 for (const auto &BBLabel : Iter1->second) 2289 FOS << "<" << BBLabel.BlockLabel << ">" << BBLabel.PGOAnalysis 2290 << ":\n"; 2291 } else { 2292 auto Iter2 = AllLabels.find(SectionAddr + Index); 2293 if (Iter2 != AllLabels.end()) 2294 FOS << "<" << Iter2->second << ">:\n"; 2295 } 2296 2297 // Disassemble a real instruction or a data when disassemble all is 2298 // provided 2299 MCInst Inst; 2300 ArrayRef<uint8_t> ThisBytes = Bytes.slice(Index); 2301 uint64_t ThisAddr = SectionAddr + Index; 2302 bool Disassembled = DT->DisAsm->getInstruction( 2303 Inst, Size, ThisBytes, ThisAddr, CommentStream); 2304 if (Size == 0) 2305 Size = std::min<uint64_t>( 2306 ThisBytes.size(), 2307 DT->DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr)); 2308 2309 LVP.update({Index, Section.getIndex()}, 2310 {Index + Size, Section.getIndex()}, Index + Size != End); 2311 2312 DT->InstPrinter->setCommentStream(CommentStream); 2313 2314 DT->Printer->printInst( 2315 *DT->InstPrinter, Disassembled ? &Inst : nullptr, 2316 Bytes.slice(Index, Size), 2317 {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS, 2318 "", *DT->SubtargetInfo, &SP, Obj.getFileName(), &Rels, LVP); 2319 2320 DT->InstPrinter->setCommentStream(llvm::nulls()); 2321 2322 // If disassembly succeeds, we try to resolve the target address 2323 // (jump target or memory operand address) and print it to the 2324 // right of the instruction. 2325 // 2326 // Otherwise, we don't print anything else so that we avoid 2327 // analyzing invalid or incomplete instruction information. 2328 if (Disassembled && DT->InstrAnalysis) { 2329 llvm::raw_ostream *TargetOS = &FOS; 2330 uint64_t Target; 2331 bool PrintTarget = DT->InstrAnalysis->evaluateBranch( 2332 Inst, SectionAddr + Index, Size, Target); 2333 2334 if (!PrintTarget) { 2335 if (std::optional<uint64_t> MaybeTarget = 2336 DT->InstrAnalysis->evaluateMemoryOperandAddress( 2337 Inst, DT->SubtargetInfo.get(), SectionAddr + Index, 2338 Size)) { 2339 Target = *MaybeTarget; 2340 PrintTarget = true; 2341 // Do not print real address when symbolizing. 2342 if (!SymbolizeOperands) { 2343 // Memory operand addresses are printed as comments. 2344 TargetOS = &CommentStream; 2345 *TargetOS << "0x" << Twine::utohexstr(Target); 2346 } 2347 } 2348 } 2349 2350 if (PrintTarget) { 2351 // In a relocatable object, the target's section must reside in 2352 // the same section as the call instruction or it is accessed 2353 // through a relocation. 2354 // 2355 // In a non-relocatable object, the target may be in any section. 2356 // In that case, locate the section(s) containing the target 2357 // address and find the symbol in one of those, if possible. 2358 // 2359 // N.B. Except for XCOFF, we don't walk the relocations in the 2360 // relocatable case yet. 2361 std::vector<const SectionSymbolsTy *> TargetSectionSymbols; 2362 if (!Obj.isRelocatableObject()) { 2363 auto It = llvm::partition_point( 2364 SectionAddresses, 2365 [=](const std::pair<uint64_t, SectionRef> &O) { 2366 return O.first <= Target; 2367 }); 2368 uint64_t TargetSecAddr = 0; 2369 while (It != SectionAddresses.begin()) { 2370 --It; 2371 if (TargetSecAddr == 0) 2372 TargetSecAddr = It->first; 2373 if (It->first != TargetSecAddr) 2374 break; 2375 TargetSectionSymbols.push_back(&AllSymbols[It->second]); 2376 } 2377 } else { 2378 TargetSectionSymbols.push_back(&Symbols); 2379 } 2380 TargetSectionSymbols.push_back(&AbsoluteSymbols); 2381 2382 // Find the last symbol in the first candidate section whose 2383 // offset is less than or equal to the target. If there are no 2384 // such symbols, try in the next section and so on, before finally 2385 // using the nearest preceding absolute symbol (if any), if there 2386 // are no other valid symbols. 2387 const SymbolInfoTy *TargetSym = nullptr; 2388 for (const SectionSymbolsTy *TargetSymbols : 2389 TargetSectionSymbols) { 2390 auto It = llvm::partition_point( 2391 *TargetSymbols, 2392 [=](const SymbolInfoTy &O) { return O.Addr <= Target; }); 2393 while (It != TargetSymbols->begin()) { 2394 --It; 2395 // Skip mapping symbols to avoid possible ambiguity as they 2396 // do not allow uniquely identifying the target address. 2397 if (!It->IsMappingSymbol) { 2398 TargetSym = &*It; 2399 break; 2400 } 2401 } 2402 if (TargetSym) 2403 break; 2404 } 2405 2406 // Branch targets are printed just after the instructions. 2407 // Print the labels corresponding to the target if there's any. 2408 bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target); 2409 bool LabelAvailable = AllLabels.count(Target); 2410 2411 if (TargetSym != nullptr) { 2412 uint64_t TargetAddress = TargetSym->Addr; 2413 uint64_t Disp = Target - TargetAddress; 2414 std::string TargetName = Demangle ? demangle(TargetSym->Name) 2415 : TargetSym->Name.str(); 2416 bool RelFixedUp = false; 2417 SmallString<32> Val; 2418 2419 *TargetOS << " <"; 2420 // On XCOFF, we use relocations, even without -r, so we 2421 // can print the correct name for an extern function call. 2422 if (Obj.isXCOFF() && findRel()) { 2423 // Check for possible branch relocations and 2424 // branches to fixup code. 2425 bool BranchRelocationType = true; 2426 XCOFF::RelocationType RelocType; 2427 if (Obj.is64Bit()) { 2428 const XCOFFRelocation64 *Reloc = 2429 reinterpret_cast<XCOFFRelocation64 *>( 2430 RelCur->getRawDataRefImpl().p); 2431 RelFixedUp = Reloc->isFixupIndicated(); 2432 RelocType = Reloc->Type; 2433 } else { 2434 const XCOFFRelocation32 *Reloc = 2435 reinterpret_cast<XCOFFRelocation32 *>( 2436 RelCur->getRawDataRefImpl().p); 2437 RelFixedUp = Reloc->isFixupIndicated(); 2438 RelocType = Reloc->Type; 2439 } 2440 BranchRelocationType = 2441 RelocType == XCOFF::R_BA || RelocType == XCOFF::R_BR || 2442 RelocType == XCOFF::R_RBA || RelocType == XCOFF::R_RBR; 2443 2444 // If we have a valid relocation, try to print its 2445 // corresponding symbol name. Multiple relocations on the 2446 // same instruction are not handled. 2447 // Branches to fixup code will have the RelFixedUp flag set in 2448 // the RLD. For these instructions, we print the correct 2449 // branch target, but print the referenced symbol as a 2450 // comment. 2451 if (Error E = getRelocationValueString(*RelCur, false, Val)) { 2452 // If -r was used, this error will be printed later. 2453 // Otherwise, we ignore the error and print what 2454 // would have been printed without using relocations. 2455 consumeError(std::move(E)); 2456 *TargetOS << TargetName; 2457 RelFixedUp = false; // Suppress comment for RLD sym name 2458 } else if (BranchRelocationType && !RelFixedUp) 2459 *TargetOS << Val; 2460 else 2461 *TargetOS << TargetName; 2462 if (Disp) 2463 *TargetOS << "+0x" << Twine::utohexstr(Disp); 2464 } else if (!Disp) { 2465 *TargetOS << TargetName; 2466 } else if (BBAddrMapLabelAvailable) { 2467 *TargetOS << BBAddrMapLabels[Target].front().BlockLabel; 2468 } else if (LabelAvailable) { 2469 *TargetOS << AllLabels[Target]; 2470 } else { 2471 // Always Print the binary symbol plus an offset if there's no 2472 // local label corresponding to the target address. 2473 *TargetOS << TargetName << "+0x" << Twine::utohexstr(Disp); 2474 } 2475 *TargetOS << ">"; 2476 if (RelFixedUp && !InlineRelocs) { 2477 // We have fixup code for a relocation. We print the 2478 // referenced symbol as a comment. 2479 *TargetOS << "\t# " << Val; 2480 } 2481 2482 } else if (BBAddrMapLabelAvailable) { 2483 *TargetOS << " <" << BBAddrMapLabels[Target].front().BlockLabel 2484 << ">"; 2485 } else if (LabelAvailable) { 2486 *TargetOS << " <" << AllLabels[Target] << ">"; 2487 } 2488 // By convention, each record in the comment stream should be 2489 // terminated. 2490 if (TargetOS == &CommentStream) 2491 *TargetOS << "\n"; 2492 } 2493 2494 DT->InstrAnalysis->updateState(Inst, SectionAddr + Index); 2495 } else if (!Disassembled && DT->InstrAnalysis) { 2496 DT->InstrAnalysis->resetState(); 2497 } 2498 } 2499 2500 assert(DT->Context->getAsmInfo()); 2501 emitPostInstructionInfo(FOS, *DT->Context->getAsmInfo(), 2502 *DT->SubtargetInfo, CommentStream.str(), LVP); 2503 Comments.clear(); 2504 2505 if (BTF) 2506 printBTFRelocation(FOS, *BTF, {Index, Section.getIndex()}, LVP); 2507 2508 // Hexagon handles relocs in pretty printer 2509 if (InlineRelocs && Obj.getArch() != Triple::hexagon) { 2510 while (findRel()) { 2511 // When --adjust-vma is used, update the address printed. 2512 if (RelCur->getSymbol() != Obj.symbol_end()) { 2513 Expected<section_iterator> SymSI = 2514 RelCur->getSymbol()->getSection(); 2515 if (SymSI && *SymSI != Obj.section_end() && 2516 shouldAdjustVA(**SymSI)) 2517 RelOffset += AdjustVMA; 2518 } 2519 2520 printRelocation(FOS, Obj.getFileName(), *RelCur, 2521 SectionAddr + RelOffset, Is64Bits); 2522 LVP.printAfterOtherLine(FOS, true); 2523 ++RelCur; 2524 } 2525 } 2526 2527 Index += Size; 2528 } 2529 } 2530 } 2531 StringSet<> MissingDisasmSymbolSet = 2532 set_difference(DisasmSymbolSet, FoundDisasmSymbolSet); 2533 for (StringRef Sym : MissingDisasmSymbolSet.keys()) 2534 reportWarning("failed to disassemble missing symbol " + Sym, FileName); 2535 } 2536 2537 static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) { 2538 // If information useful for showing the disassembly is missing, try to find a 2539 // more complete binary and disassemble that instead. 2540 OwningBinary<Binary> FetchedBinary; 2541 if (Obj->symbols().empty()) { 2542 if (std::optional<OwningBinary<Binary>> FetchedBinaryOpt = 2543 fetchBinaryByBuildID(*Obj)) { 2544 if (auto *O = dyn_cast<ObjectFile>(FetchedBinaryOpt->getBinary())) { 2545 if (!O->symbols().empty() || 2546 (!O->sections().empty() && Obj->sections().empty())) { 2547 FetchedBinary = std::move(*FetchedBinaryOpt); 2548 Obj = O; 2549 } 2550 } 2551 } 2552 } 2553 2554 const Target *TheTarget = getTarget(Obj); 2555 2556 // Package up features to be passed to target/subtarget 2557 Expected<SubtargetFeatures> FeaturesValue = Obj->getFeatures(); 2558 if (!FeaturesValue) 2559 reportError(FeaturesValue.takeError(), Obj->getFileName()); 2560 SubtargetFeatures Features = *FeaturesValue; 2561 if (!MAttrs.empty()) { 2562 for (unsigned I = 0; I != MAttrs.size(); ++I) 2563 Features.AddFeature(MAttrs[I]); 2564 } else if (MCPU.empty() && Obj->makeTriple().isAArch64()) { 2565 Features.AddFeature("+all"); 2566 } 2567 2568 if (MCPU.empty()) 2569 MCPU = Obj->tryGetCPUName().value_or("").str(); 2570 2571 if (isArmElf(*Obj)) { 2572 // When disassembling big-endian Arm ELF, the instruction endianness is 2573 // determined in a complex way. In relocatable objects, AAELF32 mandates 2574 // that instruction endianness matches the ELF file endianness; in 2575 // executable images, that's true unless the file header has the EF_ARM_BE8 2576 // flag, in which case instructions are little-endian regardless of data 2577 // endianness. 2578 // 2579 // We must set the big-endian-instructions SubtargetFeature to make the 2580 // disassembler read the instructions the right way round, and also tell 2581 // our own prettyprinter to retrieve the encodings the same way to print in 2582 // hex. 2583 const auto *Elf32BE = dyn_cast<ELF32BEObjectFile>(Obj); 2584 2585 if (Elf32BE && (Elf32BE->isRelocatableObject() || 2586 !(Elf32BE->getPlatformFlags() & ELF::EF_ARM_BE8))) { 2587 Features.AddFeature("+big-endian-instructions"); 2588 ARMPrettyPrinterInst.setInstructionEndianness(llvm::endianness::big); 2589 } else { 2590 ARMPrettyPrinterInst.setInstructionEndianness(llvm::endianness::little); 2591 } 2592 } 2593 2594 DisassemblerTarget PrimaryTarget(TheTarget, *Obj, TripleName, MCPU, Features); 2595 2596 // If we have an ARM object file, we need a second disassembler, because 2597 // ARM CPUs have two different instruction sets: ARM mode, and Thumb mode. 2598 // We use mapping symbols to switch between the two assemblers, where 2599 // appropriate. 2600 std::optional<DisassemblerTarget> SecondaryTarget; 2601 2602 if (isArmElf(*Obj)) { 2603 if (!PrimaryTarget.SubtargetInfo->checkFeatures("+mclass")) { 2604 if (PrimaryTarget.SubtargetInfo->checkFeatures("+thumb-mode")) 2605 Features.AddFeature("-thumb-mode"); 2606 else 2607 Features.AddFeature("+thumb-mode"); 2608 SecondaryTarget.emplace(PrimaryTarget, Features); 2609 } 2610 } else if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) { 2611 const chpe_metadata *CHPEMetadata = COFFObj->getCHPEMetadata(); 2612 if (CHPEMetadata && CHPEMetadata->CodeMapCount) { 2613 // Set up x86_64 disassembler for ARM64EC binaries. 2614 Triple X64Triple(TripleName); 2615 X64Triple.setArch(Triple::ArchType::x86_64); 2616 2617 std::string Error; 2618 const Target *X64Target = 2619 TargetRegistry::lookupTarget("", X64Triple, Error); 2620 if (X64Target) { 2621 SubtargetFeatures X64Features; 2622 SecondaryTarget.emplace(X64Target, *Obj, X64Triple.getTriple(), "", 2623 X64Features); 2624 } else { 2625 reportWarning(Error, Obj->getFileName()); 2626 } 2627 } 2628 } 2629 2630 const ObjectFile *DbgObj = Obj; 2631 if (!FetchedBinary.getBinary() && !Obj->hasDebugInfo()) { 2632 if (std::optional<OwningBinary<Binary>> DebugBinaryOpt = 2633 fetchBinaryByBuildID(*Obj)) { 2634 if (auto *FetchedObj = 2635 dyn_cast<const ObjectFile>(DebugBinaryOpt->getBinary())) { 2636 if (FetchedObj->hasDebugInfo()) { 2637 FetchedBinary = std::move(*DebugBinaryOpt); 2638 DbgObj = FetchedObj; 2639 } 2640 } 2641 } 2642 } 2643 2644 std::unique_ptr<object::Binary> DSYMBinary; 2645 std::unique_ptr<MemoryBuffer> DSYMBuf; 2646 if (!DbgObj->hasDebugInfo()) { 2647 if (const MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&*Obj)) { 2648 DbgObj = objdump::getMachODSymObject(MachOOF, Obj->getFileName(), 2649 DSYMBinary, DSYMBuf); 2650 if (!DbgObj) 2651 return; 2652 } 2653 } 2654 2655 SourcePrinter SP(DbgObj, TheTarget->getName()); 2656 2657 for (StringRef Opt : DisassemblerOptions) 2658 if (!PrimaryTarget.InstPrinter->applyTargetSpecificCLOption(Opt)) 2659 reportError(Obj->getFileName(), 2660 "Unrecognized disassembler option: " + Opt); 2661 2662 disassembleObject(*Obj, *DbgObj, PrimaryTarget, SecondaryTarget, SP, 2663 InlineRelocs); 2664 } 2665 2666 void Dumper::printRelocations() { 2667 StringRef Fmt = O.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; 2668 2669 // Build a mapping from relocation target to a vector of relocation 2670 // sections. Usually, there is an only one relocation section for 2671 // each relocated section. 2672 MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec; 2673 uint64_t Ndx; 2674 for (const SectionRef &Section : ToolSectionFilter(O, &Ndx)) { 2675 if (O.isELF() && (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC)) 2676 continue; 2677 if (Section.relocation_begin() == Section.relocation_end()) 2678 continue; 2679 Expected<section_iterator> SecOrErr = Section.getRelocatedSection(); 2680 if (!SecOrErr) 2681 reportError(O.getFileName(), 2682 "section (" + Twine(Ndx) + 2683 "): unable to get a relocation target: " + 2684 toString(SecOrErr.takeError())); 2685 SecToRelSec[**SecOrErr].push_back(Section); 2686 } 2687 2688 for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) { 2689 StringRef SecName = unwrapOrError(P.first.getName(), O.getFileName()); 2690 outs() << "\nRELOCATION RECORDS FOR [" << SecName << "]:\n"; 2691 uint32_t OffsetPadding = (O.getBytesInAddress() > 4 ? 16 : 8); 2692 uint32_t TypePadding = 24; 2693 outs() << left_justify("OFFSET", OffsetPadding) << " " 2694 << left_justify("TYPE", TypePadding) << " " 2695 << "VALUE\n"; 2696 2697 for (SectionRef Section : P.second) { 2698 // CREL sections require decoding, each section may have its own specific 2699 // decode problems. 2700 if (O.isELF() && ELFSectionRef(Section).getType() == ELF::SHT_CREL) { 2701 StringRef Err = 2702 cast<const ELFObjectFileBase>(O).getCrelDecodeProblem(Section); 2703 if (!Err.empty()) { 2704 reportUniqueWarning(Err); 2705 continue; 2706 } 2707 } 2708 for (const RelocationRef &Reloc : Section.relocations()) { 2709 uint64_t Address = Reloc.getOffset(); 2710 SmallString<32> RelocName; 2711 SmallString<32> ValueStr; 2712 if (Address < StartAddress || Address > StopAddress || getHidden(Reloc)) 2713 continue; 2714 Reloc.getTypeName(RelocName); 2715 if (Error E = 2716 getRelocationValueString(Reloc, SymbolDescription, ValueStr)) 2717 reportUniqueWarning(std::move(E)); 2718 2719 outs() << format(Fmt.data(), Address) << " " 2720 << left_justify(RelocName, TypePadding) << " " << ValueStr 2721 << "\n"; 2722 } 2723 } 2724 } 2725 } 2726 2727 // Returns true if we need to show LMA column when dumping section headers. We 2728 // show it only when the platform is ELF and either we have at least one section 2729 // whose VMA and LMA are different and/or when --show-lma flag is used. 2730 static bool shouldDisplayLMA(const ObjectFile &Obj) { 2731 if (!Obj.isELF()) 2732 return false; 2733 for (const SectionRef &S : ToolSectionFilter(Obj)) 2734 if (S.getAddress() != getELFSectionLMA(S)) 2735 return true; 2736 return ShowLMA; 2737 } 2738 2739 static size_t getMaxSectionNameWidth(const ObjectFile &Obj) { 2740 // Default column width for names is 13 even if no names are that long. 2741 size_t MaxWidth = 13; 2742 for (const SectionRef &Section : ToolSectionFilter(Obj)) { 2743 StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName()); 2744 MaxWidth = std::max(MaxWidth, Name.size()); 2745 } 2746 return MaxWidth; 2747 } 2748 2749 void objdump::printSectionHeaders(ObjectFile &Obj) { 2750 if (Obj.isELF() && Obj.sections().empty()) 2751 createFakeELFSections(Obj); 2752 2753 size_t NameWidth = getMaxSectionNameWidth(Obj); 2754 size_t AddressWidth = 2 * Obj.getBytesInAddress(); 2755 bool HasLMAColumn = shouldDisplayLMA(Obj); 2756 outs() << "\nSections:\n"; 2757 if (HasLMAColumn) 2758 outs() << "Idx " << left_justify("Name", NameWidth) << " Size " 2759 << left_justify("VMA", AddressWidth) << " " 2760 << left_justify("LMA", AddressWidth) << " Type\n"; 2761 else 2762 outs() << "Idx " << left_justify("Name", NameWidth) << " Size " 2763 << left_justify("VMA", AddressWidth) << " Type\n"; 2764 2765 uint64_t Idx; 2766 for (const SectionRef &Section : ToolSectionFilter(Obj, &Idx)) { 2767 StringRef Name = unwrapOrError(Section.getName(), Obj.getFileName()); 2768 uint64_t VMA = Section.getAddress(); 2769 if (shouldAdjustVA(Section)) 2770 VMA += AdjustVMA; 2771 2772 uint64_t Size = Section.getSize(); 2773 2774 std::string Type = Section.isText() ? "TEXT" : ""; 2775 if (Section.isData()) 2776 Type += Type.empty() ? "DATA" : ", DATA"; 2777 if (Section.isBSS()) 2778 Type += Type.empty() ? "BSS" : ", BSS"; 2779 if (Section.isDebugSection()) 2780 Type += Type.empty() ? "DEBUG" : ", DEBUG"; 2781 2782 if (HasLMAColumn) 2783 outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth, 2784 Name.str().c_str(), Size) 2785 << format_hex_no_prefix(VMA, AddressWidth) << " " 2786 << format_hex_no_prefix(getELFSectionLMA(Section), AddressWidth) 2787 << " " << Type << "\n"; 2788 else 2789 outs() << format("%3" PRIu64 " %-*s %08" PRIx64 " ", Idx, NameWidth, 2790 Name.str().c_str(), Size) 2791 << format_hex_no_prefix(VMA, AddressWidth) << " " << Type << "\n"; 2792 } 2793 } 2794 2795 void objdump::printSectionContents(const ObjectFile *Obj) { 2796 const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj); 2797 2798 for (const SectionRef &Section : ToolSectionFilter(*Obj)) { 2799 StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); 2800 uint64_t BaseAddr = Section.getAddress(); 2801 uint64_t Size = Section.getSize(); 2802 if (!Size) 2803 continue; 2804 2805 outs() << "Contents of section "; 2806 StringRef SegmentName = getSegmentName(MachO, Section); 2807 if (!SegmentName.empty()) 2808 outs() << SegmentName << ","; 2809 outs() << Name << ":\n"; 2810 if (Section.isBSS()) { 2811 outs() << format("<skipping contents of bss section at [%04" PRIx64 2812 ", %04" PRIx64 ")>\n", 2813 BaseAddr, BaseAddr + Size); 2814 continue; 2815 } 2816 2817 StringRef Contents = unwrapOrError(Section.getContents(), Obj->getFileName()); 2818 2819 // Dump out the content as hex and printable ascii characters. 2820 for (std::size_t Addr = 0, End = Contents.size(); Addr < End; Addr += 16) { 2821 outs() << format(" %04" PRIx64 " ", BaseAddr + Addr); 2822 // Dump line of hex. 2823 for (std::size_t I = 0; I < 16; ++I) { 2824 if (I != 0 && I % 4 == 0) 2825 outs() << ' '; 2826 if (Addr + I < End) 2827 outs() << hexdigit((Contents[Addr + I] >> 4) & 0xF, true) 2828 << hexdigit(Contents[Addr + I] & 0xF, true); 2829 else 2830 outs() << " "; 2831 } 2832 // Print ascii. 2833 outs() << " "; 2834 for (std::size_t I = 0; I < 16 && Addr + I < End; ++I) { 2835 if (isPrint(static_cast<unsigned char>(Contents[Addr + I]) & 0xFF)) 2836 outs() << Contents[Addr + I]; 2837 else 2838 outs() << "."; 2839 } 2840 outs() << "\n"; 2841 } 2842 } 2843 } 2844 2845 void Dumper::printSymbolTable(StringRef ArchiveName, StringRef ArchitectureName, 2846 bool DumpDynamic) { 2847 if (O.isCOFF() && !DumpDynamic) { 2848 outs() << "\nSYMBOL TABLE:\n"; 2849 printCOFFSymbolTable(cast<const COFFObjectFile>(O)); 2850 return; 2851 } 2852 2853 const StringRef FileName = O.getFileName(); 2854 2855 if (!DumpDynamic) { 2856 outs() << "\nSYMBOL TABLE:\n"; 2857 for (auto I = O.symbol_begin(); I != O.symbol_end(); ++I) 2858 printSymbol(*I, {}, FileName, ArchiveName, ArchitectureName, DumpDynamic); 2859 return; 2860 } 2861 2862 outs() << "\nDYNAMIC SYMBOL TABLE:\n"; 2863 if (!O.isELF()) { 2864 reportWarning( 2865 "this operation is not currently supported for this file format", 2866 FileName); 2867 return; 2868 } 2869 2870 const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(&O); 2871 auto Symbols = ELF->getDynamicSymbolIterators(); 2872 Expected<std::vector<VersionEntry>> SymbolVersionsOrErr = 2873 ELF->readDynsymVersions(); 2874 if (!SymbolVersionsOrErr) { 2875 reportWarning(toString(SymbolVersionsOrErr.takeError()), FileName); 2876 SymbolVersionsOrErr = std::vector<VersionEntry>(); 2877 (void)!SymbolVersionsOrErr; 2878 } 2879 for (auto &Sym : Symbols) 2880 printSymbol(Sym, *SymbolVersionsOrErr, FileName, ArchiveName, 2881 ArchitectureName, DumpDynamic); 2882 } 2883 2884 void Dumper::printSymbol(const SymbolRef &Symbol, 2885 ArrayRef<VersionEntry> SymbolVersions, 2886 StringRef FileName, StringRef ArchiveName, 2887 StringRef ArchitectureName, bool DumpDynamic) { 2888 const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&O); 2889 Expected<uint64_t> AddrOrErr = Symbol.getAddress(); 2890 if (!AddrOrErr) { 2891 reportUniqueWarning(AddrOrErr.takeError()); 2892 return; 2893 } 2894 2895 // Don't ask a Mach-O STAB symbol for its section unless you know that 2896 // STAB symbol's section field refers to a valid section index. Otherwise 2897 // the symbol may error trying to load a section that does not exist. 2898 bool IsSTAB = false; 2899 if (MachO) { 2900 DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); 2901 uint8_t NType = 2902 (MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type 2903 : MachO->getSymbolTableEntry(SymDRI).n_type); 2904 if (NType & MachO::N_STAB) 2905 IsSTAB = true; 2906 } 2907 section_iterator Section = IsSTAB 2908 ? O.section_end() 2909 : unwrapOrError(Symbol.getSection(), FileName, 2910 ArchiveName, ArchitectureName); 2911 2912 uint64_t Address = *AddrOrErr; 2913 if (Section != O.section_end() && shouldAdjustVA(*Section)) 2914 Address += AdjustVMA; 2915 if ((Address < StartAddress) || (Address > StopAddress)) 2916 return; 2917 SymbolRef::Type Type = 2918 unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName); 2919 uint32_t Flags = 2920 unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName); 2921 2922 StringRef Name; 2923 if (Type == SymbolRef::ST_Debug && Section != O.section_end()) { 2924 if (Expected<StringRef> NameOrErr = Section->getName()) 2925 Name = *NameOrErr; 2926 else 2927 consumeError(NameOrErr.takeError()); 2928 2929 } else { 2930 Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName, 2931 ArchitectureName); 2932 } 2933 2934 bool Global = Flags & SymbolRef::SF_Global; 2935 bool Weak = Flags & SymbolRef::SF_Weak; 2936 bool Absolute = Flags & SymbolRef::SF_Absolute; 2937 bool Common = Flags & SymbolRef::SF_Common; 2938 bool Hidden = Flags & SymbolRef::SF_Hidden; 2939 2940 char GlobLoc = ' '; 2941 if ((Section != O.section_end() || Absolute) && !Weak) 2942 GlobLoc = Global ? 'g' : 'l'; 2943 char IFunc = ' '; 2944 if (O.isELF()) { 2945 if (ELFSymbolRef(Symbol).getELFType() == ELF::STT_GNU_IFUNC) 2946 IFunc = 'i'; 2947 if (ELFSymbolRef(Symbol).getBinding() == ELF::STB_GNU_UNIQUE) 2948 GlobLoc = 'u'; 2949 } 2950 2951 char Debug = ' '; 2952 if (DumpDynamic) 2953 Debug = 'D'; 2954 else if (Type == SymbolRef::ST_Debug || Type == SymbolRef::ST_File) 2955 Debug = 'd'; 2956 2957 char FileFunc = ' '; 2958 if (Type == SymbolRef::ST_File) 2959 FileFunc = 'f'; 2960 else if (Type == SymbolRef::ST_Function) 2961 FileFunc = 'F'; 2962 else if (Type == SymbolRef::ST_Data) 2963 FileFunc = 'O'; 2964 2965 const char *Fmt = O.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; 2966 2967 outs() << format(Fmt, Address) << " " 2968 << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' ' 2969 << (Weak ? 'w' : ' ') // Weak? 2970 << ' ' // Constructor. Not supported yet. 2971 << ' ' // Warning. Not supported yet. 2972 << IFunc // Indirect reference to another symbol. 2973 << Debug // Debugging (d) or dynamic (D) symbol. 2974 << FileFunc // Name of function (F), file (f) or object (O). 2975 << ' '; 2976 if (Absolute) { 2977 outs() << "*ABS*"; 2978 } else if (Common) { 2979 outs() << "*COM*"; 2980 } else if (Section == O.section_end()) { 2981 if (O.isXCOFF()) { 2982 XCOFFSymbolRef XCOFFSym = cast<const XCOFFObjectFile>(O).toSymbolRef( 2983 Symbol.getRawDataRefImpl()); 2984 if (XCOFF::N_DEBUG == XCOFFSym.getSectionNumber()) 2985 outs() << "*DEBUG*"; 2986 else 2987 outs() << "*UND*"; 2988 } else 2989 outs() << "*UND*"; 2990 } else { 2991 StringRef SegmentName = getSegmentName(MachO, *Section); 2992 if (!SegmentName.empty()) 2993 outs() << SegmentName << ","; 2994 StringRef SectionName = unwrapOrError(Section->getName(), FileName); 2995 outs() << SectionName; 2996 if (O.isXCOFF()) { 2997 std::optional<SymbolRef> SymRef = 2998 getXCOFFSymbolContainingSymbolRef(cast<XCOFFObjectFile>(O), Symbol); 2999 if (SymRef) { 3000 3001 Expected<StringRef> NameOrErr = SymRef->getName(); 3002 3003 if (NameOrErr) { 3004 outs() << " (csect:"; 3005 std::string SymName = 3006 Demangle ? demangle(*NameOrErr) : NameOrErr->str(); 3007 3008 if (SymbolDescription) 3009 SymName = getXCOFFSymbolDescription(createSymbolInfo(O, *SymRef), 3010 SymName); 3011 3012 outs() << ' ' << SymName; 3013 outs() << ") "; 3014 } else 3015 reportWarning(toString(NameOrErr.takeError()), FileName); 3016 } 3017 } 3018 } 3019 3020 if (Common) 3021 outs() << '\t' << format(Fmt, static_cast<uint64_t>(Symbol.getAlignment())); 3022 else if (O.isXCOFF()) 3023 outs() << '\t' 3024 << format(Fmt, cast<XCOFFObjectFile>(O).getSymbolSize( 3025 Symbol.getRawDataRefImpl())); 3026 else if (O.isELF()) 3027 outs() << '\t' << format(Fmt, ELFSymbolRef(Symbol).getSize()); 3028 else if (O.isWasm()) 3029 outs() << '\t' 3030 << format(Fmt, static_cast<uint64_t>( 3031 cast<WasmObjectFile>(O).getSymbolSize(Symbol))); 3032 3033 if (O.isELF()) { 3034 if (!SymbolVersions.empty()) { 3035 const VersionEntry &Ver = 3036 SymbolVersions[Symbol.getRawDataRefImpl().d.b - 1]; 3037 std::string Str; 3038 if (!Ver.Name.empty()) 3039 Str = Ver.IsVerDef ? ' ' + Ver.Name : '(' + Ver.Name + ')'; 3040 outs() << ' ' << left_justify(Str, 12); 3041 } 3042 3043 uint8_t Other = ELFSymbolRef(Symbol).getOther(); 3044 switch (Other) { 3045 case ELF::STV_DEFAULT: 3046 break; 3047 case ELF::STV_INTERNAL: 3048 outs() << " .internal"; 3049 break; 3050 case ELF::STV_HIDDEN: 3051 outs() << " .hidden"; 3052 break; 3053 case ELF::STV_PROTECTED: 3054 outs() << " .protected"; 3055 break; 3056 default: 3057 outs() << format(" 0x%02x", Other); 3058 break; 3059 } 3060 } else if (Hidden) { 3061 outs() << " .hidden"; 3062 } 3063 3064 std::string SymName = Demangle ? demangle(Name) : Name.str(); 3065 if (O.isXCOFF() && SymbolDescription) 3066 SymName = getXCOFFSymbolDescription(createSymbolInfo(O, Symbol), SymName); 3067 3068 outs() << ' ' << SymName << '\n'; 3069 } 3070 3071 static void printUnwindInfo(const ObjectFile *O) { 3072 outs() << "Unwind info:\n\n"; 3073 3074 if (const COFFObjectFile *Coff = dyn_cast<COFFObjectFile>(O)) 3075 printCOFFUnwindInfo(Coff); 3076 else if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O)) 3077 printMachOUnwindInfo(MachO); 3078 else 3079 // TODO: Extract DWARF dump tool to objdump. 3080 WithColor::error(errs(), ToolName) 3081 << "This operation is only currently supported " 3082 "for COFF and MachO object files.\n"; 3083 } 3084 3085 /// Dump the raw contents of the __clangast section so the output can be piped 3086 /// into llvm-bcanalyzer. 3087 static void printRawClangAST(const ObjectFile *Obj) { 3088 if (outs().is_displayed()) { 3089 WithColor::error(errs(), ToolName) 3090 << "The -raw-clang-ast option will dump the raw binary contents of " 3091 "the clang ast section.\n" 3092 "Please redirect the output to a file or another program such as " 3093 "llvm-bcanalyzer.\n"; 3094 return; 3095 } 3096 3097 StringRef ClangASTSectionName("__clangast"); 3098 if (Obj->isCOFF()) { 3099 ClangASTSectionName = "clangast"; 3100 } 3101 3102 std::optional<object::SectionRef> ClangASTSection; 3103 for (auto Sec : ToolSectionFilter(*Obj)) { 3104 StringRef Name; 3105 if (Expected<StringRef> NameOrErr = Sec.getName()) 3106 Name = *NameOrErr; 3107 else 3108 consumeError(NameOrErr.takeError()); 3109 3110 if (Name == ClangASTSectionName) { 3111 ClangASTSection = Sec; 3112 break; 3113 } 3114 } 3115 if (!ClangASTSection) 3116 return; 3117 3118 StringRef ClangASTContents = 3119 unwrapOrError(ClangASTSection->getContents(), Obj->getFileName()); 3120 outs().write(ClangASTContents.data(), ClangASTContents.size()); 3121 } 3122 3123 static void printFaultMaps(const ObjectFile *Obj) { 3124 StringRef FaultMapSectionName; 3125 3126 if (Obj->isELF()) { 3127 FaultMapSectionName = ".llvm_faultmaps"; 3128 } else if (Obj->isMachO()) { 3129 FaultMapSectionName = "__llvm_faultmaps"; 3130 } else { 3131 WithColor::error(errs(), ToolName) 3132 << "This operation is only currently supported " 3133 "for ELF and Mach-O executable files.\n"; 3134 return; 3135 } 3136 3137 std::optional<object::SectionRef> FaultMapSection; 3138 3139 for (auto Sec : ToolSectionFilter(*Obj)) { 3140 StringRef Name; 3141 if (Expected<StringRef> NameOrErr = Sec.getName()) 3142 Name = *NameOrErr; 3143 else 3144 consumeError(NameOrErr.takeError()); 3145 3146 if (Name == FaultMapSectionName) { 3147 FaultMapSection = Sec; 3148 break; 3149 } 3150 } 3151 3152 outs() << "FaultMap table:\n"; 3153 3154 if (!FaultMapSection) { 3155 outs() << "<not found>\n"; 3156 return; 3157 } 3158 3159 StringRef FaultMapContents = 3160 unwrapOrError(FaultMapSection->getContents(), Obj->getFileName()); 3161 FaultMapParser FMP(FaultMapContents.bytes_begin(), 3162 FaultMapContents.bytes_end()); 3163 3164 outs() << FMP; 3165 } 3166 3167 void Dumper::printPrivateHeaders() { 3168 reportError(O.getFileName(), "Invalid/Unsupported object file format"); 3169 } 3170 3171 static void printFileHeaders(const ObjectFile *O) { 3172 if (!O->isELF() && !O->isCOFF() && !O->isXCOFF()) 3173 reportError(O->getFileName(), "Invalid/Unsupported object file format"); 3174 3175 Triple::ArchType AT = O->getArch(); 3176 outs() << "architecture: " << Triple::getArchTypeName(AT) << "\n"; 3177 uint64_t Address = unwrapOrError(O->getStartAddress(), O->getFileName()); 3178 3179 StringRef Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; 3180 outs() << "start address: " 3181 << "0x" << format(Fmt.data(), Address) << "\n"; 3182 } 3183 3184 static void printArchiveChild(StringRef Filename, const Archive::Child &C) { 3185 Expected<sys::fs::perms> ModeOrErr = C.getAccessMode(); 3186 if (!ModeOrErr) { 3187 WithColor::error(errs(), ToolName) << "ill-formed archive entry.\n"; 3188 consumeError(ModeOrErr.takeError()); 3189 return; 3190 } 3191 sys::fs::perms Mode = ModeOrErr.get(); 3192 outs() << ((Mode & sys::fs::owner_read) ? "r" : "-"); 3193 outs() << ((Mode & sys::fs::owner_write) ? "w" : "-"); 3194 outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-"); 3195 outs() << ((Mode & sys::fs::group_read) ? "r" : "-"); 3196 outs() << ((Mode & sys::fs::group_write) ? "w" : "-"); 3197 outs() << ((Mode & sys::fs::group_exe) ? "x" : "-"); 3198 outs() << ((Mode & sys::fs::others_read) ? "r" : "-"); 3199 outs() << ((Mode & sys::fs::others_write) ? "w" : "-"); 3200 outs() << ((Mode & sys::fs::others_exe) ? "x" : "-"); 3201 3202 outs() << " "; 3203 3204 outs() << format("%d/%d %6" PRId64 " ", unwrapOrError(C.getUID(), Filename), 3205 unwrapOrError(C.getGID(), Filename), 3206 unwrapOrError(C.getRawSize(), Filename)); 3207 3208 StringRef RawLastModified = C.getRawLastModified(); 3209 unsigned Seconds; 3210 if (RawLastModified.getAsInteger(10, Seconds)) 3211 outs() << "(date: \"" << RawLastModified 3212 << "\" contains non-decimal chars) "; 3213 else { 3214 // Since ctime(3) returns a 26 character string of the form: 3215 // "Sun Sep 16 01:03:52 1973\n\0" 3216 // just print 24 characters. 3217 time_t t = Seconds; 3218 outs() << format("%.24s ", ctime(&t)); 3219 } 3220 3221 StringRef Name = ""; 3222 Expected<StringRef> NameOrErr = C.getName(); 3223 if (!NameOrErr) { 3224 consumeError(NameOrErr.takeError()); 3225 Name = unwrapOrError(C.getRawName(), Filename); 3226 } else { 3227 Name = NameOrErr.get(); 3228 } 3229 outs() << Name << "\n"; 3230 } 3231 3232 // For ELF only now. 3233 static bool shouldWarnForInvalidStartStopAddress(ObjectFile *Obj) { 3234 if (const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj)) { 3235 if (Elf->getEType() != ELF::ET_REL) 3236 return true; 3237 } 3238 return false; 3239 } 3240 3241 static void checkForInvalidStartStopAddress(ObjectFile *Obj, 3242 uint64_t Start, uint64_t Stop) { 3243 if (!shouldWarnForInvalidStartStopAddress(Obj)) 3244 return; 3245 3246 for (const SectionRef &Section : Obj->sections()) 3247 if (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC) { 3248 uint64_t BaseAddr = Section.getAddress(); 3249 uint64_t Size = Section.getSize(); 3250 if ((Start < BaseAddr + Size) && Stop > BaseAddr) 3251 return; 3252 } 3253 3254 if (!HasStartAddressFlag) 3255 reportWarning("no section has address less than 0x" + 3256 Twine::utohexstr(Stop) + " specified by --stop-address", 3257 Obj->getFileName()); 3258 else if (!HasStopAddressFlag) 3259 reportWarning("no section has address greater than or equal to 0x" + 3260 Twine::utohexstr(Start) + " specified by --start-address", 3261 Obj->getFileName()); 3262 else 3263 reportWarning("no section overlaps the range [0x" + 3264 Twine::utohexstr(Start) + ",0x" + Twine::utohexstr(Stop) + 3265 ") specified by --start-address/--stop-address", 3266 Obj->getFileName()); 3267 } 3268 3269 static void dumpObject(ObjectFile *O, const Archive *A = nullptr, 3270 const Archive::Child *C = nullptr) { 3271 Expected<std::unique_ptr<Dumper>> DumperOrErr = createDumper(*O); 3272 if (!DumperOrErr) { 3273 reportError(DumperOrErr.takeError(), O->getFileName(), 3274 A ? A->getFileName() : ""); 3275 return; 3276 } 3277 Dumper &D = **DumperOrErr; 3278 3279 // Avoid other output when using a raw option. 3280 if (!RawClangAST) { 3281 outs() << '\n'; 3282 if (A) 3283 outs() << A->getFileName() << "(" << O->getFileName() << ")"; 3284 else 3285 outs() << O->getFileName(); 3286 outs() << ":\tfile format " << O->getFileFormatName().lower() << "\n"; 3287 } 3288 3289 if (HasStartAddressFlag || HasStopAddressFlag) 3290 checkForInvalidStartStopAddress(O, StartAddress, StopAddress); 3291 3292 // TODO: Change print* free functions to Dumper member functions to utilitize 3293 // stateful functions like reportUniqueWarning. 3294 3295 // Note: the order here matches GNU objdump for compatability. 3296 StringRef ArchiveName = A ? A->getFileName() : ""; 3297 if (ArchiveHeaders && !MachOOpt && C) 3298 printArchiveChild(ArchiveName, *C); 3299 if (FileHeaders) 3300 printFileHeaders(O); 3301 if (PrivateHeaders || FirstPrivateHeader) 3302 D.printPrivateHeaders(); 3303 if (SectionHeaders) 3304 printSectionHeaders(*O); 3305 if (SymbolTable) 3306 D.printSymbolTable(ArchiveName); 3307 if (DynamicSymbolTable) 3308 D.printSymbolTable(ArchiveName, /*ArchitectureName=*/"", 3309 /*DumpDynamic=*/true); 3310 if (DwarfDumpType != DIDT_Null) { 3311 std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O); 3312 // Dump the complete DWARF structure. 3313 DIDumpOptions DumpOpts; 3314 DumpOpts.DumpType = DwarfDumpType; 3315 DICtx->dump(outs(), DumpOpts); 3316 } 3317 if (Relocations && !Disassemble) 3318 D.printRelocations(); 3319 if (DynamicRelocations) 3320 D.printDynamicRelocations(); 3321 if (SectionContents) 3322 printSectionContents(O); 3323 if (Disassemble) 3324 disassembleObject(O, Relocations); 3325 if (UnwindInfo) 3326 printUnwindInfo(O); 3327 3328 // Mach-O specific options: 3329 if (ExportsTrie) 3330 printExportsTrie(O); 3331 if (Rebase) 3332 printRebaseTable(O); 3333 if (Bind) 3334 printBindTable(O); 3335 if (LazyBind) 3336 printLazyBindTable(O); 3337 if (WeakBind) 3338 printWeakBindTable(O); 3339 3340 // Other special sections: 3341 if (RawClangAST) 3342 printRawClangAST(O); 3343 if (FaultMapSection) 3344 printFaultMaps(O); 3345 if (Offloading) 3346 dumpOffloadBinary(*O); 3347 } 3348 3349 static void dumpObject(const COFFImportFile *I, const Archive *A, 3350 const Archive::Child *C = nullptr) { 3351 StringRef ArchiveName = A ? A->getFileName() : ""; 3352 3353 // Avoid other output when using a raw option. 3354 if (!RawClangAST) 3355 outs() << '\n' 3356 << ArchiveName << "(" << I->getFileName() << ")" 3357 << ":\tfile format COFF-import-file" 3358 << "\n\n"; 3359 3360 if (ArchiveHeaders && !MachOOpt && C) 3361 printArchiveChild(ArchiveName, *C); 3362 if (SymbolTable) 3363 printCOFFSymbolTable(*I); 3364 } 3365 3366 /// Dump each object file in \a a; 3367 static void dumpArchive(const Archive *A) { 3368 Error Err = Error::success(); 3369 unsigned I = -1; 3370 for (auto &C : A->children(Err)) { 3371 ++I; 3372 Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(); 3373 if (!ChildOrErr) { 3374 if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) 3375 reportError(std::move(E), getFileNameForError(C, I), A->getFileName()); 3376 continue; 3377 } 3378 if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get())) 3379 dumpObject(O, A, &C); 3380 else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get())) 3381 dumpObject(I, A, &C); 3382 else 3383 reportError(errorCodeToError(object_error::invalid_file_type), 3384 A->getFileName()); 3385 } 3386 if (Err) 3387 reportError(std::move(Err), A->getFileName()); 3388 } 3389 3390 /// Open file and figure out how to dump it. 3391 static void dumpInput(StringRef file) { 3392 // If we are using the Mach-O specific object file parser, then let it parse 3393 // the file and process the command line options. So the -arch flags can 3394 // be used to select specific slices, etc. 3395 if (MachOOpt) { 3396 parseInputMachO(file); 3397 return; 3398 } 3399 3400 // Attempt to open the binary. 3401 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(file), file); 3402 Binary &Binary = *OBinary.getBinary(); 3403 3404 if (Archive *A = dyn_cast<Archive>(&Binary)) 3405 dumpArchive(A); 3406 else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary)) 3407 dumpObject(O); 3408 else if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Binary)) 3409 parseInputMachO(UB); 3410 else if (OffloadBinary *OB = dyn_cast<OffloadBinary>(&Binary)) 3411 dumpOffloadSections(*OB); 3412 else 3413 reportError(errorCodeToError(object_error::invalid_file_type), file); 3414 } 3415 3416 template <typename T> 3417 static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID, 3418 T &Value) { 3419 if (const opt::Arg *A = InputArgs.getLastArg(ID)) { 3420 StringRef V(A->getValue()); 3421 if (!llvm::to_integer(V, Value, 0)) { 3422 reportCmdLineError(A->getSpelling() + 3423 ": expected a non-negative integer, but got '" + V + 3424 "'"); 3425 } 3426 } 3427 } 3428 3429 static object::BuildID parseBuildIDArg(const opt::Arg *A) { 3430 StringRef V(A->getValue()); 3431 object::BuildID BID = parseBuildID(V); 3432 if (BID.empty()) 3433 reportCmdLineError(A->getSpelling() + ": expected a build ID, but got '" + 3434 V + "'"); 3435 return BID; 3436 } 3437 3438 void objdump::invalidArgValue(const opt::Arg *A) { 3439 reportCmdLineError("'" + StringRef(A->getValue()) + 3440 "' is not a valid value for '" + A->getSpelling() + "'"); 3441 } 3442 3443 static std::vector<std::string> 3444 commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) { 3445 std::vector<std::string> Values; 3446 for (StringRef Value : InputArgs.getAllArgValues(ID)) { 3447 llvm::SmallVector<StringRef, 2> SplitValues; 3448 llvm::SplitString(Value, SplitValues, ","); 3449 for (StringRef SplitValue : SplitValues) 3450 Values.push_back(SplitValue.str()); 3451 } 3452 return Values; 3453 } 3454 3455 static void parseOtoolOptions(const llvm::opt::InputArgList &InputArgs) { 3456 MachOOpt = true; 3457 FullLeadingAddr = true; 3458 PrintImmHex = true; 3459 3460 ArchName = InputArgs.getLastArgValue(OTOOL_arch).str(); 3461 LinkOptHints = InputArgs.hasArg(OTOOL_C); 3462 if (InputArgs.hasArg(OTOOL_d)) 3463 FilterSections.push_back("__DATA,__data"); 3464 DylibId = InputArgs.hasArg(OTOOL_D); 3465 UniversalHeaders = InputArgs.hasArg(OTOOL_f); 3466 DataInCode = InputArgs.hasArg(OTOOL_G); 3467 FirstPrivateHeader = InputArgs.hasArg(OTOOL_h); 3468 IndirectSymbols = InputArgs.hasArg(OTOOL_I); 3469 ShowRawInsn = InputArgs.hasArg(OTOOL_j); 3470 PrivateHeaders = InputArgs.hasArg(OTOOL_l); 3471 DylibsUsed = InputArgs.hasArg(OTOOL_L); 3472 MCPU = InputArgs.getLastArgValue(OTOOL_mcpu_EQ).str(); 3473 ObjcMetaData = InputArgs.hasArg(OTOOL_o); 3474 DisSymName = InputArgs.getLastArgValue(OTOOL_p).str(); 3475 InfoPlist = InputArgs.hasArg(OTOOL_P); 3476 Relocations = InputArgs.hasArg(OTOOL_r); 3477 if (const Arg *A = InputArgs.getLastArg(OTOOL_s)) { 3478 auto Filter = (A->getValue(0) + StringRef(",") + A->getValue(1)).str(); 3479 FilterSections.push_back(Filter); 3480 } 3481 if (InputArgs.hasArg(OTOOL_t)) 3482 FilterSections.push_back("__TEXT,__text"); 3483 Verbose = InputArgs.hasArg(OTOOL_v) || InputArgs.hasArg(OTOOL_V) || 3484 InputArgs.hasArg(OTOOL_o); 3485 SymbolicOperands = InputArgs.hasArg(OTOOL_V); 3486 if (InputArgs.hasArg(OTOOL_x)) 3487 FilterSections.push_back(",__text"); 3488 LeadingAddr = LeadingHeaders = !InputArgs.hasArg(OTOOL_X); 3489 3490 ChainedFixups = InputArgs.hasArg(OTOOL_chained_fixups); 3491 DyldInfo = InputArgs.hasArg(OTOOL_dyld_info); 3492 3493 InputFilenames = InputArgs.getAllArgValues(OTOOL_INPUT); 3494 if (InputFilenames.empty()) 3495 reportCmdLineError("no input file"); 3496 3497 for (const Arg *A : InputArgs) { 3498 const Option &O = A->getOption(); 3499 if (O.getGroup().isValid() && O.getGroup().getID() == OTOOL_grp_obsolete) { 3500 reportCmdLineWarning(O.getPrefixedName() + 3501 " is obsolete and not implemented"); 3502 } 3503 } 3504 } 3505 3506 static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { 3507 parseIntArg(InputArgs, OBJDUMP_adjust_vma_EQ, AdjustVMA); 3508 AllHeaders = InputArgs.hasArg(OBJDUMP_all_headers); 3509 ArchName = InputArgs.getLastArgValue(OBJDUMP_arch_name_EQ).str(); 3510 ArchiveHeaders = InputArgs.hasArg(OBJDUMP_archive_headers); 3511 Demangle = InputArgs.hasArg(OBJDUMP_demangle); 3512 Disassemble = InputArgs.hasArg(OBJDUMP_disassemble); 3513 DisassembleAll = InputArgs.hasArg(OBJDUMP_disassemble_all); 3514 SymbolDescription = InputArgs.hasArg(OBJDUMP_symbol_description); 3515 TracebackTable = InputArgs.hasArg(OBJDUMP_traceback_table); 3516 DisassembleSymbols = 3517 commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ); 3518 DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes); 3519 if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) { 3520 DwarfDumpType = StringSwitch<DIDumpType>(A->getValue()) 3521 .Case("frames", DIDT_DebugFrame) 3522 .Default(DIDT_Null); 3523 if (DwarfDumpType == DIDT_Null) 3524 invalidArgValue(A); 3525 } 3526 DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc); 3527 FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section); 3528 Offloading = InputArgs.hasArg(OBJDUMP_offloading); 3529 FileHeaders = InputArgs.hasArg(OBJDUMP_file_headers); 3530 SectionContents = InputArgs.hasArg(OBJDUMP_full_contents); 3531 PrintLines = InputArgs.hasArg(OBJDUMP_line_numbers); 3532 InputFilenames = InputArgs.getAllArgValues(OBJDUMP_INPUT); 3533 MachOOpt = InputArgs.hasArg(OBJDUMP_macho); 3534 MCPU = InputArgs.getLastArgValue(OBJDUMP_mcpu_EQ).str(); 3535 MAttrs = commaSeparatedValues(InputArgs, OBJDUMP_mattr_EQ); 3536 ShowRawInsn = !InputArgs.hasArg(OBJDUMP_no_show_raw_insn); 3537 LeadingAddr = !InputArgs.hasArg(OBJDUMP_no_leading_addr); 3538 RawClangAST = InputArgs.hasArg(OBJDUMP_raw_clang_ast); 3539 Relocations = InputArgs.hasArg(OBJDUMP_reloc); 3540 PrintImmHex = 3541 InputArgs.hasFlag(OBJDUMP_print_imm_hex, OBJDUMP_no_print_imm_hex, true); 3542 PrivateHeaders = InputArgs.hasArg(OBJDUMP_private_headers); 3543 FilterSections = InputArgs.getAllArgValues(OBJDUMP_section_EQ); 3544 SectionHeaders = InputArgs.hasArg(OBJDUMP_section_headers); 3545 ShowAllSymbols = InputArgs.hasArg(OBJDUMP_show_all_symbols); 3546 ShowLMA = InputArgs.hasArg(OBJDUMP_show_lma); 3547 PrintSource = InputArgs.hasArg(OBJDUMP_source); 3548 parseIntArg(InputArgs, OBJDUMP_start_address_EQ, StartAddress); 3549 HasStartAddressFlag = InputArgs.hasArg(OBJDUMP_start_address_EQ); 3550 parseIntArg(InputArgs, OBJDUMP_stop_address_EQ, StopAddress); 3551 HasStopAddressFlag = InputArgs.hasArg(OBJDUMP_stop_address_EQ); 3552 SymbolTable = InputArgs.hasArg(OBJDUMP_syms); 3553 SymbolizeOperands = InputArgs.hasArg(OBJDUMP_symbolize_operands); 3554 PrettyPGOAnalysisMap = InputArgs.hasArg(OBJDUMP_pretty_pgo_analysis_map); 3555 if (PrettyPGOAnalysisMap && !SymbolizeOperands) 3556 reportCmdLineWarning("--symbolize-operands must be enabled for " 3557 "--pretty-pgo-analysis-map to have an effect"); 3558 DynamicSymbolTable = InputArgs.hasArg(OBJDUMP_dynamic_syms); 3559 TripleName = InputArgs.getLastArgValue(OBJDUMP_triple_EQ).str(); 3560 UnwindInfo = InputArgs.hasArg(OBJDUMP_unwind_info); 3561 Wide = InputArgs.hasArg(OBJDUMP_wide); 3562 Prefix = InputArgs.getLastArgValue(OBJDUMP_prefix).str(); 3563 parseIntArg(InputArgs, OBJDUMP_prefix_strip, PrefixStrip); 3564 if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) { 3565 DbgVariables = StringSwitch<DebugVarsFormat>(A->getValue()) 3566 .Case("ascii", DVASCII) 3567 .Case("unicode", DVUnicode) 3568 .Default(DVInvalid); 3569 if (DbgVariables == DVInvalid) 3570 invalidArgValue(A); 3571 } 3572 if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_disassembler_color_EQ)) { 3573 DisassemblyColor = StringSwitch<ColorOutput>(A->getValue()) 3574 .Case("on", ColorOutput::Enable) 3575 .Case("off", ColorOutput::Disable) 3576 .Case("terminal", ColorOutput::Auto) 3577 .Default(ColorOutput::Invalid); 3578 if (DisassemblyColor == ColorOutput::Invalid) 3579 invalidArgValue(A); 3580 } 3581 3582 parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent); 3583 3584 parseMachOOptions(InputArgs); 3585 3586 // Parse -M (--disassembler-options) and deprecated 3587 // --x86-asm-syntax={att,intel}. 3588 // 3589 // Note, for x86, the asm dialect (AssemblerDialect) is initialized when the 3590 // MCAsmInfo is constructed. MCInstPrinter::applyTargetSpecificCLOption is 3591 // called too late. For now we have to use the internal cl::opt option. 3592 const char *AsmSyntax = nullptr; 3593 for (const auto *A : InputArgs.filtered(OBJDUMP_disassembler_options_EQ, 3594 OBJDUMP_x86_asm_syntax_att, 3595 OBJDUMP_x86_asm_syntax_intel)) { 3596 switch (A->getOption().getID()) { 3597 case OBJDUMP_x86_asm_syntax_att: 3598 AsmSyntax = "--x86-asm-syntax=att"; 3599 continue; 3600 case OBJDUMP_x86_asm_syntax_intel: 3601 AsmSyntax = "--x86-asm-syntax=intel"; 3602 continue; 3603 } 3604 3605 SmallVector<StringRef, 2> Values; 3606 llvm::SplitString(A->getValue(), Values, ","); 3607 for (StringRef V : Values) { 3608 if (V == "att") 3609 AsmSyntax = "--x86-asm-syntax=att"; 3610 else if (V == "intel") 3611 AsmSyntax = "--x86-asm-syntax=intel"; 3612 else 3613 DisassemblerOptions.push_back(V.str()); 3614 } 3615 } 3616 SmallVector<const char *> Args = {"llvm-objdump"}; 3617 for (const opt::Arg *A : InputArgs.filtered(OBJDUMP_mllvm)) 3618 Args.push_back(A->getValue()); 3619 if (AsmSyntax) 3620 Args.push_back(AsmSyntax); 3621 if (Args.size() > 1) 3622 llvm::cl::ParseCommandLineOptions(Args.size(), Args.data()); 3623 3624 // Look up any provided build IDs, then append them to the input filenames. 3625 for (const opt::Arg *A : InputArgs.filtered(OBJDUMP_build_id)) { 3626 object::BuildID BuildID = parseBuildIDArg(A); 3627 std::optional<std::string> Path = BIDFetcher->fetch(BuildID); 3628 if (!Path) { 3629 reportCmdLineError(A->getSpelling() + ": could not find build ID '" + 3630 A->getValue() + "'"); 3631 } 3632 InputFilenames.push_back(std::move(*Path)); 3633 } 3634 3635 // objdump defaults to a.out if no filenames specified. 3636 if (InputFilenames.empty()) 3637 InputFilenames.push_back("a.out"); 3638 } 3639 3640 int llvm_objdump_main(int argc, char **argv, const llvm::ToolContext &) { 3641 using namespace llvm; 3642 3643 ToolName = argv[0]; 3644 std::unique_ptr<CommonOptTable> T; 3645 OptSpecifier Unknown, HelpFlag, HelpHiddenFlag, VersionFlag; 3646 3647 StringRef Stem = sys::path::stem(ToolName); 3648 auto Is = [=](StringRef Tool) { 3649 // We need to recognize the following filenames: 3650 // 3651 // llvm-objdump -> objdump 3652 // llvm-otool-10.exe -> otool 3653 // powerpc64-unknown-freebsd13-objdump -> objdump 3654 auto I = Stem.rfind_insensitive(Tool); 3655 return I != StringRef::npos && 3656 (I + Tool.size() == Stem.size() || !isAlnum(Stem[I + Tool.size()])); 3657 }; 3658 if (Is("otool")) { 3659 T = std::make_unique<OtoolOptTable>(); 3660 Unknown = OTOOL_UNKNOWN; 3661 HelpFlag = OTOOL_help; 3662 HelpHiddenFlag = OTOOL_help_hidden; 3663 VersionFlag = OTOOL_version; 3664 } else { 3665 T = std::make_unique<ObjdumpOptTable>(); 3666 Unknown = OBJDUMP_UNKNOWN; 3667 HelpFlag = OBJDUMP_help; 3668 HelpHiddenFlag = OBJDUMP_help_hidden; 3669 VersionFlag = OBJDUMP_version; 3670 } 3671 3672 BumpPtrAllocator A; 3673 StringSaver Saver(A); 3674 opt::InputArgList InputArgs = 3675 T->parseArgs(argc, argv, Unknown, Saver, 3676 [&](StringRef Msg) { reportCmdLineError(Msg); }); 3677 3678 if (InputArgs.size() == 0 || InputArgs.hasArg(HelpFlag)) { 3679 T->printHelp(ToolName); 3680 return 0; 3681 } 3682 if (InputArgs.hasArg(HelpHiddenFlag)) { 3683 T->printHelp(ToolName, /*ShowHidden=*/true); 3684 return 0; 3685 } 3686 3687 // Initialize targets and assembly printers/parsers. 3688 InitializeAllTargetInfos(); 3689 InitializeAllTargetMCs(); 3690 InitializeAllDisassemblers(); 3691 3692 if (InputArgs.hasArg(VersionFlag)) { 3693 cl::PrintVersionMessage(); 3694 if (!Is("otool")) { 3695 outs() << '\n'; 3696 TargetRegistry::printRegisteredTargetsForVersion(outs()); 3697 } 3698 return 0; 3699 } 3700 3701 // Initialize debuginfod. 3702 const bool ShouldUseDebuginfodByDefault = 3703 InputArgs.hasArg(OBJDUMP_build_id) || canUseDebuginfod(); 3704 std::vector<std::string> DebugFileDirectories = 3705 InputArgs.getAllArgValues(OBJDUMP_debug_file_directory); 3706 if (InputArgs.hasFlag(OBJDUMP_debuginfod, OBJDUMP_no_debuginfod, 3707 ShouldUseDebuginfodByDefault)) { 3708 HTTPClient::initialize(); 3709 BIDFetcher = 3710 std::make_unique<DebuginfodFetcher>(std::move(DebugFileDirectories)); 3711 } else { 3712 BIDFetcher = 3713 std::make_unique<BuildIDFetcher>(std::move(DebugFileDirectories)); 3714 } 3715 3716 if (Is("otool")) 3717 parseOtoolOptions(InputArgs); 3718 else 3719 parseObjdumpOptions(InputArgs); 3720 3721 if (StartAddress >= StopAddress) 3722 reportCmdLineError("start address should be less than stop address"); 3723 3724 // Removes trailing separators from prefix. 3725 while (!Prefix.empty() && sys::path::is_separator(Prefix.back())) 3726 Prefix.pop_back(); 3727 3728 if (AllHeaders) 3729 ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations = 3730 SectionHeaders = SymbolTable = true; 3731 3732 if (DisassembleAll || PrintSource || PrintLines || TracebackTable || 3733 !DisassembleSymbols.empty()) 3734 Disassemble = true; 3735 3736 if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null && 3737 !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST && 3738 !Relocations && !SectionHeaders && !SectionContents && !SymbolTable && 3739 !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && !Offloading && 3740 !(MachOOpt && 3741 (Bind || DataInCode || ChainedFixups || DyldInfo || DylibId || 3742 DylibsUsed || ExportsTrie || FirstPrivateHeader || 3743 FunctionStartsType != FunctionStartsMode::None || IndirectSymbols || 3744 InfoPlist || LazyBind || LinkOptHints || ObjcMetaData || Rebase || 3745 Rpaths || UniversalHeaders || WeakBind || !FilterSections.empty()))) { 3746 T->printHelp(ToolName); 3747 return 2; 3748 } 3749 3750 DisasmSymbolSet.insert(DisassembleSymbols.begin(), DisassembleSymbols.end()); 3751 3752 llvm::for_each(InputFilenames, dumpInput); 3753 3754 warnOnNoMatchForSections(); 3755 3756 return EXIT_SUCCESS; 3757 } 3758