1 //===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "BinaryHolder.h" 10 #include "DebugMap.h" 11 #include "MachOUtils.h" 12 #include "RelocationMap.h" 13 #include "llvm/ADT/DenseSet.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/Object/MachO.h" 16 #include "llvm/Support/Chrono.h" 17 #include "llvm/Support/Path.h" 18 #include "llvm/Support/WithColor.h" 19 #include "llvm/Support/raw_ostream.h" 20 #include <optional> 21 #include <vector> 22 23 namespace { 24 using namespace llvm; 25 using namespace llvm::dsymutil; 26 using namespace llvm::object; 27 28 class MachODebugMapParser { 29 public: 30 MachODebugMapParser(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, 31 StringRef BinaryPath, ArrayRef<std::string> Archs, 32 ArrayRef<std::string> DSYMSearchPaths, 33 StringRef PathPrefix = "", StringRef VariantSuffix = "", 34 bool Verbose = false) 35 : BinaryPath(std::string(BinaryPath)), Archs(Archs.begin(), Archs.end()), 36 DSYMSearchPaths(DSYMSearchPaths.begin(), DSYMSearchPaths.end()), 37 PathPrefix(std::string(PathPrefix)), 38 VariantSuffix(std::string(VariantSuffix)), BinHolder(VFS, Verbose), 39 CurrentDebugMapObject(nullptr), SkipDebugMapObject(false) {} 40 41 /// Parses and returns the DebugMaps of the input binary. The binary contains 42 /// multiple maps in case it is a universal binary. 43 /// \returns an error in case the provided BinaryPath doesn't exist 44 /// or isn't of a supported type. 45 ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse(); 46 47 /// Walk the symbol table and dump it. 48 bool dumpStab(); 49 50 using OSO = std::pair<llvm::StringRef, uint64_t>; 51 52 private: 53 std::string BinaryPath; 54 SmallVector<StringRef, 1> Archs; 55 SmallVector<StringRef, 1> DSYMSearchPaths; 56 std::string PathPrefix; 57 std::string VariantSuffix; 58 59 /// Owns the MemoryBuffer for the main binary. 60 BinaryHolder BinHolder; 61 /// Map of the binary symbol addresses. 62 StringMap<uint64_t> MainBinarySymbolAddresses; 63 StringRef MainBinaryStrings; 64 /// The constructed DebugMap. 65 std::unique_ptr<DebugMap> Result; 66 /// List of common symbols that need to be added to the debug map. 67 std::vector<std::string> CommonSymbols; 68 69 /// Map of the currently processed object file symbol addresses. 70 StringMap<std::optional<uint64_t>> CurrentObjectAddresses; 71 72 /// Lazily computed map of symbols aliased to the processed object file. 73 StringMap<std::optional<uint64_t>> CurrentObjectAliasMap; 74 75 /// If CurrentObjectAliasMap has been computed for a given address. 76 SmallSet<uint64_t, 4> SeenAliasValues; 77 78 /// Element of the debug map corresponding to the current object file. 79 DebugMapObject *CurrentDebugMapObject; 80 81 /// Whether we need to skip the current debug map object. 82 bool SkipDebugMapObject; 83 84 /// Holds function info while function scope processing. 85 const char *CurrentFunctionName; 86 uint64_t CurrentFunctionAddress; 87 88 std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary, 89 StringRef BinaryPath); 90 void handleStabDebugMap( 91 const MachOObjectFile &MainBinary, 92 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F); 93 94 void 95 switchToNewDebugMapObject(StringRef Filename, 96 sys::TimePoint<std::chrono::seconds> Timestamp); 97 void 98 switchToNewLibDebugMapObject(StringRef Filename, 99 sys::TimePoint<std::chrono::seconds> Timestamp); 100 void resetParserState(); 101 uint64_t getMainBinarySymbolAddress(StringRef Name); 102 std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value); 103 void loadMainBinarySymbols(const MachOObjectFile &MainBinary); 104 void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj); 105 106 void handleStabOSOEntry(uint32_t StringIndex, uint8_t Type, 107 uint8_t SectionIndex, uint16_t Flags, uint64_t Value, 108 llvm::DenseSet<OSO> &OSOs, 109 llvm::SmallSet<OSO, 4> &Duplicates); 110 void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type, 111 uint8_t SectionIndex, uint16_t Flags, 112 uint64_t Value, 113 const llvm::SmallSet<OSO, 4> &Duplicates); 114 115 template <typename STEType> 116 void handleStabDebugMapEntry( 117 const STEType &STE, 118 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) { 119 F(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, STE.n_value); 120 } 121 122 void addCommonSymbols(); 123 124 /// Dump the symbol table output header. 125 void dumpSymTabHeader(raw_ostream &OS, StringRef Arch); 126 127 /// Dump the contents of nlist entries. 128 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex, 129 uint8_t Type, uint8_t SectionIndex, uint16_t Flags, 130 uint64_t Value); 131 132 template <typename STEType> 133 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) { 134 dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, 135 STE.n_value); 136 } 137 void dumpOneBinaryStab(const MachOObjectFile &MainBinary, 138 StringRef BinaryPath); 139 140 void Warning(const Twine &Msg, StringRef File = StringRef()) { 141 assert(Result && 142 "The debug map must be initialized before calling this function"); 143 WithColor::warning() << "(" 144 << MachOUtils::getArchName( 145 Result->getTriple().getArchName()) 146 << ") " << File << " " << Msg << "\n"; 147 } 148 }; 149 150 } // anonymous namespace 151 152 /// Reset the parser state corresponding to the current object 153 /// file. This is to be called after an object file is finished 154 /// processing. 155 void MachODebugMapParser::resetParserState() { 156 CommonSymbols.clear(); 157 CurrentObjectAddresses.clear(); 158 CurrentObjectAliasMap.clear(); 159 SeenAliasValues.clear(); 160 CurrentDebugMapObject = nullptr; 161 SkipDebugMapObject = false; 162 } 163 164 /// Commons symbols won't show up in the symbol map but might need to be 165 /// relocated. We can add them to the symbol table ourselves by combining the 166 /// information in the object file (the symbol name) and the main binary (the 167 /// address). 168 void MachODebugMapParser::addCommonSymbols() { 169 for (auto &CommonSymbol : CommonSymbols) { 170 uint64_t CommonAddr = getMainBinarySymbolAddress(CommonSymbol); 171 if (CommonAddr == 0) { 172 // The main binary doesn't have an address for the given symbol. 173 continue; 174 } 175 if (!CurrentDebugMapObject->addSymbol(CommonSymbol, 176 std::nullopt /*ObjectAddress*/, 177 CommonAddr, 0 /*size*/)) { 178 // The symbol is already present. 179 continue; 180 } 181 } 182 } 183 184 /// Create a new DebugMapObject. This function resets the state of the 185 /// parser that was referring to the last object file and sets 186 /// everything up to add symbols to the new one. 187 void MachODebugMapParser::switchToNewDebugMapObject( 188 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { 189 190 SmallString<80> Path(PathPrefix); 191 sys::path::append(Path, Filename); 192 193 auto ObjectEntry = BinHolder.getObjectEntry(Path, Timestamp); 194 if (!ObjectEntry) { 195 auto Err = ObjectEntry.takeError(); 196 Warning("unable to open object file: " + toString(std::move(Err)), 197 Path.str()); 198 return; 199 } 200 201 auto Object = ObjectEntry->getObjectAs<MachOObjectFile>(Result->getTriple()); 202 if (!Object) { 203 auto Err = Object.takeError(); 204 Warning("unable to open object file: " + toString(std::move(Err)), 205 Path.str()); 206 return; 207 } 208 209 addCommonSymbols(); 210 resetParserState(); 211 212 CurrentDebugMapObject = 213 &Result->addDebugMapObject(Path, Timestamp, MachO::N_OSO); 214 215 loadCurrentObjectFileSymbols(*Object); 216 } 217 218 /// Create a new DebugMapObject of type MachO::N_LIB. 219 /// This function resets the state of the parser that was 220 /// referring to the last object file and sets everything 221 /// up to add symbols to the new one. 222 void MachODebugMapParser::switchToNewLibDebugMapObject( 223 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { 224 225 if (DSYMSearchPaths.empty()) { 226 Warning("no dSYM search path was specified"); 227 return; 228 } 229 230 StringRef LeafName = sys::path::filename(Filename); 231 SmallString<128> VariantLeafName; 232 SmallString<128> ProductName(LeafName); 233 234 // For Framework.framework/Framework and -build-variant-suffix=_debug, 235 // look in the following order: 236 // 1) Framework.framework.dSYM/Contents/Resources/DWARF/Framework_debug 237 // 2) Framework.framework.dSYM/Contents/Resources/DWARF/Framework 238 // 239 // For libName.dylib and -build-variant-suffix=_debug, 240 // look in the following order: 241 // 1) libName.dylib.dSYM/Contents/Resources/DWARF/libName_debug.dylib 242 // 2) libName.dylib.dSYM/Contents/Resources/DWARF/libName.dylib 243 244 size_t libExt = LeafName.rfind(".dylib"); 245 if (libExt != StringRef::npos) { 246 if (!VariantSuffix.empty()) { 247 VariantLeafName.append(LeafName.substr(0, libExt)); 248 VariantLeafName.append(VariantSuffix); 249 VariantLeafName.append(".dylib"); 250 } 251 } else { 252 // Expected to be a framework 253 ProductName.append(".framework"); 254 if (!VariantSuffix.empty()) { 255 VariantLeafName.append(LeafName); 256 VariantLeafName.append(VariantSuffix); 257 } 258 } 259 260 for (auto DSYMSearchPath : DSYMSearchPaths) { 261 SmallString<256> Path(DSYMSearchPath); 262 SmallString<256> FallbackPath(Path); 263 264 SmallString<256> DSYMPath(ProductName); 265 DSYMPath.append(".dSYM"); 266 sys::path::append(DSYMPath, "Contents", "Resources", "DWARF"); 267 268 if (!VariantSuffix.empty()) { 269 sys::path::append(Path, DSYMPath, VariantLeafName); 270 sys::path::append(FallbackPath, DSYMPath, LeafName); 271 } else { 272 sys::path::append(Path, DSYMPath, LeafName); 273 } 274 275 auto ObjectEntry = BinHolder.getObjectEntry(Path, Timestamp); 276 if (!ObjectEntry) { 277 auto Err = ObjectEntry.takeError(); 278 Warning("unable to open object file: " + toString(std::move(Err)), 279 Path.str()); 280 if (!VariantSuffix.empty()) { 281 ObjectEntry = BinHolder.getObjectEntry(FallbackPath, Timestamp); 282 if (!ObjectEntry) { 283 auto Err = ObjectEntry.takeError(); 284 Warning("unable to open object file: " + toString(std::move(Err)), 285 FallbackPath.str()); 286 continue; 287 } 288 Path.assign(FallbackPath); 289 } else { 290 continue; 291 } 292 } 293 294 auto Object = 295 ObjectEntry->getObjectAs<MachOObjectFile>(Result->getTriple()); 296 if (!Object) { 297 auto Err = Object.takeError(); 298 Warning("unable to open object file: " + toString(std::move(Err)), 299 Path.str()); 300 continue; 301 } 302 303 if (CurrentDebugMapObject && 304 CurrentDebugMapObject->getType() == MachO::N_LIB && 305 CurrentDebugMapObject->getObjectFilename().compare(Path.str()) == 0) { 306 return; 307 } 308 309 addCommonSymbols(); 310 resetParserState(); 311 312 CurrentDebugMapObject = 313 &Result->addDebugMapObject(Path, Timestamp, MachO::N_LIB); 314 315 CurrentDebugMapObject->setInstallName(Filename); 316 317 SmallString<256> RMPath(DSYMSearchPath); 318 sys::path::append(RMPath, ProductName); 319 RMPath.append(".dSYM"); 320 StringRef ArchName = Triple::getArchName(Result->getTriple().getArch(), 321 Result->getTriple().getSubArch()); 322 sys::path::append(RMPath, "Contents", "Resources", "Relocations", ArchName); 323 sys::path::append(RMPath, LeafName); 324 RMPath.append(".yml"); 325 const auto &RelocMapPtrOrErr = 326 RelocationMap::parseYAMLRelocationMap(RMPath, PathPrefix); 327 if (auto EC = RelocMapPtrOrErr.getError()) { 328 Warning("cannot parse relocation map file: " + EC.message(), 329 RMPath.str()); 330 return; 331 } 332 CurrentDebugMapObject->setRelocationMap(*RelocMapPtrOrErr->get()); 333 334 loadCurrentObjectFileSymbols(*Object); 335 336 // Found and loaded new dSYM file 337 return; 338 } 339 } 340 341 static std::string getArchName(const object::MachOObjectFile &Obj) { 342 Triple T = Obj.getArchTriple(); 343 return std::string(T.getArchName()); 344 } 345 346 void MachODebugMapParser::handleStabDebugMap( 347 const MachOObjectFile &MainBinary, 348 std::function<void(uint32_t, uint8_t, uint8_t, uint16_t, uint64_t)> F) { 349 for (const SymbolRef &Symbol : MainBinary.symbols()) { 350 const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); 351 if (MainBinary.is64Bit()) 352 handleStabDebugMapEntry(MainBinary.getSymbol64TableEntry(DRI), F); 353 else 354 handleStabDebugMapEntry(MainBinary.getSymbolTableEntry(DRI), F); 355 } 356 } 357 358 std::unique_ptr<DebugMap> 359 MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary, 360 StringRef BinaryPath) { 361 Result = std::make_unique<DebugMap>(MainBinary.getArchTriple(), BinaryPath, 362 MainBinary.getUuid()); 363 loadMainBinarySymbols(MainBinary); 364 MainBinaryStrings = MainBinary.getStringTableData(); 365 366 // Static archives can contain multiple object files with identical names, in 367 // which case the timestamp is used to disambiguate. However, if both are 368 // identical, there's no way to tell them apart. Detect this and skip 369 // duplicate debug map objects. 370 llvm::DenseSet<OSO> OSOs; 371 llvm::SmallSet<OSO, 4> Duplicates; 372 373 // Iterate over all the STABS to find duplicate OSO entries. 374 handleStabDebugMap(MainBinary, 375 [&](uint32_t StringIndex, uint8_t Type, 376 uint8_t SectionIndex, uint16_t Flags, uint64_t Value) { 377 handleStabOSOEntry(StringIndex, Type, SectionIndex, 378 Flags, Value, OSOs, Duplicates); 379 }); 380 381 // Print an informative warning with the duplicate object file name and time 382 // stamp. 383 for (const auto &OSO : Duplicates) { 384 std::string Buffer; 385 llvm::raw_string_ostream OS(Buffer); 386 OS << sys::TimePoint<std::chrono::seconds>(sys::toTimePoint(OSO.second)); 387 Warning("skipping debug map object with duplicate name and timestamp: " + 388 OS.str() + Twine(" ") + Twine(OSO.first)); 389 } 390 391 // Build the debug map by iterating over the STABS again but ignore the 392 // duplicate debug objects. 393 handleStabDebugMap(MainBinary, [&](uint32_t StringIndex, uint8_t Type, 394 uint8_t SectionIndex, uint16_t Flags, 395 uint64_t Value) { 396 handleStabSymbolTableEntry(StringIndex, Type, SectionIndex, Flags, Value, 397 Duplicates); 398 }); 399 400 resetParserState(); 401 return std::move(Result); 402 } 403 404 // Table that maps Darwin's Mach-O stab constants to strings to allow printing. 405 // llvm-nm has very similar code, the strings used here are however slightly 406 // different and part of the interface of dsymutil (some project's build-systems 407 // parse the ouptut of dsymutil -s), thus they shouldn't be changed. 408 struct DarwinStabName { 409 uint8_t NType; 410 const char *Name; 411 }; 412 413 const struct DarwinStabName DarwinStabNames[] = {{MachO::N_GSYM, "N_GSYM"}, 414 {MachO::N_FNAME, "N_FNAME"}, 415 {MachO::N_FUN, "N_FUN"}, 416 {MachO::N_STSYM, "N_STSYM"}, 417 {MachO::N_LCSYM, "N_LCSYM"}, 418 {MachO::N_BNSYM, "N_BNSYM"}, 419 {MachO::N_PC, "N_PC"}, 420 {MachO::N_AST, "N_AST"}, 421 {MachO::N_OPT, "N_OPT"}, 422 {MachO::N_RSYM, "N_RSYM"}, 423 {MachO::N_SLINE, "N_SLINE"}, 424 {MachO::N_ENSYM, "N_ENSYM"}, 425 {MachO::N_SSYM, "N_SSYM"}, 426 {MachO::N_SO, "N_SO"}, 427 {MachO::N_OSO, "N_OSO"}, 428 {MachO::N_LIB, "N_LIB"}, 429 {MachO::N_LSYM, "N_LSYM"}, 430 {MachO::N_BINCL, "N_BINCL"}, 431 {MachO::N_SOL, "N_SOL"}, 432 {MachO::N_PARAMS, "N_PARAM"}, 433 {MachO::N_VERSION, "N_VERS"}, 434 {MachO::N_OLEVEL, "N_OLEV"}, 435 {MachO::N_PSYM, "N_PSYM"}, 436 {MachO::N_EINCL, "N_EINCL"}, 437 {MachO::N_ENTRY, "N_ENTRY"}, 438 {MachO::N_LBRAC, "N_LBRAC"}, 439 {MachO::N_EXCL, "N_EXCL"}, 440 {MachO::N_RBRAC, "N_RBRAC"}, 441 {MachO::N_BCOMM, "N_BCOMM"}, 442 {MachO::N_ECOMM, "N_ECOMM"}, 443 {MachO::N_ECOML, "N_ECOML"}, 444 {MachO::N_LENG, "N_LENG"}, 445 {0, nullptr}}; 446 447 static const char *getDarwinStabString(uint8_t NType) { 448 for (unsigned i = 0; DarwinStabNames[i].Name; i++) { 449 if (DarwinStabNames[i].NType == NType) 450 return DarwinStabNames[i].Name; 451 } 452 return nullptr; 453 } 454 455 void MachODebugMapParser::dumpSymTabHeader(raw_ostream &OS, StringRef Arch) { 456 OS << "-----------------------------------" 457 "-----------------------------------\n"; 458 OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n"; 459 OS << "-----------------------------------" 460 "-----------------------------------\n"; 461 OS << "Index n_strx n_type n_sect n_desc n_value\n"; 462 OS << "======== -------- ------------------ ------ ------ ----------------\n"; 463 } 464 465 void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index, 466 uint32_t StringIndex, uint8_t Type, 467 uint8_t SectionIndex, uint16_t Flags, 468 uint64_t Value) { 469 // Index 470 OS << '[' << format_decimal(Index, 6) 471 << "] " 472 // n_strx 473 << format_hex_no_prefix(StringIndex, 8) 474 << ' ' 475 // n_type... 476 << format_hex_no_prefix(Type, 2) << " ("; 477 478 if (Type & MachO::N_STAB) 479 OS << left_justify(getDarwinStabString(Type), 13); 480 else { 481 if (Type & MachO::N_PEXT) 482 OS << "PEXT "; 483 else 484 OS << " "; 485 switch (Type & MachO::N_TYPE) { 486 case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT 487 OS << "UNDF"; 488 break; 489 case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT 490 OS << "ABS "; 491 break; 492 case MachO::N_SECT: // 0xe defined in section number n_sect 493 OS << "SECT"; 494 break; 495 case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib) 496 OS << "PBUD"; 497 break; 498 case MachO::N_INDR: // 0xa indirect 499 OS << "INDR"; 500 break; 501 default: 502 OS << format_hex_no_prefix(Type, 2) << " "; 503 break; 504 } 505 if (Type & MachO::N_EXT) 506 OS << " EXT"; 507 else 508 OS << " "; 509 } 510 511 OS << ") " 512 // n_sect 513 << format_hex_no_prefix(SectionIndex, 2) 514 << " " 515 // n_desc 516 << format_hex_no_prefix(Flags, 4) 517 << " " 518 // n_value 519 << format_hex_no_prefix(Value, 16); 520 521 const char *Name = &MainBinaryStrings.data()[StringIndex]; 522 if (Name && Name[0]) 523 OS << " '" << Name << "'"; 524 525 OS << "\n"; 526 } 527 528 void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary, 529 StringRef BinaryPath) { 530 loadMainBinarySymbols(MainBinary); 531 MainBinaryStrings = MainBinary.getStringTableData(); 532 raw_ostream &OS(llvm::outs()); 533 534 dumpSymTabHeader(OS, getArchName(MainBinary)); 535 uint64_t Idx = 0; 536 for (const SymbolRef &Symbol : MainBinary.symbols()) { 537 const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); 538 if (MainBinary.is64Bit()) 539 dumpSymTabEntry(OS, Idx, MainBinary.getSymbol64TableEntry(DRI)); 540 else 541 dumpSymTabEntry(OS, Idx, MainBinary.getSymbolTableEntry(DRI)); 542 Idx++; 543 } 544 545 OS << "\n\n"; 546 resetParserState(); 547 } 548 549 static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) { 550 if (Archs.empty() || is_contained(Archs, "all") || is_contained(Archs, "*")) 551 return true; 552 553 if (Arch.startswith("arm") && Arch != "arm64" && is_contained(Archs, "arm")) 554 return true; 555 556 SmallString<16> ArchName = Arch; 557 if (Arch.startswith("thumb")) 558 ArchName = ("arm" + Arch.substr(5)).str(); 559 560 return is_contained(Archs, ArchName); 561 } 562 563 bool MachODebugMapParser::dumpStab() { 564 auto ObjectEntry = BinHolder.getObjectEntry(BinaryPath); 565 if (!ObjectEntry) { 566 auto Err = ObjectEntry.takeError(); 567 WithColor::error() << "cannot load '" << BinaryPath 568 << "': " << toString(std::move(Err)) << '\n'; 569 return false; 570 } 571 572 auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>(); 573 if (!Objects) { 574 auto Err = Objects.takeError(); 575 WithColor::error() << "cannot get '" << BinaryPath 576 << "' as MachO file: " << toString(std::move(Err)) 577 << "\n"; 578 return false; 579 } 580 581 for (const auto *Object : *Objects) 582 if (shouldLinkArch(Archs, Object->getArchTriple().getArchName())) 583 dumpOneBinaryStab(*Object, BinaryPath); 584 585 return true; 586 } 587 588 /// This main parsing routine tries to open the main binary and if 589 /// successful iterates over the STAB entries. The real parsing is 590 /// done in handleStabSymbolTableEntry. 591 ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() { 592 auto ObjectEntry = BinHolder.getObjectEntry(BinaryPath); 593 if (!ObjectEntry) { 594 return errorToErrorCode(ObjectEntry.takeError()); 595 } 596 597 auto Objects = ObjectEntry->getObjectsAs<MachOObjectFile>(); 598 if (!Objects) { 599 return errorToErrorCode(Objects.takeError()); 600 } 601 602 std::vector<std::unique_ptr<DebugMap>> Results; 603 for (const auto *Object : *Objects) 604 if (shouldLinkArch(Archs, Object->getArchTriple().getArchName())) 605 Results.push_back(parseOneBinary(*Object, BinaryPath)); 606 607 return std::move(Results); 608 } 609 610 void MachODebugMapParser::handleStabOSOEntry( 611 uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags, 612 uint64_t Value, llvm::DenseSet<OSO> &OSOs, 613 llvm::SmallSet<OSO, 4> &Duplicates) { 614 if (Type != MachO::N_OSO) 615 return; 616 617 OSO O(&MainBinaryStrings.data()[StringIndex], Value); 618 if (!OSOs.insert(O).second) 619 Duplicates.insert(O); 620 } 621 622 /// Interpret the STAB entries to fill the DebugMap. 623 void MachODebugMapParser::handleStabSymbolTableEntry( 624 uint32_t StringIndex, uint8_t Type, uint8_t SectionIndex, uint16_t Flags, 625 uint64_t Value, const llvm::SmallSet<OSO, 4> &Duplicates) { 626 if (!(Type & MachO::N_STAB)) 627 return; 628 629 const char *Name = &MainBinaryStrings.data()[StringIndex]; 630 631 // An N_LIB entry represents the start of a new library file description. 632 if (Type == MachO::N_LIB) { 633 switchToNewLibDebugMapObject(Name, sys::toTimePoint(Value)); 634 return; 635 } 636 637 // An N_OSO entry represents the start of a new object file description. 638 // If an N_LIB entry was present, this is parsed only if the library 639 // dSYM file could not be found. 640 if (Type == MachO::N_OSO) { 641 if (!CurrentDebugMapObject || 642 CurrentDebugMapObject->getType() != MachO::N_LIB) { 643 if (Duplicates.count(OSO(Name, Value))) { 644 SkipDebugMapObject = true; 645 return; 646 } 647 switchToNewDebugMapObject(Name, sys::toTimePoint(Value)); 648 } 649 return; 650 } 651 652 if (SkipDebugMapObject) 653 return; 654 655 if (Type == MachO::N_AST) { 656 SmallString<80> Path(PathPrefix); 657 sys::path::append(Path, Name); 658 Result->addDebugMapObject(Path, sys::toTimePoint(Value), Type); 659 return; 660 } 661 662 // If the last N_OSO object file wasn't found, CurrentDebugMapObject will be 663 // null. Do not update anything until we find the next valid N_OSO entry. 664 if (!CurrentDebugMapObject) 665 return; 666 667 uint32_t Size = 0; 668 switch (Type) { 669 case MachO::N_GSYM: 670 // This is a global variable. We need to query the main binary 671 // symbol table to find its address as it might not be in the 672 // debug map (for common symbols). 673 Value = getMainBinarySymbolAddress(Name); 674 break; 675 case MachO::N_FUN: 676 // Functions are scopes in STABS. They have an end marker that 677 // contains the function size. 678 if (Name[0] == '\0') { 679 Size = Value; 680 Value = CurrentFunctionAddress; 681 Name = CurrentFunctionName; 682 break; 683 } else { 684 CurrentFunctionName = Name; 685 CurrentFunctionAddress = Value; 686 return; 687 } 688 case MachO::N_STSYM: 689 break; 690 default: 691 return; 692 } 693 694 auto ObjectSymIt = CurrentObjectAddresses.find(Name); 695 696 // If the name of a (non-static) symbol is not in the current object, we 697 // check all its aliases from the main binary. 698 if (ObjectSymIt == CurrentObjectAddresses.end() && Type != MachO::N_STSYM) { 699 if (SeenAliasValues.count(Value) == 0) { 700 auto Aliases = getMainBinarySymbolNames(Value); 701 for (const auto &Alias : Aliases) { 702 auto It = CurrentObjectAddresses.find(Alias); 703 if (It != CurrentObjectAddresses.end()) { 704 auto AliasValue = It->getValue(); 705 for (const auto &Alias : Aliases) 706 CurrentObjectAliasMap[Alias] = AliasValue; 707 break; 708 } 709 } 710 SeenAliasValues.insert(Value); 711 } 712 713 auto AliasIt = CurrentObjectAliasMap.find(Name); 714 if (AliasIt != CurrentObjectAliasMap.end()) 715 ObjectSymIt = AliasIt; 716 } 717 718 // ThinLTO adds a unique suffix to exported private symbols. 719 if (ObjectSymIt == CurrentObjectAddresses.end()) { 720 for (auto Iter = CurrentObjectAddresses.begin(); 721 Iter != CurrentObjectAddresses.end(); ++Iter) { 722 llvm::StringRef SymbolName = Iter->getKey(); 723 auto Pos = SymbolName.rfind(".llvm."); 724 if (Pos != llvm::StringRef::npos && SymbolName.substr(0, Pos) == Name) { 725 ObjectSymIt = Iter; 726 break; 727 } 728 } 729 } 730 731 if (ObjectSymIt == CurrentObjectAddresses.end()) { 732 Warning("could not find object file symbol for symbol " + Twine(Name)); 733 return; 734 } 735 736 if (!CurrentDebugMapObject->addSymbol(Name, ObjectSymIt->getValue(), Value, 737 Size)) { 738 Warning(Twine("failed to insert symbol '") + Name + "' in the debug map."); 739 return; 740 } 741 } 742 743 /// Load the current object file symbols into CurrentObjectAddresses. 744 void MachODebugMapParser::loadCurrentObjectFileSymbols( 745 const object::MachOObjectFile &Obj) { 746 CurrentObjectAddresses.clear(); 747 748 for (auto Sym : Obj.symbols()) { 749 uint64_t Addr = cantFail(Sym.getValue()); 750 Expected<StringRef> Name = Sym.getName(); 751 if (!Name) { 752 auto Err = Name.takeError(); 753 Warning("failed to get symbol name: " + toString(std::move(Err)), 754 Obj.getFileName()); 755 continue; 756 } 757 // The value of some categories of symbols isn't meaningful. For 758 // example common symbols store their size in the value field, not 759 // their address. Absolute symbols have a fixed address that can 760 // conflict with standard symbols. These symbols (especially the 761 // common ones), might still be referenced by relocations. These 762 // relocations will use the symbol itself, and won't need an 763 // object file address. The object file address field is optional 764 // in the DebugMap, leave it unassigned for these symbols. 765 uint32_t Flags = cantFail(Sym.getFlags()); 766 if (Flags & SymbolRef::SF_Absolute) { 767 CurrentObjectAddresses[*Name] = std::nullopt; 768 } else if (Flags & SymbolRef::SF_Common) { 769 CurrentObjectAddresses[*Name] = std::nullopt; 770 CommonSymbols.push_back(std::string(*Name)); 771 } else { 772 CurrentObjectAddresses[*Name] = Addr; 773 } 774 } 775 } 776 777 /// Lookup a symbol address in the main binary symbol table. The 778 /// parser only needs to query common symbols, thus not every symbol's 779 /// address is available through this function. 780 uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) { 781 auto Sym = MainBinarySymbolAddresses.find(Name); 782 if (Sym == MainBinarySymbolAddresses.end()) 783 return 0; 784 return Sym->second; 785 } 786 787 /// Get all symbol names in the main binary for the given value. 788 std::vector<StringRef> 789 MachODebugMapParser::getMainBinarySymbolNames(uint64_t Value) { 790 std::vector<StringRef> Names; 791 for (const auto &Entry : MainBinarySymbolAddresses) { 792 if (Entry.second == Value) 793 Names.push_back(Entry.first()); 794 } 795 return Names; 796 } 797 798 /// Load the interesting main binary symbols' addresses into 799 /// MainBinarySymbolAddresses. 800 void MachODebugMapParser::loadMainBinarySymbols( 801 const MachOObjectFile &MainBinary) { 802 section_iterator Section = MainBinary.section_end(); 803 MainBinarySymbolAddresses.clear(); 804 for (const auto &Sym : MainBinary.symbols()) { 805 Expected<SymbolRef::Type> TypeOrErr = Sym.getType(); 806 if (!TypeOrErr) { 807 auto Err = TypeOrErr.takeError(); 808 Warning("failed to get symbol type: " + toString(std::move(Err)), 809 MainBinary.getFileName()); 810 continue; 811 } 812 SymbolRef::Type Type = *TypeOrErr; 813 // Skip undefined and STAB entries. 814 if ((Type == SymbolRef::ST_Debug) || (Type == SymbolRef::ST_Unknown)) 815 continue; 816 // In theory, the only symbols of interest are the global variables. These 817 // are the only ones that need to be queried because the address of common 818 // data won't be described in the debug map. All other addresses should be 819 // fetched for the debug map. In reality, by playing with 'ld -r' and 820 // export lists, you can get symbols described as N_GSYM in the debug map, 821 // but associated with a local symbol. Gather all the symbols, but prefer 822 // the global ones. 823 uint8_t SymType = 824 MainBinary.getSymbolTableEntry(Sym.getRawDataRefImpl()).n_type; 825 bool Extern = SymType & (MachO::N_EXT | MachO::N_PEXT); 826 Expected<section_iterator> SectionOrErr = Sym.getSection(); 827 if (!SectionOrErr) { 828 auto Err = TypeOrErr.takeError(); 829 Warning("failed to get symbol section: " + toString(std::move(Err)), 830 MainBinary.getFileName()); 831 continue; 832 } 833 Section = *SectionOrErr; 834 if ((Section == MainBinary.section_end() || Section->isText()) && !Extern) 835 continue; 836 uint64_t Addr = cantFail(Sym.getValue()); 837 Expected<StringRef> NameOrErr = Sym.getName(); 838 if (!NameOrErr) { 839 auto Err = NameOrErr.takeError(); 840 Warning("failed to get symbol name: " + toString(std::move(Err)), 841 MainBinary.getFileName()); 842 continue; 843 } 844 StringRef Name = *NameOrErr; 845 if (Name.size() == 0 || Name[0] == '\0') 846 continue; 847 // Override only if the new key is global. 848 if (Extern) 849 MainBinarySymbolAddresses[Name] = Addr; 850 else 851 MainBinarySymbolAddresses.try_emplace(Name, Addr); 852 } 853 } 854 855 namespace llvm { 856 namespace dsymutil { 857 llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>> 858 parseDebugMap(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, 859 StringRef InputFile, ArrayRef<std::string> Archs, 860 ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath, 861 StringRef VariantSuffix, bool Verbose, bool InputIsYAML) { 862 if (InputIsYAML) 863 return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose); 864 865 MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths, 866 PrependPath, VariantSuffix, Verbose); 867 868 return Parser.parse(); 869 } 870 871 bool dumpStab(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS, 872 StringRef InputFile, ArrayRef<std::string> Archs, 873 ArrayRef<std::string> DSYMSearchPaths, StringRef PrependPath, 874 StringRef VariantSuffix) { 875 MachODebugMapParser Parser(VFS, InputFile, Archs, DSYMSearchPaths, 876 PrependPath, VariantSuffix, false); 877 return Parser.dumpStab(); 878 } 879 } // namespace dsymutil 880 } // namespace llvm 881