1 //===- tools/dsymutil/MachODebugMapParser.cpp - Parse STABS debug maps ----===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "BinaryHolder.h" 11 #include "DebugMap.h" 12 #include "ErrorReporting.h" 13 #include "MachOUtils.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/Object/MachO.h" 16 #include "llvm/Support/Path.h" 17 #include "llvm/Support/raw_ostream.h" 18 19 namespace { 20 using namespace llvm; 21 using namespace llvm::dsymutil; 22 using namespace llvm::object; 23 24 class MachODebugMapParser { 25 public: 26 MachODebugMapParser(StringRef BinaryPath, ArrayRef<std::string> Archs, 27 StringRef PathPrefix = "", bool Verbose = false) 28 : BinaryPath(BinaryPath), Archs(Archs.begin(), Archs.end()), 29 PathPrefix(PathPrefix), MainBinaryHolder(Verbose), 30 CurrentObjectHolder(Verbose), CurrentDebugMapObject(nullptr) {} 31 32 /// Parses and returns the DebugMaps of the input binary. The binary contains 33 /// multiple maps in case it is a universal binary. 34 /// \returns an error in case the provided BinaryPath doesn't exist 35 /// or isn't of a supported type. 36 ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse(); 37 38 /// Walk the symbol table and dump it. 39 bool dumpStab(); 40 41 private: 42 std::string BinaryPath; 43 SmallVector<StringRef, 1> Archs; 44 std::string PathPrefix; 45 46 /// Owns the MemoryBuffer for the main binary. 47 BinaryHolder MainBinaryHolder; 48 /// Map of the binary symbol addresses. 49 StringMap<uint64_t> MainBinarySymbolAddresses; 50 StringRef MainBinaryStrings; 51 /// The constructed DebugMap. 52 std::unique_ptr<DebugMap> Result; 53 54 /// Owns the MemoryBuffer for the currently handled object file. 55 BinaryHolder CurrentObjectHolder; 56 /// Map of the currently processed object file symbol addresses. 57 StringMap<Optional<uint64_t>> CurrentObjectAddresses; 58 /// Element of the debug map corresponding to the current object file. 59 DebugMapObject *CurrentDebugMapObject; 60 61 /// Holds function info while function scope processing. 62 const char *CurrentFunctionName; 63 uint64_t CurrentFunctionAddress; 64 65 std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary, 66 StringRef BinaryPath); 67 68 void 69 switchToNewDebugMapObject(StringRef Filename, 70 sys::TimePoint<std::chrono::seconds> Timestamp); 71 void resetParserState(); 72 uint64_t getMainBinarySymbolAddress(StringRef Name); 73 std::vector<StringRef> getMainBinarySymbolNames(uint64_t Value); 74 void loadMainBinarySymbols(const MachOObjectFile &MainBinary); 75 void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj); 76 void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type, 77 uint8_t SectionIndex, uint16_t Flags, 78 uint64_t Value); 79 80 template <typename STEType> void handleStabDebugMapEntry(const STEType &STE) { 81 handleStabSymbolTableEntry(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, 82 STE.n_value); 83 } 84 85 /// Dump the symbol table output header. 86 void dumpSymTabHeader(raw_ostream &OS, StringRef Arch); 87 88 /// Dump the contents of nlist entries. 89 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex, 90 uint8_t Type, uint8_t SectionIndex, uint16_t Flags, 91 uint64_t Value); 92 93 template <typename STEType> 94 void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) { 95 dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc, 96 STE.n_value); 97 } 98 void dumpOneBinaryStab(const MachOObjectFile &MainBinary, 99 StringRef BinaryPath); 100 101 void Warning(const Twine &Msg, StringRef File = StringRef()) { 102 warn_ostream() << "(" 103 << MachOUtils::getArchName(Result->getTriple().getArchName()) 104 << ") " << File << " " << Msg << "\n"; 105 } 106 }; 107 108 } // anonymous namespace 109 110 /// Reset the parser state corresponding to the current object 111 /// file. This is to be called after an object file is finished 112 /// processing. 113 void MachODebugMapParser::resetParserState() { 114 CurrentObjectAddresses.clear(); 115 CurrentDebugMapObject = nullptr; 116 } 117 118 /// Create a new DebugMapObject. This function resets the state of the 119 /// parser that was referring to the last object file and sets 120 /// everything up to add symbols to the new one. 121 void MachODebugMapParser::switchToNewDebugMapObject( 122 StringRef Filename, sys::TimePoint<std::chrono::seconds> Timestamp) { 123 resetParserState(); 124 125 SmallString<80> Path(PathPrefix); 126 sys::path::append(Path, Filename); 127 128 auto MachOOrError = 129 CurrentObjectHolder.GetFilesAs<MachOObjectFile>(Path, Timestamp); 130 if (auto Error = MachOOrError.getError()) { 131 Warning("unable to open object file: " + Error.message(), Path.str()); 132 return; 133 } 134 135 auto ErrOrAchObj = 136 CurrentObjectHolder.GetAs<MachOObjectFile>(Result->getTriple()); 137 if (auto Error = ErrOrAchObj.getError()) { 138 Warning("unable to open object file: " + Error.message(), Path.str()); 139 return; 140 } 141 142 CurrentDebugMapObject = 143 &Result->addDebugMapObject(Path, Timestamp, MachO::N_OSO); 144 loadCurrentObjectFileSymbols(*ErrOrAchObj); 145 } 146 147 static std::string getArchName(const object::MachOObjectFile &Obj) { 148 Triple T = Obj.getArchTriple(); 149 return T.getArchName(); 150 } 151 152 std::unique_ptr<DebugMap> 153 MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary, 154 StringRef BinaryPath) { 155 loadMainBinarySymbols(MainBinary); 156 Result = make_unique<DebugMap>(MainBinary.getArchTriple(), BinaryPath); 157 MainBinaryStrings = MainBinary.getStringTableData(); 158 for (const SymbolRef &Symbol : MainBinary.symbols()) { 159 const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); 160 if (MainBinary.is64Bit()) 161 handleStabDebugMapEntry(MainBinary.getSymbol64TableEntry(DRI)); 162 else 163 handleStabDebugMapEntry(MainBinary.getSymbolTableEntry(DRI)); 164 } 165 166 resetParserState(); 167 return std::move(Result); 168 } 169 170 // Table that maps Darwin's Mach-O stab constants to strings to allow printing. 171 // llvm-nm has very similar code, the strings used here are however slightly 172 // different and part of the interface of dsymutil (some project's build-systems 173 // parse the ouptut of dsymutil -s), thus they shouldn't be changed. 174 struct DarwinStabName { 175 uint8_t NType; 176 const char *Name; 177 }; 178 179 static const struct DarwinStabName DarwinStabNames[] = { 180 {MachO::N_GSYM, "N_GSYM"}, {MachO::N_FNAME, "N_FNAME"}, 181 {MachO::N_FUN, "N_FUN"}, {MachO::N_STSYM, "N_STSYM"}, 182 {MachO::N_LCSYM, "N_LCSYM"}, {MachO::N_BNSYM, "N_BNSYM"}, 183 {MachO::N_PC, "N_PC"}, {MachO::N_AST, "N_AST"}, 184 {MachO::N_OPT, "N_OPT"}, {MachO::N_RSYM, "N_RSYM"}, 185 {MachO::N_SLINE, "N_SLINE"}, {MachO::N_ENSYM, "N_ENSYM"}, 186 {MachO::N_SSYM, "N_SSYM"}, {MachO::N_SO, "N_SO"}, 187 {MachO::N_OSO, "N_OSO"}, {MachO::N_LSYM, "N_LSYM"}, 188 {MachO::N_BINCL, "N_BINCL"}, {MachO::N_SOL, "N_SOL"}, 189 {MachO::N_PARAMS, "N_PARAM"}, {MachO::N_VERSION, "N_VERS"}, 190 {MachO::N_OLEVEL, "N_OLEV"}, {MachO::N_PSYM, "N_PSYM"}, 191 {MachO::N_EINCL, "N_EINCL"}, {MachO::N_ENTRY, "N_ENTRY"}, 192 {MachO::N_LBRAC, "N_LBRAC"}, {MachO::N_EXCL, "N_EXCL"}, 193 {MachO::N_RBRAC, "N_RBRAC"}, {MachO::N_BCOMM, "N_BCOMM"}, 194 {MachO::N_ECOMM, "N_ECOMM"}, {MachO::N_ECOML, "N_ECOML"}, 195 {MachO::N_LENG, "N_LENG"}, {0, nullptr}}; 196 197 static const char *getDarwinStabString(uint8_t NType) { 198 for (unsigned i = 0; DarwinStabNames[i].Name; i++) { 199 if (DarwinStabNames[i].NType == NType) 200 return DarwinStabNames[i].Name; 201 } 202 return nullptr; 203 } 204 205 void MachODebugMapParser::dumpSymTabHeader(raw_ostream &OS, StringRef Arch) { 206 OS << "-----------------------------------" 207 "-----------------------------------\n"; 208 OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n"; 209 OS << "-----------------------------------" 210 "-----------------------------------\n"; 211 OS << "Index n_strx n_type n_sect n_desc n_value\n"; 212 OS << "======== -------- ------------------ ------ ------ ----------------\n"; 213 } 214 215 void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index, 216 uint32_t StringIndex, uint8_t Type, 217 uint8_t SectionIndex, uint16_t Flags, 218 uint64_t Value) { 219 // Index 220 OS << '[' << format_decimal(Index, 6) 221 << "] " 222 // n_strx 223 << format_hex_no_prefix(StringIndex, 8) 224 << ' ' 225 // n_type... 226 << format_hex_no_prefix(Type, 2) << " ("; 227 228 if (Type & MachO::N_STAB) 229 OS << left_justify(getDarwinStabString(Type), 13); 230 else { 231 if (Type & MachO::N_PEXT) 232 OS << "PEXT "; 233 else 234 OS << " "; 235 switch (Type & MachO::N_TYPE) { 236 case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT 237 OS << "UNDF"; 238 break; 239 case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT 240 OS << "ABS "; 241 break; 242 case MachO::N_SECT: // 0xe defined in section number n_sect 243 OS << "SECT"; 244 break; 245 case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib) 246 OS << "PBUD"; 247 break; 248 case MachO::N_INDR: // 0xa indirect 249 OS << "INDR"; 250 break; 251 default: 252 OS << format_hex_no_prefix(Type, 2) << " "; 253 break; 254 } 255 if (Type & MachO::N_EXT) 256 OS << " EXT"; 257 else 258 OS << " "; 259 } 260 261 OS << ") " 262 // n_sect 263 << format_hex_no_prefix(SectionIndex, 2) 264 << " " 265 // n_desc 266 << format_hex_no_prefix(Flags, 4) 267 << " " 268 // n_value 269 << format_hex_no_prefix(Value, 16); 270 271 const char *Name = &MainBinaryStrings.data()[StringIndex]; 272 if (Name && Name[0]) 273 OS << " '" << Name << "'"; 274 275 OS << "\n"; 276 } 277 278 void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary, 279 StringRef BinaryPath) { 280 loadMainBinarySymbols(MainBinary); 281 MainBinaryStrings = MainBinary.getStringTableData(); 282 raw_ostream &OS(llvm::outs()); 283 284 dumpSymTabHeader(OS, getArchName(MainBinary)); 285 uint64_t Idx = 0; 286 for (const SymbolRef &Symbol : MainBinary.symbols()) { 287 const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); 288 if (MainBinary.is64Bit()) 289 dumpSymTabEntry(OS, Idx, MainBinary.getSymbol64TableEntry(DRI)); 290 else 291 dumpSymTabEntry(OS, Idx, MainBinary.getSymbolTableEntry(DRI)); 292 Idx++; 293 } 294 295 OS << "\n\n"; 296 resetParserState(); 297 } 298 299 static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) { 300 if (Archs.empty() || is_contained(Archs, "all") || is_contained(Archs, "*")) 301 return true; 302 303 if (Arch.startswith("arm") && Arch != "arm64" && is_contained(Archs, "arm")) 304 return true; 305 306 SmallString<16> ArchName = Arch; 307 if (Arch.startswith("thumb")) 308 ArchName = ("arm" + Arch.substr(5)).str(); 309 310 return is_contained(Archs, ArchName); 311 } 312 313 bool MachODebugMapParser::dumpStab() { 314 auto MainBinOrError = 315 MainBinaryHolder.GetFilesAs<MachOObjectFile>(BinaryPath); 316 if (auto Error = MainBinOrError.getError()) { 317 llvm::errs() << "Cannot get '" << BinaryPath 318 << "' as MachO file: " << Error.message() << "\n"; 319 return false; 320 } 321 322 for (const auto *Binary : *MainBinOrError) 323 if (shouldLinkArch(Archs, Binary->getArchTriple().getArchName())) 324 dumpOneBinaryStab(*Binary, BinaryPath); 325 326 return true; 327 } 328 329 /// This main parsing routine tries to open the main binary and if 330 /// successful iterates over the STAB entries. The real parsing is 331 /// done in handleStabSymbolTableEntry. 332 ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() { 333 auto MainBinOrError = 334 MainBinaryHolder.GetFilesAs<MachOObjectFile>(BinaryPath); 335 if (auto Error = MainBinOrError.getError()) 336 return Error; 337 338 std::vector<std::unique_ptr<DebugMap>> Results; 339 for (const auto *Binary : *MainBinOrError) 340 if (shouldLinkArch(Archs, Binary->getArchTriple().getArchName())) 341 Results.push_back(parseOneBinary(*Binary, BinaryPath)); 342 343 return std::move(Results); 344 } 345 346 /// Interpret the STAB entries to fill the DebugMap. 347 void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex, 348 uint8_t Type, 349 uint8_t SectionIndex, 350 uint16_t Flags, 351 uint64_t Value) { 352 if (!(Type & MachO::N_STAB)) 353 return; 354 355 const char *Name = &MainBinaryStrings.data()[StringIndex]; 356 357 // An N_OSO entry represents the start of a new object file description. 358 if (Type == MachO::N_OSO) 359 return switchToNewDebugMapObject(Name, sys::toTimePoint(Value)); 360 361 if (Type == MachO::N_AST) { 362 SmallString<80> Path(PathPrefix); 363 sys::path::append(Path, Name); 364 Result->addDebugMapObject(Path, sys::toTimePoint(Value), Type); 365 return; 366 } 367 368 // If the last N_OSO object file wasn't found, CurrentDebugMapObject will be 369 // null. Do not update anything until we find the next valid N_OSO entry. 370 if (!CurrentDebugMapObject) 371 return; 372 373 uint32_t Size = 0; 374 switch (Type) { 375 case MachO::N_GSYM: 376 // This is a global variable. We need to query the main binary 377 // symbol table to find its address as it might not be in the 378 // debug map (for common symbols). 379 Value = getMainBinarySymbolAddress(Name); 380 break; 381 case MachO::N_FUN: 382 // Functions are scopes in STABS. They have an end marker that 383 // contains the function size. 384 if (Name[0] == '\0') { 385 Size = Value; 386 Value = CurrentFunctionAddress; 387 Name = CurrentFunctionName; 388 break; 389 } else { 390 CurrentFunctionName = Name; 391 CurrentFunctionAddress = Value; 392 return; 393 } 394 case MachO::N_STSYM: 395 break; 396 default: 397 return; 398 } 399 400 auto ObjectSymIt = CurrentObjectAddresses.find(Name); 401 402 // If the name of a (non-static) symbol is not in the current object, we 403 // check all its aliases from the main binary. 404 if (ObjectSymIt == CurrentObjectAddresses.end() && Type != MachO::N_STSYM) { 405 for (const auto &Alias : getMainBinarySymbolNames(Value)) { 406 ObjectSymIt = CurrentObjectAddresses.find(Alias); 407 if (ObjectSymIt != CurrentObjectAddresses.end()) 408 break; 409 } 410 } 411 412 if (ObjectSymIt == CurrentObjectAddresses.end()) { 413 Warning("could not find object file symbol for symbol " + Twine(Name)); 414 return; 415 } 416 417 if (!CurrentDebugMapObject->addSymbol(Name, ObjectSymIt->getValue(), Value, 418 Size)) { 419 Warning(Twine("failed to insert symbol '") + Name + "' in the debug map."); 420 return; 421 } 422 } 423 424 /// Load the current object file symbols into CurrentObjectAddresses. 425 void MachODebugMapParser::loadCurrentObjectFileSymbols( 426 const object::MachOObjectFile &Obj) { 427 CurrentObjectAddresses.clear(); 428 429 for (auto Sym : Obj.symbols()) { 430 uint64_t Addr = Sym.getValue(); 431 Expected<StringRef> Name = Sym.getName(); 432 if (!Name) { 433 // TODO: Actually report errors helpfully. 434 consumeError(Name.takeError()); 435 continue; 436 } 437 // The value of some categories of symbols isn't meaningful. For 438 // example common symbols store their size in the value field, not 439 // their address. Absolute symbols have a fixed address that can 440 // conflict with standard symbols. These symbols (especially the 441 // common ones), might still be referenced by relocations. These 442 // relocations will use the symbol itself, and won't need an 443 // object file address. The object file address field is optional 444 // in the DebugMap, leave it unassigned for these symbols. 445 if (Sym.getFlags() & (SymbolRef::SF_Absolute | SymbolRef::SF_Common)) 446 CurrentObjectAddresses[*Name] = None; 447 else 448 CurrentObjectAddresses[*Name] = Addr; 449 } 450 } 451 452 /// Lookup a symbol address in the main binary symbol table. The 453 /// parser only needs to query common symbols, thus not every symbol's 454 /// address is available through this function. 455 uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) { 456 auto Sym = MainBinarySymbolAddresses.find(Name); 457 if (Sym == MainBinarySymbolAddresses.end()) 458 return 0; 459 return Sym->second; 460 } 461 462 /// Get all symbol names in the main binary for the given value. 463 std::vector<StringRef> 464 MachODebugMapParser::getMainBinarySymbolNames(uint64_t Value) { 465 std::vector<StringRef> Names; 466 for (const auto &Entry : MainBinarySymbolAddresses) { 467 if (Entry.second == Value) 468 Names.push_back(Entry.first()); 469 } 470 return Names; 471 } 472 473 /// Load the interesting main binary symbols' addresses into 474 /// MainBinarySymbolAddresses. 475 void MachODebugMapParser::loadMainBinarySymbols( 476 const MachOObjectFile &MainBinary) { 477 section_iterator Section = MainBinary.section_end(); 478 MainBinarySymbolAddresses.clear(); 479 for (const auto &Sym : MainBinary.symbols()) { 480 Expected<SymbolRef::Type> TypeOrErr = Sym.getType(); 481 if (!TypeOrErr) { 482 // TODO: Actually report errors helpfully. 483 consumeError(TypeOrErr.takeError()); 484 continue; 485 } 486 SymbolRef::Type Type = *TypeOrErr; 487 // Skip undefined and STAB entries. 488 if ((Type == SymbolRef::ST_Debug) || (Type == SymbolRef::ST_Unknown)) 489 continue; 490 // The only symbols of interest are the global variables. These 491 // are the only ones that need to be queried because the address 492 // of common data won't be described in the debug map. All other 493 // addresses should be fetched for the debug map. 494 uint8_t SymType = 495 MainBinary.getSymbolTableEntry(Sym.getRawDataRefImpl()).n_type; 496 if (!(SymType & (MachO::N_EXT | MachO::N_PEXT))) 497 continue; 498 Expected<section_iterator> SectionOrErr = Sym.getSection(); 499 if (!SectionOrErr) { 500 // TODO: Actually report errors helpfully. 501 consumeError(SectionOrErr.takeError()); 502 continue; 503 } 504 Section = *SectionOrErr; 505 if (Section == MainBinary.section_end() || Section->isText()) 506 continue; 507 uint64_t Addr = Sym.getValue(); 508 Expected<StringRef> NameOrErr = Sym.getName(); 509 if (!NameOrErr) { 510 // TODO: Actually report errors helpfully. 511 consumeError(NameOrErr.takeError()); 512 continue; 513 } 514 StringRef Name = *NameOrErr; 515 if (Name.size() == 0 || Name[0] == '\0') 516 continue; 517 MainBinarySymbolAddresses[Name] = Addr; 518 } 519 } 520 521 namespace llvm { 522 namespace dsymutil { 523 llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>> 524 parseDebugMap(StringRef InputFile, ArrayRef<std::string> Archs, 525 StringRef PrependPath, bool Verbose, bool InputIsYAML) { 526 if (!InputIsYAML) { 527 MachODebugMapParser Parser(InputFile, Archs, PrependPath, Verbose); 528 return Parser.parse(); 529 } else { 530 return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose); 531 } 532 } 533 534 bool dumpStab(StringRef InputFile, ArrayRef<std::string> Archs, 535 StringRef PrependPath) { 536 MachODebugMapParser Parser(InputFile, Archs, PrependPath, false); 537 return Parser.dumpStab(); 538 } 539 } // namespace dsymutil 540 } // namespace llvm 541