1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/DebugInfo/BTF/BTFContext.h" 17 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 18 #include "llvm/DebugInfo/PDB/PDB.h" 19 #include "llvm/DebugInfo/PDB/PDBContext.h" 20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 21 #include "llvm/Demangle/Demangle.h" 22 #include "llvm/Object/BuildID.h" 23 #include "llvm/Object/COFF.h" 24 #include "llvm/Object/ELFObjectFile.h" 25 #include "llvm/Object/MachO.h" 26 #include "llvm/Object/MachOUniversal.h" 27 #include "llvm/Support/CRC.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/DataExtractor.h" 30 #include "llvm/Support/Errc.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/Path.h" 34 #include <cassert> 35 #include <cstring> 36 37 namespace llvm { 38 namespace codeview { 39 union DebugInfo; 40 } 41 namespace symbolize { 42 43 LLVMSymbolizer::LLVMSymbolizer() = default; 44 45 LLVMSymbolizer::LLVMSymbolizer(const Options &Opts) 46 : Opts(Opts), 47 BIDFetcher(std::make_unique<BuildIDFetcher>(Opts.DebugFileDirectory)) {} 48 49 LLVMSymbolizer::~LLVMSymbolizer() = default; 50 51 template <typename T> 52 Expected<DILineInfo> 53 LLVMSymbolizer::symbolizeCodeCommon(const T &ModuleSpecifier, 54 object::SectionedAddress ModuleOffset) { 55 56 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 57 if (!InfoOrErr) 58 return InfoOrErr.takeError(); 59 60 SymbolizableModule *Info = *InfoOrErr; 61 62 // A null module means an error has already been reported. Return an empty 63 // result. 64 if (!Info) 65 return DILineInfo(); 66 67 // If the user is giving us relative addresses, add the preferred base of the 68 // object to the offset before we do the query. It's what DIContext expects. 69 if (Opts.RelativeAddresses) 70 ModuleOffset.Address += Info->getModulePreferredBase(); 71 72 DILineInfo LineInfo = Info->symbolizeCode( 73 ModuleOffset, 74 DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, 75 Opts.SkipLineZero), 76 Opts.UseSymbolTable); 77 if (Opts.Demangle) 78 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 79 return LineInfo; 80 } 81 82 Expected<DILineInfo> 83 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, 84 object::SectionedAddress ModuleOffset) { 85 return symbolizeCodeCommon(Obj, ModuleOffset); 86 } 87 88 Expected<DILineInfo> 89 LLVMSymbolizer::symbolizeCode(StringRef ModuleName, 90 object::SectionedAddress ModuleOffset) { 91 return symbolizeCodeCommon(ModuleName, ModuleOffset); 92 } 93 94 Expected<DILineInfo> 95 LLVMSymbolizer::symbolizeCode(ArrayRef<uint8_t> BuildID, 96 object::SectionedAddress ModuleOffset) { 97 return symbolizeCodeCommon(BuildID, ModuleOffset); 98 } 99 100 template <typename T> 101 Expected<DIInliningInfo> LLVMSymbolizer::symbolizeInlinedCodeCommon( 102 const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { 103 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 104 if (!InfoOrErr) 105 return InfoOrErr.takeError(); 106 107 SymbolizableModule *Info = *InfoOrErr; 108 109 // A null module means an error has already been reported. Return an empty 110 // result. 111 if (!Info) 112 return DIInliningInfo(); 113 114 // If the user is giving us relative addresses, add the preferred base of the 115 // object to the offset before we do the query. It's what DIContext expects. 116 if (Opts.RelativeAddresses) 117 ModuleOffset.Address += Info->getModulePreferredBase(); 118 119 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 120 ModuleOffset, 121 DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions, 122 Opts.SkipLineZero), 123 Opts.UseSymbolTable); 124 if (Opts.Demangle) { 125 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 126 auto *Frame = InlinedContext.getMutableFrame(i); 127 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 128 } 129 } 130 return InlinedContext; 131 } 132 133 Expected<DIInliningInfo> 134 LLVMSymbolizer::symbolizeInlinedCode(const ObjectFile &Obj, 135 object::SectionedAddress ModuleOffset) { 136 return symbolizeInlinedCodeCommon(Obj, ModuleOffset); 137 } 138 139 Expected<DIInliningInfo> 140 LLVMSymbolizer::symbolizeInlinedCode(StringRef ModuleName, 141 object::SectionedAddress ModuleOffset) { 142 return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); 143 } 144 145 Expected<DIInliningInfo> 146 LLVMSymbolizer::symbolizeInlinedCode(ArrayRef<uint8_t> BuildID, 147 object::SectionedAddress ModuleOffset) { 148 return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); 149 } 150 151 template <typename T> 152 Expected<DIGlobal> 153 LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, 154 object::SectionedAddress ModuleOffset) { 155 156 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 157 if (!InfoOrErr) 158 return InfoOrErr.takeError(); 159 160 SymbolizableModule *Info = *InfoOrErr; 161 // A null module means an error has already been reported. Return an empty 162 // result. 163 if (!Info) 164 return DIGlobal(); 165 166 // If the user is giving us relative addresses, add the preferred base of 167 // the object to the offset before we do the query. It's what DIContext 168 // expects. 169 if (Opts.RelativeAddresses) 170 ModuleOffset.Address += Info->getModulePreferredBase(); 171 172 DIGlobal Global = Info->symbolizeData(ModuleOffset); 173 if (Opts.Demangle) 174 Global.Name = DemangleName(Global.Name, Info); 175 return Global; 176 } 177 178 Expected<DIGlobal> 179 LLVMSymbolizer::symbolizeData(const ObjectFile &Obj, 180 object::SectionedAddress ModuleOffset) { 181 return symbolizeDataCommon(Obj, ModuleOffset); 182 } 183 184 Expected<DIGlobal> 185 LLVMSymbolizer::symbolizeData(StringRef ModuleName, 186 object::SectionedAddress ModuleOffset) { 187 return symbolizeDataCommon(ModuleName, ModuleOffset); 188 } 189 190 Expected<DIGlobal> 191 LLVMSymbolizer::symbolizeData(ArrayRef<uint8_t> BuildID, 192 object::SectionedAddress ModuleOffset) { 193 return symbolizeDataCommon(BuildID, ModuleOffset); 194 } 195 196 template <typename T> 197 Expected<std::vector<DILocal>> 198 LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, 199 object::SectionedAddress ModuleOffset) { 200 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 201 if (!InfoOrErr) 202 return InfoOrErr.takeError(); 203 204 SymbolizableModule *Info = *InfoOrErr; 205 // A null module means an error has already been reported. Return an empty 206 // result. 207 if (!Info) 208 return std::vector<DILocal>(); 209 210 // If the user is giving us relative addresses, add the preferred base of 211 // the object to the offset before we do the query. It's what DIContext 212 // expects. 213 if (Opts.RelativeAddresses) 214 ModuleOffset.Address += Info->getModulePreferredBase(); 215 216 return Info->symbolizeFrame(ModuleOffset); 217 } 218 219 Expected<std::vector<DILocal>> 220 LLVMSymbolizer::symbolizeFrame(const ObjectFile &Obj, 221 object::SectionedAddress ModuleOffset) { 222 return symbolizeFrameCommon(Obj, ModuleOffset); 223 } 224 225 Expected<std::vector<DILocal>> 226 LLVMSymbolizer::symbolizeFrame(StringRef ModuleName, 227 object::SectionedAddress ModuleOffset) { 228 return symbolizeFrameCommon(ModuleName, ModuleOffset); 229 } 230 231 Expected<std::vector<DILocal>> 232 LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID, 233 object::SectionedAddress ModuleOffset) { 234 return symbolizeFrameCommon(BuildID, ModuleOffset); 235 } 236 237 template <typename T> 238 Expected<std::vector<DILineInfo>> 239 LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, 240 uint64_t Offset) { 241 auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); 242 if (!InfoOrErr) 243 return InfoOrErr.takeError(); 244 245 SymbolizableModule *Info = *InfoOrErr; 246 std::vector<DILineInfo> Result; 247 248 // A null module means an error has already been reported. Return an empty 249 // result. 250 if (!Info) 251 return Result; 252 253 for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) { 254 DILineInfo LineInfo = Info->symbolizeCode( 255 A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), 256 Opts.UseSymbolTable); 257 if (LineInfo.FileName != DILineInfo::BadString) { 258 if (Opts.Demangle) 259 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 260 Result.push_back(LineInfo); 261 } 262 } 263 264 return Result; 265 } 266 267 Expected<std::vector<DILineInfo>> 268 LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol, 269 uint64_t Offset) { 270 return findSymbolCommon(Obj, Symbol, Offset); 271 } 272 273 Expected<std::vector<DILineInfo>> 274 LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol, 275 uint64_t Offset) { 276 return findSymbolCommon(ModuleName, Symbol, Offset); 277 } 278 279 Expected<std::vector<DILineInfo>> 280 LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, 281 uint64_t Offset) { 282 return findSymbolCommon(BuildID, Symbol, Offset); 283 } 284 285 void LLVMSymbolizer::flush() { 286 ObjectForUBPathAndArch.clear(); 287 LRUBinaries.clear(); 288 CacheSize = 0; 289 BinaryForPath.clear(); 290 ObjectPairForPathArch.clear(); 291 Modules.clear(); 292 BuildIDPaths.clear(); 293 } 294 295 namespace { 296 297 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 298 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 299 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 300 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 301 std::string getDarwinDWARFResourceForPath(const std::string &Path, 302 const std::string &Basename) { 303 SmallString<16> ResourceName = StringRef(Path); 304 if (sys::path::extension(Path) != ".dSYM") { 305 ResourceName += ".dSYM"; 306 } 307 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 308 sys::path::append(ResourceName, Basename); 309 return std::string(ResourceName); 310 } 311 312 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 313 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 314 MemoryBuffer::getFileOrSTDIN(Path); 315 if (!MB) 316 return false; 317 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); 318 } 319 320 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 321 uint32_t &CRCHash) { 322 if (!Obj) 323 return false; 324 for (const SectionRef &Section : Obj->sections()) { 325 StringRef Name; 326 consumeError(Section.getName().moveInto(Name)); 327 328 Name = Name.substr(Name.find_first_not_of("._")); 329 if (Name == "gnu_debuglink") { 330 Expected<StringRef> ContentsOrErr = Section.getContents(); 331 if (!ContentsOrErr) { 332 consumeError(ContentsOrErr.takeError()); 333 return false; 334 } 335 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); 336 uint64_t Offset = 0; 337 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 338 // 4-byte align the offset. 339 Offset = (Offset + 3) & ~0x3; 340 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 341 DebugName = DebugNameStr; 342 CRCHash = DE.getU32(&Offset); 343 return true; 344 } 345 } 346 break; 347 } 348 } 349 return false; 350 } 351 352 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 353 const MachOObjectFile *Obj) { 354 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 355 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 356 if (dbg_uuid.empty() || bin_uuid.empty()) 357 return false; 358 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 359 } 360 361 } // end anonymous namespace 362 363 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 364 const MachOObjectFile *MachExeObj, 365 const std::string &ArchName) { 366 // On Darwin we may find DWARF in separate object file in 367 // resource directory. 368 std::vector<std::string> DsymPaths; 369 StringRef Filename = sys::path::filename(ExePath); 370 DsymPaths.push_back( 371 getDarwinDWARFResourceForPath(ExePath, std::string(Filename))); 372 for (const auto &Path : Opts.DsymHints) { 373 DsymPaths.push_back( 374 getDarwinDWARFResourceForPath(Path, std::string(Filename))); 375 } 376 for (const auto &Path : DsymPaths) { 377 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 378 if (!DbgObjOrErr) { 379 // Ignore errors, the file might not exist. 380 consumeError(DbgObjOrErr.takeError()); 381 continue; 382 } 383 ObjectFile *DbgObj = DbgObjOrErr.get(); 384 if (!DbgObj) 385 continue; 386 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 387 if (!MachDbgObj) 388 continue; 389 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 390 return DbgObj; 391 } 392 return nullptr; 393 } 394 395 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 396 const ObjectFile *Obj, 397 const std::string &ArchName) { 398 std::string DebuglinkName; 399 uint32_t CRCHash; 400 std::string DebugBinaryPath; 401 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 402 return nullptr; 403 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) 404 return nullptr; 405 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 406 if (!DbgObjOrErr) { 407 // Ignore errors, the file might not exist. 408 consumeError(DbgObjOrErr.takeError()); 409 return nullptr; 410 } 411 return DbgObjOrErr.get(); 412 } 413 414 ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path, 415 const ELFObjectFileBase *Obj, 416 const std::string &ArchName) { 417 auto BuildID = getBuildID(Obj); 418 if (BuildID.size() < 2) 419 return nullptr; 420 std::string DebugBinaryPath; 421 if (!getOrFindDebugBinary(BuildID, DebugBinaryPath)) 422 return nullptr; 423 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 424 if (!DbgObjOrErr) { 425 consumeError(DbgObjOrErr.takeError()); 426 return nullptr; 427 } 428 return DbgObjOrErr.get(); 429 } 430 431 bool LLVMSymbolizer::findDebugBinary(const std::string &OrigPath, 432 const std::string &DebuglinkName, 433 uint32_t CRCHash, std::string &Result) { 434 SmallString<16> OrigDir(OrigPath); 435 llvm::sys::path::remove_filename(OrigDir); 436 SmallString<16> DebugPath = OrigDir; 437 // Try relative/path/to/original_binary/debuglink_name 438 llvm::sys::path::append(DebugPath, DebuglinkName); 439 if (checkFileCRC(DebugPath, CRCHash)) { 440 Result = std::string(DebugPath); 441 return true; 442 } 443 // Try relative/path/to/original_binary/.debug/debuglink_name 444 DebugPath = OrigDir; 445 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 446 if (checkFileCRC(DebugPath, CRCHash)) { 447 Result = std::string(DebugPath); 448 return true; 449 } 450 // Make the path absolute so that lookups will go to 451 // "/usr/lib/debug/full/path/to/debug", not 452 // "/usr/lib/debug/to/debug" 453 llvm::sys::fs::make_absolute(OrigDir); 454 if (!Opts.FallbackDebugPath.empty()) { 455 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 456 DebugPath = Opts.FallbackDebugPath; 457 } else { 458 #if defined(__NetBSD__) 459 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 460 DebugPath = "/usr/libdata/debug"; 461 #else 462 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 463 DebugPath = "/usr/lib/debug"; 464 #endif 465 } 466 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 467 DebuglinkName); 468 if (checkFileCRC(DebugPath, CRCHash)) { 469 Result = std::string(DebugPath); 470 return true; 471 } 472 return false; 473 } 474 475 static StringRef getBuildIDStr(ArrayRef<uint8_t> BuildID) { 476 return StringRef(reinterpret_cast<const char *>(BuildID.data()), 477 BuildID.size()); 478 } 479 480 bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef<uint8_t> BuildID, 481 std::string &Result) { 482 StringRef BuildIDStr = getBuildIDStr(BuildID); 483 auto I = BuildIDPaths.find(BuildIDStr); 484 if (I != BuildIDPaths.end()) { 485 Result = I->second; 486 return true; 487 } 488 if (!BIDFetcher) 489 return false; 490 if (std::optional<std::string> Path = BIDFetcher->fetch(BuildID)) { 491 Result = *Path; 492 auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); 493 assert(InsertResult.second); 494 (void)InsertResult; 495 return true; 496 } 497 498 return false; 499 } 500 501 Expected<LLVMSymbolizer::ObjectPair> 502 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 503 const std::string &ArchName) { 504 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 505 if (I != ObjectPairForPathArch.end()) { 506 recordAccess(BinaryForPath.find(Path)->second); 507 return I->second; 508 } 509 510 auto ObjOrErr = getOrCreateObject(Path, ArchName); 511 if (!ObjOrErr) { 512 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), 513 ObjectPair(nullptr, nullptr)); 514 return ObjOrErr.takeError(); 515 } 516 517 ObjectFile *Obj = ObjOrErr.get(); 518 assert(Obj != nullptr); 519 ObjectFile *DbgObj = nullptr; 520 521 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 522 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 523 else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj)) 524 DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName); 525 if (!DbgObj) 526 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 527 if (!DbgObj) 528 DbgObj = Obj; 529 ObjectPair Res = std::make_pair(Obj, DbgObj); 530 std::string DbgObjPath = DbgObj->getFileName().str(); 531 auto Pair = 532 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); 533 BinaryForPath.find(DbgObjPath)->second.pushEvictor([this, I = Pair.first]() { 534 ObjectPairForPathArch.erase(I); 535 }); 536 return Res; 537 } 538 539 Expected<ObjectFile *> 540 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 541 const std::string &ArchName) { 542 Binary *Bin; 543 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>()); 544 if (!Pair.second) { 545 Bin = Pair.first->second->getBinary(); 546 recordAccess(Pair.first->second); 547 } else { 548 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 549 if (!BinOrErr) 550 return BinOrErr.takeError(); 551 552 CachedBinary &CachedBin = Pair.first->second; 553 CachedBin = std::move(BinOrErr.get()); 554 CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); }); 555 LRUBinaries.push_back(CachedBin); 556 CacheSize += CachedBin.size(); 557 Bin = CachedBin->getBinary(); 558 } 559 560 if (!Bin) 561 return static_cast<ObjectFile *>(nullptr); 562 563 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 564 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 565 if (I != ObjectForUBPathAndArch.end()) 566 return I->second.get(); 567 568 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 569 UB->getMachOObjectForArch(ArchName); 570 if (!ObjOrErr) { 571 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 572 std::unique_ptr<ObjectFile>()); 573 return ObjOrErr.takeError(); 574 } 575 ObjectFile *Res = ObjOrErr->get(); 576 auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 577 std::move(ObjOrErr.get())); 578 BinaryForPath.find(Path)->second.pushEvictor( 579 [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); }); 580 return Res; 581 } 582 if (Bin->isObject()) { 583 return cast<ObjectFile>(Bin); 584 } 585 return errorCodeToError(object_error::arch_not_found); 586 } 587 588 Expected<SymbolizableModule *> 589 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, 590 std::unique_ptr<DIContext> Context, 591 StringRef ModuleName) { 592 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), 593 Opts.UntagAddresses); 594 std::unique_ptr<SymbolizableModule> SymMod; 595 if (InfoOrErr) 596 SymMod = std::move(*InfoOrErr); 597 auto InsertResult = Modules.insert( 598 std::make_pair(std::string(ModuleName), std::move(SymMod))); 599 assert(InsertResult.second); 600 if (!InfoOrErr) 601 return InfoOrErr.takeError(); 602 return InsertResult.first->second.get(); 603 } 604 605 Expected<SymbolizableModule *> 606 LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) { 607 StringRef BinaryName = ModuleName; 608 StringRef ArchName = Opts.DefaultArch; 609 size_t ColonPos = ModuleName.find_last_of(':'); 610 // Verify that substring after colon form a valid arch name. 611 if (ColonPos != std::string::npos) { 612 StringRef ArchStr = ModuleName.substr(ColonPos + 1); 613 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 614 BinaryName = ModuleName.substr(0, ColonPos); 615 ArchName = ArchStr; 616 } 617 } 618 619 auto I = Modules.find(ModuleName); 620 if (I != Modules.end()) { 621 recordAccess(BinaryForPath.find(BinaryName)->second); 622 return I->second.get(); 623 } 624 625 auto ObjectsOrErr = 626 getOrCreateObjectPair(std::string{BinaryName}, std::string{ArchName}); 627 if (!ObjectsOrErr) { 628 // Failed to find valid object file. 629 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 630 return ObjectsOrErr.takeError(); 631 } 632 ObjectPair Objects = ObjectsOrErr.get(); 633 634 std::unique_ptr<DIContext> Context; 635 // If this is a COFF object containing PDB info and not containing DWARF 636 // section, use a PDBContext to symbolize. Otherwise, use DWARF. 637 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 638 const codeview::DebugInfo *DebugInfo; 639 StringRef PDBFileName; 640 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 641 // Use DWARF if there're DWARF sections. 642 bool HasDwarf = 643 llvm::any_of(Objects.first->sections(), [](SectionRef Section) -> bool { 644 if (Expected<StringRef> SectionName = Section.getName()) 645 return SectionName.get() == ".debug_info"; 646 return false; 647 }); 648 if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { 649 using namespace pdb; 650 std::unique_ptr<IPDBSession> Session; 651 652 PDB_ReaderType ReaderType = 653 Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; 654 if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), 655 Session)) { 656 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 657 // Return along the PDB filename to provide more context 658 return createFileError(PDBFileName, std::move(Err)); 659 } 660 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 661 } 662 } 663 if (!Context) 664 Context = DWARFContext::create( 665 *Objects.second, DWARFContext::ProcessDebugRelocations::Process, 666 nullptr, Opts.DWPName); 667 auto ModuleOrErr = 668 createModuleInfo(Objects.first, std::move(Context), ModuleName); 669 if (ModuleOrErr) { 670 auto I = Modules.find(ModuleName); 671 BinaryForPath.find(BinaryName)->second.pushEvictor([this, I]() { 672 Modules.erase(I); 673 }); 674 } 675 return ModuleOrErr; 676 } 677 678 // For BPF programs .BTF.ext section contains line numbers information, 679 // use it if regular DWARF is not available (e.g. for stripped binary). 680 static bool useBTFContext(const ObjectFile &Obj) { 681 return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() && 682 BTFParser::hasBTFSections(Obj); 683 } 684 685 Expected<SymbolizableModule *> 686 LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) { 687 StringRef ObjName = Obj.getFileName(); 688 auto I = Modules.find(ObjName); 689 if (I != Modules.end()) 690 return I->second.get(); 691 692 std::unique_ptr<DIContext> Context; 693 if (useBTFContext(Obj)) 694 Context = BTFContext::create(Obj); 695 else 696 Context = DWARFContext::create(Obj); 697 // FIXME: handle COFF object with PDB info to use PDBContext 698 return createModuleInfo(&Obj, std::move(Context), ObjName); 699 } 700 701 Expected<SymbolizableModule *> 702 LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) { 703 std::string Path; 704 if (!getOrFindDebugBinary(BuildID, Path)) { 705 return createStringError(errc::no_such_file_or_directory, 706 "could not find build ID"); 707 } 708 return getOrCreateModuleInfo(Path); 709 } 710 711 namespace { 712 713 // Undo these various manglings for Win32 extern "C" functions: 714 // cdecl - _foo 715 // stdcall - _foo@12 716 // fastcall - @foo@12 717 // vectorcall - foo@@12 718 // These are all different linkage names for 'foo'. 719 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 720 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 721 722 // Remove any '@[0-9]+' suffix. 723 bool HasAtNumSuffix = false; 724 if (Front != '?') { 725 size_t AtPos = SymbolName.rfind('@'); 726 if (AtPos != StringRef::npos && 727 all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) { 728 SymbolName = SymbolName.substr(0, AtPos); 729 HasAtNumSuffix = true; 730 } 731 } 732 733 // Remove any ending '@' for vectorcall. 734 bool IsVectorCall = false; 735 if (HasAtNumSuffix && SymbolName.ends_with("@")) { 736 SymbolName = SymbolName.drop_back(); 737 IsVectorCall = true; 738 } 739 740 // If not vectorcall, remove any '_' or '@' prefix. 741 if (!IsVectorCall && (Front == '_' || Front == '@')) 742 SymbolName = SymbolName.drop_front(); 743 744 return SymbolName; 745 } 746 747 } // end anonymous namespace 748 749 std::string 750 LLVMSymbolizer::DemangleName(StringRef Name, 751 const SymbolizableModule *DbiModuleDescriptor) { 752 std::string Result; 753 if (nonMicrosoftDemangle(Name, Result)) 754 return Result; 755 756 if (!Name.empty() && Name.front() == '?') { 757 // Only do MSVC C++ demangling on symbols starting with '?'. 758 int status = 0; 759 char *DemangledName = microsoftDemangle( 760 Name, nullptr, &status, 761 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | 762 MSDF_NoMemberType | MSDF_NoReturnType)); 763 if (status != 0) 764 return std::string{Name}; 765 Result = DemangledName; 766 free(DemangledName); 767 return Result; 768 } 769 770 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) { 771 std::string DemangledCName(demanglePE32ExternCFunc(Name)); 772 // On i386 Windows, the C name mangling for different calling conventions 773 // may also be applied on top of the Itanium or Rust name mangling. 774 if (nonMicrosoftDemangle(DemangledCName, Result)) 775 return Result; 776 return DemangledCName; 777 } 778 return std::string{Name}; 779 } 780 781 void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { 782 if (Bin->getBinary()) 783 LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator()); 784 } 785 786 void LLVMSymbolizer::pruneCache() { 787 // Evict the LRU binary until the max cache size is reached or there's <= 1 788 // item in the cache. The MRU binary is always kept to avoid thrashing if it's 789 // larger than the cache size. 790 while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && 791 std::next(LRUBinaries.begin()) != LRUBinaries.end()) { 792 CachedBinary &Bin = LRUBinaries.front(); 793 CacheSize -= Bin.size(); 794 LRUBinaries.pop_front(); 795 Bin.evict(); 796 } 797 } 798 799 void CachedBinary::pushEvictor(std::function<void()> NewEvictor) { 800 if (Evictor) { 801 this->Evictor = [OldEvictor = std::move(this->Evictor), 802 NewEvictor = std::move(NewEvictor)]() { 803 NewEvictor(); 804 OldEvictor(); 805 }; 806 } else { 807 this->Evictor = std::move(NewEvictor); 808 } 809 } 810 811 } // namespace symbolize 812 } // namespace llvm 813