1 //===-- LLVMSymbolize.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation for LLVM symbolization library. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 14 15 #include "SymbolizableObjectFile.h" 16 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/BinaryFormat/COFF.h" 19 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 20 #include "llvm/DebugInfo/PDB/PDB.h" 21 #include "llvm/DebugInfo/PDB/PDBContext.h" 22 #include "llvm/Demangle/Demangle.h" 23 #include "llvm/Object/COFF.h" 24 #include "llvm/Object/MachO.h" 25 #include "llvm/Object/MachOUniversal.h" 26 #include "llvm/Support/CRC.h" 27 #include "llvm/Support/Casting.h" 28 #include "llvm/Support/Compression.h" 29 #include "llvm/Support/DataExtractor.h" 30 #include "llvm/Support/Errc.h" 31 #include "llvm/Support/FileSystem.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Support/Path.h" 34 #include <algorithm> 35 #include <cassert> 36 #include <cstring> 37 38 namespace llvm { 39 namespace symbolize { 40 41 Expected<DILineInfo> 42 LLVMSymbolizer::symbolizeCodeCommon(SymbolizableModule *Info, 43 object::SectionedAddress ModuleOffset) { 44 // A null module means an error has already been reported. Return an empty 45 // result. 46 if (!Info) 47 return DILineInfo(); 48 49 // If the user is giving us relative addresses, add the preferred base of the 50 // object to the offset before we do the query. It's what DIContext expects. 51 if (Opts.RelativeAddresses) 52 ModuleOffset.Address += Info->getModulePreferredBase(); 53 54 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, 55 Opts.UseSymbolTable); 56 if (Opts.Demangle) 57 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 58 return LineInfo; 59 } 60 61 Expected<DILineInfo> 62 LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, 63 object::SectionedAddress ModuleOffset) { 64 StringRef ModuleName = Obj.getFileName(); 65 auto I = Modules.find(ModuleName); 66 if (I != Modules.end()) 67 return symbolizeCodeCommon(I->second.get(), ModuleOffset); 68 69 std::unique_ptr<DIContext> Context = 70 DWARFContext::create(Obj, nullptr, DWARFContext::defaultErrorHandler); 71 Expected<SymbolizableModule *> InfoOrErr = 72 createModuleInfo(&Obj, std::move(Context), ModuleName); 73 if (!InfoOrErr) 74 return InfoOrErr.takeError(); 75 return symbolizeCodeCommon(*InfoOrErr, ModuleOffset); 76 } 77 78 Expected<DILineInfo> 79 LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 80 object::SectionedAddress ModuleOffset) { 81 Expected<SymbolizableModule *> InfoOrErr = getOrCreateModuleInfo(ModuleName); 82 if (!InfoOrErr) 83 return InfoOrErr.takeError(); 84 return symbolizeCodeCommon(*InfoOrErr, ModuleOffset); 85 } 86 87 Expected<DIInliningInfo> 88 LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 89 object::SectionedAddress ModuleOffset) { 90 SymbolizableModule *Info; 91 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 92 Info = InfoOrErr.get(); 93 else 94 return InfoOrErr.takeError(); 95 96 // A null module means an error has already been reported. Return an empty 97 // result. 98 if (!Info) 99 return DIInliningInfo(); 100 101 // If the user is giving us relative addresses, add the preferred base of the 102 // object to the offset before we do the query. It's what DIContext expects. 103 if (Opts.RelativeAddresses) 104 ModuleOffset.Address += Info->getModulePreferredBase(); 105 106 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 107 ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); 108 if (Opts.Demangle) { 109 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 110 auto *Frame = InlinedContext.getMutableFrame(i); 111 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 112 } 113 } 114 return InlinedContext; 115 } 116 117 Expected<DIGlobal> 118 LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 119 object::SectionedAddress ModuleOffset) { 120 SymbolizableModule *Info; 121 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 122 Info = InfoOrErr.get(); 123 else 124 return InfoOrErr.takeError(); 125 126 // A null module means an error has already been reported. Return an empty 127 // result. 128 if (!Info) 129 return DIGlobal(); 130 131 // If the user is giving us relative addresses, add the preferred base of 132 // the object to the offset before we do the query. It's what DIContext 133 // expects. 134 if (Opts.RelativeAddresses) 135 ModuleOffset.Address += Info->getModulePreferredBase(); 136 137 DIGlobal Global = Info->symbolizeData(ModuleOffset); 138 if (Opts.Demangle) 139 Global.Name = DemangleName(Global.Name, Info); 140 return Global; 141 } 142 143 Expected<std::vector<DILocal>> 144 LLVMSymbolizer::symbolizeFrame(const std::string &ModuleName, 145 object::SectionedAddress ModuleOffset) { 146 SymbolizableModule *Info; 147 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 148 Info = InfoOrErr.get(); 149 else 150 return InfoOrErr.takeError(); 151 152 // A null module means an error has already been reported. Return an empty 153 // result. 154 if (!Info) 155 return std::vector<DILocal>(); 156 157 // If the user is giving us relative addresses, add the preferred base of 158 // the object to the offset before we do the query. It's what DIContext 159 // expects. 160 if (Opts.RelativeAddresses) 161 ModuleOffset.Address += Info->getModulePreferredBase(); 162 163 return Info->symbolizeFrame(ModuleOffset); 164 } 165 166 void LLVMSymbolizer::flush() { 167 ObjectForUBPathAndArch.clear(); 168 BinaryForPath.clear(); 169 ObjectPairForPathArch.clear(); 170 Modules.clear(); 171 } 172 173 namespace { 174 175 // For Path="/path/to/foo" and Basename="foo" assume that debug info is in 176 // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 177 // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 178 // /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 179 std::string getDarwinDWARFResourceForPath( 180 const std::string &Path, const std::string &Basename) { 181 SmallString<16> ResourceName = StringRef(Path); 182 if (sys::path::extension(Path) != ".dSYM") { 183 ResourceName += ".dSYM"; 184 } 185 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 186 sys::path::append(ResourceName, Basename); 187 return ResourceName.str(); 188 } 189 190 bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 191 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 192 MemoryBuffer::getFileOrSTDIN(Path); 193 if (!MB) 194 return false; 195 return CRCHash == llvm::crc32(arrayRefFromStringRef(MB.get()->getBuffer())); 196 } 197 198 bool findDebugBinary(const std::string &OrigPath, 199 const std::string &DebuglinkName, uint32_t CRCHash, 200 const std::string &FallbackDebugPath, 201 std::string &Result) { 202 SmallString<16> OrigDir(OrigPath); 203 llvm::sys::path::remove_filename(OrigDir); 204 SmallString<16> DebugPath = OrigDir; 205 // Try relative/path/to/original_binary/debuglink_name 206 llvm::sys::path::append(DebugPath, DebuglinkName); 207 if (checkFileCRC(DebugPath, CRCHash)) { 208 Result = DebugPath.str(); 209 return true; 210 } 211 // Try relative/path/to/original_binary/.debug/debuglink_name 212 DebugPath = OrigDir; 213 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 214 if (checkFileCRC(DebugPath, CRCHash)) { 215 Result = DebugPath.str(); 216 return true; 217 } 218 // Make the path absolute so that lookups will go to 219 // "/usr/lib/debug/full/path/to/debug", not 220 // "/usr/lib/debug/to/debug" 221 llvm::sys::fs::make_absolute(OrigDir); 222 if (!FallbackDebugPath.empty()) { 223 // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name 224 DebugPath = FallbackDebugPath; 225 } else { 226 #if defined(__NetBSD__) 227 // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name 228 DebugPath = "/usr/libdata/debug"; 229 #else 230 // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name 231 DebugPath = "/usr/lib/debug"; 232 #endif 233 } 234 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 235 DebuglinkName); 236 if (checkFileCRC(DebugPath, CRCHash)) { 237 Result = DebugPath.str(); 238 return true; 239 } 240 return false; 241 } 242 243 bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 244 uint32_t &CRCHash) { 245 if (!Obj) 246 return false; 247 for (const SectionRef &Section : Obj->sections()) { 248 StringRef Name; 249 if (Expected<StringRef> NameOrErr = Section.getName()) 250 Name = *NameOrErr; 251 else 252 consumeError(NameOrErr.takeError()); 253 254 Name = Name.substr(Name.find_first_not_of("._")); 255 if (Name == "gnu_debuglink") { 256 Expected<StringRef> ContentsOrErr = Section.getContents(); 257 if (!ContentsOrErr) { 258 consumeError(ContentsOrErr.takeError()); 259 return false; 260 } 261 DataExtractor DE(*ContentsOrErr, Obj->isLittleEndian(), 0); 262 uint64_t Offset = 0; 263 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 264 // 4-byte align the offset. 265 Offset = (Offset + 3) & ~0x3; 266 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 267 DebugName = DebugNameStr; 268 CRCHash = DE.getU32(&Offset); 269 return true; 270 } 271 } 272 break; 273 } 274 } 275 return false; 276 } 277 278 bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 279 const MachOObjectFile *Obj) { 280 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 281 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 282 if (dbg_uuid.empty() || bin_uuid.empty()) 283 return false; 284 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 285 } 286 287 } // end anonymous namespace 288 289 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 290 const MachOObjectFile *MachExeObj, const std::string &ArchName) { 291 // On Darwin we may find DWARF in separate object file in 292 // resource directory. 293 std::vector<std::string> DsymPaths; 294 StringRef Filename = sys::path::filename(ExePath); 295 DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); 296 for (const auto &Path : Opts.DsymHints) { 297 DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); 298 } 299 for (const auto &Path : DsymPaths) { 300 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 301 if (!DbgObjOrErr) { 302 // Ignore errors, the file might not exist. 303 consumeError(DbgObjOrErr.takeError()); 304 continue; 305 } 306 ObjectFile *DbgObj = DbgObjOrErr.get(); 307 if (!DbgObj) 308 continue; 309 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 310 if (!MachDbgObj) 311 continue; 312 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 313 return DbgObj; 314 } 315 return nullptr; 316 } 317 318 ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 319 const ObjectFile *Obj, 320 const std::string &ArchName) { 321 std::string DebuglinkName; 322 uint32_t CRCHash; 323 std::string DebugBinaryPath; 324 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 325 return nullptr; 326 if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath, 327 DebugBinaryPath)) 328 return nullptr; 329 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 330 if (!DbgObjOrErr) { 331 // Ignore errors, the file might not exist. 332 consumeError(DbgObjOrErr.takeError()); 333 return nullptr; 334 } 335 return DbgObjOrErr.get(); 336 } 337 338 Expected<LLVMSymbolizer::ObjectPair> 339 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 340 const std::string &ArchName) { 341 auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 342 if (I != ObjectPairForPathArch.end()) 343 return I->second; 344 345 auto ObjOrErr = getOrCreateObject(Path, ArchName); 346 if (!ObjOrErr) { 347 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), 348 ObjectPair(nullptr, nullptr)); 349 return ObjOrErr.takeError(); 350 } 351 352 ObjectFile *Obj = ObjOrErr.get(); 353 assert(Obj != nullptr); 354 ObjectFile *DbgObj = nullptr; 355 356 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 357 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 358 if (!DbgObj) 359 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 360 if (!DbgObj) 361 DbgObj = Obj; 362 ObjectPair Res = std::make_pair(Obj, DbgObj); 363 ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); 364 return Res; 365 } 366 367 Expected<ObjectFile *> 368 LLVMSymbolizer::getOrCreateObject(const std::string &Path, 369 const std::string &ArchName) { 370 Binary *Bin; 371 auto Pair = BinaryForPath.emplace(Path, OwningBinary<Binary>()); 372 if (!Pair.second) { 373 Bin = Pair.first->second.getBinary(); 374 } else { 375 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 376 if (!BinOrErr) 377 return BinOrErr.takeError(); 378 Pair.first->second = std::move(BinOrErr.get()); 379 Bin = Pair.first->second.getBinary(); 380 } 381 382 if (!Bin) 383 return static_cast<ObjectFile *>(nullptr); 384 385 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 386 auto I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 387 if (I != ObjectForUBPathAndArch.end()) 388 return I->second.get(); 389 390 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 391 UB->getMachOObjectForArch(ArchName); 392 if (!ObjOrErr) { 393 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 394 std::unique_ptr<ObjectFile>()); 395 return ObjOrErr.takeError(); 396 } 397 ObjectFile *Res = ObjOrErr->get(); 398 ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), 399 std::move(ObjOrErr.get())); 400 return Res; 401 } 402 if (Bin->isObject()) { 403 return cast<ObjectFile>(Bin); 404 } 405 return errorCodeToError(object_error::arch_not_found); 406 } 407 408 Expected<SymbolizableModule *> 409 LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, 410 std::unique_ptr<DIContext> Context, 411 StringRef ModuleName) { 412 auto InfoOrErr = SymbolizableObjectFile::create(Obj, std::move(Context), 413 Opts.UntagAddresses); 414 std::unique_ptr<SymbolizableModule> SymMod; 415 if (InfoOrErr) 416 SymMod = std::move(*InfoOrErr); 417 auto InsertResult = 418 Modules.insert(std::make_pair(ModuleName, std::move(SymMod))); 419 assert(InsertResult.second); 420 if (std::error_code EC = InfoOrErr.getError()) 421 return errorCodeToError(EC); 422 return InsertResult.first->second.get(); 423 } 424 425 Expected<SymbolizableModule *> 426 LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 427 auto I = Modules.find(ModuleName); 428 if (I != Modules.end()) 429 return I->second.get(); 430 431 std::string BinaryName = ModuleName; 432 std::string ArchName = Opts.DefaultArch; 433 size_t ColonPos = ModuleName.find_last_of(':'); 434 // Verify that substring after colon form a valid arch name. 435 if (ColonPos != std::string::npos) { 436 std::string ArchStr = ModuleName.substr(ColonPos + 1); 437 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 438 BinaryName = ModuleName.substr(0, ColonPos); 439 ArchName = ArchStr; 440 } 441 } 442 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 443 if (!ObjectsOrErr) { 444 // Failed to find valid object file. 445 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 446 return ObjectsOrErr.takeError(); 447 } 448 ObjectPair Objects = ObjectsOrErr.get(); 449 450 std::unique_ptr<DIContext> Context; 451 // If this is a COFF object containing PDB info, use a PDBContext to 452 // symbolize. Otherwise, use DWARF. 453 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 454 const codeview::DebugInfo *DebugInfo; 455 StringRef PDBFileName; 456 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 457 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 458 #if 0 459 using namespace pdb; 460 std::unique_ptr<IPDBSession> Session; 461 if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, 462 Objects.first->getFileName(), Session)) { 463 Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); 464 // Return along the PDB filename to provide more context 465 return createFileError(PDBFileName, std::move(Err)); 466 } 467 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 468 #else 469 return make_error<StringError>( 470 "PDB support not compiled in", 471 std::make_error_code(std::errc::not_supported)); 472 #endif 473 } 474 } 475 if (!Context) 476 Context = 477 DWARFContext::create(*Objects.second, nullptr, 478 DWARFContext::defaultErrorHandler, Opts.DWPName); 479 return createModuleInfo(Objects.first, std::move(Context), ModuleName); 480 } 481 482 namespace { 483 484 // Undo these various manglings for Win32 extern "C" functions: 485 // cdecl - _foo 486 // stdcall - _foo@12 487 // fastcall - @foo@12 488 // vectorcall - foo@@12 489 // These are all different linkage names for 'foo'. 490 StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 491 // Remove any '_' or '@' prefix. 492 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 493 if (Front == '_' || Front == '@') 494 SymbolName = SymbolName.drop_front(); 495 496 // Remove any '@[0-9]+' suffix. 497 if (Front != '?') { 498 size_t AtPos = SymbolName.rfind('@'); 499 if (AtPos != StringRef::npos && 500 std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), 501 [](char C) { return C >= '0' && C <= '9'; })) { 502 SymbolName = SymbolName.substr(0, AtPos); 503 } 504 } 505 506 // Remove any ending '@' for vectorcall. 507 if (SymbolName.endswith("@")) 508 SymbolName = SymbolName.drop_back(); 509 510 return SymbolName; 511 } 512 513 } // end anonymous namespace 514 515 std::string 516 LLVMSymbolizer::DemangleName(const std::string &Name, 517 const SymbolizableModule *DbiModuleDescriptor) { 518 // We can spoil names of symbols with C linkage, so use an heuristic 519 // approach to check if the name should be demangled. 520 if (Name.substr(0, 2) == "_Z") { 521 int status = 0; 522 char *DemangledName = itaniumDemangle(Name.c_str(), nullptr, nullptr, &status); 523 if (status != 0) 524 return Name; 525 std::string Result = DemangledName; 526 free(DemangledName); 527 return Result; 528 } 529 530 if (!Name.empty() && Name.front() == '?') { 531 // Only do MSVC C++ demangling on symbols starting with '?'. 532 int status = 0; 533 char *DemangledName = microsoftDemangle( 534 Name.c_str(), nullptr, nullptr, &status, 535 MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | 536 MSDF_NoMemberType | MSDF_NoReturnType)); 537 if (status != 0) 538 return Name; 539 std::string Result = DemangledName; 540 free(DemangledName); 541 return Result; 542 } 543 544 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) 545 return std::string(demanglePE32ExternCFunc(Name)); 546 return Name; 547 } 548 549 } // namespace symbolize 550 } // namespace llvm 551