1 //===- GsymReader.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/GSYM/GsymReader.h" 10 11 #include <assert.h> 12 #include <inttypes.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 16 #include "llvm/DebugInfo/GSYM/InlineInfo.h" 17 #include "llvm/DebugInfo/GSYM/LineTable.h" 18 #include "llvm/Support/BinaryStreamReader.h" 19 #include "llvm/Support/DataExtractor.h" 20 #include "llvm/Support/MemoryBuffer.h" 21 22 using namespace llvm; 23 using namespace gsym; 24 25 GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer) 26 : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {} 27 28 GsymReader::GsymReader(GsymReader &&RHS) = default; 29 30 GsymReader::~GsymReader() = default; 31 32 llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) { 33 // Open the input file and return an appropriate error if needed. 34 ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = 35 MemoryBuffer::getFileOrSTDIN(Filename); 36 auto Err = BuffOrErr.getError(); 37 if (Err) 38 return llvm::errorCodeToError(Err); 39 return create(BuffOrErr.get()); 40 } 41 42 llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) { 43 auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes"); 44 return create(MemBuffer); 45 } 46 47 llvm::Expected<llvm::gsym::GsymReader> 48 GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) { 49 if (!MemBuffer) 50 return createStringError(std::errc::invalid_argument, 51 "invalid memory buffer"); 52 GsymReader GR(std::move(MemBuffer)); 53 llvm::Error Err = GR.parse(); 54 if (Err) 55 return std::move(Err); 56 return std::move(GR); 57 } 58 59 llvm::Error 60 GsymReader::parse() { 61 BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native); 62 // Check for the magic bytes. This file format is designed to be mmap'ed 63 // into a process and accessed as read only. This is done for performance 64 // and efficiency for symbolicating and parsing GSYM data. 65 if (FileData.readObject(Hdr)) 66 return createStringError(std::errc::invalid_argument, 67 "not enough data for a GSYM header"); 68 69 const auto HostByteOrder = llvm::endianness::native; 70 switch (Hdr->Magic) { 71 case GSYM_MAGIC: 72 Endian = HostByteOrder; 73 break; 74 case GSYM_CIGAM: 75 // This is a GSYM file, but not native endianness. 76 Endian = sys::IsBigEndianHost ? llvm::endianness::little 77 : llvm::endianness::big; 78 Swap.reset(new SwappedData); 79 break; 80 default: 81 return createStringError(std::errc::invalid_argument, 82 "not a GSYM file"); 83 } 84 85 bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little; 86 // Read a correctly byte swapped header if we need to. 87 if (Swap) { 88 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); 89 if (auto ExpectedHdr = Header::decode(Data)) 90 Swap->Hdr = ExpectedHdr.get(); 91 else 92 return ExpectedHdr.takeError(); 93 Hdr = &Swap->Hdr; 94 } 95 96 // Detect errors in the header and report any that are found. If we make it 97 // past this without errors, we know we have a good magic value, a supported 98 // version number, verified address offset size and a valid UUID size. 99 if (Error Err = Hdr->checkForError()) 100 return Err; 101 102 if (!Swap) { 103 // This is the native endianness case that is most common and optimized for 104 // efficient lookups. Here we just grab pointers to the native data and 105 // use ArrayRef objects to allow efficient read only access. 106 107 // Read the address offsets. 108 if (FileData.padToAlignment(Hdr->AddrOffSize) || 109 FileData.readArray(AddrOffsets, 110 Hdr->NumAddresses * Hdr->AddrOffSize)) 111 return createStringError(std::errc::invalid_argument, 112 "failed to read address table"); 113 114 // Read the address info offsets. 115 if (FileData.padToAlignment(4) || 116 FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses)) 117 return createStringError(std::errc::invalid_argument, 118 "failed to read address info offsets table"); 119 120 // Read the file table. 121 uint32_t NumFiles = 0; 122 if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles)) 123 return createStringError(std::errc::invalid_argument, 124 "failed to read file table"); 125 126 // Get the string table. 127 FileData.setOffset(Hdr->StrtabOffset); 128 if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize)) 129 return createStringError(std::errc::invalid_argument, 130 "failed to read string table"); 131 } else { 132 // This is the non native endianness case that is not common and not 133 // optimized for lookups. Here we decode the important tables into local 134 // storage and then set the ArrayRef objects to point to these swapped 135 // copies of the read only data so lookups can be as efficient as possible. 136 DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4); 137 138 // Read the address offsets. 139 uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize); 140 Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize); 141 switch (Hdr->AddrOffSize) { 142 case 1: 143 if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses)) 144 return createStringError(std::errc::invalid_argument, 145 "failed to read address table"); 146 break; 147 case 2: 148 if (!Data.getU16(&Offset, 149 reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()), 150 Hdr->NumAddresses)) 151 return createStringError(std::errc::invalid_argument, 152 "failed to read address table"); 153 break; 154 case 4: 155 if (!Data.getU32(&Offset, 156 reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()), 157 Hdr->NumAddresses)) 158 return createStringError(std::errc::invalid_argument, 159 "failed to read address table"); 160 break; 161 case 8: 162 if (!Data.getU64(&Offset, 163 reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()), 164 Hdr->NumAddresses)) 165 return createStringError(std::errc::invalid_argument, 166 "failed to read address table"); 167 } 168 AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets); 169 170 // Read the address info offsets. 171 Offset = alignTo(Offset, 4); 172 Swap->AddrInfoOffsets.resize(Hdr->NumAddresses); 173 if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses)) 174 AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets); 175 else 176 return createStringError(std::errc::invalid_argument, 177 "failed to read address table"); 178 // Read the file table. 179 const uint32_t NumFiles = Data.getU32(&Offset); 180 if (NumFiles > 0) { 181 Swap->Files.resize(NumFiles); 182 if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2)) 183 Files = ArrayRef<FileEntry>(Swap->Files); 184 else 185 return createStringError(std::errc::invalid_argument, 186 "failed to read file table"); 187 } 188 // Get the string table. 189 StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset, 190 Hdr->StrtabSize); 191 if (StrTab.Data.empty()) 192 return createStringError(std::errc::invalid_argument, 193 "failed to read string table"); 194 } 195 return Error::success(); 196 197 } 198 199 const Header &GsymReader::getHeader() const { 200 // The only way to get a GsymReader is from GsymReader::openFile(...) or 201 // GsymReader::copyBuffer() and the header must be valid and initialized to 202 // a valid pointer value, so the assert below should not trigger. 203 assert(Hdr); 204 return *Hdr; 205 } 206 207 std::optional<uint64_t> GsymReader::getAddress(size_t Index) const { 208 switch (Hdr->AddrOffSize) { 209 case 1: return addressForIndex<uint8_t>(Index); 210 case 2: return addressForIndex<uint16_t>(Index); 211 case 4: return addressForIndex<uint32_t>(Index); 212 case 8: return addressForIndex<uint64_t>(Index); 213 } 214 return std::nullopt; 215 } 216 217 std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const { 218 const auto NumAddrInfoOffsets = AddrInfoOffsets.size(); 219 if (Index < NumAddrInfoOffsets) 220 return AddrInfoOffsets[Index]; 221 return std::nullopt; 222 } 223 224 Expected<uint64_t> 225 GsymReader::getAddressIndex(const uint64_t Addr) const { 226 if (Addr >= Hdr->BaseAddress) { 227 const uint64_t AddrOffset = Addr - Hdr->BaseAddress; 228 std::optional<uint64_t> AddrOffsetIndex; 229 switch (Hdr->AddrOffSize) { 230 case 1: 231 AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset); 232 break; 233 case 2: 234 AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset); 235 break; 236 case 4: 237 AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset); 238 break; 239 case 8: 240 AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset); 241 break; 242 default: 243 return createStringError(std::errc::invalid_argument, 244 "unsupported address offset size %u", 245 Hdr->AddrOffSize); 246 } 247 if (AddrOffsetIndex) 248 return *AddrOffsetIndex; 249 } 250 return createStringError(std::errc::invalid_argument, 251 "address 0x%" PRIx64 " is not in GSYM", Addr); 252 253 } 254 255 llvm::Expected<DataExtractor> 256 GsymReader::getFunctionInfoDataForAddress(uint64_t Addr, 257 uint64_t &FuncStartAddr) const { 258 Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr); 259 if (!ExpectedAddrIdx) 260 return ExpectedAddrIdx.takeError(); 261 const uint64_t FirstAddrIdx = *ExpectedAddrIdx; 262 // The AddrIdx is the first index of the function info entries that match 263 // \a Addr. We need to iterate over all function info objects that start with 264 // the same address until we find a range that contains \a Addr. 265 std::optional<uint64_t> FirstFuncStartAddr; 266 const size_t NumAddresses = getNumAddresses(); 267 for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) { 268 auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr); 269 // If there was an error, return the error. 270 if (!ExpextedData) 271 return ExpextedData; 272 273 // Remember the first function start address if it hasn't already been set. 274 // If it is already valid, check to see if it matches the first function 275 // start address and only continue if it matches. 276 if (FirstFuncStartAddr.has_value()) { 277 if (*FirstFuncStartAddr != FuncStartAddr) 278 break; // Done with consecutive function entries with same address. 279 } else { 280 FirstFuncStartAddr = FuncStartAddr; 281 } 282 // Make sure the current function address ranges contains \a Addr. 283 // Some symbols on Darwin don't have valid sizes, so if we run into a 284 // symbol with zero size, then we have found a match for our address. 285 286 // The first thing the encoding of a FunctionInfo object is the function 287 // size. 288 uint64_t Offset = 0; 289 uint32_t FuncSize = ExpextedData->getU32(&Offset); 290 if (FuncSize == 0 || 291 AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr)) 292 return ExpextedData; 293 } 294 return createStringError(std::errc::invalid_argument, 295 "address 0x%" PRIx64 " is not in GSYM", Addr); 296 } 297 298 llvm::Expected<DataExtractor> 299 GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx, 300 uint64_t &FuncStartAddr) const { 301 if (AddrIdx >= getNumAddresses()) 302 return createStringError(std::errc::invalid_argument, 303 "invalid address index %" PRIu64, AddrIdx); 304 const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx]; 305 assert((Endian == endianness::big || Endian == endianness::little) && 306 "Endian must be either big or little"); 307 StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset); 308 if (Bytes.empty()) 309 return createStringError(std::errc::invalid_argument, 310 "invalid address info offset 0x%" PRIx32, 311 AddrInfoOffset); 312 std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx); 313 if (!OptFuncStartAddr) 314 return createStringError(std::errc::invalid_argument, 315 "failed to extract address[%" PRIu64 "]", AddrIdx); 316 FuncStartAddr = *OptFuncStartAddr; 317 return DataExtractor(Bytes, Endian == llvm::endianness::little, 4); 318 } 319 320 llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const { 321 uint64_t FuncStartAddr = 0; 322 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) 323 return FunctionInfo::decode(*ExpectedData, FuncStartAddr); 324 else 325 return ExpectedData.takeError(); 326 } 327 328 llvm::Expected<FunctionInfo> 329 GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const { 330 uint64_t FuncStartAddr = 0; 331 if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr)) 332 return FunctionInfo::decode(*ExpectedData, FuncStartAddr); 333 else 334 return ExpectedData.takeError(); 335 } 336 337 llvm::Expected<LookupResult> 338 GsymReader::lookup(uint64_t Addr, 339 std::optional<DataExtractor> *MergedFunctionsData) const { 340 uint64_t FuncStartAddr = 0; 341 if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr)) 342 return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr, 343 MergedFunctionsData); 344 else 345 return ExpectedData.takeError(); 346 } 347 348 llvm::Expected<std::vector<LookupResult>> 349 GsymReader::lookupAll(uint64_t Addr) const { 350 std::vector<LookupResult> Results; 351 std::optional<DataExtractor> MergedFunctionsData; 352 353 // First perform a lookup to get the primary function info result. 354 auto MainResult = lookup(Addr, &MergedFunctionsData); 355 if (!MainResult) 356 return MainResult.takeError(); 357 358 // Add the main result as the first entry. 359 Results.push_back(std::move(*MainResult)); 360 361 // Now process any merged functions data that was found during the lookup. 362 if (MergedFunctionsData) { 363 // Get data extractors for each merged function. 364 auto ExpectedMergedFuncExtractors = 365 MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData); 366 if (!ExpectedMergedFuncExtractors) 367 return ExpectedMergedFuncExtractors.takeError(); 368 369 // Process each merged function data. 370 for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) { 371 if (auto FI = FunctionInfo::lookup(MergedData, *this, 372 MainResult->FuncRange.start(), Addr)) { 373 Results.push_back(std::move(*FI)); 374 } else { 375 return FI.takeError(); 376 } 377 } 378 } 379 380 return Results; 381 } 382 383 void GsymReader::dump(raw_ostream &OS) { 384 const auto &Header = getHeader(); 385 // Dump the GSYM header. 386 OS << Header << "\n"; 387 // Dump the address table. 388 OS << "Address Table:\n"; 389 OS << "INDEX OFFSET"; 390 391 switch (Hdr->AddrOffSize) { 392 case 1: OS << "8 "; break; 393 case 2: OS << "16"; break; 394 case 4: OS << "32"; break; 395 case 8: OS << "64"; break; 396 default: OS << "??"; break; 397 } 398 OS << " (ADDRESS)\n"; 399 OS << "====== =============================== \n"; 400 for (uint32_t I = 0; I < Header.NumAddresses; ++I) { 401 OS << format("[%4u] ", I); 402 switch (Hdr->AddrOffSize) { 403 case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break; 404 case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break; 405 case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break; 406 case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break; 407 default: break; 408 } 409 OS << " (" << HEX64(*getAddress(I)) << ")\n"; 410 } 411 // Dump the address info offsets table. 412 OS << "\nAddress Info Offsets:\n"; 413 OS << "INDEX Offset\n"; 414 OS << "====== ==========\n"; 415 for (uint32_t I = 0; I < Header.NumAddresses; ++I) 416 OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n"; 417 // Dump the file table. 418 OS << "\nFiles:\n"; 419 OS << "INDEX DIRECTORY BASENAME PATH\n"; 420 OS << "====== ========== ========== ==============================\n"; 421 for (uint32_t I = 0; I < Files.size(); ++I) { 422 OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' ' 423 << HEX32(Files[I].Base) << ' '; 424 dump(OS, getFile(I)); 425 OS << "\n"; 426 } 427 OS << "\n" << StrTab << "\n"; 428 429 for (uint32_t I = 0; I < Header.NumAddresses; ++I) { 430 OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": "; 431 if (auto FI = getFunctionInfoAtIndex(I)) 432 dump(OS, *FI); 433 else 434 logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:"); 435 } 436 } 437 438 void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI, 439 uint32_t Indent) { 440 OS.indent(Indent); 441 OS << FI.Range << " \"" << getString(FI.Name) << "\"\n"; 442 if (FI.OptLineTable) 443 dump(OS, *FI.OptLineTable, Indent); 444 if (FI.Inline) 445 dump(OS, *FI.Inline, Indent); 446 447 if (FI.CallSites) 448 dump(OS, *FI.CallSites, Indent); 449 450 if (FI.MergedFunctions) { 451 assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level"); 452 dump(OS, *FI.MergedFunctions); 453 } 454 } 455 456 void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) { 457 for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) { 458 OS << "++ Merged FunctionInfos[" << inx << "]:\n"; 459 dump(OS, MFI.MergedFunctions[inx], 4); 460 } 461 } 462 463 void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) { 464 OS << HEX16(CSI.ReturnOffset); 465 466 std::string Flags; 467 auto addFlag = [&](const char *Flag) { 468 if (!Flags.empty()) 469 Flags += " | "; 470 Flags += Flag; 471 }; 472 473 if (CSI.Flags == CallSiteInfo::Flags::None) 474 Flags = "None"; 475 else { 476 if (CSI.Flags & CallSiteInfo::Flags::InternalCall) 477 addFlag("InternalCall"); 478 479 if (CSI.Flags & CallSiteInfo::Flags::ExternalCall) 480 addFlag("ExternalCall"); 481 } 482 OS << " Flags[" << Flags << "]"; 483 484 if (!CSI.MatchRegex.empty()) { 485 OS << " MatchRegex["; 486 for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) { 487 if (i > 0) 488 OS << ";"; 489 OS << getString(CSI.MatchRegex[i]); 490 } 491 OS << "]"; 492 } 493 } 494 495 void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC, 496 uint32_t Indent) { 497 OS.indent(Indent); 498 OS << "CallSites (by relative return offset):\n"; 499 for (const auto &CS : CSIC.CallSites) { 500 OS.indent(Indent); 501 OS << " "; 502 dump(OS, CS); 503 OS << "\n"; 504 } 505 } 506 507 void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) { 508 OS.indent(Indent); 509 OS << "LineTable:\n"; 510 for (auto &LE: LT) { 511 OS.indent(Indent); 512 OS << " " << HEX64(LE.Addr) << ' '; 513 if (LE.File) 514 dump(OS, getFile(LE.File)); 515 OS << ':' << LE.Line << '\n'; 516 } 517 } 518 519 void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) { 520 if (Indent == 0) 521 OS << "InlineInfo:\n"; 522 else 523 OS.indent(Indent); 524 OS << II.Ranges << ' ' << getString(II.Name); 525 if (II.CallFile != 0) { 526 if (auto File = getFile(II.CallFile)) { 527 OS << " called from "; 528 dump(OS, File); 529 OS << ':' << II.CallLine; 530 } 531 } 532 OS << '\n'; 533 for (const auto &ChildII: II.Children) 534 dump(OS, ChildII, Indent + 2); 535 } 536 537 void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) { 538 if (FE) { 539 // IF we have the file from index 0, then don't print anything 540 if (FE->Dir == 0 && FE->Base == 0) 541 return; 542 StringRef Dir = getString(FE->Dir); 543 StringRef Base = getString(FE->Base); 544 if (!Dir.empty()) { 545 OS << Dir; 546 if (Dir.contains('\\') && !Dir.contains('/')) 547 OS << '\\'; 548 else 549 OS << '/'; 550 } 551 if (!Base.empty()) { 552 OS << Base; 553 } 554 if (!Dir.empty() || !Base.empty()) 555 return; 556 } 557 OS << "<invalid-file>"; 558 } 559