1 //===- DWARFDebugLine.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" 10 #include "llvm/ADT/Optional.h" 11 #include "llvm/ADT/SmallString.h" 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/BinaryFormat/Dwarf.h" 15 #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" 16 #include "llvm/DebugInfo/DWARF/DWARFDie.h" 17 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" 18 #include "llvm/Support/Errc.h" 19 #include "llvm/Support/Format.h" 20 #include "llvm/Support/FormatVariadic.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include <algorithm> 23 #include <cassert> 24 #include <cinttypes> 25 #include <cstdint> 26 #include <cstdio> 27 #include <utility> 28 29 using namespace llvm; 30 using namespace dwarf; 31 32 using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; 33 34 namespace { 35 36 struct ContentDescriptor { 37 dwarf::LineNumberEntryFormat Type; 38 dwarf::Form Form; 39 }; 40 41 using ContentDescriptors = SmallVector<ContentDescriptor, 4>; 42 43 } // end anonymous namespace 44 45 static bool versionIsSupported(uint16_t Version) { 46 return Version >= 2 && Version <= 5; 47 } 48 49 void DWARFDebugLine::ContentTypeTracker::trackContentType( 50 dwarf::LineNumberEntryFormat ContentType) { 51 switch (ContentType) { 52 case dwarf::DW_LNCT_timestamp: 53 HasModTime = true; 54 break; 55 case dwarf::DW_LNCT_size: 56 HasLength = true; 57 break; 58 case dwarf::DW_LNCT_MD5: 59 HasMD5 = true; 60 break; 61 case dwarf::DW_LNCT_LLVM_source: 62 HasSource = true; 63 break; 64 default: 65 // We only care about values we consider optional, and new values may be 66 // added in the vendor extension range, so we do not match exhaustively. 67 break; 68 } 69 } 70 71 DWARFDebugLine::Prologue::Prologue() { clear(); } 72 73 bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const { 74 uint16_t DwarfVersion = getVersion(); 75 assert(DwarfVersion != 0 && 76 "line table prologue has no dwarf version information"); 77 if (DwarfVersion >= 5) 78 return FileIndex < FileNames.size(); 79 return FileIndex != 0 && FileIndex <= FileNames.size(); 80 } 81 82 std::optional<uint64_t> 83 DWARFDebugLine::Prologue::getLastValidFileIndex() const { 84 if (FileNames.empty()) 85 return std::nullopt; 86 uint16_t DwarfVersion = getVersion(); 87 assert(DwarfVersion != 0 && 88 "line table prologue has no dwarf version information"); 89 // In DWARF v5 the file names are 0-indexed. 90 if (DwarfVersion >= 5) 91 return FileNames.size() - 1; 92 return FileNames.size(); 93 } 94 95 const llvm::DWARFDebugLine::FileNameEntry & 96 DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const { 97 uint16_t DwarfVersion = getVersion(); 98 assert(DwarfVersion != 0 && 99 "line table prologue has no dwarf version information"); 100 // In DWARF v5 the file names are 0-indexed. 101 if (DwarfVersion >= 5) 102 return FileNames[Index]; 103 return FileNames[Index - 1]; 104 } 105 106 void DWARFDebugLine::Prologue::clear() { 107 TotalLength = PrologueLength = 0; 108 SegSelectorSize = 0; 109 MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; 110 OpcodeBase = 0; 111 FormParams = dwarf::FormParams({0, 0, DWARF32}); 112 ContentTypes = ContentTypeTracker(); 113 StandardOpcodeLengths.clear(); 114 IncludeDirectories.clear(); 115 FileNames.clear(); 116 } 117 118 void DWARFDebugLine::Prologue::dump(raw_ostream &OS, 119 DIDumpOptions DumpOptions) const { 120 if (!totalLengthIsValid()) 121 return; 122 int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(FormParams.Format); 123 OS << "Line table prologue:\n" 124 << format(" total_length: 0x%0*" PRIx64 "\n", OffsetDumpWidth, 125 TotalLength) 126 << " format: " << dwarf::FormatString(FormParams.Format) << "\n" 127 << format(" version: %u\n", getVersion()); 128 if (!versionIsSupported(getVersion())) 129 return; 130 if (getVersion() >= 5) 131 OS << format(" address_size: %u\n", getAddressSize()) 132 << format(" seg_select_size: %u\n", SegSelectorSize); 133 OS << format(" prologue_length: 0x%0*" PRIx64 "\n", OffsetDumpWidth, 134 PrologueLength) 135 << format(" min_inst_length: %u\n", MinInstLength) 136 << format(getVersion() >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) 137 << format(" default_is_stmt: %u\n", DefaultIsStmt) 138 << format(" line_base: %i\n", LineBase) 139 << format(" line_range: %u\n", LineRange) 140 << format(" opcode_base: %u\n", OpcodeBase); 141 142 for (uint32_t I = 0; I != StandardOpcodeLengths.size(); ++I) 143 OS << formatv("standard_opcode_lengths[{0}] = {1}\n", 144 static_cast<dwarf::LineNumberOps>(I + 1), 145 StandardOpcodeLengths[I]); 146 147 if (!IncludeDirectories.empty()) { 148 // DWARF v5 starts directory indexes at 0. 149 uint32_t DirBase = getVersion() >= 5 ? 0 : 1; 150 for (uint32_t I = 0; I != IncludeDirectories.size(); ++I) { 151 OS << format("include_directories[%3u] = ", I + DirBase); 152 IncludeDirectories[I].dump(OS, DumpOptions); 153 OS << '\n'; 154 } 155 } 156 157 if (!FileNames.empty()) { 158 // DWARF v5 starts file indexes at 0. 159 uint32_t FileBase = getVersion() >= 5 ? 0 : 1; 160 for (uint32_t I = 0; I != FileNames.size(); ++I) { 161 const FileNameEntry &FileEntry = FileNames[I]; 162 OS << format("file_names[%3u]:\n", I + FileBase); 163 OS << " name: "; 164 FileEntry.Name.dump(OS, DumpOptions); 165 OS << '\n' 166 << format(" dir_index: %" PRIu64 "\n", FileEntry.DirIdx); 167 if (ContentTypes.HasMD5) 168 OS << " md5_checksum: " << FileEntry.Checksum.digest() << '\n'; 169 if (ContentTypes.HasModTime) 170 OS << format(" mod_time: 0x%8.8" PRIx64 "\n", FileEntry.ModTime); 171 if (ContentTypes.HasLength) 172 OS << format(" length: 0x%8.8" PRIx64 "\n", FileEntry.Length); 173 if (ContentTypes.HasSource) { 174 OS << " source: "; 175 FileEntry.Source.dump(OS, DumpOptions); 176 OS << '\n'; 177 } 178 } 179 } 180 } 181 182 // Parse v2-v4 directory and file tables. 183 static Error 184 parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, 185 uint64_t *OffsetPtr, 186 DWARFDebugLine::ContentTypeTracker &ContentTypes, 187 std::vector<DWARFFormValue> &IncludeDirectories, 188 std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { 189 while (true) { 190 Error Err = Error::success(); 191 StringRef S = DebugLineData.getCStrRef(OffsetPtr, &Err); 192 if (Err) { 193 consumeError(std::move(Err)); 194 return createStringError(errc::invalid_argument, 195 "include directories table was not null " 196 "terminated before the end of the prologue"); 197 } 198 if (S.empty()) 199 break; 200 DWARFFormValue Dir = 201 DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, S.data()); 202 IncludeDirectories.push_back(Dir); 203 } 204 205 ContentTypes.HasModTime = true; 206 ContentTypes.HasLength = true; 207 208 while (true) { 209 Error Err = Error::success(); 210 StringRef Name = DebugLineData.getCStrRef(OffsetPtr, &Err); 211 if (!Err && Name.empty()) 212 break; 213 214 DWARFDebugLine::FileNameEntry FileEntry; 215 FileEntry.Name = 216 DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name.data()); 217 FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr, &Err); 218 FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr, &Err); 219 FileEntry.Length = DebugLineData.getULEB128(OffsetPtr, &Err); 220 221 if (Err) { 222 consumeError(std::move(Err)); 223 return createStringError( 224 errc::invalid_argument, 225 "file names table was not null terminated before " 226 "the end of the prologue"); 227 } 228 FileNames.push_back(FileEntry); 229 } 230 231 return Error::success(); 232 } 233 234 // Parse v5 directory/file entry content descriptions. 235 // Returns the descriptors, or an error if we did not find a path or ran off 236 // the end of the prologue. 237 static llvm::Expected<ContentDescriptors> 238 parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, 239 DWARFDebugLine::ContentTypeTracker *ContentTypes) { 240 Error Err = Error::success(); 241 ContentDescriptors Descriptors; 242 int FormatCount = DebugLineData.getU8(OffsetPtr, &Err); 243 bool HasPath = false; 244 for (int I = 0; I != FormatCount && !Err; ++I) { 245 ContentDescriptor Descriptor; 246 Descriptor.Type = 247 dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr, &Err)); 248 Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr, &Err)); 249 if (Descriptor.Type == dwarf::DW_LNCT_path) 250 HasPath = true; 251 if (ContentTypes) 252 ContentTypes->trackContentType(Descriptor.Type); 253 Descriptors.push_back(Descriptor); 254 } 255 256 if (Err) 257 return createStringError(errc::invalid_argument, 258 "failed to parse entry content descriptors: %s", 259 toString(std::move(Err)).c_str()); 260 261 if (!HasPath) 262 return createStringError(errc::invalid_argument, 263 "failed to parse entry content descriptions" 264 " because no path was found"); 265 return Descriptors; 266 } 267 268 static Error 269 parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, 270 uint64_t *OffsetPtr, const dwarf::FormParams &FormParams, 271 const DWARFContext &Ctx, const DWARFUnit *U, 272 DWARFDebugLine::ContentTypeTracker &ContentTypes, 273 std::vector<DWARFFormValue> &IncludeDirectories, 274 std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { 275 // Get the directory entry description. 276 llvm::Expected<ContentDescriptors> DirDescriptors = 277 parseV5EntryFormat(DebugLineData, OffsetPtr, nullptr); 278 if (!DirDescriptors) 279 return DirDescriptors.takeError(); 280 281 // Get the directory entries, according to the format described above. 282 uint64_t DirEntryCount = DebugLineData.getULEB128(OffsetPtr); 283 for (uint64_t I = 0; I != DirEntryCount; ++I) { 284 for (auto Descriptor : *DirDescriptors) { 285 DWARFFormValue Value(Descriptor.Form); 286 switch (Descriptor.Type) { 287 case DW_LNCT_path: 288 if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) 289 return createStringError(errc::invalid_argument, 290 "failed to parse directory entry because " 291 "extracting the form value failed"); 292 IncludeDirectories.push_back(Value); 293 break; 294 default: 295 if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams)) 296 return createStringError(errc::invalid_argument, 297 "failed to parse directory entry because " 298 "skipping the form value failed"); 299 } 300 } 301 } 302 303 // Get the file entry description. 304 llvm::Expected<ContentDescriptors> FileDescriptors = 305 parseV5EntryFormat(DebugLineData, OffsetPtr, &ContentTypes); 306 if (!FileDescriptors) 307 return FileDescriptors.takeError(); 308 309 // Get the file entries, according to the format described above. 310 uint64_t FileEntryCount = DebugLineData.getULEB128(OffsetPtr); 311 for (uint64_t I = 0; I != FileEntryCount; ++I) { 312 DWARFDebugLine::FileNameEntry FileEntry; 313 for (auto Descriptor : *FileDescriptors) { 314 DWARFFormValue Value(Descriptor.Form); 315 if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) 316 return createStringError(errc::invalid_argument, 317 "failed to parse file entry because " 318 "extracting the form value failed"); 319 switch (Descriptor.Type) { 320 case DW_LNCT_path: 321 FileEntry.Name = Value; 322 break; 323 case DW_LNCT_LLVM_source: 324 FileEntry.Source = Value; 325 break; 326 case DW_LNCT_directory_index: 327 FileEntry.DirIdx = Value.getAsUnsignedConstant().value(); 328 break; 329 case DW_LNCT_timestamp: 330 FileEntry.ModTime = Value.getAsUnsignedConstant().value(); 331 break; 332 case DW_LNCT_size: 333 FileEntry.Length = Value.getAsUnsignedConstant().value(); 334 break; 335 case DW_LNCT_MD5: 336 if (!Value.getAsBlock() || Value.getAsBlock().value().size() != 16) 337 return createStringError( 338 errc::invalid_argument, 339 "failed to parse file entry because the MD5 hash is invalid"); 340 std::uninitialized_copy_n(Value.getAsBlock().value().begin(), 16, 341 FileEntry.Checksum.begin()); 342 break; 343 default: 344 break; 345 } 346 } 347 FileNames.push_back(FileEntry); 348 } 349 return Error::success(); 350 } 351 352 uint64_t DWARFDebugLine::Prologue::getLength() const { 353 uint64_t Length = PrologueLength + sizeofTotalLength() + 354 sizeof(getVersion()) + sizeofPrologueLength(); 355 if (getVersion() >= 5) 356 Length += 2; // Address + Segment selector sizes. 357 return Length; 358 } 359 360 Error DWARFDebugLine::Prologue::parse( 361 DWARFDataExtractor DebugLineData, uint64_t *OffsetPtr, 362 function_ref<void(Error)> RecoverableErrorHandler, const DWARFContext &Ctx, 363 const DWARFUnit *U) { 364 const uint64_t PrologueOffset = *OffsetPtr; 365 366 clear(); 367 DataExtractor::Cursor Cursor(*OffsetPtr); 368 std::tie(TotalLength, FormParams.Format) = 369 DebugLineData.getInitialLength(Cursor); 370 371 DebugLineData = 372 DWARFDataExtractor(DebugLineData, Cursor.tell() + TotalLength); 373 FormParams.Version = DebugLineData.getU16(Cursor); 374 if (Cursor && !versionIsSupported(getVersion())) { 375 // Treat this error as unrecoverable - we cannot be sure what any of 376 // the data represents including the length field, so cannot skip it or make 377 // any reasonable assumptions. 378 *OffsetPtr = Cursor.tell(); 379 return createStringError( 380 errc::not_supported, 381 "parsing line table prologue at offset 0x%8.8" PRIx64 382 ": unsupported version %" PRIu16, 383 PrologueOffset, getVersion()); 384 } 385 386 if (getVersion() >= 5) { 387 FormParams.AddrSize = DebugLineData.getU8(Cursor); 388 assert((!Cursor || DebugLineData.getAddressSize() == 0 || 389 DebugLineData.getAddressSize() == getAddressSize()) && 390 "Line table header and data extractor disagree"); 391 SegSelectorSize = DebugLineData.getU8(Cursor); 392 } 393 394 PrologueLength = 395 DebugLineData.getRelocatedValue(Cursor, sizeofPrologueLength()); 396 const uint64_t EndPrologueOffset = PrologueLength + Cursor.tell(); 397 DebugLineData = DWARFDataExtractor(DebugLineData, EndPrologueOffset); 398 MinInstLength = DebugLineData.getU8(Cursor); 399 if (getVersion() >= 4) 400 MaxOpsPerInst = DebugLineData.getU8(Cursor); 401 DefaultIsStmt = DebugLineData.getU8(Cursor); 402 LineBase = DebugLineData.getU8(Cursor); 403 LineRange = DebugLineData.getU8(Cursor); 404 OpcodeBase = DebugLineData.getU8(Cursor); 405 406 if (Cursor && OpcodeBase == 0) { 407 // If the opcode base is 0, we cannot read the standard opcode lengths (of 408 // which there are supposed to be one fewer than the opcode base). Assume 409 // there are no standard opcodes and continue parsing. 410 RecoverableErrorHandler(createStringError( 411 errc::invalid_argument, 412 "parsing line table prologue at offset 0x%8.8" PRIx64 413 " found opcode base of 0. Assuming no standard opcodes", 414 PrologueOffset)); 415 } else if (Cursor) { 416 StandardOpcodeLengths.reserve(OpcodeBase - 1); 417 for (uint32_t I = 1; I < OpcodeBase; ++I) { 418 uint8_t OpLen = DebugLineData.getU8(Cursor); 419 StandardOpcodeLengths.push_back(OpLen); 420 } 421 } 422 423 *OffsetPtr = Cursor.tell(); 424 // A corrupt file name or directory table does not prevent interpretation of 425 // the main line program, so check the cursor state now so that its errors can 426 // be handled separately. 427 if (!Cursor) 428 return createStringError( 429 errc::invalid_argument, 430 "parsing line table prologue at offset 0x%8.8" PRIx64 ": %s", 431 PrologueOffset, toString(Cursor.takeError()).c_str()); 432 433 Error E = 434 getVersion() >= 5 435 ? parseV5DirFileTables(DebugLineData, OffsetPtr, FormParams, Ctx, U, 436 ContentTypes, IncludeDirectories, FileNames) 437 : parseV2DirFileTables(DebugLineData, OffsetPtr, ContentTypes, 438 IncludeDirectories, FileNames); 439 if (E) { 440 RecoverableErrorHandler(joinErrors( 441 createStringError( 442 errc::invalid_argument, 443 "parsing line table prologue at 0x%8.8" PRIx64 444 " found an invalid directory or file table description at" 445 " 0x%8.8" PRIx64, 446 PrologueOffset, *OffsetPtr), 447 std::move(E))); 448 return Error::success(); 449 } 450 451 assert(*OffsetPtr <= EndPrologueOffset); 452 if (*OffsetPtr != EndPrologueOffset) { 453 RecoverableErrorHandler(createStringError( 454 errc::invalid_argument, 455 "unknown data in line table prologue at offset 0x%8.8" PRIx64 456 ": parsing ended (at offset 0x%8.8" PRIx64 457 ") before reaching the prologue end at offset 0x%8.8" PRIx64, 458 PrologueOffset, *OffsetPtr, EndPrologueOffset)); 459 } 460 return Error::success(); 461 } 462 463 DWARFDebugLine::Row::Row(bool DefaultIsStmt) { reset(DefaultIsStmt); } 464 465 void DWARFDebugLine::Row::postAppend() { 466 Discriminator = 0; 467 BasicBlock = false; 468 PrologueEnd = false; 469 EpilogueBegin = false; 470 } 471 472 void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { 473 Address.Address = 0; 474 Address.SectionIndex = object::SectionedAddress::UndefSection; 475 Line = 1; 476 Column = 0; 477 File = 1; 478 Isa = 0; 479 Discriminator = 0; 480 IsStmt = DefaultIsStmt; 481 BasicBlock = false; 482 EndSequence = false; 483 PrologueEnd = false; 484 EpilogueBegin = false; 485 } 486 487 void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS, unsigned Indent) { 488 OS.indent(Indent) 489 << "Address Line Column File ISA Discriminator Flags\n"; 490 OS.indent(Indent) 491 << "------------------ ------ ------ ------ --- ------------- " 492 "-------------\n"; 493 } 494 495 void DWARFDebugLine::Row::dump(raw_ostream &OS) const { 496 OS << format("0x%16.16" PRIx64 " %6u %6u", Address.Address, Line, Column) 497 << format(" %6u %3u %13u ", File, Isa, Discriminator) 498 << (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "") 499 << (PrologueEnd ? " prologue_end" : "") 500 << (EpilogueBegin ? " epilogue_begin" : "") 501 << (EndSequence ? " end_sequence" : "") << '\n'; 502 } 503 504 DWARFDebugLine::Sequence::Sequence() { reset(); } 505 506 void DWARFDebugLine::Sequence::reset() { 507 LowPC = 0; 508 HighPC = 0; 509 SectionIndex = object::SectionedAddress::UndefSection; 510 FirstRowIndex = 0; 511 LastRowIndex = 0; 512 Empty = true; 513 } 514 515 DWARFDebugLine::LineTable::LineTable() { clear(); } 516 517 void DWARFDebugLine::LineTable::dump(raw_ostream &OS, 518 DIDumpOptions DumpOptions) const { 519 Prologue.dump(OS, DumpOptions); 520 521 if (!Rows.empty()) { 522 OS << '\n'; 523 Row::dumpTableHeader(OS, 0); 524 for (const Row &R : Rows) { 525 R.dump(OS); 526 } 527 } 528 529 // Terminate the table with a final blank line to clearly delineate it from 530 // later dumps. 531 OS << '\n'; 532 } 533 534 void DWARFDebugLine::LineTable::clear() { 535 Prologue.clear(); 536 Rows.clear(); 537 Sequences.clear(); 538 } 539 540 DWARFDebugLine::ParsingState::ParsingState( 541 struct LineTable *LT, uint64_t TableOffset, 542 function_ref<void(Error)> ErrorHandler) 543 : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) { 544 resetRowAndSequence(); 545 } 546 547 void DWARFDebugLine::ParsingState::resetRowAndSequence() { 548 Row.reset(LineTable->Prologue.DefaultIsStmt); 549 Sequence.reset(); 550 } 551 552 void DWARFDebugLine::ParsingState::appendRowToMatrix() { 553 unsigned RowNumber = LineTable->Rows.size(); 554 if (Sequence.Empty) { 555 // Record the beginning of instruction sequence. 556 Sequence.Empty = false; 557 Sequence.LowPC = Row.Address.Address; 558 Sequence.FirstRowIndex = RowNumber; 559 } 560 LineTable->appendRow(Row); 561 if (Row.EndSequence) { 562 // Record the end of instruction sequence. 563 Sequence.HighPC = Row.Address.Address; 564 Sequence.LastRowIndex = RowNumber + 1; 565 Sequence.SectionIndex = Row.Address.SectionIndex; 566 if (Sequence.isValid()) 567 LineTable->appendSequence(Sequence); 568 Sequence.reset(); 569 } 570 Row.postAppend(); 571 } 572 573 const DWARFDebugLine::LineTable * 574 DWARFDebugLine::getLineTable(uint64_t Offset) const { 575 LineTableConstIter Pos = LineTableMap.find(Offset); 576 if (Pos != LineTableMap.end()) 577 return &Pos->second; 578 return nullptr; 579 } 580 581 Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable( 582 DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, 583 const DWARFUnit *U, function_ref<void(Error)> RecoverableErrorHandler) { 584 if (!DebugLineData.isValidOffset(Offset)) 585 return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx64 586 " is not a valid debug line section offset", 587 Offset); 588 589 std::pair<LineTableIter, bool> Pos = 590 LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable())); 591 LineTable *LT = &Pos.first->second; 592 if (Pos.second) { 593 if (Error Err = 594 LT->parse(DebugLineData, &Offset, Ctx, U, RecoverableErrorHandler)) 595 return std::move(Err); 596 return LT; 597 } 598 return LT; 599 } 600 601 void DWARFDebugLine::clearLineTable(uint64_t Offset) { 602 LineTableMap.erase(Offset); 603 } 604 605 static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) { 606 assert(Opcode != 0); 607 if (Opcode < OpcodeBase) 608 return LNStandardString(Opcode); 609 return "special"; 610 } 611 612 uint64_t DWARFDebugLine::ParsingState::advanceAddr(uint64_t OperationAdvance, 613 uint8_t Opcode, 614 uint64_t OpcodeOffset) { 615 StringRef OpcodeName = getOpcodeName(Opcode, LineTable->Prologue.OpcodeBase); 616 // For versions less than 4, the MaxOpsPerInst member is set to 0, as the 617 // maximum_operations_per_instruction field wasn't introduced until DWARFv4. 618 // Don't warn about bad values in this situation. 619 if (ReportAdvanceAddrProblem && LineTable->Prologue.getVersion() >= 4 && 620 LineTable->Prologue.MaxOpsPerInst != 1) 621 ErrorHandler(createStringError( 622 errc::not_supported, 623 "line table program at offset 0x%8.8" PRIx64 624 " contains a %s opcode at offset 0x%8.8" PRIx64 625 ", but the prologue maximum_operations_per_instruction value is %" PRId8 626 ", which is unsupported. Assuming a value of 1 instead", 627 LineTableOffset, OpcodeName.data(), OpcodeOffset, 628 LineTable->Prologue.MaxOpsPerInst)); 629 if (ReportAdvanceAddrProblem && LineTable->Prologue.MinInstLength == 0) 630 ErrorHandler( 631 createStringError(errc::invalid_argument, 632 "line table program at offset 0x%8.8" PRIx64 633 " contains a %s opcode at offset 0x%8.8" PRIx64 634 ", but the prologue minimum_instruction_length value " 635 "is 0, which prevents any address advancing", 636 LineTableOffset, OpcodeName.data(), OpcodeOffset)); 637 ReportAdvanceAddrProblem = false; 638 uint64_t AddrOffset = OperationAdvance * LineTable->Prologue.MinInstLength; 639 Row.Address.Address += AddrOffset; 640 return AddrOffset; 641 } 642 643 DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode 644 DWARFDebugLine::ParsingState::advanceAddrForOpcode(uint8_t Opcode, 645 uint64_t OpcodeOffset) { 646 assert(Opcode == DW_LNS_const_add_pc || 647 Opcode >= LineTable->Prologue.OpcodeBase); 648 if (ReportBadLineRange && LineTable->Prologue.LineRange == 0) { 649 StringRef OpcodeName = 650 getOpcodeName(Opcode, LineTable->Prologue.OpcodeBase); 651 ErrorHandler( 652 createStringError(errc::not_supported, 653 "line table program at offset 0x%8.8" PRIx64 654 " contains a %s opcode at offset 0x%8.8" PRIx64 655 ", but the prologue line_range value is 0. The " 656 "address and line will not be adjusted", 657 LineTableOffset, OpcodeName.data(), OpcodeOffset)); 658 ReportBadLineRange = false; 659 } 660 661 uint8_t OpcodeValue = Opcode; 662 if (Opcode == DW_LNS_const_add_pc) 663 OpcodeValue = 255; 664 uint8_t AdjustedOpcode = OpcodeValue - LineTable->Prologue.OpcodeBase; 665 uint64_t OperationAdvance = 666 LineTable->Prologue.LineRange != 0 667 ? AdjustedOpcode / LineTable->Prologue.LineRange 668 : 0; 669 uint64_t AddrOffset = advanceAddr(OperationAdvance, Opcode, OpcodeOffset); 670 return {AddrOffset, AdjustedOpcode}; 671 } 672 673 DWARFDebugLine::ParsingState::AddrAndLineDelta 674 DWARFDebugLine::ParsingState::handleSpecialOpcode(uint8_t Opcode, 675 uint64_t OpcodeOffset) { 676 // A special opcode value is chosen based on the amount that needs 677 // to be added to the line and address registers. The maximum line 678 // increment for a special opcode is the value of the line_base 679 // field in the header, plus the value of the line_range field, 680 // minus 1 (line base + line range - 1). If the desired line 681 // increment is greater than the maximum line increment, a standard 682 // opcode must be used instead of a special opcode. The "address 683 // advance" is calculated by dividing the desired address increment 684 // by the minimum_instruction_length field from the header. The 685 // special opcode is then calculated using the following formula: 686 // 687 // opcode = (desired line increment - line_base) + 688 // (line_range * address advance) + opcode_base 689 // 690 // If the resulting opcode is greater than 255, a standard opcode 691 // must be used instead. 692 // 693 // To decode a special opcode, subtract the opcode_base from the 694 // opcode itself to give the adjusted opcode. The amount to 695 // increment the address register is the result of the adjusted 696 // opcode divided by the line_range multiplied by the 697 // minimum_instruction_length field from the header. That is: 698 // 699 // address increment = (adjusted opcode / line_range) * 700 // minimum_instruction_length 701 // 702 // The amount to increment the line register is the line_base plus 703 // the result of the adjusted opcode modulo the line_range. That is: 704 // 705 // line increment = line_base + (adjusted opcode % line_range) 706 707 DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode AddrAdvanceResult = 708 advanceAddrForOpcode(Opcode, OpcodeOffset); 709 int32_t LineOffset = 0; 710 if (LineTable->Prologue.LineRange != 0) 711 LineOffset = 712 LineTable->Prologue.LineBase + 713 (AddrAdvanceResult.AdjustedOpcode % LineTable->Prologue.LineRange); 714 Row.Line += LineOffset; 715 return {AddrAdvanceResult.AddrDelta, LineOffset}; 716 } 717 718 /// Parse a ULEB128 using the specified \p Cursor. \returns the parsed value on 719 /// success, or None if \p Cursor is in a failing state. 720 template <typename T> 721 static std::optional<T> parseULEB128(DWARFDataExtractor &Data, 722 DataExtractor::Cursor &Cursor) { 723 T Value = Data.getULEB128(Cursor); 724 if (Cursor) 725 return Value; 726 return std::nullopt; 727 } 728 729 Error DWARFDebugLine::LineTable::parse( 730 DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, 731 const DWARFContext &Ctx, const DWARFUnit *U, 732 function_ref<void(Error)> RecoverableErrorHandler, raw_ostream *OS, 733 bool Verbose) { 734 assert((OS || !Verbose) && "cannot have verbose output without stream"); 735 const uint64_t DebugLineOffset = *OffsetPtr; 736 737 clear(); 738 739 Error PrologueErr = 740 Prologue.parse(DebugLineData, OffsetPtr, RecoverableErrorHandler, Ctx, U); 741 742 if (OS) { 743 DIDumpOptions DumpOptions; 744 DumpOptions.Verbose = Verbose; 745 Prologue.dump(*OS, DumpOptions); 746 } 747 748 if (PrologueErr) { 749 // Ensure there is a blank line after the prologue to clearly delineate it 750 // from later dumps. 751 if (OS) 752 *OS << "\n"; 753 return PrologueErr; 754 } 755 756 uint64_t ProgramLength = Prologue.TotalLength + Prologue.sizeofTotalLength(); 757 if (!DebugLineData.isValidOffsetForDataOfSize(DebugLineOffset, 758 ProgramLength)) { 759 assert(DebugLineData.size() > DebugLineOffset && 760 "prologue parsing should handle invalid offset"); 761 uint64_t BytesRemaining = DebugLineData.size() - DebugLineOffset; 762 RecoverableErrorHandler( 763 createStringError(errc::invalid_argument, 764 "line table program with offset 0x%8.8" PRIx64 765 " has length 0x%8.8" PRIx64 " but only 0x%8.8" PRIx64 766 " bytes are available", 767 DebugLineOffset, ProgramLength, BytesRemaining)); 768 // Continue by capping the length at the number of remaining bytes. 769 ProgramLength = BytesRemaining; 770 } 771 772 // Create a DataExtractor which can only see the data up to the end of the 773 // table, to prevent reading past the end. 774 const uint64_t EndOffset = DebugLineOffset + ProgramLength; 775 DWARFDataExtractor TableData(DebugLineData, EndOffset); 776 777 // See if we should tell the data extractor the address size. 778 if (TableData.getAddressSize() == 0) 779 TableData.setAddressSize(Prologue.getAddressSize()); 780 else 781 assert(Prologue.getAddressSize() == 0 || 782 Prologue.getAddressSize() == TableData.getAddressSize()); 783 784 ParsingState State(this, DebugLineOffset, RecoverableErrorHandler); 785 786 *OffsetPtr = DebugLineOffset + Prologue.getLength(); 787 if (OS && *OffsetPtr < EndOffset) { 788 *OS << '\n'; 789 Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0); 790 } 791 bool TombstonedAddress = false; 792 auto EmitRow = [&] { 793 if (!TombstonedAddress) { 794 if (Verbose) { 795 *OS << "\n"; 796 OS->indent(12); 797 } 798 if (OS) 799 State.Row.dump(*OS); 800 State.appendRowToMatrix(); 801 } 802 }; 803 while (*OffsetPtr < EndOffset) { 804 DataExtractor::Cursor Cursor(*OffsetPtr); 805 806 if (Verbose) 807 *OS << format("0x%08.08" PRIx64 ": ", *OffsetPtr); 808 809 uint64_t OpcodeOffset = *OffsetPtr; 810 uint8_t Opcode = TableData.getU8(Cursor); 811 size_t RowCount = Rows.size(); 812 813 if (Cursor && Verbose) 814 *OS << format("%02.02" PRIx8 " ", Opcode); 815 816 if (Opcode == 0) { 817 // Extended Opcodes always start with a zero opcode followed by 818 // a uleb128 length so you can skip ones you don't know about 819 uint64_t Len = TableData.getULEB128(Cursor); 820 uint64_t ExtOffset = Cursor.tell(); 821 822 // Tolerate zero-length; assume length is correct and soldier on. 823 if (Len == 0) { 824 if (Cursor && Verbose) 825 *OS << "Badly formed extended line op (length 0)\n"; 826 if (!Cursor) { 827 if (Verbose) 828 *OS << "\n"; 829 RecoverableErrorHandler(Cursor.takeError()); 830 } 831 *OffsetPtr = Cursor.tell(); 832 continue; 833 } 834 835 uint8_t SubOpcode = TableData.getU8(Cursor); 836 // OperandOffset will be the same as ExtOffset, if it was not possible to 837 // read the SubOpcode. 838 uint64_t OperandOffset = Cursor.tell(); 839 if (Verbose) 840 *OS << LNExtendedString(SubOpcode); 841 switch (SubOpcode) { 842 case DW_LNE_end_sequence: 843 // Set the end_sequence register of the state machine to true and 844 // append a row to the matrix using the current values of the 845 // state-machine registers. Then reset the registers to the initial 846 // values specified above. Every statement program sequence must end 847 // with a DW_LNE_end_sequence instruction which creates a row whose 848 // address is that of the byte after the last target machine instruction 849 // of the sequence. 850 State.Row.EndSequence = true; 851 // No need to test the Cursor is valid here, since it must be to get 852 // into this code path - if it were invalid, the default case would be 853 // followed. 854 EmitRow(); 855 State.resetRowAndSequence(); 856 break; 857 858 case DW_LNE_set_address: 859 // Takes a single relocatable address as an operand. The size of the 860 // operand is the size appropriate to hold an address on the target 861 // machine. Set the address register to the value given by the 862 // relocatable address. All of the other statement program opcodes 863 // that affect the address register add a delta to it. This instruction 864 // stores a relocatable value into it instead. 865 // 866 // Make sure the extractor knows the address size. If not, infer it 867 // from the size of the operand. 868 { 869 uint8_t ExtractorAddressSize = TableData.getAddressSize(); 870 uint64_t OpcodeAddressSize = Len - 1; 871 if (ExtractorAddressSize != OpcodeAddressSize && 872 ExtractorAddressSize != 0) 873 RecoverableErrorHandler(createStringError( 874 errc::invalid_argument, 875 "mismatching address size at offset 0x%8.8" PRIx64 876 " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64, 877 ExtOffset, ExtractorAddressSize, Len - 1)); 878 879 // Assume that the line table is correct and temporarily override the 880 // address size. If the size is unsupported, give up trying to read 881 // the address and continue to the next opcode. 882 if (OpcodeAddressSize != 1 && OpcodeAddressSize != 2 && 883 OpcodeAddressSize != 4 && OpcodeAddressSize != 8) { 884 RecoverableErrorHandler(createStringError( 885 errc::invalid_argument, 886 "address size 0x%2.2" PRIx64 887 " of DW_LNE_set_address opcode at offset 0x%8.8" PRIx64 888 " is unsupported", 889 OpcodeAddressSize, ExtOffset)); 890 TableData.skip(Cursor, OpcodeAddressSize); 891 } else { 892 TableData.setAddressSize(OpcodeAddressSize); 893 State.Row.Address.Address = TableData.getRelocatedAddress( 894 Cursor, &State.Row.Address.SectionIndex); 895 896 uint64_t Tombstone = 897 dwarf::computeTombstoneAddress(OpcodeAddressSize); 898 TombstonedAddress = State.Row.Address.Address == Tombstone; 899 900 // Restore the address size if the extractor already had it. 901 if (ExtractorAddressSize != 0) 902 TableData.setAddressSize(ExtractorAddressSize); 903 } 904 905 if (Cursor && Verbose) { 906 *OS << " ("; 907 DWARFFormValue::dumpAddress(*OS, OpcodeAddressSize, State.Row.Address.Address); 908 *OS << ')'; 909 } 910 } 911 break; 912 913 case DW_LNE_define_file: 914 // Takes 4 arguments. The first is a null terminated string containing 915 // a source file name. The second is an unsigned LEB128 number 916 // representing the directory index of the directory in which the file 917 // was found. The third is an unsigned LEB128 number representing the 918 // time of last modification of the file. The fourth is an unsigned 919 // LEB128 number representing the length in bytes of the file. The time 920 // and length fields may contain LEB128(0) if the information is not 921 // available. 922 // 923 // The directory index represents an entry in the include_directories 924 // section of the statement program prologue. The index is LEB128(0) 925 // if the file was found in the current directory of the compilation, 926 // LEB128(1) if it was found in the first directory in the 927 // include_directories section, and so on. The directory index is 928 // ignored for file names that represent full path names. 929 // 930 // The files are numbered, starting at 1, in the order in which they 931 // appear; the names in the prologue come before names defined by 932 // the DW_LNE_define_file instruction. These numbers are used in the 933 // the file register of the state machine. 934 { 935 FileNameEntry FileEntry; 936 const char *Name = TableData.getCStr(Cursor); 937 FileEntry.Name = 938 DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name); 939 FileEntry.DirIdx = TableData.getULEB128(Cursor); 940 FileEntry.ModTime = TableData.getULEB128(Cursor); 941 FileEntry.Length = TableData.getULEB128(Cursor); 942 Prologue.FileNames.push_back(FileEntry); 943 if (Cursor && Verbose) 944 *OS << " (" << Name << ", dir=" << FileEntry.DirIdx << ", mod_time=" 945 << format("(0x%16.16" PRIx64 ")", FileEntry.ModTime) 946 << ", length=" << FileEntry.Length << ")"; 947 } 948 break; 949 950 case DW_LNE_set_discriminator: 951 State.Row.Discriminator = TableData.getULEB128(Cursor); 952 if (Cursor && Verbose) 953 *OS << " (" << State.Row.Discriminator << ")"; 954 break; 955 956 default: 957 if (Cursor && Verbose) 958 *OS << format("Unrecognized extended op 0x%02.02" PRIx8, SubOpcode) 959 << format(" length %" PRIx64, Len); 960 // Len doesn't include the zero opcode byte or the length itself, but 961 // it does include the sub_opcode, so we have to adjust for that. 962 TableData.skip(Cursor, Len - 1); 963 break; 964 } 965 // Make sure the length as recorded in the table and the standard length 966 // for the opcode match. If they don't, continue from the end as claimed 967 // by the table. Similarly, continue from the claimed end in the event of 968 // a parsing error. 969 uint64_t End = ExtOffset + Len; 970 if (Cursor && Cursor.tell() != End) 971 RecoverableErrorHandler(createStringError( 972 errc::illegal_byte_sequence, 973 "unexpected line op length at offset 0x%8.8" PRIx64 974 " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx64, 975 ExtOffset, Len, Cursor.tell() - ExtOffset)); 976 if (!Cursor && Verbose) { 977 DWARFDataExtractor::Cursor ByteCursor(OperandOffset); 978 uint8_t Byte = TableData.getU8(ByteCursor); 979 if (ByteCursor) { 980 *OS << " (<parsing error>"; 981 do { 982 *OS << format(" %2.2" PRIx8, Byte); 983 Byte = TableData.getU8(ByteCursor); 984 } while (ByteCursor); 985 *OS << ")"; 986 } 987 988 // The only parse failure in this case should be if the end was reached. 989 // In that case, throw away the error, as the main Cursor's error will 990 // be sufficient. 991 consumeError(ByteCursor.takeError()); 992 } 993 *OffsetPtr = End; 994 } else if (Opcode < Prologue.OpcodeBase) { 995 if (Verbose) 996 *OS << LNStandardString(Opcode); 997 switch (Opcode) { 998 // Standard Opcodes 999 case DW_LNS_copy: 1000 // Takes no arguments. Append a row to the matrix using the 1001 // current values of the state-machine registers. 1002 EmitRow(); 1003 break; 1004 1005 case DW_LNS_advance_pc: 1006 // Takes a single unsigned LEB128 operand, multiplies it by the 1007 // min_inst_length field of the prologue, and adds the 1008 // result to the address register of the state machine. 1009 if (std::optional<uint64_t> Operand = 1010 parseULEB128<uint64_t>(TableData, Cursor)) { 1011 uint64_t AddrOffset = 1012 State.advanceAddr(*Operand, Opcode, OpcodeOffset); 1013 if (Verbose) 1014 *OS << " (" << AddrOffset << ")"; 1015 } 1016 break; 1017 1018 case DW_LNS_advance_line: 1019 // Takes a single signed LEB128 operand and adds that value to 1020 // the line register of the state machine. 1021 { 1022 int64_t LineDelta = TableData.getSLEB128(Cursor); 1023 if (Cursor) { 1024 State.Row.Line += LineDelta; 1025 if (Verbose) 1026 *OS << " (" << State.Row.Line << ")"; 1027 } 1028 } 1029 break; 1030 1031 case DW_LNS_set_file: 1032 // Takes a single unsigned LEB128 operand and stores it in the file 1033 // register of the state machine. 1034 if (std::optional<uint16_t> File = 1035 parseULEB128<uint16_t>(TableData, Cursor)) { 1036 State.Row.File = *File; 1037 if (Verbose) 1038 *OS << " (" << State.Row.File << ")"; 1039 } 1040 break; 1041 1042 case DW_LNS_set_column: 1043 // Takes a single unsigned LEB128 operand and stores it in the 1044 // column register of the state machine. 1045 if (std::optional<uint16_t> Column = 1046 parseULEB128<uint16_t>(TableData, Cursor)) { 1047 State.Row.Column = *Column; 1048 if (Verbose) 1049 *OS << " (" << State.Row.Column << ")"; 1050 } 1051 break; 1052 1053 case DW_LNS_negate_stmt: 1054 // Takes no arguments. Set the is_stmt register of the state 1055 // machine to the logical negation of its current value. 1056 State.Row.IsStmt = !State.Row.IsStmt; 1057 break; 1058 1059 case DW_LNS_set_basic_block: 1060 // Takes no arguments. Set the basic_block register of the 1061 // state machine to true 1062 State.Row.BasicBlock = true; 1063 break; 1064 1065 case DW_LNS_const_add_pc: 1066 // Takes no arguments. Add to the address register of the state 1067 // machine the address increment value corresponding to special 1068 // opcode 255. The motivation for DW_LNS_const_add_pc is this: 1069 // when the statement program needs to advance the address by a 1070 // small amount, it can use a single special opcode, which occupies 1071 // a single byte. When it needs to advance the address by up to 1072 // twice the range of the last special opcode, it can use 1073 // DW_LNS_const_add_pc followed by a special opcode, for a total 1074 // of two bytes. Only if it needs to advance the address by more 1075 // than twice that range will it need to use both DW_LNS_advance_pc 1076 // and a special opcode, requiring three or more bytes. 1077 { 1078 uint64_t AddrOffset = 1079 State.advanceAddrForOpcode(Opcode, OpcodeOffset).AddrDelta; 1080 if (Verbose) 1081 *OS << format(" (0x%16.16" PRIx64 ")", AddrOffset); 1082 } 1083 break; 1084 1085 case DW_LNS_fixed_advance_pc: 1086 // Takes a single uhalf operand. Add to the address register of 1087 // the state machine the value of the (unencoded) operand. This 1088 // is the only extended opcode that takes an argument that is not 1089 // a variable length number. The motivation for DW_LNS_fixed_advance_pc 1090 // is this: existing assemblers cannot emit DW_LNS_advance_pc or 1091 // special opcodes because they cannot encode LEB128 numbers or 1092 // judge when the computation of a special opcode overflows and 1093 // requires the use of DW_LNS_advance_pc. Such assemblers, however, 1094 // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. 1095 { 1096 uint16_t PCOffset = 1097 TableData.getRelocatedValue(Cursor, 2); 1098 if (Cursor) { 1099 State.Row.Address.Address += PCOffset; 1100 if (Verbose) 1101 *OS << format(" (0x%4.4" PRIx16 ")", PCOffset); 1102 } 1103 } 1104 break; 1105 1106 case DW_LNS_set_prologue_end: 1107 // Takes no arguments. Set the prologue_end register of the 1108 // state machine to true 1109 State.Row.PrologueEnd = true; 1110 break; 1111 1112 case DW_LNS_set_epilogue_begin: 1113 // Takes no arguments. Set the basic_block register of the 1114 // state machine to true 1115 State.Row.EpilogueBegin = true; 1116 break; 1117 1118 case DW_LNS_set_isa: 1119 // Takes a single unsigned LEB128 operand and stores it in the 1120 // ISA register of the state machine. 1121 if (std::optional<uint8_t> Isa = 1122 parseULEB128<uint8_t>(TableData, Cursor)) { 1123 State.Row.Isa = *Isa; 1124 if (Verbose) 1125 *OS << " (" << (uint64_t)State.Row.Isa << ")"; 1126 } 1127 break; 1128 1129 default: 1130 // Handle any unknown standard opcodes here. We know the lengths 1131 // of such opcodes because they are specified in the prologue 1132 // as a multiple of LEB128 operands for each opcode. 1133 { 1134 assert(Opcode - 1U < Prologue.StandardOpcodeLengths.size()); 1135 if (Verbose) 1136 *OS << "Unrecognized standard opcode"; 1137 uint8_t OpcodeLength = Prologue.StandardOpcodeLengths[Opcode - 1]; 1138 std::vector<uint64_t> Operands; 1139 for (uint8_t I = 0; I < OpcodeLength; ++I) { 1140 if (std::optional<uint64_t> Value = 1141 parseULEB128<uint64_t>(TableData, Cursor)) 1142 Operands.push_back(*Value); 1143 else 1144 break; 1145 } 1146 if (Verbose && !Operands.empty()) { 1147 *OS << " (operands: "; 1148 bool First = true; 1149 for (uint64_t Value : Operands) { 1150 if (!First) 1151 *OS << ", "; 1152 First = false; 1153 *OS << format("0x%16.16" PRIx64, Value); 1154 } 1155 if (Verbose) 1156 *OS << ')'; 1157 } 1158 } 1159 break; 1160 } 1161 1162 *OffsetPtr = Cursor.tell(); 1163 } else { 1164 // Special Opcodes. 1165 ParsingState::AddrAndLineDelta Delta = 1166 State.handleSpecialOpcode(Opcode, OpcodeOffset); 1167 1168 if (Verbose) 1169 *OS << "address += " << Delta.Address << ", line += " << Delta.Line; 1170 EmitRow(); 1171 *OffsetPtr = Cursor.tell(); 1172 } 1173 1174 // When a row is added to the matrix, it is also dumped, which includes a 1175 // new line already, so don't add an extra one. 1176 if (Verbose && Rows.size() == RowCount) 1177 *OS << "\n"; 1178 1179 // Most parse failures other than when parsing extended opcodes are due to 1180 // failures to read ULEBs. Bail out of parsing, since we don't know where to 1181 // continue reading from as there is no stated length for such byte 1182 // sequences. Print the final trailing new line if needed before doing so. 1183 if (!Cursor && Opcode != 0) { 1184 if (Verbose) 1185 *OS << "\n"; 1186 return Cursor.takeError(); 1187 } 1188 1189 if (!Cursor) 1190 RecoverableErrorHandler(Cursor.takeError()); 1191 } 1192 1193 if (!State.Sequence.Empty) 1194 RecoverableErrorHandler(createStringError( 1195 errc::illegal_byte_sequence, 1196 "last sequence in debug line table at offset 0x%8.8" PRIx64 1197 " is not terminated", 1198 DebugLineOffset)); 1199 1200 // Sort all sequences so that address lookup will work faster. 1201 if (!Sequences.empty()) { 1202 llvm::sort(Sequences, Sequence::orderByHighPC); 1203 // Note: actually, instruction address ranges of sequences should not 1204 // overlap (in shared objects and executables). If they do, the address 1205 // lookup would still work, though, but result would be ambiguous. 1206 // We don't report warning in this case. For example, 1207 // sometimes .so compiled from multiple object files contains a few 1208 // rudimentary sequences for address ranges [0x0, 0xsomething). 1209 } 1210 1211 // Terminate the table with a final blank line to clearly delineate it from 1212 // later dumps. 1213 if (OS) 1214 *OS << "\n"; 1215 1216 return Error::success(); 1217 } 1218 1219 uint32_t DWARFDebugLine::LineTable::findRowInSeq( 1220 const DWARFDebugLine::Sequence &Seq, 1221 object::SectionedAddress Address) const { 1222 if (!Seq.containsPC(Address)) 1223 return UnknownRowIndex; 1224 assert(Seq.SectionIndex == Address.SectionIndex); 1225 // In some cases, e.g. first instruction in a function, the compiler generates 1226 // two entries, both with the same address. We want the last one. 1227 // 1228 // In general we want a non-empty range: the last row whose address is less 1229 // than or equal to Address. This can be computed as upper_bound - 1. 1230 DWARFDebugLine::Row Row; 1231 Row.Address = Address; 1232 RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex; 1233 RowIter LastRow = Rows.begin() + Seq.LastRowIndex; 1234 assert(FirstRow->Address.Address <= Row.Address.Address && 1235 Row.Address.Address < LastRow[-1].Address.Address); 1236 RowIter RowPos = std::upper_bound(FirstRow + 1, LastRow - 1, Row, 1237 DWARFDebugLine::Row::orderByAddress) - 1238 1; 1239 assert(Seq.SectionIndex == RowPos->Address.SectionIndex); 1240 return RowPos - Rows.begin(); 1241 } 1242 1243 uint32_t DWARFDebugLine::LineTable::lookupAddress( 1244 object::SectionedAddress Address) const { 1245 1246 // Search for relocatable addresses 1247 uint32_t Result = lookupAddressImpl(Address); 1248 1249 if (Result != UnknownRowIndex || 1250 Address.SectionIndex == object::SectionedAddress::UndefSection) 1251 return Result; 1252 1253 // Search for absolute addresses 1254 Address.SectionIndex = object::SectionedAddress::UndefSection; 1255 return lookupAddressImpl(Address); 1256 } 1257 1258 uint32_t DWARFDebugLine::LineTable::lookupAddressImpl( 1259 object::SectionedAddress Address) const { 1260 // First, find an instruction sequence containing the given address. 1261 DWARFDebugLine::Sequence Sequence; 1262 Sequence.SectionIndex = Address.SectionIndex; 1263 Sequence.HighPC = Address.Address; 1264 SequenceIter It = llvm::upper_bound(Sequences, Sequence, 1265 DWARFDebugLine::Sequence::orderByHighPC); 1266 if (It == Sequences.end() || It->SectionIndex != Address.SectionIndex) 1267 return UnknownRowIndex; 1268 return findRowInSeq(*It, Address); 1269 } 1270 1271 bool DWARFDebugLine::LineTable::lookupAddressRange( 1272 object::SectionedAddress Address, uint64_t Size, 1273 std::vector<uint32_t> &Result) const { 1274 1275 // Search for relocatable addresses 1276 if (lookupAddressRangeImpl(Address, Size, Result)) 1277 return true; 1278 1279 if (Address.SectionIndex == object::SectionedAddress::UndefSection) 1280 return false; 1281 1282 // Search for absolute addresses 1283 Address.SectionIndex = object::SectionedAddress::UndefSection; 1284 return lookupAddressRangeImpl(Address, Size, Result); 1285 } 1286 1287 bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( 1288 object::SectionedAddress Address, uint64_t Size, 1289 std::vector<uint32_t> &Result) const { 1290 if (Sequences.empty()) 1291 return false; 1292 uint64_t EndAddr = Address.Address + Size; 1293 // First, find an instruction sequence containing the given address. 1294 DWARFDebugLine::Sequence Sequence; 1295 Sequence.SectionIndex = Address.SectionIndex; 1296 Sequence.HighPC = Address.Address; 1297 SequenceIter LastSeq = Sequences.end(); 1298 SequenceIter SeqPos = llvm::upper_bound( 1299 Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC); 1300 if (SeqPos == LastSeq || !SeqPos->containsPC(Address)) 1301 return false; 1302 1303 SequenceIter StartPos = SeqPos; 1304 1305 // Add the rows from the first sequence to the vector, starting with the 1306 // index we just calculated 1307 1308 while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { 1309 const DWARFDebugLine::Sequence &CurSeq = *SeqPos; 1310 // For the first sequence, we need to find which row in the sequence is the 1311 // first in our range. 1312 uint32_t FirstRowIndex = CurSeq.FirstRowIndex; 1313 if (SeqPos == StartPos) 1314 FirstRowIndex = findRowInSeq(CurSeq, Address); 1315 1316 // Figure out the last row in the range. 1317 uint32_t LastRowIndex = 1318 findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex}); 1319 if (LastRowIndex == UnknownRowIndex) 1320 LastRowIndex = CurSeq.LastRowIndex - 1; 1321 1322 assert(FirstRowIndex != UnknownRowIndex); 1323 assert(LastRowIndex != UnknownRowIndex); 1324 1325 for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) { 1326 Result.push_back(I); 1327 } 1328 1329 ++SeqPos; 1330 } 1331 1332 return true; 1333 } 1334 1335 std::optional<StringRef> 1336 DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex, 1337 FileLineInfoKind Kind) const { 1338 if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex)) 1339 return std::nullopt; 1340 const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex); 1341 if (auto E = dwarf::toString(Entry.Source)) 1342 return StringRef(*E); 1343 return std::nullopt; 1344 } 1345 1346 static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { 1347 // Debug info can contain paths from any OS, not necessarily 1348 // an OS we're currently running on. Moreover different compilation units can 1349 // be compiled on different operating systems and linked together later. 1350 return sys::path::is_absolute(Path, sys::path::Style::posix) || 1351 sys::path::is_absolute(Path, sys::path::Style::windows); 1352 } 1353 1354 bool DWARFDebugLine::Prologue::getFileNameByIndex( 1355 uint64_t FileIndex, StringRef CompDir, FileLineInfoKind Kind, 1356 std::string &Result, sys::path::Style Style) const { 1357 if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) 1358 return false; 1359 const FileNameEntry &Entry = getFileNameEntry(FileIndex); 1360 auto E = dwarf::toString(Entry.Name); 1361 if (!E) 1362 return false; 1363 StringRef FileName = *E; 1364 if (Kind == FileLineInfoKind::RawValue || 1365 isPathAbsoluteOnWindowsOrPosix(FileName)) { 1366 Result = std::string(FileName); 1367 return true; 1368 } 1369 if (Kind == FileLineInfoKind::BaseNameOnly) { 1370 Result = std::string(llvm::sys::path::filename(FileName)); 1371 return true; 1372 } 1373 1374 SmallString<16> FilePath; 1375 StringRef IncludeDir; 1376 // Be defensive about the contents of Entry. 1377 if (getVersion() >= 5) { 1378 // DirIdx 0 is the compilation directory, so don't include it for 1379 // relative names. 1380 if ((Entry.DirIdx != 0 || Kind != FileLineInfoKind::RelativeFilePath) && 1381 Entry.DirIdx < IncludeDirectories.size()) 1382 IncludeDir = dwarf::toStringRef(IncludeDirectories[Entry.DirIdx]); 1383 } else { 1384 if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size()) 1385 IncludeDir = dwarf::toStringRef(IncludeDirectories[Entry.DirIdx - 1]); 1386 } 1387 1388 // For absolute paths only, include the compilation directory of compile unit, 1389 // unless v5 DirIdx == 0 (IncludeDir indicates the compilation directory). We 1390 // know that FileName is not absolute, the only way to have an absolute path 1391 // at this point would be if IncludeDir is absolute. 1392 if (Kind == FileLineInfoKind::AbsoluteFilePath && 1393 (getVersion() < 5 || Entry.DirIdx != 0) && !CompDir.empty() && 1394 !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) 1395 sys::path::append(FilePath, Style, CompDir); 1396 1397 assert((Kind == FileLineInfoKind::AbsoluteFilePath || 1398 Kind == FileLineInfoKind::RelativeFilePath) && 1399 "invalid FileLineInfo Kind"); 1400 1401 // sys::path::append skips empty strings. 1402 sys::path::append(FilePath, Style, IncludeDir, FileName); 1403 Result = std::string(FilePath.str()); 1404 return true; 1405 } 1406 1407 bool DWARFDebugLine::LineTable::getFileLineInfoForAddress( 1408 object::SectionedAddress Address, const char *CompDir, 1409 FileLineInfoKind Kind, DILineInfo &Result) const { 1410 // Get the index of row we're looking for in the line table. 1411 uint32_t RowIndex = lookupAddress(Address); 1412 if (RowIndex == -1U) 1413 return false; 1414 // Take file number and line/column from the row. 1415 const auto &Row = Rows[RowIndex]; 1416 if (!getFileNameByIndex(Row.File, CompDir, Kind, Result.FileName)) 1417 return false; 1418 Result.Line = Row.Line; 1419 Result.Column = Row.Column; 1420 Result.Discriminator = Row.Discriminator; 1421 Result.Source = getSourceByIndex(Row.File, Kind); 1422 return true; 1423 } 1424 1425 bool DWARFDebugLine::LineTable::getDirectoryForEntry( 1426 const FileNameEntry &Entry, std::string &Directory) const { 1427 if (Prologue.getVersion() >= 5) { 1428 if (Entry.DirIdx < Prologue.IncludeDirectories.size()) { 1429 Directory = 1430 dwarf::toString(Prologue.IncludeDirectories[Entry.DirIdx], ""); 1431 return true; 1432 } 1433 return false; 1434 } 1435 if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size()) { 1436 Directory = 1437 dwarf::toString(Prologue.IncludeDirectories[Entry.DirIdx - 1], ""); 1438 return true; 1439 } 1440 return false; 1441 } 1442 1443 // We want to supply the Unit associated with a .debug_line[.dwo] table when 1444 // we dump it, if possible, but still dump the table even if there isn't a Unit. 1445 // Therefore, collect up handles on all the Units that point into the 1446 // line-table section. 1447 static DWARFDebugLine::SectionParser::LineToUnitMap 1448 buildLineToUnitMap(DWARFUnitVector::iterator_range Units) { 1449 DWARFDebugLine::SectionParser::LineToUnitMap LineToUnit; 1450 for (const auto &U : Units) 1451 if (auto CUDIE = U->getUnitDIE()) 1452 if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list))) 1453 LineToUnit.insert(std::make_pair(*StmtOffset, &*U)); 1454 return LineToUnit; 1455 } 1456 1457 DWARFDebugLine::SectionParser::SectionParser( 1458 DWARFDataExtractor &Data, const DWARFContext &C, 1459 DWARFUnitVector::iterator_range Units) 1460 : DebugLineData(Data), Context(C) { 1461 LineToUnit = buildLineToUnitMap(Units); 1462 if (!DebugLineData.isValidOffset(Offset)) 1463 Done = true; 1464 } 1465 1466 bool DWARFDebugLine::Prologue::totalLengthIsValid() const { 1467 return TotalLength != 0u; 1468 } 1469 1470 DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( 1471 function_ref<void(Error)> RecoverableErrorHandler, 1472 function_ref<void(Error)> UnrecoverableErrorHandler, raw_ostream *OS, 1473 bool Verbose) { 1474 assert(DebugLineData.isValidOffset(Offset) && 1475 "parsing should have terminated"); 1476 DWARFUnit *U = prepareToParse(Offset); 1477 uint64_t OldOffset = Offset; 1478 LineTable LT; 1479 if (Error Err = LT.parse(DebugLineData, &Offset, Context, U, 1480 RecoverableErrorHandler, OS, Verbose)) 1481 UnrecoverableErrorHandler(std::move(Err)); 1482 moveToNextTable(OldOffset, LT.Prologue); 1483 return LT; 1484 } 1485 1486 void DWARFDebugLine::SectionParser::skip( 1487 function_ref<void(Error)> RecoverableErrorHandler, 1488 function_ref<void(Error)> UnrecoverableErrorHandler) { 1489 assert(DebugLineData.isValidOffset(Offset) && 1490 "parsing should have terminated"); 1491 DWARFUnit *U = prepareToParse(Offset); 1492 uint64_t OldOffset = Offset; 1493 LineTable LT; 1494 if (Error Err = LT.Prologue.parse(DebugLineData, &Offset, 1495 RecoverableErrorHandler, Context, U)) 1496 UnrecoverableErrorHandler(std::move(Err)); 1497 moveToNextTable(OldOffset, LT.Prologue); 1498 } 1499 1500 DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) { 1501 DWARFUnit *U = nullptr; 1502 auto It = LineToUnit.find(Offset); 1503 if (It != LineToUnit.end()) 1504 U = It->second; 1505 DebugLineData.setAddressSize(U ? U->getAddressByteSize() : 0); 1506 return U; 1507 } 1508 1509 void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, 1510 const Prologue &P) { 1511 // If the length field is not valid, we don't know where the next table is, so 1512 // cannot continue to parse. Mark the parser as done, and leave the Offset 1513 // value as it currently is. This will be the end of the bad length field. 1514 if (!P.totalLengthIsValid()) { 1515 Done = true; 1516 return; 1517 } 1518 1519 Offset = OldOffset + P.TotalLength + P.sizeofTotalLength(); 1520 if (!DebugLineData.isValidOffset(Offset)) { 1521 Done = true; 1522 } 1523 } 1524