1 // dwarf_reader.h -- parse dwarf2/3 debug information for gold -*- C++ -*- 2 3 // Copyright (C) 2007-2022 Free Software Foundation, Inc. 4 // Written by Ian Lance Taylor <iant@google.com>. 5 6 // This file is part of gold. 7 8 // This program is free software; you can redistribute it and/or modify 9 // it under the terms of the GNU General Public License as published by 10 // the Free Software Foundation; either version 3 of the License, or 11 // (at your option) any later version. 12 13 // This program is distributed in the hope that it will be useful, 14 // but WITHOUT ANY WARRANTY; without even the implied warranty of 15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 // GNU General Public License for more details. 17 18 // You should have received a copy of the GNU General Public License 19 // along with this program; if not, write to the Free Software 20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, 21 // MA 02110-1301, USA. 22 23 #ifndef GOLD_DWARF_READER_H 24 #define GOLD_DWARF_READER_H 25 26 #include <vector> 27 #include <map> 28 #include <limits.h> 29 #include <sys/types.h> 30 31 #include "elfcpp.h" 32 #include "elfcpp_swap.h" 33 #include "dwarf.h" 34 #include "reloc.h" 35 36 namespace gold 37 { 38 39 class Dwarf_info_reader; 40 struct LineStateMachine; 41 42 // This class is used to extract the section index and offset of 43 // the target of a relocation for a given offset within the section. 44 45 class Elf_reloc_mapper 46 { 47 public: Elf_reloc_mapper()48 Elf_reloc_mapper() 49 { } 50 51 virtual ~Elf_reloc_mapper()52 ~Elf_reloc_mapper() 53 { } 54 55 // Initialize the relocation tracker for section RELOC_SHNDX. 56 bool initialize(unsigned int reloc_shndx,unsigned int reloc_type)57 initialize(unsigned int reloc_shndx, unsigned int reloc_type) 58 { return this->do_initialize(reloc_shndx, reloc_type); } 59 60 // Return the next reloc_offset. 61 off_t next_offset()62 next_offset() 63 { return this->do_next_offset(); } 64 65 // Advance to the next relocation past OFFSET. 66 void advance(off_t offset)67 advance(off_t offset) 68 { this->do_advance(offset); } 69 70 // Return the section index and offset within the section of the target 71 // of the relocation for RELOC_OFFSET in the referring section. 72 unsigned int get_reloc_target(off_t reloc_offset,off_t * target_offset)73 get_reloc_target(off_t reloc_offset, off_t* target_offset) 74 { return this->do_get_reloc_target(reloc_offset, target_offset); } 75 76 // Checkpoint the current position in the reloc section. 77 uint64_t checkpoint()78 checkpoint() const 79 { return this->do_checkpoint(); } 80 81 // Reset the current position to the CHECKPOINT. 82 void reset(uint64_t checkpoint)83 reset(uint64_t checkpoint) 84 { this->do_reset(checkpoint); } 85 86 protected: 87 virtual bool 88 do_initialize(unsigned int, unsigned int) = 0; 89 90 // Return the next reloc_offset. 91 virtual off_t 92 do_next_offset() = 0; 93 94 // Advance to the next relocation past OFFSET. 95 virtual void 96 do_advance(off_t offset) = 0; 97 98 virtual unsigned int 99 do_get_reloc_target(off_t reloc_offset, off_t* target_offset) = 0; 100 101 // Checkpoint the current position in the reloc section. 102 virtual uint64_t 103 do_checkpoint() const = 0; 104 105 // Reset the current position to the CHECKPOINT. 106 virtual void 107 do_reset(uint64_t checkpoint) = 0; 108 }; 109 110 template<int size, bool big_endian> 111 class Sized_elf_reloc_mapper : public Elf_reloc_mapper 112 { 113 public: Sized_elf_reloc_mapper(Object * object,const unsigned char * symtab,off_t symtab_size)114 Sized_elf_reloc_mapper(Object* object, const unsigned char* symtab, 115 off_t symtab_size) 116 : object_(object), symtab_(symtab), symtab_size_(symtab_size), 117 reloc_type_(0), track_relocs_() 118 { } 119 120 protected: 121 bool 122 do_initialize(unsigned int reloc_shndx, unsigned int reloc_type); 123 124 // Return the next reloc_offset. 125 virtual off_t do_next_offset()126 do_next_offset() 127 { return this->track_relocs_.next_offset(); } 128 129 // Advance to the next relocation past OFFSET. 130 virtual void do_advance(off_t offset)131 do_advance(off_t offset) 132 { this->track_relocs_.advance(offset); } 133 134 unsigned int 135 do_get_reloc_target(off_t reloc_offset, off_t* target_offset); 136 137 // Checkpoint the current position in the reloc section. 138 uint64_t do_checkpoint()139 do_checkpoint() const 140 { return this->track_relocs_.checkpoint(); } 141 142 // Reset the current position to the CHECKPOINT. 143 void do_reset(uint64_t checkpoint)144 do_reset(uint64_t checkpoint) 145 { this->track_relocs_.reset(checkpoint); } 146 147 private: 148 typedef typename elfcpp::Elf_types<size>::Elf_Addr Address; 149 150 // Return the section index of symbol SYMNDX, and copy its value to *VALUE. 151 // Set *IS_ORDINARY true if the section index is an ordinary section index. 152 unsigned int 153 symbol_section(unsigned int symndx, Address* value, bool* is_ordinary); 154 155 // The object file. 156 Object* object_; 157 // The ELF symbol table. 158 const unsigned char* symtab_; 159 // The size of the ELF symbol table. 160 off_t symtab_size_; 161 // Type of the relocation section (SHT_REL or SHT_RELA). 162 unsigned int reloc_type_; 163 // Relocations for the referring section. 164 Track_relocs<size, big_endian> track_relocs_; 165 }; 166 167 // This class is used to read the abbreviations table from the 168 // .debug_abbrev section of the object file. 169 170 class Dwarf_abbrev_table 171 { 172 public: 173 // An attribute list entry. 174 struct Attribute 175 { AttributeAttribute176 Attribute(unsigned int a, unsigned int f, int c) 177 : attr(a), form(f), implicit_const(c) 178 { } 179 unsigned int attr; 180 unsigned int form; 181 int implicit_const; 182 }; 183 184 // An abbrev code entry. 185 struct Abbrev_code 186 { Abbrev_codeAbbrev_code187 Abbrev_code(unsigned int t, bool hc) 188 : tag(t), has_children(hc), has_sibling_attribute(false), attributes() 189 { 190 this->attributes.reserve(10); 191 } 192 193 void add_attributeAbbrev_code194 add_attribute(unsigned int attr, unsigned int form, int implicit_const) 195 { 196 this->attributes.push_back(Attribute(attr, form, implicit_const)); 197 } 198 199 // The DWARF tag. 200 unsigned int tag; 201 // True if the DIE has children. 202 bool has_children : 1; 203 // True if the DIE has a sibling attribute. 204 bool has_sibling_attribute : 1; 205 // The list of attributes and forms. 206 std::vector<Attribute> attributes; 207 }; 208 Dwarf_abbrev_table()209 Dwarf_abbrev_table() 210 : abbrev_shndx_(0), abbrev_offset_(0), buffer_(NULL), buffer_end_(NULL), 211 owns_buffer_(false), buffer_pos_(NULL), high_abbrev_codes_() 212 { 213 memset(this->low_abbrev_codes_, 0, sizeof(this->low_abbrev_codes_)); 214 } 215 ~Dwarf_abbrev_table()216 ~Dwarf_abbrev_table() 217 { 218 if (this->owns_buffer_ && this->buffer_ != NULL) 219 delete[] this->buffer_; 220 this->clear_abbrev_codes(); 221 } 222 223 // Read the abbrev table from an object file. 224 bool read_abbrevs(Relobj * object,unsigned int abbrev_shndx,off_t abbrev_offset)225 read_abbrevs(Relobj* object, 226 unsigned int abbrev_shndx, 227 off_t abbrev_offset) 228 { 229 // If we've already read this abbrev table, return immediately. 230 if (this->abbrev_shndx_ > 0 231 && this->abbrev_shndx_ == abbrev_shndx 232 && this->abbrev_offset_ == abbrev_offset) 233 return true; 234 return this->do_read_abbrevs(object, abbrev_shndx, abbrev_offset); 235 } 236 237 // Return the abbrev code entry for CODE. This is a fast path for 238 // abbrev codes that are in the direct lookup table. If not found 239 // there, we call do_get_abbrev() to do the hard work. 240 const Abbrev_code* get_abbrev(unsigned int code)241 get_abbrev(unsigned int code) 242 { 243 if (code < this->low_abbrev_code_max_ 244 && this->low_abbrev_codes_[code] != NULL) 245 return this->low_abbrev_codes_[code]; 246 return this->do_get_abbrev(code); 247 } 248 249 private: 250 // Read the abbrev table from an object file. 251 bool 252 do_read_abbrevs(Relobj* object, 253 unsigned int abbrev_shndx, 254 off_t abbrev_offset); 255 256 // Lookup the abbrev code entry for CODE. 257 const Abbrev_code* 258 do_get_abbrev(unsigned int code); 259 260 // Store an abbrev code entry for CODE. 261 void store_abbrev(unsigned int code,const Abbrev_code * entry)262 store_abbrev(unsigned int code, const Abbrev_code* entry) 263 { 264 if (code < this->low_abbrev_code_max_) 265 this->low_abbrev_codes_[code] = entry; 266 else 267 this->high_abbrev_codes_[code] = entry; 268 } 269 270 // Clear the abbrev code table and release the memory it uses. 271 void 272 clear_abbrev_codes(); 273 274 typedef Unordered_map<unsigned int, const Abbrev_code*> Abbrev_code_table; 275 276 // The section index of the current abbrev table. 277 unsigned int abbrev_shndx_; 278 // The offset within the section of the current abbrev table. 279 off_t abbrev_offset_; 280 // The buffer containing the .debug_abbrev section. 281 const unsigned char* buffer_; 282 const unsigned char* buffer_end_; 283 // True if this object owns the buffer and needs to delete it. 284 bool owns_buffer_; 285 // Pointer to the current position in the buffer. 286 const unsigned char* buffer_pos_; 287 // The table of abbrev codes. 288 // We use a direct-lookup array for low abbrev codes, 289 // and store the rest in a hash table. 290 static const unsigned int low_abbrev_code_max_ = 256; 291 const Abbrev_code* low_abbrev_codes_[low_abbrev_code_max_]; 292 Abbrev_code_table high_abbrev_codes_; 293 }; 294 295 // A DWARF range list. The start and end offsets are relative 296 // to the input section SHNDX. Each range must lie entirely 297 // within a single section. 298 299 class Dwarf_range_list 300 { 301 public: 302 struct Range 303 { RangeRange304 Range(unsigned int a_shndx, off_t a_start, off_t a_end) 305 : shndx(a_shndx), start(a_start), end(a_end) 306 { } 307 308 unsigned int shndx; 309 off_t start; 310 off_t end; 311 }; 312 Dwarf_range_list()313 Dwarf_range_list() 314 : range_list_() 315 { } 316 317 void add(unsigned int shndx,off_t start,off_t end)318 add(unsigned int shndx, off_t start, off_t end) 319 { this->range_list_.push_back(Range(shndx, start, end)); } 320 321 size_t size()322 size() const 323 { return this->range_list_.size(); } 324 325 const Range& 326 operator[](off_t i) const 327 { return this->range_list_[i]; } 328 329 private: 330 std::vector<Range> range_list_; 331 }; 332 333 // This class is used to read the ranges table from the 334 // .debug_ranges section of the object file. 335 336 class Dwarf_ranges_table 337 { 338 public: Dwarf_ranges_table(Dwarf_info_reader * dwinfo)339 Dwarf_ranges_table(Dwarf_info_reader* dwinfo) 340 : dwinfo_(dwinfo), ranges_shndx_(0), ranges_buffer_(NULL), 341 ranges_buffer_end_(NULL), owns_ranges_buffer_(false), 342 ranges_reloc_mapper_(NULL), reloc_type_(0), output_section_offset_(0) 343 { } 344 ~Dwarf_ranges_table()345 ~Dwarf_ranges_table() 346 { 347 if (this->owns_ranges_buffer_ && this->ranges_buffer_ != NULL) 348 delete[] this->ranges_buffer_; 349 if (this->ranges_reloc_mapper_ != NULL) 350 delete this->ranges_reloc_mapper_; 351 } 352 353 // Fetch the contents of the ranges table from an object file. 354 bool 355 read_ranges_table(Relobj* object, 356 const unsigned char* symtab, 357 off_t symtab_size, 358 unsigned int ranges_shndx, 359 unsigned int version); 360 361 // Read the DWARF 2/3/4 range table. 362 Dwarf_range_list* 363 read_range_list(Relobj* object, 364 const unsigned char* symtab, 365 off_t symtab_size, 366 unsigned int address_size, 367 unsigned int ranges_shndx, 368 off_t ranges_offset); 369 370 // Read the DWARF 5 rnglists table. 371 Dwarf_range_list* 372 read_range_list_v5(Relobj* object, 373 const unsigned char* symtab, 374 off_t symtab_size, 375 unsigned int address_size, 376 unsigned int ranges_shndx, 377 off_t ranges_offset); 378 379 // Look for a relocation at offset OFF in the range table, 380 // and return the section index and offset of the target. 381 unsigned int 382 lookup_reloc(off_t off, off_t* target_off); 383 384 private: 385 // The Dwarf_info_reader, for reading data. 386 Dwarf_info_reader* dwinfo_; 387 // The section index of the ranges table. 388 unsigned int ranges_shndx_; 389 // The buffer containing the .debug_ranges section. 390 const unsigned char* ranges_buffer_; 391 const unsigned char* ranges_buffer_end_; 392 // True if this object owns the buffer and needs to delete it. 393 bool owns_ranges_buffer_; 394 // Relocation mapper for the .debug_ranges section. 395 Elf_reloc_mapper* ranges_reloc_mapper_; 396 // Type of the relocation section (SHT_REL or SHT_RELA). 397 unsigned int reloc_type_; 398 // For incremental update links, this will hold the offset of the 399 // input section within the output section. Offsets read from 400 // relocated data will be relative to the output section, and need 401 // to be corrected before reading data from the input section. 402 uint64_t output_section_offset_; 403 }; 404 405 // This class is used to read the pubnames and pubtypes tables from the 406 // .debug_pubnames and .debug_pubtypes sections of the object file. 407 408 class Dwarf_pubnames_table 409 { 410 public: Dwarf_pubnames_table(Dwarf_info_reader * dwinfo,bool is_pubtypes)411 Dwarf_pubnames_table(Dwarf_info_reader* dwinfo, bool is_pubtypes) 412 : dwinfo_(dwinfo), buffer_(NULL), buffer_end_(NULL), owns_buffer_(false), 413 offset_size_(0), pinfo_(NULL), end_of_table_(NULL), 414 is_pubtypes_(is_pubtypes), is_gnu_style_(false), 415 unit_length_(0), cu_offset_(0) 416 { } 417 ~Dwarf_pubnames_table()418 ~Dwarf_pubnames_table() 419 { 420 if (this->owns_buffer_ && this->buffer_ != NULL) 421 delete[] this->buffer_; 422 } 423 424 // Read the pubnames section from the object file, using the symbol 425 // table for relocating it. 426 bool 427 read_section(Relobj* object, const unsigned char* symbol_table, 428 off_t symtab_size); 429 430 // Read the header for the set at OFFSET. 431 bool 432 read_header(off_t offset); 433 434 // Return the offset to the cu within the info or types section. 435 off_t cu_offset()436 cu_offset() 437 { return this->cu_offset_; } 438 439 // Return the size of this subsection of the table. The unit length 440 // doesn't include the size of its own field. 441 off_t subsection_size()442 subsection_size() 443 { return this->unit_length_; } 444 445 // Read the next name from the set. If the pubname table is gnu-style, 446 // FLAG_BYTE is set to the high-byte of a gdb_index version 7 cu_index. 447 const char* 448 next_name(uint8_t* flag_byte); 449 450 private: 451 // The Dwarf_info_reader, for reading data. 452 Dwarf_info_reader* dwinfo_; 453 // The buffer containing the .debug_ranges section. 454 const unsigned char* buffer_; 455 const unsigned char* buffer_end_; 456 // True if this object owns the buffer and needs to delete it. 457 bool owns_buffer_; 458 // The size of a DWARF offset for the current set. 459 unsigned int offset_size_; 460 // The current position within the buffer. 461 const unsigned char* pinfo_; 462 // The end of the current pubnames table. 463 const unsigned char* end_of_table_; 464 // TRUE if this is a .debug_pubtypes section. 465 bool is_pubtypes_; 466 // Gnu-style pubnames table. This style has an extra flag byte between the 467 // offset and the name, and is used for generating version 7 of gdb-index. 468 bool is_gnu_style_; 469 // Fields read from the header. 470 uint64_t unit_length_; 471 off_t cu_offset_; 472 473 // Track relocations for this table so we can find the CUs that 474 // correspond to the subsections. 475 Elf_reloc_mapper* reloc_mapper_; 476 // Type of the relocation section (SHT_REL or SHT_RELA). 477 unsigned int reloc_type_; 478 }; 479 480 // This class represents a DWARF Debug Info Entry (DIE). 481 482 class Dwarf_die 483 { 484 public: 485 // An attribute value. 486 struct Attribute_value 487 { 488 unsigned int attr; 489 unsigned int form; 490 union 491 { 492 int64_t intval; 493 uint64_t uintval; 494 const char* stringval; 495 const unsigned char* blockval; 496 off_t refval; 497 } val; 498 union 499 { 500 // Section index for reference forms. 501 unsigned int shndx; 502 // Block length for block forms. 503 unsigned int blocklen; 504 } aux; 505 }; 506 507 // A list of attribute values. 508 typedef std::vector<Attribute_value> Attributes; 509 510 Dwarf_die(Dwarf_info_reader* dwinfo, 511 off_t die_offset, 512 Dwarf_die* parent); 513 514 // Return the DWARF tag for this DIE. 515 unsigned int tag()516 tag() const 517 { 518 if (this->abbrev_code_ == NULL) 519 return 0; 520 return this->abbrev_code_->tag; 521 } 522 523 // Return true if this DIE has children. 524 bool has_children()525 has_children() const 526 { 527 gold_assert(this->abbrev_code_ != NULL); 528 return this->abbrev_code_->has_children; 529 } 530 531 // Return true if this DIE has a sibling attribute. 532 bool has_sibling_attribute()533 has_sibling_attribute() const 534 { 535 gold_assert(this->abbrev_code_ != NULL); 536 return this->abbrev_code_->has_sibling_attribute; 537 } 538 539 // Return the value of attribute ATTR. 540 const Attribute_value* 541 attribute(unsigned int attr); 542 543 // Return the value of the DW_AT_name attribute. 544 const char* name()545 name() 546 { 547 if (this->name_ == NULL) 548 this->set_name(); 549 return this->name_; 550 } 551 552 // Return the value of the DW_AT_linkage_name 553 // or DW_AT_MIPS_linkage_name attribute. 554 const char* linkage_name()555 linkage_name() 556 { 557 if (this->linkage_name_ == NULL) 558 this->set_linkage_name(); 559 return this->linkage_name_; 560 } 561 562 // Return the value of the DW_AT_specification attribute. 563 off_t specification()564 specification() 565 { 566 if (!this->attributes_read_) 567 this->read_attributes(); 568 return this->specification_; 569 } 570 571 // Return the value of the DW_AT_abstract_origin attribute. 572 off_t abstract_origin()573 abstract_origin() 574 { 575 if (!this->attributes_read_) 576 this->read_attributes(); 577 return this->abstract_origin_; 578 } 579 580 // Return the value of attribute ATTR as a string. 581 const char* 582 string_attribute(unsigned int attr); 583 584 // Return the value of attribute ATTR as an integer. 585 int64_t 586 int_attribute(unsigned int attr); 587 588 // Return the value of attribute ATTR as an unsigned integer. 589 uint64_t 590 uint_attribute(unsigned int attr); 591 592 // Return the value of attribute ATTR as a reference. 593 off_t 594 ref_attribute(unsigned int attr, unsigned int* shndx); 595 596 // Return the value of attribute ATTR as a address. 597 off_t 598 address_attribute(unsigned int attr, unsigned int* shndx); 599 600 // Return the value of attribute ATTR as a flag. 601 bool flag_attribute(unsigned int attr)602 flag_attribute(unsigned int attr) 603 { return this->int_attribute(attr) != 0; } 604 605 // Return true if this DIE is a declaration. 606 bool is_declaration()607 is_declaration() 608 { return this->flag_attribute(elfcpp::DW_AT_declaration); } 609 610 // Return the parent of this DIE. 611 Dwarf_die* parent()612 parent() const 613 { return this->parent_; } 614 615 // Return the offset of this DIE. 616 off_t offset()617 offset() const 618 { return this->die_offset_; } 619 620 // Return the offset of this DIE's first child. 621 off_t 622 child_offset(); 623 624 // Set the offset of this DIE's next sibling. 625 void set_sibling_offset(off_t sibling_offset)626 set_sibling_offset(off_t sibling_offset) 627 { this->sibling_offset_ = sibling_offset; } 628 629 // Return the offset of this DIE's next sibling. 630 off_t 631 sibling_offset(); 632 633 private: 634 typedef Dwarf_abbrev_table::Abbrev_code Abbrev_code; 635 636 // Read all the attributes of the DIE. 637 bool 638 read_attributes(); 639 640 // Set the name of the DIE if present. 641 void 642 set_name(); 643 644 // Set the linkage name if present. 645 void 646 set_linkage_name(); 647 648 // Skip all the attributes of the DIE and return the offset 649 // of the next DIE. 650 off_t 651 skip_attributes(); 652 653 // The Dwarf_info_reader, for reading attributes. 654 Dwarf_info_reader* dwinfo_; 655 // The parent of this DIE. 656 Dwarf_die* parent_; 657 // Offset of this DIE within its compilation unit. 658 off_t die_offset_; 659 // Offset of the first attribute, relative to the beginning of the DIE. 660 off_t attr_offset_; 661 // Offset of the first child, relative to the compilation unit. 662 off_t child_offset_; 663 // Offset of the next sibling, relative to the compilation unit. 664 off_t sibling_offset_; 665 // The abbreviation table entry. 666 const Abbrev_code* abbrev_code_; 667 // The list of attributes. 668 Attributes attributes_; 669 // True if the attributes have been read. 670 bool attributes_read_; 671 // The following fields hold common attributes to avoid a linear 672 // search through the attribute list. 673 // The DIE name (DW_AT_name). 674 const char* name_; 675 // Offset of the name in the string table (for DW_FORM_strp). 676 off_t name_off_; 677 // The linkage name (DW_AT_linkage_name or DW_AT_MIPS_linkage_name). 678 const char* linkage_name_; 679 // Offset of the linkage name in the string table (for DW_FORM_strp). 680 off_t linkage_name_off_; 681 // Section index of the string table (for DW_FORM_strp). 682 unsigned int string_shndx_; 683 // The value of a DW_AT_specification attribute. 684 off_t specification_; 685 // The value of a DW_AT_abstract_origin attribute. 686 off_t abstract_origin_; 687 }; 688 689 // This class is used to read the debug info from the .debug_info 690 // or .debug_types sections. This is a base class that implements 691 // the generic parsing of the compilation unit header and DIE 692 // structure. The parse() method parses the entire section, and 693 // calls the various visit_xxx() methods for each header. Clients 694 // should derive a new class from this one and implement the 695 // visit_compilation_unit() and visit_type_unit() functions. 696 // IS_TYPE_UNIT is true if we are reading from a .debug_types section, 697 // which is used only in DWARF 4. For DWARF 5, it will be false, 698 // and we will determine whether it's a type init when we parse the 699 // header. 700 701 class Dwarf_info_reader 702 { 703 public: Dwarf_info_reader(bool is_type_unit,Relobj * object,const unsigned char * symtab,off_t symtab_size,unsigned int shndx,unsigned int reloc_shndx,unsigned int reloc_type)704 Dwarf_info_reader(bool is_type_unit, 705 Relobj* object, 706 const unsigned char* symtab, 707 off_t symtab_size, 708 unsigned int shndx, 709 unsigned int reloc_shndx, 710 unsigned int reloc_type) 711 : object_(object), symtab_(symtab), 712 symtab_size_(symtab_size), shndx_(shndx), reloc_shndx_(reloc_shndx), 713 reloc_type_(reloc_type), abbrev_shndx_(0), string_shndx_(0), 714 buffer_(NULL), buffer_end_(NULL), cu_offset_(0), cu_length_(0), 715 offset_size_(0), address_size_(0), cu_version_(0), 716 abbrev_table_(), ranges_table_(this), 717 reloc_mapper_(NULL), string_buffer_(NULL), string_buffer_end_(NULL), 718 owns_string_buffer_(false), string_output_section_offset_(0) 719 { 720 // For DWARF 4, we infer the unit type from the section name. 721 // For DWARF 5, we will read this from the unit header. 722 this->unit_type_ = 723 (is_type_unit ? elfcpp::DW_UT_type : elfcpp::DW_UT_compile); 724 } 725 726 virtual ~Dwarf_info_reader()727 ~Dwarf_info_reader() 728 { 729 if (this->reloc_mapper_ != NULL) 730 delete this->reloc_mapper_; 731 if (this->owns_string_buffer_ && this->string_buffer_ != NULL) 732 delete[] this->string_buffer_; 733 } 734 735 bool is_type_unit()736 is_type_unit() const 737 { 738 return (this->unit_type_ == elfcpp::DW_UT_type 739 || this->unit_type_ == elfcpp::DW_UT_split_type); 740 } 741 742 // Begin parsing the debug info. This calls visit_compilation_unit() 743 // or visit_type_unit() for each compilation or type unit found in the 744 // section, and visit_die() for each top-level DIE. 745 void 746 parse(); 747 748 // Return the abbrev code entry for a CODE. 749 const Dwarf_abbrev_table::Abbrev_code* get_abbrev(unsigned int code)750 get_abbrev(unsigned int code) 751 { return this->abbrev_table_.get_abbrev(code); } 752 753 // Return a pointer to the DWARF info buffer at OFFSET. 754 const unsigned char* buffer_at_offset(off_t offset)755 buffer_at_offset(off_t offset) const 756 { 757 const unsigned char* p = this->buffer_ + this->cu_offset_ + offset; 758 if (this->check_buffer(p + 1)) 759 return p; 760 return NULL; 761 } 762 763 // Read a possibly unaligned integer of SIZE. 764 template <int valsize> 765 inline typename elfcpp::Valtype_base<valsize>::Valtype 766 read_from_pointer(const unsigned char* source); 767 768 // Read a possibly unaligned integer of SIZE. Update SOURCE after read. 769 template <int valsize> 770 inline typename elfcpp::Valtype_base<valsize>::Valtype 771 read_from_pointer(const unsigned char** source); 772 773 inline typename elfcpp::Valtype_base<32>::Valtype 774 read_3bytes_from_pointer(const unsigned char** source); 775 776 // Look for a relocation at offset ATTR_OFF in the dwarf info, 777 // and return the section index and offset of the target. 778 unsigned int 779 lookup_reloc(off_t attr_off, off_t* target_off); 780 781 // Return a string from the DWARF string table. 782 const char* 783 get_string(off_t str_off, unsigned int string_shndx); 784 785 // Return the size of a DWARF offset. 786 unsigned int offset_size()787 offset_size() const 788 { return this->offset_size_; } 789 790 // Return the size of an address. 791 unsigned int address_size()792 address_size() const 793 { return this->address_size_; } 794 795 // Return the size of a DW_FORM_ref_addr. 796 // In DWARF v2, this was the size of an address; in DWARF v3 and later, 797 // it is the size of an DWARF offset. 798 unsigned int ref_addr_size()799 ref_addr_size() const 800 { return this->cu_version_ > 2 ? this->offset_size_ : this->address_size_; } 801 802 // Set the section index of the .debug_abbrev section. 803 // We use this if there are no relocations for the .debug_info section. 804 // If not set, the code parse() routine will search for the section by name. 805 void set_abbrev_shndx(unsigned int abbrev_shndx)806 set_abbrev_shndx(unsigned int abbrev_shndx) 807 { this->abbrev_shndx_ = abbrev_shndx; } 808 809 // Return a pointer to the object file's ELF symbol table. 810 const unsigned char* symtab()811 symtab() const 812 { return this->symtab_; } 813 814 // Return the size of the object file's ELF symbol table. 815 off_t symtab_size()816 symtab_size() const 817 { return this->symtab_size_; } 818 819 // Return the offset of the current compilation unit. 820 off_t cu_offset()821 cu_offset() const 822 { return this->cu_offset_; } 823 824 protected: 825 // Begin parsing the debug info. This calls visit_compilation_unit() 826 // or visit_type_unit() for each compilation or type unit found in the 827 // section, and visit_die() for each top-level DIE. 828 template<bool big_endian> 829 void 830 do_parse(); 831 832 // The following methods are hooks that are meant to be implemented 833 // by a derived class. A default, do-nothing, implementation of 834 // each is provided for this base class. 835 836 // Visit a compilation unit. 837 virtual void 838 visit_compilation_unit(off_t cu_offset, off_t cu_length, Dwarf_die* root_die); 839 840 // Visit a type unit. 841 virtual void 842 visit_type_unit(off_t tu_offset, off_t tu_length, off_t type_offset, 843 uint64_t signature, Dwarf_die* root_die); 844 845 // Read the range table. 846 Dwarf_range_list* read_range_list(unsigned int ranges_shndx,off_t ranges_offset)847 read_range_list(unsigned int ranges_shndx, off_t ranges_offset) 848 { 849 if (this->cu_version_ < 5) 850 return this->ranges_table_.read_range_list(this->object_, 851 this->symtab_, 852 this->symtab_size_, 853 this->address_size_, 854 ranges_shndx, 855 ranges_offset); 856 else 857 return this->ranges_table_.read_range_list_v5(this->object_, 858 this->symtab_, 859 this->symtab_size_, 860 this->address_size_, 861 ranges_shndx, 862 ranges_offset); 863 } 864 865 // Return the object. 866 Relobj* object()867 object() const 868 { return this->object_; } 869 870 // Checkpoint the relocation tracker. 871 uint64_t get_reloc_checkpoint()872 get_reloc_checkpoint() const 873 { return this->reloc_mapper_->checkpoint(); } 874 875 // Reset the relocation tracker to the CHECKPOINT. 876 void reset_relocs(uint64_t checkpoint)877 reset_relocs(uint64_t checkpoint) 878 { this->reloc_mapper_->reset(checkpoint); } 879 880 private: 881 // Print a warning about a corrupt debug section. 882 void 883 warn_corrupt_debug_section() const; 884 885 // Check that P is within the bounds of the current section. 886 bool check_buffer(const unsigned char * p)887 check_buffer(const unsigned char* p) const 888 { 889 if (p > this->buffer_ + this->cu_offset_ + this->cu_length_) 890 { 891 this->warn_corrupt_debug_section(); 892 return false; 893 } 894 return true; 895 } 896 897 // Read the DWARF string table. 898 bool read_string_table(unsigned int string_shndx)899 read_string_table(unsigned int string_shndx) 900 { 901 // If we've already read this string table, return immediately. 902 if (this->string_shndx_ > 0 && this->string_shndx_ == string_shndx) 903 return true; 904 if (string_shndx == 0 && this->string_shndx_ > 0) 905 return true; 906 return this->do_read_string_table(string_shndx); 907 } 908 909 bool 910 do_read_string_table(unsigned int string_shndx); 911 912 // The unit type (DW_UT_xxx). 913 unsigned int unit_type_; 914 // The object containing the .debug_info or .debug_types input section. 915 Relobj* object_; 916 // The ELF symbol table. 917 const unsigned char* symtab_; 918 // The size of the ELF symbol table. 919 off_t symtab_size_; 920 // Index of the .debug_info or .debug_types section. 921 unsigned int shndx_; 922 // Index of the relocation section. 923 unsigned int reloc_shndx_; 924 // Type of the relocation section (SHT_REL or SHT_RELA). 925 unsigned int reloc_type_; 926 // Index of the .debug_abbrev section (0 if not known). 927 unsigned int abbrev_shndx_; 928 // Index of the .debug_str section. 929 unsigned int string_shndx_; 930 // The buffer for the debug info. 931 const unsigned char* buffer_; 932 const unsigned char* buffer_end_; 933 // Offset of the current compilation unit. 934 off_t cu_offset_; 935 // Length of the current compilation unit. 936 off_t cu_length_; 937 // Size of a DWARF offset for the current compilation unit. 938 unsigned int offset_size_; 939 // Size of an address for the target architecture. 940 unsigned int address_size_; 941 // Compilation unit version number. 942 unsigned int cu_version_; 943 // Abbreviations table for current compilation unit. 944 Dwarf_abbrev_table abbrev_table_; 945 // Ranges table for the current compilation unit. 946 Dwarf_ranges_table ranges_table_; 947 // Relocation mapper for the section. 948 Elf_reloc_mapper* reloc_mapper_; 949 // The buffer for the debug string table. 950 const char* string_buffer_; 951 const char* string_buffer_end_; 952 // True if this object owns the buffer and needs to delete it. 953 bool owns_string_buffer_; 954 // For incremental update links, this will hold the offset of the 955 // input .debug_str section within the output section. Offsets read 956 // from relocated data will be relative to the output section, and need 957 // to be corrected before reading data from the input section. 958 uint64_t string_output_section_offset_; 959 }; 960 961 // We can't do better than to keep the offsets in a sorted vector. 962 // Here, offset is the key, and file_num/line_num is the value. 963 struct Offset_to_lineno_entry 964 { 965 off_t offset; 966 int header_num; // which file-list to use (i.e. which .o file are we in) 967 // A pointer into files_. 968 unsigned int file_num : sizeof(int) * CHAR_BIT - 1; 969 // True if this was the last entry for the current offset, meaning 970 // it's the line that actually applies. 971 unsigned int last_line_for_offset : 1; 972 // The line number in the source file. -1 to indicate end-of-function. 973 int line_num; 974 975 // This sorts by offsets first, and then puts the correct line to 976 // report for a given offset at the beginning of the run of equal 977 // offsets (so that asking for 1 line gives the best answer). This 978 // is not a total ordering. 979 bool operator<(const Offset_to_lineno_entry& that) const 980 { 981 if (this->offset != that.offset) 982 return this->offset < that.offset; 983 // Note the '>' which makes this sort 'true' first. 984 return this->last_line_for_offset > that.last_line_for_offset; 985 } 986 }; 987 988 // This class is used to read the line information from the debugging 989 // section of an object file. 990 991 class Dwarf_line_info 992 { 993 public: Dwarf_line_info()994 Dwarf_line_info() 995 { } 996 997 virtual ~Dwarf_line_info()998 ~Dwarf_line_info() 999 { } 1000 1001 // Given a section number and an offset, returns the associated 1002 // file and line-number, as a string: "file:lineno". If unable 1003 // to do the mapping, returns the empty string. You must call 1004 // read_line_mappings() before calling this function. If 1005 // 'other_lines' is non-NULL, fills that in with other line 1006 // numbers assigned to the same offset. 1007 std::string addr2line(unsigned int shndx,off_t offset,std::vector<std::string> * other_lines)1008 addr2line(unsigned int shndx, off_t offset, 1009 std::vector<std::string>* other_lines) 1010 { return this->do_addr2line(shndx, offset, other_lines); } 1011 1012 // A helper function for a single addr2line lookup. It also keeps a 1013 // cache of the last CACHE_SIZE Dwarf_line_info objects it created; 1014 // set to 0 not to cache at all. The larger CACHE_SIZE is, the more 1015 // chance this routine won't have to re-create a Dwarf_line_info 1016 // object for its addr2line computation; such creations are slow. 1017 // NOTE: Not thread-safe, so only call from one thread at a time. 1018 static std::string 1019 one_addr2line(Object* object, unsigned int shndx, off_t offset, 1020 size_t cache_size, std::vector<std::string>* other_lines); 1021 1022 // This reclaims all the memory that one_addr2line may have cached. 1023 // Use this when you know you will not be calling one_addr2line again. 1024 static void 1025 clear_addr2line_cache(); 1026 1027 private: 1028 virtual std::string 1029 do_addr2line(unsigned int shndx, off_t offset, 1030 std::vector<std::string>* other_lines) = 0; 1031 }; 1032 1033 template<int size, bool big_endian> 1034 class Sized_dwarf_line_info : public Dwarf_line_info 1035 { 1036 public: 1037 // Initializes a .debug_line reader for a given object file. 1038 // If SHNDX is specified and non-negative, only read the debug 1039 // information that pertains to the specified section. 1040 Sized_dwarf_line_info(Object* object, unsigned int read_shndx = -1U); 1041 1042 virtual ~Sized_dwarf_line_info()1043 ~Sized_dwarf_line_info() 1044 { 1045 if (this->buffer_start_ != NULL) 1046 delete[] this->buffer_start_; 1047 if (this->str_buffer_start_ != NULL) 1048 delete[] this->str_buffer_start_; 1049 } 1050 1051 private: 1052 std::string 1053 do_addr2line(unsigned int shndx, off_t offset, 1054 std::vector<std::string>* other_lines); 1055 1056 // Formats a file and line number to a string like "dirname/filename:lineno". 1057 std::string 1058 format_file_lineno(const Offset_to_lineno_entry& lineno) const; 1059 1060 // Start processing line info, and populates the offset_map_. 1061 // If SHNDX is non-negative, only store debug information that 1062 // pertains to the specified section. 1063 void 1064 read_line_mappings(unsigned int shndx); 1065 1066 // Reads the relocation section associated with .debug_line and 1067 // stores relocation information in reloc_map_. 1068 void 1069 read_relocs(); 1070 1071 // Reads the DWARF header for this line info. Each takes as input 1072 // a starting buffer position, and returns the ending position. 1073 const unsigned char* 1074 read_header_prolog(const unsigned char* lineptr); 1075 1076 const unsigned char* 1077 read_header_tables_v2(const unsigned char* lineptr); 1078 1079 const unsigned char* 1080 read_header_tables_v5(const unsigned char* lineptr); 1081 1082 // Reads the DWARF line information. If shndx is non-negative, 1083 // discard all line information that doesn't pertain to the given 1084 // section. 1085 const unsigned char* 1086 read_lines(const unsigned char* lineptr, const unsigned char* endptr, 1087 unsigned int shndx); 1088 1089 // Process a single line info opcode at START using the state 1090 // machine at LSM. Return true if we should define a line using the 1091 // current state of the line state machine. Place the length of the 1092 // opcode in LEN. 1093 bool 1094 process_one_opcode(const unsigned char* start, 1095 struct LineStateMachine* lsm, size_t* len); 1096 1097 // Some parts of processing differ depending on whether the input 1098 // was a .o file or not. 1099 bool input_is_relobj(); 1100 1101 // If we saw anything amiss while parsing, we set this to false. 1102 // Then addr2line will always fail (rather than return possibly- 1103 // corrupt data). 1104 bool data_valid_; 1105 1106 // A DWARF2/3 line info header. This is not the same size as in the 1107 // actual file, as the one in the file may have a 32 bit or 64 bit 1108 // lengths. 1109 1110 struct Dwarf_line_infoHeader 1111 { 1112 off_t total_length; 1113 int version; 1114 int address_size; 1115 off_t prologue_length; 1116 int min_insn_length; // insn stands for instruction 1117 int max_ops_per_insn; // Added in DWARF-4. 1118 bool default_is_stmt; // stmt stands for statement 1119 signed char line_base; 1120 int line_range; 1121 unsigned char opcode_base; 1122 std::vector<unsigned char> std_opcode_lengths; 1123 int offset_size; 1124 } header_; 1125 1126 // buffer is the buffer for our line info, starting at exactly where 1127 // the line info to read is. 1128 const unsigned char* buffer_; 1129 const unsigned char* buffer_end_; 1130 // If the buffer was allocated temporarily, and therefore must be 1131 // deallocated in the dtor, this contains a pointer to the start 1132 // of the buffer. 1133 const unsigned char* buffer_start_; 1134 1135 // str_buffer is the buffer for the line table strings. 1136 const unsigned char* str_buffer_; 1137 const unsigned char* str_buffer_end_; 1138 // If the buffer was allocated temporarily, and therefore must be 1139 // deallocated in the dtor, this contains a pointer to the start 1140 // of the buffer. 1141 const unsigned char* str_buffer_start_; 1142 1143 // Pointer to the end of the header_length field (aka prologue_length). 1144 const unsigned char* end_of_header_length_; 1145 1146 // Pointer to the end of the current compilation unit. 1147 const unsigned char* end_of_unit_; 1148 1149 // This has relocations that point into buffer. 1150 Sized_elf_reloc_mapper<size, big_endian>* reloc_mapper_; 1151 // The type of the reloc section in track_relocs_--SHT_REL or SHT_RELA. 1152 unsigned int track_relocs_type_; 1153 1154 // This is used to figure out what section to apply a relocation to. 1155 const unsigned char* symtab_buffer_; 1156 section_size_type symtab_buffer_size_; 1157 1158 // Holds the directories and files as we see them. We have an array 1159 // of directory-lists, one for each .o file we're reading (usually 1160 // there will just be one, but there may be more if input is a .so). 1161 std::vector<std::vector<std::string> > directories_; 1162 // The first part is an index into directories_, the second the filename. 1163 std::vector<std::vector< std::pair<int, std::string> > > files_; 1164 1165 // An index into the current directories_ and files_ vectors. 1166 int current_header_index_; 1167 1168 // A sorted map from offset of the relocation target to the shndx 1169 // and addend for the relocation. 1170 typedef std::map<off_t, std::pair<unsigned int, off_t> > 1171 Reloc_map; 1172 Reloc_map reloc_map_; 1173 1174 // We have a vector of offset->lineno entries for every input section. 1175 typedef Unordered_map<unsigned int, std::vector<Offset_to_lineno_entry> > 1176 Lineno_map; 1177 1178 Lineno_map line_number_map_; 1179 }; 1180 1181 } // End namespace gold. 1182 1183 #endif // !defined(GOLD_DWARF_READER_H) 1184