1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HashedNameToDIE.h" 10 #include "llvm/ADT/StringRef.h" 11 12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 13 DIEArray &die_offsets) { 14 const size_t count = die_info_array.size(); 15 for (size_t i = 0; i < count; ++i) 16 die_offsets.emplace_back(die_info_array[i]); 17 } 18 19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 20 const dw_tag_t tag, 21 DIEArray &die_offsets) { 22 if (tag == 0) { 23 ExtractDIEArray(die_info_array, die_offsets); 24 } else { 25 const size_t count = die_info_array.size(); 26 for (size_t i = 0; i < count; ++i) { 27 const dw_tag_t die_tag = die_info_array[i].tag; 28 bool tag_matches = die_tag == 0 || tag == die_tag; 29 if (!tag_matches) { 30 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 31 tag_matches = 32 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 33 } 34 if (tag_matches) 35 die_offsets.emplace_back(die_info_array[i]); 36 } 37 } 38 } 39 40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array, 41 const dw_tag_t tag, 42 const uint32_t qualified_name_hash, 43 DIEArray &die_offsets) { 44 if (tag == 0) { 45 ExtractDIEArray(die_info_array, die_offsets); 46 } else { 47 const size_t count = die_info_array.size(); 48 for (size_t i = 0; i < count; ++i) { 49 if (qualified_name_hash != die_info_array[i].qualified_name_hash) 50 continue; 51 const dw_tag_t die_tag = die_info_array[i].tag; 52 bool tag_matches = die_tag == 0 || tag == die_tag; 53 if (!tag_matches) { 54 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 55 tag_matches = 56 tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 57 } 58 if (tag_matches) 59 die_offsets.emplace_back(die_info_array[i]); 60 } 61 } 62 } 63 64 void DWARFMappedHash::ExtractClassOrStructDIEArray( 65 const DIEInfoArray &die_info_array, 66 bool return_implementation_only_if_available, DIEArray &die_offsets) { 67 const size_t count = die_info_array.size(); 68 for (size_t i = 0; i < count; ++i) { 69 const dw_tag_t die_tag = die_info_array[i].tag; 70 if (die_tag == 0 || die_tag == DW_TAG_class_type || 71 die_tag == DW_TAG_structure_type) { 72 if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) { 73 if (return_implementation_only_if_available) { 74 // We found the one true definition for this class, so only return 75 // that 76 die_offsets.clear(); 77 die_offsets.emplace_back(die_info_array[i]); 78 return; 79 } else { 80 // Put the one true definition as the first entry so it matches first 81 die_offsets.emplace(die_offsets.begin(), die_info_array[i]); 82 } 83 } else { 84 die_offsets.emplace_back(die_info_array[i]); 85 } 86 } 87 } 88 } 89 90 void DWARFMappedHash::ExtractTypesFromDIEArray( 91 const DIEInfoArray &die_info_array, uint32_t type_flag_mask, 92 uint32_t type_flag_value, DIEArray &die_offsets) { 93 const size_t count = die_info_array.size(); 94 for (size_t i = 0; i < count; ++i) { 95 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) 96 die_offsets.emplace_back(die_info_array[i]); 97 } 98 } 99 100 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { 101 switch (atom) { 102 case eAtomTypeNULL: 103 return "NULL"; 104 case eAtomTypeDIEOffset: 105 return "die-offset"; 106 case eAtomTypeCUOffset: 107 return "cu-offset"; 108 case eAtomTypeTag: 109 return "die-tag"; 110 case eAtomTypeNameFlags: 111 return "name-flags"; 112 case eAtomTypeTypeFlags: 113 return "type-flags"; 114 case eAtomTypeQualNameHash: 115 return "qualified-name-hash"; 116 } 117 return "<invalid>"; 118 } 119 120 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f, 121 uint32_t h) 122 : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {} 123 124 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset) 125 : die_base_offset(_die_base_offset), atoms(), atom_mask(0), 126 min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) { 127 // Define an array of DIE offsets by first defining an array, and then define 128 // the atom type for the array, in this case we have an array of DIE offsets. 129 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 130 } 131 132 void DWARFMappedHash::Prologue::ClearAtoms() { 133 hash_data_has_fixed_byte_size = true; 134 min_hash_data_byte_size = 0; 135 atom_mask = 0; 136 atoms.clear(); 137 } 138 139 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const { 140 return (atom_mask & (1u << atom_type)) != 0; 141 } 142 143 void DWARFMappedHash::Prologue::Clear() { 144 die_base_offset = 0; 145 ClearAtoms(); 146 } 147 148 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) { 149 atoms.push_back({type, form}); 150 atom_mask |= 1u << type; 151 switch (form) { 152 case DW_FORM_indirect: 153 case DW_FORM_exprloc: 154 case DW_FORM_flag_present: 155 case DW_FORM_ref_sig8: 156 llvm_unreachable("Unhandled atom form"); 157 158 case DW_FORM_addrx: 159 case DW_FORM_string: 160 case DW_FORM_block: 161 case DW_FORM_block1: 162 case DW_FORM_sdata: 163 case DW_FORM_udata: 164 case DW_FORM_ref_udata: 165 case DW_FORM_GNU_addr_index: 166 case DW_FORM_GNU_str_index: 167 hash_data_has_fixed_byte_size = false; 168 LLVM_FALLTHROUGH; 169 case DW_FORM_flag: 170 case DW_FORM_data1: 171 case DW_FORM_ref1: 172 case DW_FORM_sec_offset: 173 min_hash_data_byte_size += 1; 174 break; 175 176 case DW_FORM_block2: 177 hash_data_has_fixed_byte_size = false; 178 LLVM_FALLTHROUGH; 179 case DW_FORM_data2: 180 case DW_FORM_ref2: 181 min_hash_data_byte_size += 2; 182 break; 183 184 case DW_FORM_block4: 185 hash_data_has_fixed_byte_size = false; 186 LLVM_FALLTHROUGH; 187 case DW_FORM_data4: 188 case DW_FORM_ref4: 189 case DW_FORM_addr: 190 case DW_FORM_ref_addr: 191 case DW_FORM_strp: 192 min_hash_data_byte_size += 4; 193 break; 194 195 case DW_FORM_data8: 196 case DW_FORM_ref8: 197 min_hash_data_byte_size += 8; 198 break; 199 } 200 } 201 202 lldb::offset_t 203 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data, 204 lldb::offset_t offset) { 205 ClearAtoms(); 206 207 die_base_offset = data.GetU32(&offset); 208 209 const uint32_t atom_count = data.GetU32(&offset); 210 if (atom_count == 0x00060003u) { 211 // Old format, deal with contents of old pre-release format. 212 while (data.GetU32(&offset)) { 213 /* do nothing */; 214 } 215 216 // Hardcode to the only known value for now. 217 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 218 } else { 219 for (uint32_t i = 0; i < atom_count; ++i) { 220 AtomType type = (AtomType)data.GetU16(&offset); 221 dw_form_t form = (dw_form_t)data.GetU16(&offset); 222 AppendAtom(type, form); 223 } 224 } 225 return offset; 226 } 227 228 size_t DWARFMappedHash::Prologue::GetByteSize() const { 229 // Add an extra count to the atoms size for the zero termination Atom that 230 // gets written to disk. 231 return sizeof(die_base_offset) + sizeof(uint32_t) + 232 atoms.size() * sizeof(Atom); 233 } 234 235 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const { 236 return min_hash_data_byte_size; 237 } 238 239 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const { 240 return hash_data_has_fixed_byte_size; 241 } 242 243 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) { 244 return header_data.GetByteSize(); 245 } 246 247 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data, 248 lldb::offset_t offset) { 249 offset = MappedHash::Header<Prologue>::Read(data, offset); 250 if (offset != UINT32_MAX) { 251 offset = header_data.Read(data, offset); 252 } 253 return offset; 254 } 255 256 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data, 257 lldb::offset_t *offset_ptr, 258 DIEInfo &hash_data) const { 259 const size_t num_atoms = header_data.atoms.size(); 260 if (num_atoms == 0) 261 return false; 262 263 for (size_t i = 0; i < num_atoms; ++i) { 264 DWARFFormValue form_value(nullptr, header_data.atoms[i].form); 265 266 if (!form_value.ExtractValue(data, offset_ptr)) 267 return false; 268 269 switch (header_data.atoms[i].type) { 270 case eAtomTypeDIEOffset: // DIE offset, check form for encoding 271 hash_data.die_offset = 272 DWARFFormValue::IsDataForm(form_value.Form()) 273 ? form_value.Unsigned() 274 : form_value.Reference(header_data.die_base_offset); 275 break; 276 277 case eAtomTypeTag: // DW_TAG value for the DIE 278 hash_data.tag = (dw_tag_t)form_value.Unsigned(); 279 break; 280 281 case eAtomTypeTypeFlags: // Flags from enum TypeFlags 282 hash_data.type_flags = (uint32_t)form_value.Unsigned(); 283 break; 284 285 case eAtomTypeQualNameHash: // Flags from enum TypeFlags 286 hash_data.qualified_name_hash = form_value.Unsigned(); 287 break; 288 289 default: 290 // We can always skip atoms we don't know about. 291 break; 292 } 293 } 294 return hash_data.die_offset != DW_INVALID_OFFSET; 295 } 296 297 DWARFMappedHash::MemoryTable::MemoryTable( 298 lldb_private::DWARFDataExtractor &table_data, 299 const lldb_private::DWARFDataExtractor &string_table, const char *name) 300 : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data), 301 m_data(table_data), m_string_table(string_table), m_name(name) {} 302 303 const char * 304 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const { 305 // The key in the DWARF table is the .debug_str offset for the string 306 return m_string_table.PeekCStr(key); 307 } 308 309 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset, 310 HashData &hash_data) const { 311 lldb::offset_t offset = hash_data_offset; 312 // Skip string table offset that contains offset of hash name in .debug_str. 313 offset += 4; 314 const uint32_t count = m_data.GetU32(&offset); 315 if (count > 0) { 316 hash_data.resize(count); 317 for (uint32_t i = 0; i < count; ++i) { 318 if (!m_header.Read(m_data, &offset, hash_data[i])) 319 return false; 320 } 321 } else 322 hash_data.clear(); 323 return true; 324 } 325 326 DWARFMappedHash::MemoryTable::Result 327 DWARFMappedHash::MemoryTable::GetHashDataForName( 328 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, 329 Pair &pair) const { 330 pair.key = m_data.GetU32(hash_data_offset_ptr); 331 pair.value.clear(); 332 333 // If the key is zero, this terminates our chain of HashData objects for this 334 // hash value. 335 if (pair.key == 0) 336 return eResultEndOfHashData; 337 338 // There definitely should be a string for this string offset, if there 339 // isn't, there is something wrong, return and error. 340 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 341 if (strp_cstr == nullptr) { 342 *hash_data_offset_ptr = UINT32_MAX; 343 return eResultError; 344 } 345 346 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 347 const size_t min_total_hash_data_size = 348 count * m_header.header_data.GetMinimumHashDataByteSize(); 349 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 350 min_total_hash_data_size)) { 351 // We have at least one HashData entry, and we have enough data to parse at 352 // least "count" HashData entries. 353 354 // First make sure the entire C string matches... 355 const bool match = name == strp_cstr; 356 357 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 358 // If the string doesn't match and we have fixed size data, we can just 359 // add the total byte size of all HashData objects to the hash data 360 // offset and be done... 361 *hash_data_offset_ptr += min_total_hash_data_size; 362 } else { 363 // If the string does match, or we don't have fixed size data then we 364 // need to read the hash data as a stream. If the string matches we also 365 // append all HashData objects to the value array. 366 for (uint32_t i = 0; i < count; ++i) { 367 DIEInfo die_info; 368 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 369 // Only happened if the HashData of the string matched... 370 if (match) 371 pair.value.push_back(die_info); 372 } else { 373 // Something went wrong while reading the data. 374 *hash_data_offset_ptr = UINT32_MAX; 375 return eResultError; 376 } 377 } 378 } 379 // Return the correct response depending on if the string matched or not... 380 if (match) { 381 // The key (cstring) matches and we have lookup results! 382 return eResultKeyMatch; 383 } else { 384 // The key doesn't match, this function will get called again for the 385 // next key/value or the key terminator which in our case is a zero 386 // .debug_str offset. 387 return eResultKeyMismatch; 388 } 389 } else { 390 *hash_data_offset_ptr = UINT32_MAX; 391 return eResultError; 392 } 393 } 394 395 DWARFMappedHash::MemoryTable::Result 396 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( 397 const lldb_private::RegularExpression ®ex, 398 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { 399 pair.key = m_data.GetU32(hash_data_offset_ptr); 400 // If the key is zero, this terminates our chain of HashData objects for this 401 // hash value. 402 if (pair.key == 0) 403 return eResultEndOfHashData; 404 405 // There definitely should be a string for this string offset, if there 406 // isn't, there is something wrong, return and error. 407 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 408 if (strp_cstr == nullptr) 409 return eResultError; 410 411 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 412 const size_t min_total_hash_data_size = 413 count * m_header.header_data.GetMinimumHashDataByteSize(); 414 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 415 min_total_hash_data_size)) { 416 const bool match = regex.Execute(llvm::StringRef(strp_cstr)); 417 418 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 419 // If the regex doesn't match and we have fixed size data, we can just 420 // add the total byte size of all HashData objects to the hash data 421 // offset and be done... 422 *hash_data_offset_ptr += min_total_hash_data_size; 423 } else { 424 // If the string does match, or we don't have fixed size data then we 425 // need to read the hash data as a stream. If the string matches we also 426 // append all HashData objects to the value array. 427 for (uint32_t i = 0; i < count; ++i) { 428 DIEInfo die_info; 429 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 430 // Only happened if the HashData of the string matched... 431 if (match) 432 pair.value.push_back(die_info); 433 } else { 434 // Something went wrong while reading the data 435 *hash_data_offset_ptr = UINT32_MAX; 436 return eResultError; 437 } 438 } 439 } 440 // Return the correct response depending on if the string matched or not... 441 if (match) { 442 // The key (cstring) matches and we have lookup results! 443 return eResultKeyMatch; 444 } else { 445 // The key doesn't match, this function will get called again for the 446 // next key/value or the key terminator which in our case is a zero 447 // .debug_str offset. 448 return eResultKeyMismatch; 449 } 450 } else { 451 *hash_data_offset_ptr = UINT32_MAX; 452 return eResultError; 453 } 454 } 455 456 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( 457 const lldb_private::RegularExpression ®ex, 458 DIEInfoArray &die_info_array) const { 459 const uint32_t hash_count = m_header.hashes_count; 460 Pair pair; 461 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 462 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 463 while (hash_data_offset != UINT32_MAX) { 464 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 465 Result hash_result = 466 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); 467 if (prev_hash_data_offset == hash_data_offset) 468 break; 469 470 // Check the result of getting our hash data. 471 switch (hash_result) { 472 case eResultKeyMatch: 473 case eResultKeyMismatch: 474 // Whether we matches or not, it doesn't matter, we keep looking. 475 break; 476 477 case eResultEndOfHashData: 478 case eResultError: 479 hash_data_offset = UINT32_MAX; 480 break; 481 } 482 } 483 } 484 die_info_array.swap(pair.value); 485 return die_info_array.size(); 486 } 487 488 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( 489 const uint32_t die_offset_start, const uint32_t die_offset_end, 490 DIEInfoArray &die_info_array) const { 491 const uint32_t hash_count = m_header.hashes_count; 492 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 493 bool done = false; 494 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 495 while (!done && hash_data_offset != UINT32_MAX) { 496 KeyType key = m_data.GetU32(&hash_data_offset); 497 // If the key is zero, this terminates our chain of HashData objects for 498 // this hash value. 499 if (key == 0) 500 break; 501 502 const uint32_t count = m_data.GetU32(&hash_data_offset); 503 for (uint32_t i = 0; i < count; ++i) { 504 DIEInfo die_info; 505 if (m_header.Read(m_data, &hash_data_offset, die_info)) { 506 if (die_info.die_offset == 0) 507 done = true; 508 if (die_offset_start <= die_info.die_offset && 509 die_info.die_offset < die_offset_end) 510 die_info_array.push_back(die_info); 511 } 512 } 513 } 514 } 515 return die_info_array.size(); 516 } 517 518 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 519 DIEArray &die_offsets) { 520 if (name.empty()) 521 return 0; 522 523 DIEInfoArray die_info_array; 524 if (FindByName(name, die_info_array)) 525 DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets); 526 return die_info_array.size(); 527 } 528 529 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name, 530 const dw_tag_t tag, 531 DIEArray &die_offsets) { 532 DIEInfoArray die_info_array; 533 if (FindByName(name, die_info_array)) 534 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets); 535 return die_info_array.size(); 536 } 537 538 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( 539 llvm::StringRef name, const dw_tag_t tag, 540 const uint32_t qualified_name_hash, DIEArray &die_offsets) { 541 DIEInfoArray die_info_array; 542 if (FindByName(name, die_info_array)) 543 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, 544 die_offsets); 545 return die_info_array.size(); 546 } 547 548 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( 549 llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) { 550 DIEInfoArray die_info_array; 551 if (FindByName(name, die_info_array)) { 552 if (must_be_implementation && 553 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { 554 // If we have two atoms, then we have the DIE offset and the type flags 555 // so we can find the objective C class efficiently. 556 DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX, 557 eTypeFlagClassIsImplementation, 558 die_offsets); 559 } else { 560 // We don't only want the one true definition, so try and see what we can 561 // find, and only return class or struct DIEs. If we do have the full 562 // implementation, then return it alone, else return all possible 563 // matches. 564 const bool return_implementation_only_if_available = true; 565 DWARFMappedHash::ExtractClassOrStructDIEArray( 566 die_info_array, return_implementation_only_if_available, die_offsets); 567 } 568 } 569 return die_offsets.size(); 570 } 571 572 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 573 DIEInfoArray &die_info_array) { 574 if (name.empty()) 575 return 0; 576 577 Pair kv_pair; 578 size_t old_size = die_info_array.size(); 579 if (Find(name, kv_pair)) { 580 die_info_array.swap(kv_pair.value); 581 return die_info_array.size() - old_size; 582 } 583 return 0; 584 } 585