1 //===-- HashedNameToDIE.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HashedNameToDIE.h" 10 #include "llvm/ADT/StringRef.h" 11 12 bool DWARFMappedHash::ExtractDIEArray( 13 const DIEInfoArray &die_info_array, 14 llvm::function_ref<bool(DIERef ref)> callback) { 15 const size_t count = die_info_array.size(); 16 for (size_t i = 0; i < count; ++i) 17 if (!callback(DIERef(die_info_array[i]))) 18 return false; 19 return true; 20 } 21 22 void DWARFMappedHash::ExtractDIEArray( 23 const DIEInfoArray &die_info_array, const dw_tag_t tag, 24 llvm::function_ref<bool(DIERef ref)> callback) { 25 if (tag == 0) { 26 ExtractDIEArray(die_info_array, callback); 27 return; 28 } 29 30 const size_t count = die_info_array.size(); 31 for (size_t i = 0; i < count; ++i) { 32 const dw_tag_t die_tag = die_info_array[i].tag; 33 bool tag_matches = die_tag == 0 || tag == die_tag; 34 if (!tag_matches) { 35 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 36 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 37 } 38 if (tag_matches) { 39 if (!callback(DIERef(die_info_array[i]))) 40 return; 41 } 42 } 43 } 44 45 void DWARFMappedHash::ExtractDIEArray( 46 const DIEInfoArray &die_info_array, const dw_tag_t tag, 47 const uint32_t qualified_name_hash, 48 llvm::function_ref<bool(DIERef ref)> callback) { 49 if (tag == 0) { 50 ExtractDIEArray(die_info_array, callback); 51 return; 52 } 53 54 const size_t count = die_info_array.size(); 55 for (size_t i = 0; i < count; ++i) { 56 if (qualified_name_hash != die_info_array[i].qualified_name_hash) 57 continue; 58 const dw_tag_t die_tag = die_info_array[i].tag; 59 bool tag_matches = die_tag == 0 || tag == die_tag; 60 if (!tag_matches) { 61 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 62 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 63 } 64 if (tag_matches) { 65 if (!callback(DIERef(die_info_array[i]))) 66 return; 67 } 68 } 69 } 70 71 void DWARFMappedHash::ExtractClassOrStructDIEArray( 72 const DIEInfoArray &die_info_array, 73 bool return_implementation_only_if_available, 74 llvm::function_ref<bool(DIERef ref)> callback) { 75 const size_t count = die_info_array.size(); 76 for (size_t i = 0; i < count; ++i) { 77 const dw_tag_t die_tag = die_info_array[i].tag; 78 if (!(die_tag == 0 || die_tag == DW_TAG_class_type || 79 die_tag == DW_TAG_structure_type)) 80 continue; 81 bool is_implementation = 82 (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0; 83 if (is_implementation != return_implementation_only_if_available) 84 continue; 85 if (return_implementation_only_if_available) { 86 // We found the one true definition for this class, so only return 87 // that 88 callback(DIERef(die_info_array[i])); 89 return; 90 } 91 if (!callback(DIERef(die_info_array[i]))) 92 return; 93 } 94 } 95 96 void DWARFMappedHash::ExtractTypesFromDIEArray( 97 const DIEInfoArray &die_info_array, uint32_t type_flag_mask, 98 uint32_t type_flag_value, llvm::function_ref<bool(DIERef ref)> callback) { 99 const size_t count = die_info_array.size(); 100 for (size_t i = 0; i < count; ++i) { 101 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) { 102 if (!callback(DIERef(die_info_array[i]))) 103 return; 104 } 105 } 106 } 107 108 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { 109 switch (atom) { 110 case eAtomTypeNULL: 111 return "NULL"; 112 case eAtomTypeDIEOffset: 113 return "die-offset"; 114 case eAtomTypeCUOffset: 115 return "cu-offset"; 116 case eAtomTypeTag: 117 return "die-tag"; 118 case eAtomTypeNameFlags: 119 return "name-flags"; 120 case eAtomTypeTypeFlags: 121 return "type-flags"; 122 case eAtomTypeQualNameHash: 123 return "qualified-name-hash"; 124 } 125 return "<invalid>"; 126 } 127 128 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f, 129 uint32_t h) 130 : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {} 131 132 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset) 133 : die_base_offset(_die_base_offset), atoms(), atom_mask(0), 134 min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) { 135 // Define an array of DIE offsets by first defining an array, and then define 136 // the atom type for the array, in this case we have an array of DIE offsets. 137 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 138 } 139 140 void DWARFMappedHash::Prologue::ClearAtoms() { 141 hash_data_has_fixed_byte_size = true; 142 min_hash_data_byte_size = 0; 143 atom_mask = 0; 144 atoms.clear(); 145 } 146 147 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const { 148 return (atom_mask & (1u << atom_type)) != 0; 149 } 150 151 void DWARFMappedHash::Prologue::Clear() { 152 die_base_offset = 0; 153 ClearAtoms(); 154 } 155 156 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) { 157 atoms.push_back({type, form}); 158 atom_mask |= 1u << type; 159 switch (form) { 160 case DW_FORM_indirect: 161 case DW_FORM_exprloc: 162 case DW_FORM_flag_present: 163 case DW_FORM_ref_sig8: 164 llvm_unreachable("Unhandled atom form"); 165 166 case DW_FORM_addrx: 167 case DW_FORM_string: 168 case DW_FORM_block: 169 case DW_FORM_block1: 170 case DW_FORM_sdata: 171 case DW_FORM_udata: 172 case DW_FORM_ref_udata: 173 case DW_FORM_GNU_addr_index: 174 case DW_FORM_GNU_str_index: 175 hash_data_has_fixed_byte_size = false; 176 LLVM_FALLTHROUGH; 177 case DW_FORM_flag: 178 case DW_FORM_data1: 179 case DW_FORM_ref1: 180 case DW_FORM_sec_offset: 181 min_hash_data_byte_size += 1; 182 break; 183 184 case DW_FORM_block2: 185 hash_data_has_fixed_byte_size = false; 186 LLVM_FALLTHROUGH; 187 case DW_FORM_data2: 188 case DW_FORM_ref2: 189 min_hash_data_byte_size += 2; 190 break; 191 192 case DW_FORM_block4: 193 hash_data_has_fixed_byte_size = false; 194 LLVM_FALLTHROUGH; 195 case DW_FORM_data4: 196 case DW_FORM_ref4: 197 case DW_FORM_addr: 198 case DW_FORM_ref_addr: 199 case DW_FORM_strp: 200 min_hash_data_byte_size += 4; 201 break; 202 203 case DW_FORM_data8: 204 case DW_FORM_ref8: 205 min_hash_data_byte_size += 8; 206 break; 207 } 208 } 209 210 lldb::offset_t 211 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data, 212 lldb::offset_t offset) { 213 ClearAtoms(); 214 215 die_base_offset = data.GetU32(&offset); 216 217 const uint32_t atom_count = data.GetU32(&offset); 218 if (atom_count == 0x00060003u) { 219 // Old format, deal with contents of old pre-release format. 220 while (data.GetU32(&offset)) { 221 /* do nothing */; 222 } 223 224 // Hardcode to the only known value for now. 225 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 226 } else { 227 for (uint32_t i = 0; i < atom_count; ++i) { 228 AtomType type = (AtomType)data.GetU16(&offset); 229 dw_form_t form = (dw_form_t)data.GetU16(&offset); 230 AppendAtom(type, form); 231 } 232 } 233 return offset; 234 } 235 236 size_t DWARFMappedHash::Prologue::GetByteSize() const { 237 // Add an extra count to the atoms size for the zero termination Atom that 238 // gets written to disk. 239 return sizeof(die_base_offset) + sizeof(uint32_t) + 240 atoms.size() * sizeof(Atom); 241 } 242 243 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const { 244 return min_hash_data_byte_size; 245 } 246 247 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const { 248 return hash_data_has_fixed_byte_size; 249 } 250 251 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) { 252 return header_data.GetByteSize(); 253 } 254 255 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data, 256 lldb::offset_t offset) { 257 offset = MappedHash::Header<Prologue>::Read(data, offset); 258 if (offset != UINT32_MAX) { 259 offset = header_data.Read(data, offset); 260 } 261 return offset; 262 } 263 264 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data, 265 lldb::offset_t *offset_ptr, 266 DIEInfo &hash_data) const { 267 const size_t num_atoms = header_data.atoms.size(); 268 if (num_atoms == 0) 269 return false; 270 271 for (size_t i = 0; i < num_atoms; ++i) { 272 DWARFFormValue form_value(nullptr, header_data.atoms[i].form); 273 274 if (!form_value.ExtractValue(data, offset_ptr)) 275 return false; 276 277 switch (header_data.atoms[i].type) { 278 case eAtomTypeDIEOffset: // DIE offset, check form for encoding 279 hash_data.die_offset = 280 DWARFFormValue::IsDataForm(form_value.Form()) 281 ? form_value.Unsigned() 282 : form_value.Reference(header_data.die_base_offset); 283 break; 284 285 case eAtomTypeTag: // DW_TAG value for the DIE 286 hash_data.tag = (dw_tag_t)form_value.Unsigned(); 287 break; 288 289 case eAtomTypeTypeFlags: // Flags from enum TypeFlags 290 hash_data.type_flags = (uint32_t)form_value.Unsigned(); 291 break; 292 293 case eAtomTypeQualNameHash: // Flags from enum TypeFlags 294 hash_data.qualified_name_hash = form_value.Unsigned(); 295 break; 296 297 default: 298 // We can always skip atoms we don't know about. 299 break; 300 } 301 } 302 return hash_data.die_offset != DW_INVALID_OFFSET; 303 } 304 305 DWARFMappedHash::MemoryTable::MemoryTable( 306 lldb_private::DWARFDataExtractor &table_data, 307 const lldb_private::DWARFDataExtractor &string_table, const char *name) 308 : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data), 309 m_data(table_data), m_string_table(string_table), m_name(name) {} 310 311 const char * 312 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const { 313 // The key in the DWARF table is the .debug_str offset for the string 314 return m_string_table.PeekCStr(key); 315 } 316 317 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset, 318 HashData &hash_data) const { 319 lldb::offset_t offset = hash_data_offset; 320 // Skip string table offset that contains offset of hash name in .debug_str. 321 offset += 4; 322 const uint32_t count = m_data.GetU32(&offset); 323 if (count > 0) { 324 hash_data.resize(count); 325 for (uint32_t i = 0; i < count; ++i) { 326 if (!m_header.Read(m_data, &offset, hash_data[i])) 327 return false; 328 } 329 } else 330 hash_data.clear(); 331 return true; 332 } 333 334 DWARFMappedHash::MemoryTable::Result 335 DWARFMappedHash::MemoryTable::GetHashDataForName( 336 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, 337 Pair &pair) const { 338 pair.key = m_data.GetU32(hash_data_offset_ptr); 339 pair.value.clear(); 340 341 // If the key is zero, this terminates our chain of HashData objects for this 342 // hash value. 343 if (pair.key == 0) 344 return eResultEndOfHashData; 345 346 // There definitely should be a string for this string offset, if there 347 // isn't, there is something wrong, return and error. 348 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 349 if (strp_cstr == nullptr) { 350 *hash_data_offset_ptr = UINT32_MAX; 351 return eResultError; 352 } 353 354 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 355 const size_t min_total_hash_data_size = 356 count * m_header.header_data.GetMinimumHashDataByteSize(); 357 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 358 min_total_hash_data_size)) { 359 // We have at least one HashData entry, and we have enough data to parse at 360 // least "count" HashData entries. 361 362 // First make sure the entire C string matches... 363 const bool match = name == strp_cstr; 364 365 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 366 // If the string doesn't match and we have fixed size data, we can just 367 // add the total byte size of all HashData objects to the hash data 368 // offset and be done... 369 *hash_data_offset_ptr += min_total_hash_data_size; 370 } else { 371 // If the string does match, or we don't have fixed size data then we 372 // need to read the hash data as a stream. If the string matches we also 373 // append all HashData objects to the value array. 374 for (uint32_t i = 0; i < count; ++i) { 375 DIEInfo die_info; 376 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 377 // Only happened if the HashData of the string matched... 378 if (match) 379 pair.value.push_back(die_info); 380 } else { 381 // Something went wrong while reading the data. 382 *hash_data_offset_ptr = UINT32_MAX; 383 return eResultError; 384 } 385 } 386 } 387 // Return the correct response depending on if the string matched or not... 388 if (match) { 389 // The key (cstring) matches and we have lookup results! 390 return eResultKeyMatch; 391 } else { 392 // The key doesn't match, this function will get called again for the 393 // next key/value or the key terminator which in our case is a zero 394 // .debug_str offset. 395 return eResultKeyMismatch; 396 } 397 } else { 398 *hash_data_offset_ptr = UINT32_MAX; 399 return eResultError; 400 } 401 } 402 403 DWARFMappedHash::MemoryTable::Result 404 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( 405 const lldb_private::RegularExpression ®ex, 406 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { 407 pair.key = m_data.GetU32(hash_data_offset_ptr); 408 // If the key is zero, this terminates our chain of HashData objects for this 409 // hash value. 410 if (pair.key == 0) 411 return eResultEndOfHashData; 412 413 // There definitely should be a string for this string offset, if there 414 // isn't, there is something wrong, return and error. 415 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 416 if (strp_cstr == nullptr) 417 return eResultError; 418 419 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 420 const size_t min_total_hash_data_size = 421 count * m_header.header_data.GetMinimumHashDataByteSize(); 422 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 423 min_total_hash_data_size)) { 424 const bool match = regex.Execute(llvm::StringRef(strp_cstr)); 425 426 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 427 // If the regex doesn't match and we have fixed size data, we can just 428 // add the total byte size of all HashData objects to the hash data 429 // offset and be done... 430 *hash_data_offset_ptr += min_total_hash_data_size; 431 } else { 432 // If the string does match, or we don't have fixed size data then we 433 // need to read the hash data as a stream. If the string matches we also 434 // append all HashData objects to the value array. 435 for (uint32_t i = 0; i < count; ++i) { 436 DIEInfo die_info; 437 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 438 // Only happened if the HashData of the string matched... 439 if (match) 440 pair.value.push_back(die_info); 441 } else { 442 // Something went wrong while reading the data 443 *hash_data_offset_ptr = UINT32_MAX; 444 return eResultError; 445 } 446 } 447 } 448 // Return the correct response depending on if the string matched or not... 449 if (match) { 450 // The key (cstring) matches and we have lookup results! 451 return eResultKeyMatch; 452 } else { 453 // The key doesn't match, this function will get called again for the 454 // next key/value or the key terminator which in our case is a zero 455 // .debug_str offset. 456 return eResultKeyMismatch; 457 } 458 } else { 459 *hash_data_offset_ptr = UINT32_MAX; 460 return eResultError; 461 } 462 } 463 464 void DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( 465 const lldb_private::RegularExpression ®ex, 466 DIEInfoArray &die_info_array) const { 467 const uint32_t hash_count = m_header.hashes_count; 468 Pair pair; 469 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 470 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 471 while (hash_data_offset != UINT32_MAX) { 472 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 473 Result hash_result = 474 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); 475 if (prev_hash_data_offset == hash_data_offset) 476 break; 477 478 // Check the result of getting our hash data. 479 switch (hash_result) { 480 case eResultKeyMatch: 481 case eResultKeyMismatch: 482 // Whether we matches or not, it doesn't matter, we keep looking. 483 break; 484 485 case eResultEndOfHashData: 486 case eResultError: 487 hash_data_offset = UINT32_MAX; 488 break; 489 } 490 } 491 } 492 die_info_array.swap(pair.value); 493 } 494 495 void DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( 496 const uint32_t die_offset_start, const uint32_t die_offset_end, 497 DIEInfoArray &die_info_array) const { 498 const uint32_t hash_count = m_header.hashes_count; 499 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 500 bool done = false; 501 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 502 while (!done && hash_data_offset != UINT32_MAX) { 503 KeyType key = m_data.GetU32(&hash_data_offset); 504 // If the key is zero, this terminates our chain of HashData objects for 505 // this hash value. 506 if (key == 0) 507 break; 508 509 const uint32_t count = m_data.GetU32(&hash_data_offset); 510 for (uint32_t i = 0; i < count; ++i) { 511 DIEInfo die_info; 512 if (m_header.Read(m_data, &hash_data_offset, die_info)) { 513 if (die_info.die_offset == 0) 514 done = true; 515 if (die_offset_start <= die_info.die_offset && 516 die_info.die_offset < die_offset_end) 517 die_info_array.push_back(die_info); 518 } 519 } 520 } 521 } 522 } 523 524 bool DWARFMappedHash::MemoryTable::FindByName( 525 llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) { 526 if (name.empty()) 527 return true; 528 529 DIEInfoArray die_info_array; 530 FindByName(name, die_info_array); 531 return DWARFMappedHash::ExtractDIEArray(die_info_array, callback); 532 } 533 534 void DWARFMappedHash::MemoryTable::FindByNameAndTag( 535 llvm::StringRef name, const dw_tag_t tag, 536 llvm::function_ref<bool(DIERef ref)> callback) { 537 DIEInfoArray die_info_array; 538 FindByName(name, die_info_array); 539 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback); 540 } 541 542 void DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( 543 llvm::StringRef name, const dw_tag_t tag, 544 const uint32_t qualified_name_hash, 545 llvm::function_ref<bool(DIERef ref)> callback) { 546 DIEInfoArray die_info_array; 547 FindByName(name, die_info_array); 548 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, 549 callback); 550 } 551 552 void DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( 553 llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback, 554 bool must_be_implementation) { 555 DIEInfoArray die_info_array; 556 FindByName(name, die_info_array); 557 if (must_be_implementation && 558 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { 559 // If we have two atoms, then we have the DIE offset and the type flags 560 // so we can find the objective C class efficiently. 561 DWARFMappedHash::ExtractTypesFromDIEArray( 562 die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback); 563 return; 564 } 565 // We don't only want the one true definition, so try and see what we can 566 // find, and only return class or struct DIEs. If we do have the full 567 // implementation, then return it alone, else return all possible 568 // matches. 569 bool found_implementation = false; 570 DWARFMappedHash::ExtractClassOrStructDIEArray( 571 die_info_array, true /*return_implementation_only_if_available*/, 572 [&](DIERef ref) { 573 found_implementation = true; 574 // Here the return value does not matter as we are called at most once. 575 return callback(ref); 576 }); 577 if (found_implementation) 578 return; 579 DWARFMappedHash::ExtractClassOrStructDIEArray( 580 die_info_array, false /*return_implementation_only_if_available*/, 581 callback); 582 } 583 584 void DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 585 DIEInfoArray &die_info_array) { 586 if (name.empty()) 587 return; 588 589 Pair kv_pair; 590 if (Find(name, kv_pair)) 591 die_info_array.swap(kv_pair.value); 592 } 593