1 //===-- HashedNameToDIE.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "HashedNameToDIE.h" 10 #include "llvm/ADT/StringRef.h" 11 12 bool DWARFMappedHash::ExtractDIEArray( 13 const DIEInfoArray &die_info_array, 14 llvm::function_ref<bool(DIERef ref)> callback) { 15 const size_t count = die_info_array.size(); 16 for (size_t i = 0; i < count; ++i) 17 if (!callback(DIERef(die_info_array[i]))) 18 return false; 19 return true; 20 } 21 22 void DWARFMappedHash::ExtractDIEArray( 23 const DIEInfoArray &die_info_array, const dw_tag_t tag, 24 llvm::function_ref<bool(DIERef ref)> callback) { 25 if (tag == 0) { 26 ExtractDIEArray(die_info_array, callback); 27 return; 28 } 29 30 const size_t count = die_info_array.size(); 31 for (size_t i = 0; i < count; ++i) { 32 const dw_tag_t die_tag = die_info_array[i].tag; 33 bool tag_matches = die_tag == 0 || tag == die_tag; 34 if (!tag_matches) { 35 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 36 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 37 } 38 if (tag_matches) { 39 if (!callback(DIERef(die_info_array[i]))) 40 return; 41 } 42 } 43 } 44 45 void DWARFMappedHash::ExtractDIEArray( 46 const DIEInfoArray &die_info_array, const dw_tag_t tag, 47 const uint32_t qualified_name_hash, 48 llvm::function_ref<bool(DIERef ref)> callback) { 49 if (tag == 0) { 50 ExtractDIEArray(die_info_array, callback); 51 return; 52 } 53 54 const size_t count = die_info_array.size(); 55 for (size_t i = 0; i < count; ++i) { 56 if (qualified_name_hash != die_info_array[i].qualified_name_hash) 57 continue; 58 const dw_tag_t die_tag = die_info_array[i].tag; 59 bool tag_matches = die_tag == 0 || tag == die_tag; 60 if (!tag_matches) { 61 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type) 62 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type; 63 } 64 if (tag_matches) { 65 if (!callback(DIERef(die_info_array[i]))) 66 return; 67 } 68 } 69 } 70 71 void DWARFMappedHash::ExtractClassOrStructDIEArray( 72 const DIEInfoArray &die_info_array, 73 bool return_implementation_only_if_available, 74 llvm::function_ref<bool(DIERef ref)> callback) { 75 const size_t count = die_info_array.size(); 76 for (size_t i = 0; i < count; ++i) { 77 const dw_tag_t die_tag = die_info_array[i].tag; 78 if (!(die_tag == 0 || die_tag == DW_TAG_class_type || 79 die_tag == DW_TAG_structure_type)) 80 continue; 81 bool is_implementation = 82 (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0; 83 if (is_implementation != return_implementation_only_if_available) 84 continue; 85 if (return_implementation_only_if_available) { 86 // We found the one true definition for this class, so only return 87 // that 88 callback(DIERef(die_info_array[i])); 89 return; 90 } 91 if (!callback(DIERef(die_info_array[i]))) 92 return; 93 } 94 } 95 96 void DWARFMappedHash::ExtractTypesFromDIEArray( 97 const DIEInfoArray &die_info_array, uint32_t type_flag_mask, 98 uint32_t type_flag_value, llvm::function_ref<bool(DIERef ref)> callback) { 99 const size_t count = die_info_array.size(); 100 for (size_t i = 0; i < count; ++i) { 101 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) { 102 if (!callback(DIERef(die_info_array[i]))) 103 return; 104 } 105 } 106 } 107 108 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) { 109 switch (atom) { 110 case eAtomTypeNULL: 111 return "NULL"; 112 case eAtomTypeDIEOffset: 113 return "die-offset"; 114 case eAtomTypeCUOffset: 115 return "cu-offset"; 116 case eAtomTypeTag: 117 return "die-tag"; 118 case eAtomTypeNameFlags: 119 return "name-flags"; 120 case eAtomTypeTypeFlags: 121 return "type-flags"; 122 case eAtomTypeQualNameHash: 123 return "qualified-name-hash"; 124 } 125 return "<invalid>"; 126 } 127 128 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f, 129 uint32_t h) 130 : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {} 131 132 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset) 133 : die_base_offset(_die_base_offset), atoms() { 134 // Define an array of DIE offsets by first defining an array, and then define 135 // the atom type for the array, in this case we have an array of DIE offsets. 136 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 137 } 138 139 void DWARFMappedHash::Prologue::ClearAtoms() { 140 hash_data_has_fixed_byte_size = true; 141 min_hash_data_byte_size = 0; 142 atom_mask = 0; 143 atoms.clear(); 144 } 145 146 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const { 147 return (atom_mask & (1u << atom_type)) != 0; 148 } 149 150 void DWARFMappedHash::Prologue::Clear() { 151 die_base_offset = 0; 152 ClearAtoms(); 153 } 154 155 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) { 156 atoms.push_back({type, form}); 157 atom_mask |= 1u << type; 158 switch (form) { 159 case DW_FORM_indirect: 160 case DW_FORM_exprloc: 161 case DW_FORM_flag_present: 162 case DW_FORM_ref_sig8: 163 llvm_unreachable("Unhandled atom form"); 164 165 case DW_FORM_addrx: 166 case DW_FORM_string: 167 case DW_FORM_block: 168 case DW_FORM_block1: 169 case DW_FORM_sdata: 170 case DW_FORM_udata: 171 case DW_FORM_ref_udata: 172 case DW_FORM_GNU_addr_index: 173 case DW_FORM_GNU_str_index: 174 hash_data_has_fixed_byte_size = false; 175 LLVM_FALLTHROUGH; 176 case DW_FORM_flag: 177 case DW_FORM_data1: 178 case DW_FORM_ref1: 179 case DW_FORM_sec_offset: 180 min_hash_data_byte_size += 1; 181 break; 182 183 case DW_FORM_block2: 184 hash_data_has_fixed_byte_size = false; 185 LLVM_FALLTHROUGH; 186 case DW_FORM_data2: 187 case DW_FORM_ref2: 188 min_hash_data_byte_size += 2; 189 break; 190 191 case DW_FORM_block4: 192 hash_data_has_fixed_byte_size = false; 193 LLVM_FALLTHROUGH; 194 case DW_FORM_data4: 195 case DW_FORM_ref4: 196 case DW_FORM_addr: 197 case DW_FORM_ref_addr: 198 case DW_FORM_strp: 199 min_hash_data_byte_size += 4; 200 break; 201 202 case DW_FORM_data8: 203 case DW_FORM_ref8: 204 min_hash_data_byte_size += 8; 205 break; 206 } 207 } 208 209 lldb::offset_t 210 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data, 211 lldb::offset_t offset) { 212 ClearAtoms(); 213 214 die_base_offset = data.GetU32(&offset); 215 216 const uint32_t atom_count = data.GetU32(&offset); 217 if (atom_count == 0x00060003u) { 218 // Old format, deal with contents of old pre-release format. 219 while (data.GetU32(&offset)) { 220 /* do nothing */; 221 } 222 223 // Hardcode to the only known value for now. 224 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4); 225 } else { 226 for (uint32_t i = 0; i < atom_count; ++i) { 227 AtomType type = (AtomType)data.GetU16(&offset); 228 dw_form_t form = (dw_form_t)data.GetU16(&offset); 229 AppendAtom(type, form); 230 } 231 } 232 return offset; 233 } 234 235 size_t DWARFMappedHash::Prologue::GetByteSize() const { 236 // Add an extra count to the atoms size for the zero termination Atom that 237 // gets written to disk. 238 return sizeof(die_base_offset) + sizeof(uint32_t) + 239 atoms.size() * sizeof(Atom); 240 } 241 242 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const { 243 return min_hash_data_byte_size; 244 } 245 246 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const { 247 return hash_data_has_fixed_byte_size; 248 } 249 250 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) { 251 return header_data.GetByteSize(); 252 } 253 254 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data, 255 lldb::offset_t offset) { 256 offset = MappedHash::Header<Prologue>::Read(data, offset); 257 if (offset != UINT32_MAX) { 258 offset = header_data.Read(data, offset); 259 } 260 return offset; 261 } 262 263 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data, 264 lldb::offset_t *offset_ptr, 265 DIEInfo &hash_data) const { 266 const size_t num_atoms = header_data.atoms.size(); 267 if (num_atoms == 0) 268 return false; 269 270 for (size_t i = 0; i < num_atoms; ++i) { 271 DWARFFormValue form_value(nullptr, header_data.atoms[i].form); 272 273 if (!form_value.ExtractValue(data, offset_ptr)) 274 return false; 275 276 switch (header_data.atoms[i].type) { 277 case eAtomTypeDIEOffset: // DIE offset, check form for encoding 278 hash_data.die_offset = 279 DWARFFormValue::IsDataForm(form_value.Form()) 280 ? form_value.Unsigned() 281 : form_value.Reference(header_data.die_base_offset); 282 break; 283 284 case eAtomTypeTag: // DW_TAG value for the DIE 285 hash_data.tag = (dw_tag_t)form_value.Unsigned(); 286 break; 287 288 case eAtomTypeTypeFlags: // Flags from enum TypeFlags 289 hash_data.type_flags = (uint32_t)form_value.Unsigned(); 290 break; 291 292 case eAtomTypeQualNameHash: // Flags from enum TypeFlags 293 hash_data.qualified_name_hash = form_value.Unsigned(); 294 break; 295 296 default: 297 // We can always skip atoms we don't know about. 298 break; 299 } 300 } 301 return hash_data.die_offset != DW_INVALID_OFFSET; 302 } 303 304 DWARFMappedHash::MemoryTable::MemoryTable( 305 lldb_private::DWARFDataExtractor &table_data, 306 const lldb_private::DWARFDataExtractor &string_table, const char *name) 307 : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data), 308 m_data(table_data), m_string_table(string_table), m_name(name) {} 309 310 const char * 311 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const { 312 // The key in the DWARF table is the .debug_str offset for the string 313 return m_string_table.PeekCStr(key); 314 } 315 316 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset, 317 HashData &hash_data) const { 318 lldb::offset_t offset = hash_data_offset; 319 // Skip string table offset that contains offset of hash name in .debug_str. 320 offset += 4; 321 const uint32_t count = m_data.GetU32(&offset); 322 if (count > 0) { 323 hash_data.resize(count); 324 for (uint32_t i = 0; i < count; ++i) { 325 if (!m_header.Read(m_data, &offset, hash_data[i])) 326 return false; 327 } 328 } else 329 hash_data.clear(); 330 return true; 331 } 332 333 DWARFMappedHash::MemoryTable::Result 334 DWARFMappedHash::MemoryTable::GetHashDataForName( 335 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, 336 Pair &pair) const { 337 pair.key = m_data.GetU32(hash_data_offset_ptr); 338 pair.value.clear(); 339 340 // If the key is zero, this terminates our chain of HashData objects for this 341 // hash value. 342 if (pair.key == 0) 343 return eResultEndOfHashData; 344 345 // There definitely should be a string for this string offset, if there 346 // isn't, there is something wrong, return and error. 347 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 348 if (strp_cstr == nullptr) { 349 *hash_data_offset_ptr = UINT32_MAX; 350 return eResultError; 351 } 352 353 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 354 const size_t min_total_hash_data_size = 355 count * m_header.header_data.GetMinimumHashDataByteSize(); 356 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 357 min_total_hash_data_size)) { 358 // We have at least one HashData entry, and we have enough data to parse at 359 // least "count" HashData entries. 360 361 // First make sure the entire C string matches... 362 const bool match = name == strp_cstr; 363 364 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 365 // If the string doesn't match and we have fixed size data, we can just 366 // add the total byte size of all HashData objects to the hash data 367 // offset and be done... 368 *hash_data_offset_ptr += min_total_hash_data_size; 369 } else { 370 // If the string does match, or we don't have fixed size data then we 371 // need to read the hash data as a stream. If the string matches we also 372 // append all HashData objects to the value array. 373 for (uint32_t i = 0; i < count; ++i) { 374 DIEInfo die_info; 375 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 376 // Only happened if the HashData of the string matched... 377 if (match) 378 pair.value.push_back(die_info); 379 } else { 380 // Something went wrong while reading the data. 381 *hash_data_offset_ptr = UINT32_MAX; 382 return eResultError; 383 } 384 } 385 } 386 // Return the correct response depending on if the string matched or not... 387 if (match) { 388 // The key (cstring) matches and we have lookup results! 389 return eResultKeyMatch; 390 } else { 391 // The key doesn't match, this function will get called again for the 392 // next key/value or the key terminator which in our case is a zero 393 // .debug_str offset. 394 return eResultKeyMismatch; 395 } 396 } else { 397 *hash_data_offset_ptr = UINT32_MAX; 398 return eResultError; 399 } 400 } 401 402 DWARFMappedHash::MemoryTable::Result 403 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression( 404 const lldb_private::RegularExpression ®ex, 405 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const { 406 pair.key = m_data.GetU32(hash_data_offset_ptr); 407 // If the key is zero, this terminates our chain of HashData objects for this 408 // hash value. 409 if (pair.key == 0) 410 return eResultEndOfHashData; 411 412 // There definitely should be a string for this string offset, if there 413 // isn't, there is something wrong, return and error. 414 const char *strp_cstr = m_string_table.PeekCStr(pair.key); 415 if (strp_cstr == nullptr) 416 return eResultError; 417 418 const uint32_t count = m_data.GetU32(hash_data_offset_ptr); 419 const size_t min_total_hash_data_size = 420 count * m_header.header_data.GetMinimumHashDataByteSize(); 421 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr, 422 min_total_hash_data_size)) { 423 const bool match = regex.Execute(llvm::StringRef(strp_cstr)); 424 425 if (!match && m_header.header_data.HashDataHasFixedByteSize()) { 426 // If the regex doesn't match and we have fixed size data, we can just 427 // add the total byte size of all HashData objects to the hash data 428 // offset and be done... 429 *hash_data_offset_ptr += min_total_hash_data_size; 430 } else { 431 // If the string does match, or we don't have fixed size data then we 432 // need to read the hash data as a stream. If the string matches we also 433 // append all HashData objects to the value array. 434 for (uint32_t i = 0; i < count; ++i) { 435 DIEInfo die_info; 436 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) { 437 // Only happened if the HashData of the string matched... 438 if (match) 439 pair.value.push_back(die_info); 440 } else { 441 // Something went wrong while reading the data 442 *hash_data_offset_ptr = UINT32_MAX; 443 return eResultError; 444 } 445 } 446 } 447 // Return the correct response depending on if the string matched or not... 448 if (match) { 449 // The key (cstring) matches and we have lookup results! 450 return eResultKeyMatch; 451 } else { 452 // The key doesn't match, this function will get called again for the 453 // next key/value or the key terminator which in our case is a zero 454 // .debug_str offset. 455 return eResultKeyMismatch; 456 } 457 } else { 458 *hash_data_offset_ptr = UINT32_MAX; 459 return eResultError; 460 } 461 } 462 463 void DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex( 464 const lldb_private::RegularExpression ®ex, 465 DIEInfoArray &die_info_array) const { 466 const uint32_t hash_count = m_header.hashes_count; 467 Pair pair; 468 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 469 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 470 while (hash_data_offset != UINT32_MAX) { 471 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 472 Result hash_result = 473 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair); 474 if (prev_hash_data_offset == hash_data_offset) 475 break; 476 477 // Check the result of getting our hash data. 478 switch (hash_result) { 479 case eResultKeyMatch: 480 case eResultKeyMismatch: 481 // Whether we matches or not, it doesn't matter, we keep looking. 482 break; 483 484 case eResultEndOfHashData: 485 case eResultError: 486 hash_data_offset = UINT32_MAX; 487 break; 488 } 489 } 490 } 491 die_info_array.swap(pair.value); 492 } 493 494 void DWARFMappedHash::MemoryTable::AppendAllDIEsInRange( 495 const uint32_t die_offset_start, const uint32_t die_offset_end, 496 DIEInfoArray &die_info_array) const { 497 const uint32_t hash_count = m_header.hashes_count; 498 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) { 499 bool done = false; 500 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx); 501 while (!done && hash_data_offset != UINT32_MAX) { 502 KeyType key = m_data.GetU32(&hash_data_offset); 503 // If the key is zero, this terminates our chain of HashData objects for 504 // this hash value. 505 if (key == 0) 506 break; 507 508 const uint32_t count = m_data.GetU32(&hash_data_offset); 509 for (uint32_t i = 0; i < count; ++i) { 510 DIEInfo die_info; 511 if (m_header.Read(m_data, &hash_data_offset, die_info)) { 512 if (die_info.die_offset == 0) 513 done = true; 514 if (die_offset_start <= die_info.die_offset && 515 die_info.die_offset < die_offset_end) 516 die_info_array.push_back(die_info); 517 } 518 } 519 } 520 } 521 } 522 523 bool DWARFMappedHash::MemoryTable::FindByName( 524 llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) { 525 if (name.empty()) 526 return true; 527 528 DIEInfoArray die_info_array; 529 FindByName(name, die_info_array); 530 return DWARFMappedHash::ExtractDIEArray(die_info_array, callback); 531 } 532 533 void DWARFMappedHash::MemoryTable::FindByNameAndTag( 534 llvm::StringRef name, const dw_tag_t tag, 535 llvm::function_ref<bool(DIERef ref)> callback) { 536 DIEInfoArray die_info_array; 537 FindByName(name, die_info_array); 538 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback); 539 } 540 541 void DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash( 542 llvm::StringRef name, const dw_tag_t tag, 543 const uint32_t qualified_name_hash, 544 llvm::function_ref<bool(DIERef ref)> callback) { 545 DIEInfoArray die_info_array; 546 FindByName(name, die_info_array); 547 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash, 548 callback); 549 } 550 551 void DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName( 552 llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback, 553 bool must_be_implementation) { 554 DIEInfoArray die_info_array; 555 FindByName(name, die_info_array); 556 if (must_be_implementation && 557 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) { 558 // If we have two atoms, then we have the DIE offset and the type flags 559 // so we can find the objective C class efficiently. 560 DWARFMappedHash::ExtractTypesFromDIEArray( 561 die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback); 562 return; 563 } 564 // We don't only want the one true definition, so try and see what we can 565 // find, and only return class or struct DIEs. If we do have the full 566 // implementation, then return it alone, else return all possible 567 // matches. 568 bool found_implementation = false; 569 DWARFMappedHash::ExtractClassOrStructDIEArray( 570 die_info_array, true /*return_implementation_only_if_available*/, 571 [&](DIERef ref) { 572 found_implementation = true; 573 // Here the return value does not matter as we are called at most once. 574 return callback(ref); 575 }); 576 if (found_implementation) 577 return; 578 DWARFMappedHash::ExtractClassOrStructDIEArray( 579 die_info_array, false /*return_implementation_only_if_available*/, 580 callback); 581 } 582 583 void DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name, 584 DIEInfoArray &die_info_array) { 585 if (name.empty()) 586 return; 587 588 Pair kv_pair; 589 if (Find(name, kv_pair)) 590 die_info_array.swap(kv_pair.value); 591 } 592