xref: /openbsd-src/gnu/llvm/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 //===-- HashedNameToDIE.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
12 bool DWARFMappedHash::ExtractDIEArray(
13     const DIEInfoArray &die_info_array,
14     llvm::function_ref<bool(DIERef ref)> callback) {
15   const size_t count = die_info_array.size();
16   for (size_t i = 0; i < count; ++i)
17     if (!callback(DIERef(die_info_array[i])))
18       return false;
19   return true;
20 }
21 
22 void DWARFMappedHash::ExtractDIEArray(
23     const DIEInfoArray &die_info_array, const dw_tag_t tag,
24     llvm::function_ref<bool(DIERef ref)> callback) {
25   if (tag == 0) {
26     ExtractDIEArray(die_info_array, callback);
27     return;
28   }
29 
30   const size_t count = die_info_array.size();
31   for (size_t i = 0; i < count; ++i) {
32     const dw_tag_t die_tag = die_info_array[i].tag;
33     bool tag_matches = die_tag == 0 || tag == die_tag;
34     if (!tag_matches) {
35       if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
36         tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
37     }
38     if (tag_matches) {
39       if (!callback(DIERef(die_info_array[i])))
40         return;
41     }
42   }
43 }
44 
45 void DWARFMappedHash::ExtractDIEArray(
46     const DIEInfoArray &die_info_array, const dw_tag_t tag,
47     const uint32_t qualified_name_hash,
48     llvm::function_ref<bool(DIERef ref)> callback) {
49   if (tag == 0) {
50     ExtractDIEArray(die_info_array, callback);
51     return;
52   }
53 
54   const size_t count = die_info_array.size();
55   for (size_t i = 0; i < count; ++i) {
56     if (qualified_name_hash != die_info_array[i].qualified_name_hash)
57       continue;
58     const dw_tag_t die_tag = die_info_array[i].tag;
59     bool tag_matches = die_tag == 0 || tag == die_tag;
60     if (!tag_matches) {
61       if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
62         tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
63     }
64     if (tag_matches) {
65       if (!callback(DIERef(die_info_array[i])))
66         return;
67     }
68   }
69 }
70 
71 void DWARFMappedHash::ExtractClassOrStructDIEArray(
72     const DIEInfoArray &die_info_array,
73     bool return_implementation_only_if_available,
74     llvm::function_ref<bool(DIERef ref)> callback) {
75   const size_t count = die_info_array.size();
76   for (size_t i = 0; i < count; ++i) {
77     const dw_tag_t die_tag = die_info_array[i].tag;
78     if (!(die_tag == 0 || die_tag == DW_TAG_class_type ||
79           die_tag == DW_TAG_structure_type))
80       continue;
81     bool is_implementation =
82         (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0;
83     if (is_implementation != return_implementation_only_if_available)
84       continue;
85     if (return_implementation_only_if_available) {
86       // We found the one true definition for this class, so only return
87       // that
88       callback(DIERef(die_info_array[i]));
89       return;
90     }
91     if (!callback(DIERef(die_info_array[i])))
92       return;
93   }
94 }
95 
96 void DWARFMappedHash::ExtractTypesFromDIEArray(
97     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
98     uint32_t type_flag_value, llvm::function_ref<bool(DIERef ref)> callback) {
99   const size_t count = die_info_array.size();
100   for (size_t i = 0; i < count; ++i) {
101     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) {
102       if (!callback(DIERef(die_info_array[i])))
103         return;
104     }
105   }
106 }
107 
108 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
109   switch (atom) {
110   case eAtomTypeNULL:
111     return "NULL";
112   case eAtomTypeDIEOffset:
113     return "die-offset";
114   case eAtomTypeCUOffset:
115     return "cu-offset";
116   case eAtomTypeTag:
117     return "die-tag";
118   case eAtomTypeNameFlags:
119     return "name-flags";
120   case eAtomTypeTypeFlags:
121     return "type-flags";
122   case eAtomTypeQualNameHash:
123     return "qualified-name-hash";
124   }
125   return "<invalid>";
126 }
127 
128 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f,
129                                   uint32_t h)
130     : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
131 
132 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
133     : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
134       min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
135   // Define an array of DIE offsets by first defining an array, and then define
136   // the atom type for the array, in this case we have an array of DIE offsets.
137   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
138 }
139 
140 void DWARFMappedHash::Prologue::ClearAtoms() {
141   hash_data_has_fixed_byte_size = true;
142   min_hash_data_byte_size = 0;
143   atom_mask = 0;
144   atoms.clear();
145 }
146 
147 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
148   return (atom_mask & (1u << atom_type)) != 0;
149 }
150 
151 void DWARFMappedHash::Prologue::Clear() {
152   die_base_offset = 0;
153   ClearAtoms();
154 }
155 
156 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
157   atoms.push_back({type, form});
158   atom_mask |= 1u << type;
159   switch (form) {
160   case DW_FORM_indirect:
161   case DW_FORM_exprloc:
162   case DW_FORM_flag_present:
163   case DW_FORM_ref_sig8:
164     llvm_unreachable("Unhandled atom form");
165 
166   case DW_FORM_addrx:
167   case DW_FORM_string:
168   case DW_FORM_block:
169   case DW_FORM_block1:
170   case DW_FORM_sdata:
171   case DW_FORM_udata:
172   case DW_FORM_ref_udata:
173   case DW_FORM_GNU_addr_index:
174   case DW_FORM_GNU_str_index:
175     hash_data_has_fixed_byte_size = false;
176     LLVM_FALLTHROUGH;
177   case DW_FORM_flag:
178   case DW_FORM_data1:
179   case DW_FORM_ref1:
180   case DW_FORM_sec_offset:
181     min_hash_data_byte_size += 1;
182     break;
183 
184   case DW_FORM_block2:
185     hash_data_has_fixed_byte_size = false;
186     LLVM_FALLTHROUGH;
187   case DW_FORM_data2:
188   case DW_FORM_ref2:
189     min_hash_data_byte_size += 2;
190     break;
191 
192   case DW_FORM_block4:
193     hash_data_has_fixed_byte_size = false;
194     LLVM_FALLTHROUGH;
195   case DW_FORM_data4:
196   case DW_FORM_ref4:
197   case DW_FORM_addr:
198   case DW_FORM_ref_addr:
199   case DW_FORM_strp:
200     min_hash_data_byte_size += 4;
201     break;
202 
203   case DW_FORM_data8:
204   case DW_FORM_ref8:
205     min_hash_data_byte_size += 8;
206     break;
207   }
208 }
209 
210 lldb::offset_t
211 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
212                                 lldb::offset_t offset) {
213   ClearAtoms();
214 
215   die_base_offset = data.GetU32(&offset);
216 
217   const uint32_t atom_count = data.GetU32(&offset);
218   if (atom_count == 0x00060003u) {
219     // Old format, deal with contents of old pre-release format.
220     while (data.GetU32(&offset)) {
221       /* do nothing */;
222     }
223 
224     // Hardcode to the only known value for now.
225     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
226   } else {
227     for (uint32_t i = 0; i < atom_count; ++i) {
228       AtomType type = (AtomType)data.GetU16(&offset);
229       dw_form_t form = (dw_form_t)data.GetU16(&offset);
230       AppendAtom(type, form);
231     }
232   }
233   return offset;
234 }
235 
236 size_t DWARFMappedHash::Prologue::GetByteSize() const {
237   // Add an extra count to the atoms size for the zero termination Atom that
238   // gets written to disk.
239   return sizeof(die_base_offset) + sizeof(uint32_t) +
240          atoms.size() * sizeof(Atom);
241 }
242 
243 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
244   return min_hash_data_byte_size;
245 }
246 
247 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
248   return hash_data_has_fixed_byte_size;
249 }
250 
251 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
252   return header_data.GetByteSize();
253 }
254 
255 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
256                                              lldb::offset_t offset) {
257   offset = MappedHash::Header<Prologue>::Read(data, offset);
258   if (offset != UINT32_MAX) {
259     offset = header_data.Read(data, offset);
260   }
261   return offset;
262 }
263 
264 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
265                                    lldb::offset_t *offset_ptr,
266                                    DIEInfo &hash_data) const {
267   const size_t num_atoms = header_data.atoms.size();
268   if (num_atoms == 0)
269     return false;
270 
271   for (size_t i = 0; i < num_atoms; ++i) {
272     DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
273 
274     if (!form_value.ExtractValue(data, offset_ptr))
275       return false;
276 
277     switch (header_data.atoms[i].type) {
278     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
279       hash_data.die_offset =
280           DWARFFormValue::IsDataForm(form_value.Form())
281               ? form_value.Unsigned()
282               : form_value.Reference(header_data.die_base_offset);
283       break;
284 
285     case eAtomTypeTag: // DW_TAG value for the DIE
286       hash_data.tag = (dw_tag_t)form_value.Unsigned();
287       break;
288 
289     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
290       hash_data.type_flags = (uint32_t)form_value.Unsigned();
291       break;
292 
293     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
294       hash_data.qualified_name_hash = form_value.Unsigned();
295       break;
296 
297     default:
298       // We can always skip atoms we don't know about.
299       break;
300     }
301   }
302   return hash_data.die_offset != DW_INVALID_OFFSET;
303 }
304 
305 DWARFMappedHash::MemoryTable::MemoryTable(
306     lldb_private::DWARFDataExtractor &table_data,
307     const lldb_private::DWARFDataExtractor &string_table, const char *name)
308     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
309       m_data(table_data), m_string_table(string_table), m_name(name) {}
310 
311 const char *
312 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
313   // The key in the DWARF table is the .debug_str offset for the string
314   return m_string_table.PeekCStr(key);
315 }
316 
317 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
318                                                 HashData &hash_data) const {
319   lldb::offset_t offset = hash_data_offset;
320   // Skip string table offset that contains offset of hash name in .debug_str.
321   offset += 4;
322   const uint32_t count = m_data.GetU32(&offset);
323   if (count > 0) {
324     hash_data.resize(count);
325     for (uint32_t i = 0; i < count; ++i) {
326       if (!m_header.Read(m_data, &offset, hash_data[i]))
327         return false;
328     }
329   } else
330     hash_data.clear();
331   return true;
332 }
333 
334 DWARFMappedHash::MemoryTable::Result
335 DWARFMappedHash::MemoryTable::GetHashDataForName(
336     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
337     Pair &pair) const {
338   pair.key = m_data.GetU32(hash_data_offset_ptr);
339   pair.value.clear();
340 
341   // If the key is zero, this terminates our chain of HashData objects for this
342   // hash value.
343   if (pair.key == 0)
344     return eResultEndOfHashData;
345 
346   // There definitely should be a string for this string offset, if there
347   // isn't, there is something wrong, return and error.
348   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
349   if (strp_cstr == nullptr) {
350     *hash_data_offset_ptr = UINT32_MAX;
351     return eResultError;
352   }
353 
354   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
355   const size_t min_total_hash_data_size =
356       count * m_header.header_data.GetMinimumHashDataByteSize();
357   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
358                                                    min_total_hash_data_size)) {
359     // We have at least one HashData entry, and we have enough data to parse at
360     // least "count" HashData entries.
361 
362     // First make sure the entire C string matches...
363     const bool match = name == strp_cstr;
364 
365     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
366       // If the string doesn't match and we have fixed size data, we can just
367       // add the total byte size of all HashData objects to the hash data
368       // offset and be done...
369       *hash_data_offset_ptr += min_total_hash_data_size;
370     } else {
371       // If the string does match, or we don't have fixed size data then we
372       // need to read the hash data as a stream. If the string matches we also
373       // append all HashData objects to the value array.
374       for (uint32_t i = 0; i < count; ++i) {
375         DIEInfo die_info;
376         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
377           // Only happened if the HashData of the string matched...
378           if (match)
379             pair.value.push_back(die_info);
380         } else {
381           // Something went wrong while reading the data.
382           *hash_data_offset_ptr = UINT32_MAX;
383           return eResultError;
384         }
385       }
386     }
387     // Return the correct response depending on if the string matched or not...
388     if (match) {
389       // The key (cstring) matches and we have lookup results!
390       return eResultKeyMatch;
391     } else {
392       // The key doesn't match, this function will get called again for the
393       // next key/value or the key terminator which in our case is a zero
394       // .debug_str offset.
395       return eResultKeyMismatch;
396     }
397   } else {
398     *hash_data_offset_ptr = UINT32_MAX;
399     return eResultError;
400   }
401 }
402 
403 DWARFMappedHash::MemoryTable::Result
404 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
405     const lldb_private::RegularExpression &regex,
406     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
407   pair.key = m_data.GetU32(hash_data_offset_ptr);
408   // If the key is zero, this terminates our chain of HashData objects for this
409   // hash value.
410   if (pair.key == 0)
411     return eResultEndOfHashData;
412 
413   // There definitely should be a string for this string offset, if there
414   // isn't, there is something wrong, return and error.
415   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
416   if (strp_cstr == nullptr)
417     return eResultError;
418 
419   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
420   const size_t min_total_hash_data_size =
421       count * m_header.header_data.GetMinimumHashDataByteSize();
422   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
423                                                    min_total_hash_data_size)) {
424     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
425 
426     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
427       // If the regex doesn't match and we have fixed size data, we can just
428       // add the total byte size of all HashData objects to the hash data
429       // offset and be done...
430       *hash_data_offset_ptr += min_total_hash_data_size;
431     } else {
432       // If the string does match, or we don't have fixed size data then we
433       // need to read the hash data as a stream. If the string matches we also
434       // append all HashData objects to the value array.
435       for (uint32_t i = 0; i < count; ++i) {
436         DIEInfo die_info;
437         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
438           // Only happened if the HashData of the string matched...
439           if (match)
440             pair.value.push_back(die_info);
441         } else {
442           // Something went wrong while reading the data
443           *hash_data_offset_ptr = UINT32_MAX;
444           return eResultError;
445         }
446       }
447     }
448     // Return the correct response depending on if the string matched or not...
449     if (match) {
450       // The key (cstring) matches and we have lookup results!
451       return eResultKeyMatch;
452     } else {
453       // The key doesn't match, this function will get called again for the
454       // next key/value or the key terminator which in our case is a zero
455       // .debug_str offset.
456       return eResultKeyMismatch;
457     }
458   } else {
459     *hash_data_offset_ptr = UINT32_MAX;
460     return eResultError;
461   }
462 }
463 
464 void DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
465     const lldb_private::RegularExpression &regex,
466     DIEInfoArray &die_info_array) const {
467   const uint32_t hash_count = m_header.hashes_count;
468   Pair pair;
469   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
470     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
471     while (hash_data_offset != UINT32_MAX) {
472       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
473       Result hash_result =
474           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
475       if (prev_hash_data_offset == hash_data_offset)
476         break;
477 
478       // Check the result of getting our hash data.
479       switch (hash_result) {
480       case eResultKeyMatch:
481       case eResultKeyMismatch:
482         // Whether we matches or not, it doesn't matter, we keep looking.
483         break;
484 
485       case eResultEndOfHashData:
486       case eResultError:
487         hash_data_offset = UINT32_MAX;
488         break;
489       }
490     }
491   }
492   die_info_array.swap(pair.value);
493 }
494 
495 void DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
496     const uint32_t die_offset_start, const uint32_t die_offset_end,
497     DIEInfoArray &die_info_array) const {
498   const uint32_t hash_count = m_header.hashes_count;
499   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
500     bool done = false;
501     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
502     while (!done && hash_data_offset != UINT32_MAX) {
503       KeyType key = m_data.GetU32(&hash_data_offset);
504       // If the key is zero, this terminates our chain of HashData objects for
505       // this hash value.
506       if (key == 0)
507         break;
508 
509       const uint32_t count = m_data.GetU32(&hash_data_offset);
510       for (uint32_t i = 0; i < count; ++i) {
511         DIEInfo die_info;
512         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
513           if (die_info.die_offset == 0)
514             done = true;
515           if (die_offset_start <= die_info.die_offset &&
516               die_info.die_offset < die_offset_end)
517             die_info_array.push_back(die_info);
518         }
519       }
520     }
521   }
522 }
523 
524 bool DWARFMappedHash::MemoryTable::FindByName(
525     llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) {
526   if (name.empty())
527     return true;
528 
529   DIEInfoArray die_info_array;
530   FindByName(name, die_info_array);
531   return DWARFMappedHash::ExtractDIEArray(die_info_array, callback);
532 }
533 
534 void DWARFMappedHash::MemoryTable::FindByNameAndTag(
535     llvm::StringRef name, const dw_tag_t tag,
536     llvm::function_ref<bool(DIERef ref)> callback) {
537   DIEInfoArray die_info_array;
538   FindByName(name, die_info_array);
539   DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback);
540 }
541 
542 void DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
543     llvm::StringRef name, const dw_tag_t tag,
544     const uint32_t qualified_name_hash,
545     llvm::function_ref<bool(DIERef ref)> callback) {
546   DIEInfoArray die_info_array;
547   FindByName(name, die_info_array);
548   DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
549                                    callback);
550 }
551 
552 void DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
553     llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback,
554     bool must_be_implementation) {
555   DIEInfoArray die_info_array;
556   FindByName(name, die_info_array);
557   if (must_be_implementation &&
558       GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
559     // If we have two atoms, then we have the DIE offset and the type flags
560     // so we can find the objective C class efficiently.
561     DWARFMappedHash::ExtractTypesFromDIEArray(
562         die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback);
563     return;
564   }
565   // We don't only want the one true definition, so try and see what we can
566   // find, and only return class or struct DIEs. If we do have the full
567   // implementation, then return it alone, else return all possible
568   // matches.
569   bool found_implementation = false;
570   DWARFMappedHash::ExtractClassOrStructDIEArray(
571       die_info_array, true /*return_implementation_only_if_available*/,
572       [&](DIERef ref) {
573         found_implementation = true;
574         // Here the return value does not matter as we are called at most once.
575         return callback(ref);
576       });
577   if (found_implementation)
578     return;
579   DWARFMappedHash::ExtractClassOrStructDIEArray(
580       die_info_array, false /*return_implementation_only_if_available*/,
581       callback);
582 }
583 
584 void DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
585                                               DIEInfoArray &die_info_array) {
586   if (name.empty())
587     return;
588 
589   Pair kv_pair;
590   if (Find(name, kv_pair))
591     die_info_array.swap(kv_pair.value);
592 }
593