xref: /openbsd-src/gnu/llvm/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp (revision 5a38ef86d0b61900239c7913d24a05e7b88a58f0)
1 //===-- HashedNameToDIE.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
12 bool DWARFMappedHash::ExtractDIEArray(
13     const DIEInfoArray &die_info_array,
14     llvm::function_ref<bool(DIERef ref)> callback) {
15   const size_t count = die_info_array.size();
16   for (size_t i = 0; i < count; ++i)
17     if (!callback(DIERef(die_info_array[i])))
18       return false;
19   return true;
20 }
21 
22 void DWARFMappedHash::ExtractDIEArray(
23     const DIEInfoArray &die_info_array, const dw_tag_t tag,
24     llvm::function_ref<bool(DIERef ref)> callback) {
25   if (tag == 0) {
26     ExtractDIEArray(die_info_array, callback);
27     return;
28   }
29 
30   const size_t count = die_info_array.size();
31   for (size_t i = 0; i < count; ++i) {
32     const dw_tag_t die_tag = die_info_array[i].tag;
33     bool tag_matches = die_tag == 0 || tag == die_tag;
34     if (!tag_matches) {
35       if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
36         tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
37     }
38     if (tag_matches) {
39       if (!callback(DIERef(die_info_array[i])))
40         return;
41     }
42   }
43 }
44 
45 void DWARFMappedHash::ExtractDIEArray(
46     const DIEInfoArray &die_info_array, const dw_tag_t tag,
47     const uint32_t qualified_name_hash,
48     llvm::function_ref<bool(DIERef ref)> callback) {
49   if (tag == 0) {
50     ExtractDIEArray(die_info_array, callback);
51     return;
52   }
53 
54   const size_t count = die_info_array.size();
55   for (size_t i = 0; i < count; ++i) {
56     if (qualified_name_hash != die_info_array[i].qualified_name_hash)
57       continue;
58     const dw_tag_t die_tag = die_info_array[i].tag;
59     bool tag_matches = die_tag == 0 || tag == die_tag;
60     if (!tag_matches) {
61       if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
62         tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
63     }
64     if (tag_matches) {
65       if (!callback(DIERef(die_info_array[i])))
66         return;
67     }
68   }
69 }
70 
71 void DWARFMappedHash::ExtractClassOrStructDIEArray(
72     const DIEInfoArray &die_info_array,
73     bool return_implementation_only_if_available,
74     llvm::function_ref<bool(DIERef ref)> callback) {
75   const size_t count = die_info_array.size();
76   for (size_t i = 0; i < count; ++i) {
77     const dw_tag_t die_tag = die_info_array[i].tag;
78     if (!(die_tag == 0 || die_tag == DW_TAG_class_type ||
79           die_tag == DW_TAG_structure_type))
80       continue;
81     bool is_implementation =
82         (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0;
83     if (is_implementation != return_implementation_only_if_available)
84       continue;
85     if (return_implementation_only_if_available) {
86       // We found the one true definition for this class, so only return
87       // that
88       callback(DIERef(die_info_array[i]));
89       return;
90     }
91     if (!callback(DIERef(die_info_array[i])))
92       return;
93   }
94 }
95 
96 void DWARFMappedHash::ExtractTypesFromDIEArray(
97     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
98     uint32_t type_flag_value, llvm::function_ref<bool(DIERef ref)> callback) {
99   const size_t count = die_info_array.size();
100   for (size_t i = 0; i < count; ++i) {
101     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) {
102       if (!callback(DIERef(die_info_array[i])))
103         return;
104     }
105   }
106 }
107 
108 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
109   switch (atom) {
110   case eAtomTypeNULL:
111     return "NULL";
112   case eAtomTypeDIEOffset:
113     return "die-offset";
114   case eAtomTypeCUOffset:
115     return "cu-offset";
116   case eAtomTypeTag:
117     return "die-tag";
118   case eAtomTypeNameFlags:
119     return "name-flags";
120   case eAtomTypeTypeFlags:
121     return "type-flags";
122   case eAtomTypeQualNameHash:
123     return "qualified-name-hash";
124   }
125   return "<invalid>";
126 }
127 
128 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f,
129                                   uint32_t h)
130     : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
131 
132 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
133     : die_base_offset(_die_base_offset), atoms() {
134   // Define an array of DIE offsets by first defining an array, and then define
135   // the atom type for the array, in this case we have an array of DIE offsets.
136   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
137 }
138 
139 void DWARFMappedHash::Prologue::ClearAtoms() {
140   hash_data_has_fixed_byte_size = true;
141   min_hash_data_byte_size = 0;
142   atom_mask = 0;
143   atoms.clear();
144 }
145 
146 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
147   return (atom_mask & (1u << atom_type)) != 0;
148 }
149 
150 void DWARFMappedHash::Prologue::Clear() {
151   die_base_offset = 0;
152   ClearAtoms();
153 }
154 
155 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
156   atoms.push_back({type, form});
157   atom_mask |= 1u << type;
158   switch (form) {
159   case DW_FORM_indirect:
160   case DW_FORM_exprloc:
161   case DW_FORM_flag_present:
162   case DW_FORM_ref_sig8:
163     llvm_unreachable("Unhandled atom form");
164 
165   case DW_FORM_addrx:
166   case DW_FORM_string:
167   case DW_FORM_block:
168   case DW_FORM_block1:
169   case DW_FORM_sdata:
170   case DW_FORM_udata:
171   case DW_FORM_ref_udata:
172   case DW_FORM_GNU_addr_index:
173   case DW_FORM_GNU_str_index:
174     hash_data_has_fixed_byte_size = false;
175     LLVM_FALLTHROUGH;
176   case DW_FORM_flag:
177   case DW_FORM_data1:
178   case DW_FORM_ref1:
179   case DW_FORM_sec_offset:
180     min_hash_data_byte_size += 1;
181     break;
182 
183   case DW_FORM_block2:
184     hash_data_has_fixed_byte_size = false;
185     LLVM_FALLTHROUGH;
186   case DW_FORM_data2:
187   case DW_FORM_ref2:
188     min_hash_data_byte_size += 2;
189     break;
190 
191   case DW_FORM_block4:
192     hash_data_has_fixed_byte_size = false;
193     LLVM_FALLTHROUGH;
194   case DW_FORM_data4:
195   case DW_FORM_ref4:
196   case DW_FORM_addr:
197   case DW_FORM_ref_addr:
198   case DW_FORM_strp:
199     min_hash_data_byte_size += 4;
200     break;
201 
202   case DW_FORM_data8:
203   case DW_FORM_ref8:
204     min_hash_data_byte_size += 8;
205     break;
206   }
207 }
208 
209 lldb::offset_t
210 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
211                                 lldb::offset_t offset) {
212   ClearAtoms();
213 
214   die_base_offset = data.GetU32(&offset);
215 
216   const uint32_t atom_count = data.GetU32(&offset);
217   if (atom_count == 0x00060003u) {
218     // Old format, deal with contents of old pre-release format.
219     while (data.GetU32(&offset)) {
220       /* do nothing */;
221     }
222 
223     // Hardcode to the only known value for now.
224     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
225   } else {
226     for (uint32_t i = 0; i < atom_count; ++i) {
227       AtomType type = (AtomType)data.GetU16(&offset);
228       dw_form_t form = (dw_form_t)data.GetU16(&offset);
229       AppendAtom(type, form);
230     }
231   }
232   return offset;
233 }
234 
235 size_t DWARFMappedHash::Prologue::GetByteSize() const {
236   // Add an extra count to the atoms size for the zero termination Atom that
237   // gets written to disk.
238   return sizeof(die_base_offset) + sizeof(uint32_t) +
239          atoms.size() * sizeof(Atom);
240 }
241 
242 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
243   return min_hash_data_byte_size;
244 }
245 
246 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
247   return hash_data_has_fixed_byte_size;
248 }
249 
250 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
251   return header_data.GetByteSize();
252 }
253 
254 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
255                                              lldb::offset_t offset) {
256   offset = MappedHash::Header<Prologue>::Read(data, offset);
257   if (offset != UINT32_MAX) {
258     offset = header_data.Read(data, offset);
259   }
260   return offset;
261 }
262 
263 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
264                                    lldb::offset_t *offset_ptr,
265                                    DIEInfo &hash_data) const {
266   const size_t num_atoms = header_data.atoms.size();
267   if (num_atoms == 0)
268     return false;
269 
270   for (size_t i = 0; i < num_atoms; ++i) {
271     DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
272 
273     if (!form_value.ExtractValue(data, offset_ptr))
274       return false;
275 
276     switch (header_data.atoms[i].type) {
277     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
278       hash_data.die_offset =
279           DWARFFormValue::IsDataForm(form_value.Form())
280               ? form_value.Unsigned()
281               : form_value.Reference(header_data.die_base_offset);
282       break;
283 
284     case eAtomTypeTag: // DW_TAG value for the DIE
285       hash_data.tag = (dw_tag_t)form_value.Unsigned();
286       break;
287 
288     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
289       hash_data.type_flags = (uint32_t)form_value.Unsigned();
290       break;
291 
292     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
293       hash_data.qualified_name_hash = form_value.Unsigned();
294       break;
295 
296     default:
297       // We can always skip atoms we don't know about.
298       break;
299     }
300   }
301   return hash_data.die_offset != DW_INVALID_OFFSET;
302 }
303 
304 DWARFMappedHash::MemoryTable::MemoryTable(
305     lldb_private::DWARFDataExtractor &table_data,
306     const lldb_private::DWARFDataExtractor &string_table, const char *name)
307     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
308       m_data(table_data), m_string_table(string_table), m_name(name) {}
309 
310 const char *
311 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
312   // The key in the DWARF table is the .debug_str offset for the string
313   return m_string_table.PeekCStr(key);
314 }
315 
316 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
317                                                 HashData &hash_data) const {
318   lldb::offset_t offset = hash_data_offset;
319   // Skip string table offset that contains offset of hash name in .debug_str.
320   offset += 4;
321   const uint32_t count = m_data.GetU32(&offset);
322   if (count > 0) {
323     hash_data.resize(count);
324     for (uint32_t i = 0; i < count; ++i) {
325       if (!m_header.Read(m_data, &offset, hash_data[i]))
326         return false;
327     }
328   } else
329     hash_data.clear();
330   return true;
331 }
332 
333 DWARFMappedHash::MemoryTable::Result
334 DWARFMappedHash::MemoryTable::GetHashDataForName(
335     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
336     Pair &pair) const {
337   pair.key = m_data.GetU32(hash_data_offset_ptr);
338   pair.value.clear();
339 
340   // If the key is zero, this terminates our chain of HashData objects for this
341   // hash value.
342   if (pair.key == 0)
343     return eResultEndOfHashData;
344 
345   // There definitely should be a string for this string offset, if there
346   // isn't, there is something wrong, return and error.
347   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
348   if (strp_cstr == nullptr) {
349     *hash_data_offset_ptr = UINT32_MAX;
350     return eResultError;
351   }
352 
353   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
354   const size_t min_total_hash_data_size =
355       count * m_header.header_data.GetMinimumHashDataByteSize();
356   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
357                                                    min_total_hash_data_size)) {
358     // We have at least one HashData entry, and we have enough data to parse at
359     // least "count" HashData entries.
360 
361     // First make sure the entire C string matches...
362     const bool match = name == strp_cstr;
363 
364     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
365       // If the string doesn't match and we have fixed size data, we can just
366       // add the total byte size of all HashData objects to the hash data
367       // offset and be done...
368       *hash_data_offset_ptr += min_total_hash_data_size;
369     } else {
370       // If the string does match, or we don't have fixed size data then we
371       // need to read the hash data as a stream. If the string matches we also
372       // append all HashData objects to the value array.
373       for (uint32_t i = 0; i < count; ++i) {
374         DIEInfo die_info;
375         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
376           // Only happened if the HashData of the string matched...
377           if (match)
378             pair.value.push_back(die_info);
379         } else {
380           // Something went wrong while reading the data.
381           *hash_data_offset_ptr = UINT32_MAX;
382           return eResultError;
383         }
384       }
385     }
386     // Return the correct response depending on if the string matched or not...
387     if (match) {
388       // The key (cstring) matches and we have lookup results!
389       return eResultKeyMatch;
390     } else {
391       // The key doesn't match, this function will get called again for the
392       // next key/value or the key terminator which in our case is a zero
393       // .debug_str offset.
394       return eResultKeyMismatch;
395     }
396   } else {
397     *hash_data_offset_ptr = UINT32_MAX;
398     return eResultError;
399   }
400 }
401 
402 DWARFMappedHash::MemoryTable::Result
403 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
404     const lldb_private::RegularExpression &regex,
405     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
406   pair.key = m_data.GetU32(hash_data_offset_ptr);
407   // If the key is zero, this terminates our chain of HashData objects for this
408   // hash value.
409   if (pair.key == 0)
410     return eResultEndOfHashData;
411 
412   // There definitely should be a string for this string offset, if there
413   // isn't, there is something wrong, return and error.
414   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
415   if (strp_cstr == nullptr)
416     return eResultError;
417 
418   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
419   const size_t min_total_hash_data_size =
420       count * m_header.header_data.GetMinimumHashDataByteSize();
421   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
422                                                    min_total_hash_data_size)) {
423     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
424 
425     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
426       // If the regex doesn't match and we have fixed size data, we can just
427       // add the total byte size of all HashData objects to the hash data
428       // offset and be done...
429       *hash_data_offset_ptr += min_total_hash_data_size;
430     } else {
431       // If the string does match, or we don't have fixed size data then we
432       // need to read the hash data as a stream. If the string matches we also
433       // append all HashData objects to the value array.
434       for (uint32_t i = 0; i < count; ++i) {
435         DIEInfo die_info;
436         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
437           // Only happened if the HashData of the string matched...
438           if (match)
439             pair.value.push_back(die_info);
440         } else {
441           // Something went wrong while reading the data
442           *hash_data_offset_ptr = UINT32_MAX;
443           return eResultError;
444         }
445       }
446     }
447     // Return the correct response depending on if the string matched or not...
448     if (match) {
449       // The key (cstring) matches and we have lookup results!
450       return eResultKeyMatch;
451     } else {
452       // The key doesn't match, this function will get called again for the
453       // next key/value or the key terminator which in our case is a zero
454       // .debug_str offset.
455       return eResultKeyMismatch;
456     }
457   } else {
458     *hash_data_offset_ptr = UINT32_MAX;
459     return eResultError;
460   }
461 }
462 
463 void DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
464     const lldb_private::RegularExpression &regex,
465     DIEInfoArray &die_info_array) const {
466   const uint32_t hash_count = m_header.hashes_count;
467   Pair pair;
468   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
469     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
470     while (hash_data_offset != UINT32_MAX) {
471       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
472       Result hash_result =
473           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
474       if (prev_hash_data_offset == hash_data_offset)
475         break;
476 
477       // Check the result of getting our hash data.
478       switch (hash_result) {
479       case eResultKeyMatch:
480       case eResultKeyMismatch:
481         // Whether we matches or not, it doesn't matter, we keep looking.
482         break;
483 
484       case eResultEndOfHashData:
485       case eResultError:
486         hash_data_offset = UINT32_MAX;
487         break;
488       }
489     }
490   }
491   die_info_array.swap(pair.value);
492 }
493 
494 void DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
495     const uint32_t die_offset_start, const uint32_t die_offset_end,
496     DIEInfoArray &die_info_array) const {
497   const uint32_t hash_count = m_header.hashes_count;
498   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
499     bool done = false;
500     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
501     while (!done && hash_data_offset != UINT32_MAX) {
502       KeyType key = m_data.GetU32(&hash_data_offset);
503       // If the key is zero, this terminates our chain of HashData objects for
504       // this hash value.
505       if (key == 0)
506         break;
507 
508       const uint32_t count = m_data.GetU32(&hash_data_offset);
509       for (uint32_t i = 0; i < count; ++i) {
510         DIEInfo die_info;
511         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
512           if (die_info.die_offset == 0)
513             done = true;
514           if (die_offset_start <= die_info.die_offset &&
515               die_info.die_offset < die_offset_end)
516             die_info_array.push_back(die_info);
517         }
518       }
519     }
520   }
521 }
522 
523 bool DWARFMappedHash::MemoryTable::FindByName(
524     llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) {
525   if (name.empty())
526     return true;
527 
528   DIEInfoArray die_info_array;
529   FindByName(name, die_info_array);
530   return DWARFMappedHash::ExtractDIEArray(die_info_array, callback);
531 }
532 
533 void DWARFMappedHash::MemoryTable::FindByNameAndTag(
534     llvm::StringRef name, const dw_tag_t tag,
535     llvm::function_ref<bool(DIERef ref)> callback) {
536   DIEInfoArray die_info_array;
537   FindByName(name, die_info_array);
538   DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback);
539 }
540 
541 void DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
542     llvm::StringRef name, const dw_tag_t tag,
543     const uint32_t qualified_name_hash,
544     llvm::function_ref<bool(DIERef ref)> callback) {
545   DIEInfoArray die_info_array;
546   FindByName(name, die_info_array);
547   DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
548                                    callback);
549 }
550 
551 void DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
552     llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback,
553     bool must_be_implementation) {
554   DIEInfoArray die_info_array;
555   FindByName(name, die_info_array);
556   if (must_be_implementation &&
557       GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
558     // If we have two atoms, then we have the DIE offset and the type flags
559     // so we can find the objective C class efficiently.
560     DWARFMappedHash::ExtractTypesFromDIEArray(
561         die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback);
562     return;
563   }
564   // We don't only want the one true definition, so try and see what we can
565   // find, and only return class or struct DIEs. If we do have the full
566   // implementation, then return it alone, else return all possible
567   // matches.
568   bool found_implementation = false;
569   DWARFMappedHash::ExtractClassOrStructDIEArray(
570       die_info_array, true /*return_implementation_only_if_available*/,
571       [&](DIERef ref) {
572         found_implementation = true;
573         // Here the return value does not matter as we are called at most once.
574         return callback(ref);
575       });
576   if (found_implementation)
577     return;
578   DWARFMappedHash::ExtractClassOrStructDIEArray(
579       die_info_array, false /*return_implementation_only_if_available*/,
580       callback);
581 }
582 
583 void DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
584                                               DIEInfoArray &die_info_array) {
585   if (name.empty())
586     return;
587 
588   Pair kv_pair;
589   if (Find(name, kv_pair))
590     die_info_array.swap(kv_pair.value);
591 }
592