xref: /openbsd-src/gnu/llvm/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
13                                       DIEArray &die_offsets) {
14   const size_t count = die_info_array.size();
15   for (size_t i = 0; i < count; ++i)
16     die_offsets.emplace_back(die_info_array[i]);
17 }
18 
19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
20                                       const dw_tag_t tag,
21                                       DIEArray &die_offsets) {
22   if (tag == 0) {
23     ExtractDIEArray(die_info_array, die_offsets);
24   } else {
25     const size_t count = die_info_array.size();
26     for (size_t i = 0; i < count; ++i) {
27       const dw_tag_t die_tag = die_info_array[i].tag;
28       bool tag_matches = die_tag == 0 || tag == die_tag;
29       if (!tag_matches) {
30         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
31           tag_matches =
32               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
33       }
34       if (tag_matches)
35         die_offsets.emplace_back(die_info_array[i]);
36     }
37   }
38 }
39 
40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
41                                       const dw_tag_t tag,
42                                       const uint32_t qualified_name_hash,
43                                       DIEArray &die_offsets) {
44   if (tag == 0) {
45     ExtractDIEArray(die_info_array, die_offsets);
46   } else {
47     const size_t count = die_info_array.size();
48     for (size_t i = 0; i < count; ++i) {
49       if (qualified_name_hash != die_info_array[i].qualified_name_hash)
50         continue;
51       const dw_tag_t die_tag = die_info_array[i].tag;
52       bool tag_matches = die_tag == 0 || tag == die_tag;
53       if (!tag_matches) {
54         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
55           tag_matches =
56               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
57       }
58       if (tag_matches)
59         die_offsets.emplace_back(die_info_array[i]);
60     }
61   }
62 }
63 
64 void DWARFMappedHash::ExtractClassOrStructDIEArray(
65     const DIEInfoArray &die_info_array,
66     bool return_implementation_only_if_available, DIEArray &die_offsets) {
67   const size_t count = die_info_array.size();
68   for (size_t i = 0; i < count; ++i) {
69     const dw_tag_t die_tag = die_info_array[i].tag;
70     if (die_tag == 0 || die_tag == DW_TAG_class_type ||
71         die_tag == DW_TAG_structure_type) {
72       if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) {
73         if (return_implementation_only_if_available) {
74           // We found the one true definition for this class, so only return
75           // that
76           die_offsets.clear();
77           die_offsets.emplace_back(die_info_array[i]);
78           return;
79         } else {
80           // Put the one true definition as the first entry so it matches first
81           die_offsets.emplace(die_offsets.begin(), die_info_array[i]);
82         }
83       } else {
84         die_offsets.emplace_back(die_info_array[i]);
85       }
86     }
87   }
88 }
89 
90 void DWARFMappedHash::ExtractTypesFromDIEArray(
91     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
92     uint32_t type_flag_value, DIEArray &die_offsets) {
93   const size_t count = die_info_array.size();
94   for (size_t i = 0; i < count; ++i) {
95     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
96       die_offsets.emplace_back(die_info_array[i]);
97   }
98 }
99 
100 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
101   switch (atom) {
102   case eAtomTypeNULL:
103     return "NULL";
104   case eAtomTypeDIEOffset:
105     return "die-offset";
106   case eAtomTypeCUOffset:
107     return "cu-offset";
108   case eAtomTypeTag:
109     return "die-tag";
110   case eAtomTypeNameFlags:
111     return "name-flags";
112   case eAtomTypeTypeFlags:
113     return "type-flags";
114   case eAtomTypeQualNameHash:
115     return "qualified-name-hash";
116   }
117   return "<invalid>";
118 }
119 
120 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f,
121                                   uint32_t h)
122     : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
123 
124 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
125     : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
126       min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
127   // Define an array of DIE offsets by first defining an array, and then define
128   // the atom type for the array, in this case we have an array of DIE offsets.
129   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
130 }
131 
132 void DWARFMappedHash::Prologue::ClearAtoms() {
133   hash_data_has_fixed_byte_size = true;
134   min_hash_data_byte_size = 0;
135   atom_mask = 0;
136   atoms.clear();
137 }
138 
139 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
140   return (atom_mask & (1u << atom_type)) != 0;
141 }
142 
143 void DWARFMappedHash::Prologue::Clear() {
144   die_base_offset = 0;
145   ClearAtoms();
146 }
147 
148 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
149   atoms.push_back({type, form});
150   atom_mask |= 1u << type;
151   switch (form) {
152   case DW_FORM_indirect:
153   case DW_FORM_exprloc:
154   case DW_FORM_flag_present:
155   case DW_FORM_ref_sig8:
156     llvm_unreachable("Unhandled atom form");
157 
158   case DW_FORM_addrx:
159   case DW_FORM_string:
160   case DW_FORM_block:
161   case DW_FORM_block1:
162   case DW_FORM_sdata:
163   case DW_FORM_udata:
164   case DW_FORM_ref_udata:
165   case DW_FORM_GNU_addr_index:
166   case DW_FORM_GNU_str_index:
167     hash_data_has_fixed_byte_size = false;
168     LLVM_FALLTHROUGH;
169   case DW_FORM_flag:
170   case DW_FORM_data1:
171   case DW_FORM_ref1:
172   case DW_FORM_sec_offset:
173     min_hash_data_byte_size += 1;
174     break;
175 
176   case DW_FORM_block2:
177     hash_data_has_fixed_byte_size = false;
178     LLVM_FALLTHROUGH;
179   case DW_FORM_data2:
180   case DW_FORM_ref2:
181     min_hash_data_byte_size += 2;
182     break;
183 
184   case DW_FORM_block4:
185     hash_data_has_fixed_byte_size = false;
186     LLVM_FALLTHROUGH;
187   case DW_FORM_data4:
188   case DW_FORM_ref4:
189   case DW_FORM_addr:
190   case DW_FORM_ref_addr:
191   case DW_FORM_strp:
192     min_hash_data_byte_size += 4;
193     break;
194 
195   case DW_FORM_data8:
196   case DW_FORM_ref8:
197     min_hash_data_byte_size += 8;
198     break;
199   }
200 }
201 
202 lldb::offset_t
203 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
204                                 lldb::offset_t offset) {
205   ClearAtoms();
206 
207   die_base_offset = data.GetU32(&offset);
208 
209   const uint32_t atom_count = data.GetU32(&offset);
210   if (atom_count == 0x00060003u) {
211     // Old format, deal with contents of old pre-release format.
212     while (data.GetU32(&offset)) {
213       /* do nothing */;
214     }
215 
216     // Hardcode to the only known value for now.
217     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
218   } else {
219     for (uint32_t i = 0; i < atom_count; ++i) {
220       AtomType type = (AtomType)data.GetU16(&offset);
221       dw_form_t form = (dw_form_t)data.GetU16(&offset);
222       AppendAtom(type, form);
223     }
224   }
225   return offset;
226 }
227 
228 size_t DWARFMappedHash::Prologue::GetByteSize() const {
229   // Add an extra count to the atoms size for the zero termination Atom that
230   // gets written to disk.
231   return sizeof(die_base_offset) + sizeof(uint32_t) +
232          atoms.size() * sizeof(Atom);
233 }
234 
235 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
236   return min_hash_data_byte_size;
237 }
238 
239 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
240   return hash_data_has_fixed_byte_size;
241 }
242 
243 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
244   return header_data.GetByteSize();
245 }
246 
247 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
248                                              lldb::offset_t offset) {
249   offset = MappedHash::Header<Prologue>::Read(data, offset);
250   if (offset != UINT32_MAX) {
251     offset = header_data.Read(data, offset);
252   }
253   return offset;
254 }
255 
256 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
257                                    lldb::offset_t *offset_ptr,
258                                    DIEInfo &hash_data) const {
259   const size_t num_atoms = header_data.atoms.size();
260   if (num_atoms == 0)
261     return false;
262 
263   for (size_t i = 0; i < num_atoms; ++i) {
264     DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
265 
266     if (!form_value.ExtractValue(data, offset_ptr))
267       return false;
268 
269     switch (header_data.atoms[i].type) {
270     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
271       hash_data.die_offset =
272           DWARFFormValue::IsDataForm(form_value.Form())
273               ? form_value.Unsigned()
274               : form_value.Reference(header_data.die_base_offset);
275       break;
276 
277     case eAtomTypeTag: // DW_TAG value for the DIE
278       hash_data.tag = (dw_tag_t)form_value.Unsigned();
279       break;
280 
281     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
282       hash_data.type_flags = (uint32_t)form_value.Unsigned();
283       break;
284 
285     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
286       hash_data.qualified_name_hash = form_value.Unsigned();
287       break;
288 
289     default:
290       // We can always skip atoms we don't know about.
291       break;
292     }
293   }
294   return hash_data.die_offset != DW_INVALID_OFFSET;
295 }
296 
297 DWARFMappedHash::MemoryTable::MemoryTable(
298     lldb_private::DWARFDataExtractor &table_data,
299     const lldb_private::DWARFDataExtractor &string_table, const char *name)
300     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
301       m_data(table_data), m_string_table(string_table), m_name(name) {}
302 
303 const char *
304 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
305   // The key in the DWARF table is the .debug_str offset for the string
306   return m_string_table.PeekCStr(key);
307 }
308 
309 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
310                                                 HashData &hash_data) const {
311   lldb::offset_t offset = hash_data_offset;
312   // Skip string table offset that contains offset of hash name in .debug_str.
313   offset += 4;
314   const uint32_t count = m_data.GetU32(&offset);
315   if (count > 0) {
316     hash_data.resize(count);
317     for (uint32_t i = 0; i < count; ++i) {
318       if (!m_header.Read(m_data, &offset, hash_data[i]))
319         return false;
320     }
321   } else
322     hash_data.clear();
323   return true;
324 }
325 
326 DWARFMappedHash::MemoryTable::Result
327 DWARFMappedHash::MemoryTable::GetHashDataForName(
328     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
329     Pair &pair) const {
330   pair.key = m_data.GetU32(hash_data_offset_ptr);
331   pair.value.clear();
332 
333   // If the key is zero, this terminates our chain of HashData objects for this
334   // hash value.
335   if (pair.key == 0)
336     return eResultEndOfHashData;
337 
338   // There definitely should be a string for this string offset, if there
339   // isn't, there is something wrong, return and error.
340   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
341   if (strp_cstr == nullptr) {
342     *hash_data_offset_ptr = UINT32_MAX;
343     return eResultError;
344   }
345 
346   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
347   const size_t min_total_hash_data_size =
348       count * m_header.header_data.GetMinimumHashDataByteSize();
349   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
350                                                    min_total_hash_data_size)) {
351     // We have at least one HashData entry, and we have enough data to parse at
352     // least "count" HashData entries.
353 
354     // First make sure the entire C string matches...
355     const bool match = name == strp_cstr;
356 
357     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
358       // If the string doesn't match and we have fixed size data, we can just
359       // add the total byte size of all HashData objects to the hash data
360       // offset and be done...
361       *hash_data_offset_ptr += min_total_hash_data_size;
362     } else {
363       // If the string does match, or we don't have fixed size data then we
364       // need to read the hash data as a stream. If the string matches we also
365       // append all HashData objects to the value array.
366       for (uint32_t i = 0; i < count; ++i) {
367         DIEInfo die_info;
368         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
369           // Only happened if the HashData of the string matched...
370           if (match)
371             pair.value.push_back(die_info);
372         } else {
373           // Something went wrong while reading the data.
374           *hash_data_offset_ptr = UINT32_MAX;
375           return eResultError;
376         }
377       }
378     }
379     // Return the correct response depending on if the string matched or not...
380     if (match) {
381       // The key (cstring) matches and we have lookup results!
382       return eResultKeyMatch;
383     } else {
384       // The key doesn't match, this function will get called again for the
385       // next key/value or the key terminator which in our case is a zero
386       // .debug_str offset.
387       return eResultKeyMismatch;
388     }
389   } else {
390     *hash_data_offset_ptr = UINT32_MAX;
391     return eResultError;
392   }
393 }
394 
395 DWARFMappedHash::MemoryTable::Result
396 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
397     const lldb_private::RegularExpression &regex,
398     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
399   pair.key = m_data.GetU32(hash_data_offset_ptr);
400   // If the key is zero, this terminates our chain of HashData objects for this
401   // hash value.
402   if (pair.key == 0)
403     return eResultEndOfHashData;
404 
405   // There definitely should be a string for this string offset, if there
406   // isn't, there is something wrong, return and error.
407   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
408   if (strp_cstr == nullptr)
409     return eResultError;
410 
411   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
412   const size_t min_total_hash_data_size =
413       count * m_header.header_data.GetMinimumHashDataByteSize();
414   if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
415                                                    min_total_hash_data_size)) {
416     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
417 
418     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
419       // If the regex doesn't match and we have fixed size data, we can just
420       // add the total byte size of all HashData objects to the hash data
421       // offset and be done...
422       *hash_data_offset_ptr += min_total_hash_data_size;
423     } else {
424       // If the string does match, or we don't have fixed size data then we
425       // need to read the hash data as a stream. If the string matches we also
426       // append all HashData objects to the value array.
427       for (uint32_t i = 0; i < count; ++i) {
428         DIEInfo die_info;
429         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
430           // Only happened if the HashData of the string matched...
431           if (match)
432             pair.value.push_back(die_info);
433         } else {
434           // Something went wrong while reading the data
435           *hash_data_offset_ptr = UINT32_MAX;
436           return eResultError;
437         }
438       }
439     }
440     // Return the correct response depending on if the string matched or not...
441     if (match) {
442       // The key (cstring) matches and we have lookup results!
443       return eResultKeyMatch;
444     } else {
445       // The key doesn't match, this function will get called again for the
446       // next key/value or the key terminator which in our case is a zero
447       // .debug_str offset.
448       return eResultKeyMismatch;
449     }
450   } else {
451     *hash_data_offset_ptr = UINT32_MAX;
452     return eResultError;
453   }
454 }
455 
456 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
457     const lldb_private::RegularExpression &regex,
458     DIEInfoArray &die_info_array) const {
459   const uint32_t hash_count = m_header.hashes_count;
460   Pair pair;
461   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
462     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
463     while (hash_data_offset != UINT32_MAX) {
464       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
465       Result hash_result =
466           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
467       if (prev_hash_data_offset == hash_data_offset)
468         break;
469 
470       // Check the result of getting our hash data.
471       switch (hash_result) {
472       case eResultKeyMatch:
473       case eResultKeyMismatch:
474         // Whether we matches or not, it doesn't matter, we keep looking.
475         break;
476 
477       case eResultEndOfHashData:
478       case eResultError:
479         hash_data_offset = UINT32_MAX;
480         break;
481       }
482     }
483   }
484   die_info_array.swap(pair.value);
485   return die_info_array.size();
486 }
487 
488 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
489     const uint32_t die_offset_start, const uint32_t die_offset_end,
490     DIEInfoArray &die_info_array) const {
491   const uint32_t hash_count = m_header.hashes_count;
492   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
493     bool done = false;
494     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
495     while (!done && hash_data_offset != UINT32_MAX) {
496       KeyType key = m_data.GetU32(&hash_data_offset);
497       // If the key is zero, this terminates our chain of HashData objects for
498       // this hash value.
499       if (key == 0)
500         break;
501 
502       const uint32_t count = m_data.GetU32(&hash_data_offset);
503       for (uint32_t i = 0; i < count; ++i) {
504         DIEInfo die_info;
505         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
506           if (die_info.die_offset == 0)
507             done = true;
508           if (die_offset_start <= die_info.die_offset &&
509               die_info.die_offset < die_offset_end)
510             die_info_array.push_back(die_info);
511         }
512       }
513     }
514   }
515   return die_info_array.size();
516 }
517 
518 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
519                                                 DIEArray &die_offsets) {
520   if (name.empty())
521     return 0;
522 
523   DIEInfoArray die_info_array;
524   if (FindByName(name, die_info_array))
525     DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets);
526   return die_info_array.size();
527 }
528 
529 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name,
530                                                       const dw_tag_t tag,
531                                                       DIEArray &die_offsets) {
532   DIEInfoArray die_info_array;
533   if (FindByName(name, die_info_array))
534     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets);
535   return die_info_array.size();
536 }
537 
538 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
539     llvm::StringRef name, const dw_tag_t tag,
540     const uint32_t qualified_name_hash, DIEArray &die_offsets) {
541   DIEInfoArray die_info_array;
542   if (FindByName(name, die_info_array))
543     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
544                                      die_offsets);
545   return die_info_array.size();
546 }
547 
548 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
549     llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) {
550   DIEInfoArray die_info_array;
551   if (FindByName(name, die_info_array)) {
552     if (must_be_implementation &&
553         GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
554       // If we have two atoms, then we have the DIE offset and the type flags
555       // so we can find the objective C class efficiently.
556       DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX,
557                                                 eTypeFlagClassIsImplementation,
558                                                 die_offsets);
559     } else {
560       // We don't only want the one true definition, so try and see what we can
561       // find, and only return class or struct DIEs. If we do have the full
562       // implementation, then return it alone, else return all possible
563       // matches.
564       const bool return_implementation_only_if_available = true;
565       DWARFMappedHash::ExtractClassOrStructDIEArray(
566           die_info_array, return_implementation_only_if_available, die_offsets);
567     }
568   }
569   return die_offsets.size();
570 }
571 
572 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
573                                                 DIEInfoArray &die_info_array) {
574   if (name.empty())
575     return 0;
576 
577   Pair kv_pair;
578   size_t old_size = die_info_array.size();
579   if (Find(name, kv_pair)) {
580     die_info_array.swap(kv_pair.value);
581     return die_info_array.size() - old_size;
582   }
583   return 0;
584 }
585