xref: /llvm-project/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp (revision 0dbdc23e78ac1f34a5b563f2db73f9ca64714fac)
1 //===-- ManualDWARFIndex.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/SymbolFile/DWARF/ManualDWARFIndex.h"
10 #include "Plugins/Language/ObjC/ObjCLanguage.h"
11 #include "Plugins/SymbolFile/DWARF/DWARFDebugInfo.h"
12 #include "Plugins/SymbolFile/DWARF/DWARFDeclContext.h"
13 #include "Plugins/SymbolFile/DWARF/LogChannelDWARF.h"
14 #include "Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h"
15 #include "lldb/Core/DataFileCache.h"
16 #include "lldb/Core/Debugger.h"
17 #include "lldb/Core/Module.h"
18 #include "lldb/Core/Progress.h"
19 #include "lldb/Symbol/ObjectFile.h"
20 #include "lldb/Utility/DataEncoder.h"
21 #include "lldb/Utility/DataExtractor.h"
22 #include "lldb/Utility/Stream.h"
23 #include "lldb/Utility/Timer.h"
24 #include "llvm/Support/FormatVariadic.h"
25 #include "llvm/Support/ThreadPool.h"
26 #include <atomic>
27 #include <optional>
28 
29 using namespace lldb_private;
30 using namespace lldb;
31 using namespace lldb_private::dwarf;
32 using namespace lldb_private::plugin::dwarf;
33 
34 void ManualDWARFIndex::Index() {
35   if (m_indexed)
36     return;
37   m_indexed = true;
38 
39   ElapsedTime elapsed(m_index_time);
40   LLDB_SCOPED_TIMERF("%p", static_cast<void *>(m_dwarf));
41   if (LoadFromCache()) {
42     m_dwarf->SetDebugInfoIndexWasLoadedFromCache();
43     return;
44   }
45 
46   DWARFDebugInfo &main_info = m_dwarf->DebugInfo();
47   SymbolFileDWARFDwo *dwp_dwarf = m_dwarf->GetDwpSymbolFile().get();
48   DWARFDebugInfo *dwp_info = dwp_dwarf ? &dwp_dwarf->DebugInfo() : nullptr;
49 
50   std::vector<DWARFUnit *> units_to_index;
51   units_to_index.reserve(main_info.GetNumUnits() +
52                          (dwp_info ? dwp_info->GetNumUnits() : 0));
53 
54   // Process all units in the main file, as well as any type units in the dwp
55   // file. Type units in dwo files are handled when we reach the dwo file in
56   // IndexUnit.
57   for (size_t U = 0; U < main_info.GetNumUnits(); ++U) {
58     DWARFUnit *unit = main_info.GetUnitAtIndex(U);
59     if (unit && m_units_to_avoid.count(unit->GetOffset()) == 0)
60       units_to_index.push_back(unit);
61   }
62   if (dwp_info && dwp_info->ContainsTypeUnits()) {
63     for (size_t U = 0; U < dwp_info->GetNumUnits(); ++U) {
64       if (auto *tu =
65               llvm::dyn_cast<DWARFTypeUnit>(dwp_info->GetUnitAtIndex(U))) {
66         if (!m_type_sigs_to_avoid.contains(tu->GetTypeHash()))
67           units_to_index.push_back(tu);
68       }
69     }
70   }
71 
72   if (units_to_index.empty())
73     return;
74 
75   StreamString module_desc;
76   m_module.GetDescription(module_desc.AsRawOstream(),
77                           lldb::eDescriptionLevelBrief);
78 
79   // Include 2 passes per unit to index for extracting DIEs from the unit and
80   // indexing the unit, and then 8 extra entries for finalizing each index set.
81   const uint64_t total_progress = units_to_index.size() * 2 + 8;
82   Progress progress("Manually indexing DWARF", module_desc.GetData(),
83                     total_progress, /*debugger=*/nullptr,
84                     /*minimum_report_time=*/std::chrono::milliseconds(20));
85 
86   // Share one thread pool across operations to avoid the overhead of
87   // recreating the threads.
88   llvm::ThreadPoolTaskGroup task_group(Debugger::GetThreadPool());
89   const size_t num_threads = Debugger::GetThreadPool().getMaxConcurrency();
90 
91   // Run a function for each compile unit in parallel using as many threads as
92   // are available. This is significantly faster than submiting a new task for
93   // each unit.
94   auto for_each_unit = [&](auto &&fn) {
95     std::atomic<size_t> next_cu_idx = 0;
96     auto wrapper = [&fn, &next_cu_idx, &units_to_index,
97                     &progress](size_t worker_id) {
98       size_t cu_idx;
99       while ((cu_idx = next_cu_idx.fetch_add(1, std::memory_order_relaxed)) <
100              units_to_index.size()) {
101         fn(worker_id, cu_idx, units_to_index[cu_idx]);
102         progress.Increment();
103       }
104     };
105 
106     for (size_t i = 0; i < num_threads; ++i)
107       task_group.async(wrapper, i);
108 
109     task_group.wait();
110   };
111 
112   // Extract dies for all DWARFs unit in parallel.  Figure out which units
113   // didn't have their DIEs already parsed and remember this.  If no DIEs were
114   // parsed prior to this index function call, we are going to want to clear the
115   // CU dies after we are done indexing to make sure we don't pull in all DWARF
116   // dies, but we need to wait until all units have been indexed in case a DIE
117   // in one unit refers to another and the indexes accesses those DIEs.
118   std::vector<std::optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
119       units_to_index.size());
120   for_each_unit([&clear_cu_dies](size_t, size_t idx, DWARFUnit *unit) {
121     clear_cu_dies[idx] = unit->ExtractDIEsScoped();
122   });
123 
124   // Now index all DWARF unit in parallel.
125   std::vector<IndexSet> sets(num_threads);
126   for_each_unit(
127       [this, dwp_dwarf, &sets](size_t worker_id, size_t, DWARFUnit *unit) {
128         IndexUnit(*unit, dwp_dwarf, sets[worker_id]);
129       });
130 
131   // Merge partial indexes into a single index. Process each index in a set in
132   // parallel.
133   auto finalize_fn = [this, &sets, &progress](NameToDIE(IndexSet::*index)) {
134     NameToDIE &result = m_set.*index;
135     for (auto &set : sets)
136       result.Append(set.*index);
137     result.Finalize();
138     progress.Increment();
139   };
140 
141   task_group.async(finalize_fn, &IndexSet::function_basenames);
142   task_group.async(finalize_fn, &IndexSet::function_fullnames);
143   task_group.async(finalize_fn, &IndexSet::function_methods);
144   task_group.async(finalize_fn, &IndexSet::function_selectors);
145   task_group.async(finalize_fn, &IndexSet::objc_class_selectors);
146   task_group.async(finalize_fn, &IndexSet::globals);
147   task_group.async(finalize_fn, &IndexSet::types);
148   task_group.async(finalize_fn, &IndexSet::namespaces);
149   task_group.wait();
150 
151   SaveToCache();
152 }
153 
154 void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
155                                  IndexSet &set) {
156   Log *log = GetLog(DWARFLog::Lookups);
157 
158   if (log) {
159     m_module.LogMessage(
160         log, "ManualDWARFIndex::IndexUnit for unit at .debug_info[{0:x16}]",
161         unit.GetOffset());
162   }
163 
164   const LanguageType cu_language = SymbolFileDWARF::GetLanguage(unit);
165 
166   // First check if the unit has a DWO ID. If it does then we only want to index
167   // the .dwo file or nothing at all. If we have a compile unit where we can't
168   // locate the .dwo/.dwp file we don't want to index anything from the skeleton
169   // compile unit because it is usally has no children unless
170   // -fsplit-dwarf-inlining was used at compile time. This option will add a
171   // copy of all DW_TAG_subprogram and any contained DW_TAG_inline_subroutine
172   // DIEs so that symbolication will still work in the absence of the .dwo/.dwp
173   // file, but the functions have no return types and all arguments and locals
174   // have been removed. So we don't want to index any of these hacked up
175   // function types. Types can still exist in the skeleton compile unit DWARF
176   // though as some functions have template parameter types and other things
177   // that cause extra copies of types to be included, but we should find these
178   // types in the .dwo file only as methods could have return types removed and
179   // we don't have to index incomplete types from the skeleton compile unit.
180   if (unit.GetDWOId()) {
181     // Index the .dwo or dwp instead of the skeleton unit.
182     if (SymbolFileDWARFDwo *dwo_symbol_file = unit.GetDwoSymbolFile()) {
183       // Type units in a dwp file are indexed separately, so we just need to
184       // process the split unit here. However, if the split unit is in a dwo
185       // file, then we need to process type units here.
186       if (dwo_symbol_file == dwp) {
187         IndexUnitImpl(unit.GetNonSkeletonUnit(), cu_language, set);
188       } else {
189         DWARFDebugInfo &dwo_info = dwo_symbol_file->DebugInfo();
190         for (size_t i = 0; i < dwo_info.GetNumUnits(); ++i)
191           IndexUnitImpl(*dwo_info.GetUnitAtIndex(i), cu_language, set);
192       }
193       return;
194     }
195     // This was a DWARF5 skeleton CU and the .dwo file couldn't be located.
196     if (unit.GetVersion() >= 5 && unit.IsSkeletonUnit())
197       return;
198 
199     // Either this is a DWARF 4 + fission CU with the .dwo file
200     // missing, or it's a -gmodules pch or pcm. Try to detect the
201     // latter by checking whether the first DIE is a DW_TAG_module.
202     // If it's a pch/pcm, continue indexing it.
203     if (unit.GetDIE(unit.GetFirstDIEOffset()).GetFirstChild().Tag() !=
204         llvm::dwarf::DW_TAG_module)
205       return;
206   }
207   // We have a normal compile unit which we want to index.
208   IndexUnitImpl(unit, cu_language, set);
209 }
210 
211 void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
212                                      const LanguageType cu_language,
213                                      IndexSet &set) {
214   for (const DWARFDebugInfoEntry &die : unit.dies()) {
215     const dw_tag_t tag = die.Tag();
216 
217     switch (tag) {
218     case DW_TAG_array_type:
219     case DW_TAG_base_type:
220     case DW_TAG_class_type:
221     case DW_TAG_constant:
222     case DW_TAG_enumeration_type:
223     case DW_TAG_inlined_subroutine:
224     case DW_TAG_namespace:
225     case DW_TAG_imported_declaration:
226     case DW_TAG_string_type:
227     case DW_TAG_structure_type:
228     case DW_TAG_subprogram:
229     case DW_TAG_subroutine_type:
230     case DW_TAG_typedef:
231     case DW_TAG_union_type:
232     case DW_TAG_unspecified_type:
233     case DW_TAG_variable:
234       break;
235 
236     case DW_TAG_member:
237       // Only in DWARF 4 and earlier `static const` members of a struct, a class
238       // or a union have an entry tag `DW_TAG_member`
239       if (unit.GetVersion() >= 5)
240         continue;
241       break;
242 
243     default:
244       continue;
245     }
246 
247     const char *name = nullptr;
248     const char *mangled_cstr = nullptr;
249     bool is_declaration = false;
250     bool has_address = false;
251     bool has_location_or_const_value = false;
252     bool is_global_or_static_variable = false;
253 
254     DWARFFormValue specification_die_form;
255     DWARFAttributes attributes = die.GetAttributes(&unit);
256     for (size_t i = 0; i < attributes.Size(); ++i) {
257       dw_attr_t attr = attributes.AttributeAtIndex(i);
258       DWARFFormValue form_value;
259       switch (attr) {
260       default:
261         break;
262       case DW_AT_name:
263         if (attributes.ExtractFormValueAtIndex(i, form_value))
264           name = form_value.AsCString();
265         break;
266 
267       case DW_AT_declaration:
268         if (attributes.ExtractFormValueAtIndex(i, form_value))
269           is_declaration = form_value.Unsigned() != 0;
270         break;
271 
272       case DW_AT_MIPS_linkage_name:
273       case DW_AT_linkage_name:
274         if (attributes.ExtractFormValueAtIndex(i, form_value))
275           mangled_cstr = form_value.AsCString();
276         break;
277 
278       case DW_AT_low_pc:
279       case DW_AT_high_pc:
280       case DW_AT_ranges:
281         has_address = true;
282         break;
283 
284       case DW_AT_entry_pc:
285         has_address = true;
286         break;
287 
288       case DW_AT_location:
289       case DW_AT_const_value:
290         has_location_or_const_value = true;
291         is_global_or_static_variable = die.IsGlobalOrStaticScopeVariable();
292 
293         break;
294 
295       case DW_AT_specification:
296         if (attributes.ExtractFormValueAtIndex(i, form_value))
297           specification_die_form = form_value;
298         break;
299       }
300     }
301 
302     DIERef ref = *DWARFDIE(&unit, &die).GetDIERef();
303     switch (tag) {
304     case DW_TAG_inlined_subroutine:
305     case DW_TAG_subprogram:
306       if (has_address) {
307         if (name) {
308           bool is_objc_method = false;
309           if (cu_language == eLanguageTypeObjC ||
310               cu_language == eLanguageTypeObjC_plus_plus) {
311             std::optional<const ObjCLanguage::MethodName> objc_method =
312                 ObjCLanguage::MethodName::Create(name, true);
313             if (objc_method) {
314               is_objc_method = true;
315               ConstString class_name_with_category(
316                   objc_method->GetClassNameWithCategory());
317               ConstString objc_selector_name(objc_method->GetSelector());
318               ConstString objc_fullname_no_category_name(
319                   objc_method->GetFullNameWithoutCategory().c_str());
320               ConstString class_name_no_category(objc_method->GetClassName());
321               set.function_fullnames.Insert(ConstString(name), ref);
322               if (class_name_with_category)
323                 set.objc_class_selectors.Insert(class_name_with_category, ref);
324               if (class_name_no_category &&
325                   class_name_no_category != class_name_with_category)
326                 set.objc_class_selectors.Insert(class_name_no_category, ref);
327               if (objc_selector_name)
328                 set.function_selectors.Insert(objc_selector_name, ref);
329               if (objc_fullname_no_category_name)
330                 set.function_fullnames.Insert(objc_fullname_no_category_name,
331                                               ref);
332             }
333           }
334           // If we have a mangled name, then the DW_AT_name attribute is
335           // usually the method name without the class or any parameters
336           bool is_method = DWARFDIE(&unit, &die).IsMethod();
337 
338           if (is_method)
339             set.function_methods.Insert(ConstString(name), ref);
340           else
341             set.function_basenames.Insert(ConstString(name), ref);
342 
343           if (!is_method && !mangled_cstr && !is_objc_method)
344             set.function_fullnames.Insert(ConstString(name), ref);
345         }
346         if (mangled_cstr) {
347           // Make sure our mangled name isn't the same string table entry as
348           // our name. If it starts with '_', then it is ok, else compare the
349           // string to make sure it isn't the same and we don't end up with
350           // duplicate entries
351           if (name && name != mangled_cstr &&
352               ((mangled_cstr[0] == '_') ||
353                (::strcmp(name, mangled_cstr) != 0))) {
354             set.function_fullnames.Insert(ConstString(mangled_cstr), ref);
355           }
356         }
357       }
358       break;
359 
360     case DW_TAG_array_type:
361     case DW_TAG_base_type:
362     case DW_TAG_class_type:
363     case DW_TAG_constant:
364     case DW_TAG_enumeration_type:
365     case DW_TAG_string_type:
366     case DW_TAG_structure_type:
367     case DW_TAG_subroutine_type:
368     case DW_TAG_typedef:
369     case DW_TAG_union_type:
370     case DW_TAG_unspecified_type:
371       if (name && !is_declaration)
372         set.types.Insert(ConstString(name), ref);
373       if (mangled_cstr && !is_declaration)
374         set.types.Insert(ConstString(mangled_cstr), ref);
375       break;
376 
377     case DW_TAG_namespace:
378     case DW_TAG_imported_declaration:
379       if (name)
380         set.namespaces.Insert(ConstString(name), ref);
381       break;
382 
383     case DW_TAG_member: {
384       // In DWARF 4 and earlier `static const` members of a struct, a class or a
385       // union have an entry tag `DW_TAG_member`, and are also tagged as
386       // `DW_AT_declaration`, but otherwise follow the same rules as
387       // `DW_TAG_variable`.
388       bool parent_is_class_type = false;
389       if (auto parent = die.GetParent())
390         parent_is_class_type = DWARFDIE(&unit, parent).IsStructUnionOrClass();
391       if (!parent_is_class_type || !is_declaration)
392         break;
393       [[fallthrough]];
394     }
395     case DW_TAG_variable:
396       if (name && has_location_or_const_value && is_global_or_static_variable) {
397         set.globals.Insert(ConstString(name), ref);
398         // Be sure to include variables by their mangled and demangled names if
399         // they have any since a variable can have a basename "i", a mangled
400         // named "_ZN12_GLOBAL__N_11iE" and a demangled mangled name
401         // "(anonymous namespace)::i"...
402 
403         // Make sure our mangled name isn't the same string table entry as our
404         // name. If it starts with '_', then it is ok, else compare the string
405         // to make sure it isn't the same and we don't end up with duplicate
406         // entries
407         if (mangled_cstr && name != mangled_cstr &&
408             ((mangled_cstr[0] == '_') || (::strcmp(name, mangled_cstr) != 0))) {
409           set.globals.Insert(ConstString(mangled_cstr), ref);
410         }
411       }
412       break;
413 
414     default:
415       continue;
416     }
417   }
418 }
419 
420 void ManualDWARFIndex::GetGlobalVariables(
421     ConstString basename, llvm::function_ref<bool(DWARFDIE die)> callback) {
422   Index();
423   m_set.globals.Find(basename,
424                      DIERefCallback(callback, basename.GetStringRef()));
425 }
426 
427 void ManualDWARFIndex::GetGlobalVariables(
428     const RegularExpression &regex,
429     llvm::function_ref<bool(DWARFDIE die)> callback) {
430   Index();
431   m_set.globals.Find(regex, DIERefCallback(callback, regex.GetText()));
432 }
433 
434 void ManualDWARFIndex::GetGlobalVariables(
435     DWARFUnit &unit, llvm::function_ref<bool(DWARFDIE die)> callback) {
436   Index();
437   m_set.globals.FindAllEntriesForUnit(unit, DIERefCallback(callback));
438 }
439 
440 void ManualDWARFIndex::GetObjCMethods(
441     ConstString class_name, llvm::function_ref<bool(DWARFDIE die)> callback) {
442   Index();
443   m_set.objc_class_selectors.Find(
444       class_name, DIERefCallback(callback, class_name.GetStringRef()));
445 }
446 
447 void ManualDWARFIndex::GetCompleteObjCClass(
448     ConstString class_name, bool must_be_implementation,
449     llvm::function_ref<bool(DWARFDIE die)> callback) {
450   Index();
451   m_set.types.Find(class_name,
452                    DIERefCallback(callback, class_name.GetStringRef()));
453 }
454 
455 void ManualDWARFIndex::GetTypes(
456     ConstString name, llvm::function_ref<bool(DWARFDIE die)> callback) {
457   Index();
458   m_set.types.Find(name, DIERefCallback(callback, name.GetStringRef()));
459 }
460 
461 void ManualDWARFIndex::GetTypes(
462     const DWARFDeclContext &context,
463     llvm::function_ref<bool(DWARFDIE die)> callback) {
464   Index();
465   auto name = context[0].name;
466   m_set.types.Find(ConstString(name),
467                    DIERefCallback(callback, llvm::StringRef(name)));
468 }
469 
470 void ManualDWARFIndex::GetNamespaces(
471     ConstString name, llvm::function_ref<bool(DWARFDIE die)> callback) {
472   Index();
473   m_set.namespaces.Find(name, DIERefCallback(callback, name.GetStringRef()));
474 }
475 
476 void ManualDWARFIndex::GetFunctions(
477     const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf,
478     const CompilerDeclContext &parent_decl_ctx,
479     llvm::function_ref<bool(DWARFDIE die)> callback) {
480   Index();
481   ConstString name = lookup_info.GetLookupName();
482   FunctionNameType name_type_mask = lookup_info.GetNameTypeMask();
483 
484   if (name_type_mask & eFunctionNameTypeFull) {
485     if (!m_set.function_fullnames.Find(
486             name, DIERefCallback(
487                       [&](DWARFDIE die) {
488                         if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx,
489                                                                die))
490                           return true;
491                         return callback(die);
492                       },
493                       name.GetStringRef())))
494       return;
495   }
496   if (name_type_mask & eFunctionNameTypeBase) {
497     if (!m_set.function_basenames.Find(
498             name, DIERefCallback(
499                       [&](DWARFDIE die) {
500                         if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx,
501                                                                die))
502                           return true;
503                         return callback(die);
504                       },
505                       name.GetStringRef())))
506       return;
507   }
508 
509   if (name_type_mask & eFunctionNameTypeMethod && !parent_decl_ctx.IsValid()) {
510     if (!m_set.function_methods.Find(
511             name, DIERefCallback(callback, name.GetStringRef())))
512       return;
513   }
514 
515   if (name_type_mask & eFunctionNameTypeSelector &&
516       !parent_decl_ctx.IsValid()) {
517     if (!m_set.function_selectors.Find(
518             name, DIERefCallback(callback, name.GetStringRef())))
519       return;
520   }
521 }
522 
523 void ManualDWARFIndex::GetFunctions(
524     const RegularExpression &regex,
525     llvm::function_ref<bool(DWARFDIE die)> callback) {
526   Index();
527 
528   if (!m_set.function_basenames.Find(regex,
529                                      DIERefCallback(callback, regex.GetText())))
530     return;
531   if (!m_set.function_fullnames.Find(regex,
532                                      DIERefCallback(callback, regex.GetText())))
533     return;
534 }
535 
536 void ManualDWARFIndex::Dump(Stream &s) {
537   s.Format("Manual DWARF index for ({0}) '{1:F}':",
538            m_module.GetArchitecture().GetArchitectureName(),
539            m_module.GetObjectFile()->GetFileSpec());
540   s.Printf("\nFunction basenames:\n");
541   m_set.function_basenames.Dump(&s);
542   s.Printf("\nFunction fullnames:\n");
543   m_set.function_fullnames.Dump(&s);
544   s.Printf("\nFunction methods:\n");
545   m_set.function_methods.Dump(&s);
546   s.Printf("\nFunction selectors:\n");
547   m_set.function_selectors.Dump(&s);
548   s.Printf("\nObjective-C class selectors:\n");
549   m_set.objc_class_selectors.Dump(&s);
550   s.Printf("\nGlobals and statics:\n");
551   m_set.globals.Dump(&s);
552   s.Printf("\nTypes:\n");
553   m_set.types.Dump(&s);
554   s.Printf("\nNamespaces:\n");
555   m_set.namespaces.Dump(&s);
556 }
557 
558 constexpr llvm::StringLiteral kIdentifierManualDWARFIndex("DIDX");
559 // Define IDs for the different tables when encoding and decoding the
560 // ManualDWARFIndex NameToDIE objects so we can avoid saving any empty maps.
561 enum DataID {
562   kDataIDFunctionBasenames = 1u,
563   kDataIDFunctionFullnames,
564   kDataIDFunctionMethods,
565   kDataIDFunctionSelectors,
566   kDataIDFunctionObjcClassSelectors,
567   kDataIDGlobals,
568   kDataIDTypes,
569   kDataIDNamespaces,
570   kDataIDEnd = 255u,
571 
572 };
573 
574 // Version 2 changes the encoding of DIERef objects used in the DWARF manual
575 // index name tables. See DIERef class for details.
576 constexpr uint32_t CURRENT_CACHE_VERSION = 2;
577 
578 bool ManualDWARFIndex::IndexSet::Decode(const DataExtractor &data,
579                                         lldb::offset_t *offset_ptr) {
580   StringTableReader strtab;
581   // We now decode the string table for all strings in the data cache file.
582   if (!strtab.Decode(data, offset_ptr))
583     return false;
584 
585   llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
586   if (identifier != kIdentifierManualDWARFIndex)
587     return false;
588   const uint32_t version = data.GetU32(offset_ptr);
589   if (version != CURRENT_CACHE_VERSION)
590     return false;
591 
592   bool done = false;
593   while (!done) {
594     switch (data.GetU8(offset_ptr)) {
595     default:
596       // If we got here, this is not expected, we expect the data IDs to match
597       // one of the values from the DataID enumeration.
598       return false;
599     case kDataIDFunctionBasenames:
600       if (!function_basenames.Decode(data, offset_ptr, strtab))
601         return false;
602       break;
603     case kDataIDFunctionFullnames:
604       if (!function_fullnames.Decode(data, offset_ptr, strtab))
605         return false;
606       break;
607     case kDataIDFunctionMethods:
608       if (!function_methods.Decode(data, offset_ptr, strtab))
609         return false;
610       break;
611     case kDataIDFunctionSelectors:
612       if (!function_selectors.Decode(data, offset_ptr, strtab))
613         return false;
614       break;
615     case kDataIDFunctionObjcClassSelectors:
616       if (!objc_class_selectors.Decode(data, offset_ptr, strtab))
617         return false;
618       break;
619     case kDataIDGlobals:
620       if (!globals.Decode(data, offset_ptr, strtab))
621         return false;
622       break;
623     case kDataIDTypes:
624       if (!types.Decode(data, offset_ptr, strtab))
625         return false;
626       break;
627     case kDataIDNamespaces:
628       if (!namespaces.Decode(data, offset_ptr, strtab))
629         return false;
630       break;
631     case kDataIDEnd:
632       // We got to the end of our NameToDIE encodings.
633       done = true;
634       break;
635     }
636   }
637   // Success!
638   return true;
639 }
640 
641 void ManualDWARFIndex::IndexSet::Encode(DataEncoder &encoder) const {
642   ConstStringTable strtab;
643 
644   // Encoder the DWARF index into a separate encoder first. This allows us
645   // gather all of the strings we willl need in "strtab" as we will need to
646   // write the string table out before the symbol table.
647   DataEncoder index_encoder(encoder.GetByteOrder(),
648                             encoder.GetAddressByteSize());
649 
650   index_encoder.AppendData(kIdentifierManualDWARFIndex);
651   // Encode the data version.
652   index_encoder.AppendU32(CURRENT_CACHE_VERSION);
653 
654   if (!function_basenames.IsEmpty()) {
655     index_encoder.AppendU8(kDataIDFunctionBasenames);
656     function_basenames.Encode(index_encoder, strtab);
657   }
658   if (!function_fullnames.IsEmpty()) {
659     index_encoder.AppendU8(kDataIDFunctionFullnames);
660     function_fullnames.Encode(index_encoder, strtab);
661   }
662   if (!function_methods.IsEmpty()) {
663     index_encoder.AppendU8(kDataIDFunctionMethods);
664     function_methods.Encode(index_encoder, strtab);
665   }
666   if (!function_selectors.IsEmpty()) {
667     index_encoder.AppendU8(kDataIDFunctionSelectors);
668     function_selectors.Encode(index_encoder, strtab);
669   }
670   if (!objc_class_selectors.IsEmpty()) {
671     index_encoder.AppendU8(kDataIDFunctionObjcClassSelectors);
672     objc_class_selectors.Encode(index_encoder, strtab);
673   }
674   if (!globals.IsEmpty()) {
675     index_encoder.AppendU8(kDataIDGlobals);
676     globals.Encode(index_encoder, strtab);
677   }
678   if (!types.IsEmpty()) {
679     index_encoder.AppendU8(kDataIDTypes);
680     types.Encode(index_encoder, strtab);
681   }
682   if (!namespaces.IsEmpty()) {
683     index_encoder.AppendU8(kDataIDNamespaces);
684     namespaces.Encode(index_encoder, strtab);
685   }
686   index_encoder.AppendU8(kDataIDEnd);
687 
688   // Now that all strings have been gathered, we will emit the string table.
689   strtab.Encode(encoder);
690   // Followed by the symbol table data.
691   encoder.AppendData(index_encoder.GetData());
692 }
693 
694 bool ManualDWARFIndex::Decode(const DataExtractor &data,
695                               lldb::offset_t *offset_ptr,
696                               bool &signature_mismatch) {
697   signature_mismatch = false;
698   CacheSignature signature;
699   if (!signature.Decode(data, offset_ptr))
700     return false;
701   if (CacheSignature(m_dwarf->GetObjectFile()) != signature) {
702     signature_mismatch = true;
703     return false;
704   }
705   IndexSet set;
706   if (!set.Decode(data, offset_ptr))
707     return false;
708   m_set = std::move(set);
709   return true;
710 }
711 
712 bool ManualDWARFIndex::Encode(DataEncoder &encoder) const {
713   CacheSignature signature(m_dwarf->GetObjectFile());
714   if (!signature.Encode(encoder))
715     return false;
716   m_set.Encode(encoder);
717   return true;
718 }
719 
720 bool ManualDWARFIndex::IsPartial() const {
721   // If we have units or type units to skip, then this index is partial.
722   return !m_units_to_avoid.empty() || !m_type_sigs_to_avoid.empty();
723 }
724 
725 std::string ManualDWARFIndex::GetCacheKey() {
726   std::string key;
727   llvm::raw_string_ostream strm(key);
728   // DWARF Index can come from different object files for the same module. A
729   // module can have one object file as the main executable and might have
730   // another object file in a separate symbol file, or we might have a .dwo file
731   // that claims its module is the main executable.
732 
733   // This class can be used to index all of the DWARF, or part of the DWARF
734   // when there is a .debug_names index where some compile or type units were
735   // built without .debug_names. So we need to know when we have a full manual
736   // DWARF index or a partial manual DWARF index and save them to different
737   // cache files. Before this fix we might end up debugging a binary with
738   // .debug_names where some of the compile or type units weren't indexed, and
739   // find an issue with the .debug_names tables (bugs or being incomplete), and
740   // then we disable loading the .debug_names by setting a setting in LLDB by
741   // running "settings set plugin.symbol-file.dwarf.ignore-file-indexes 0" in
742   // another LLDB instance. The problem arose when there was an index cache from
743   // a previous run where .debug_names was enabled and it had saved a cache file
744   // that only covered the missing compile and type units from the .debug_names,
745   // and with the setting that disables the loading of the cache files we would
746   // load partial cache index cache. So we need to pick a unique cache suffix
747   // name that indicates if the cache is partial or full to avoid this problem.
748   llvm::StringRef dwarf_index_suffix(IsPartial() ? "partial-" : "full-");
749   ObjectFile *objfile = m_dwarf->GetObjectFile();
750   strm << objfile->GetModule()->GetCacheKey() << "-dwarf-index-"
751        << dwarf_index_suffix << llvm::format_hex(objfile->GetCacheHash(), 10);
752   return key;
753 }
754 
755 bool ManualDWARFIndex::LoadFromCache() {
756   DataFileCache *cache = Module::GetIndexCache();
757   if (!cache)
758     return false;
759   ObjectFile *objfile = m_dwarf->GetObjectFile();
760   if (!objfile)
761     return false;
762   std::unique_ptr<llvm::MemoryBuffer> mem_buffer_up =
763       cache->GetCachedData(GetCacheKey());
764   if (!mem_buffer_up)
765     return false;
766   DataExtractor data(mem_buffer_up->getBufferStart(),
767                      mem_buffer_up->getBufferSize(),
768                      endian::InlHostByteOrder(),
769                      objfile->GetAddressByteSize());
770   bool signature_mismatch = false;
771   lldb::offset_t offset = 0;
772   const bool result = Decode(data, &offset, signature_mismatch);
773   if (signature_mismatch)
774     cache->RemoveCacheFile(GetCacheKey());
775   return result;
776 }
777 
778 void ManualDWARFIndex::SaveToCache() {
779   DataFileCache *cache = Module::GetIndexCache();
780   if (!cache)
781     return; // Caching is not enabled.
782   ObjectFile *objfile = m_dwarf->GetObjectFile();
783   if (!objfile)
784     return;
785   DataEncoder file(endian::InlHostByteOrder(), objfile->GetAddressByteSize());
786   // Encode will return false if the object file doesn't have anything to make
787   // a signature from.
788   if (Encode(file)) {
789     if (cache->SetCachedData(GetCacheKey(), file.GetData()))
790       m_dwarf->SetDebugInfoIndexWasSavedToCache();
791   }
792 }
793