xref: /llvm-project/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp (revision f1763888bb96c9c4069d8d069083371965561111)
1 //===-- DWARFUnit.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DWARFUnit.h"
10 
11 #include "lldb/Core/Module.h"
12 #include "lldb/Symbol/ObjectFile.h"
13 #include "lldb/Utility/LLDBAssert.h"
14 #include "lldb/Utility/StreamString.h"
15 #include "lldb/Utility/Timer.h"
16 #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h"
17 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
18 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
19 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
20 #include "llvm/Object/Error.h"
21 
22 #include "DWARFCompileUnit.h"
23 #include "DWARFDebugAranges.h"
24 #include "DWARFDebugInfo.h"
25 #include "DWARFTypeUnit.h"
26 #include "LogChannelDWARF.h"
27 #include "SymbolFileDWARFDwo.h"
28 #include <optional>
29 
30 using namespace lldb;
31 using namespace lldb_private;
32 using namespace lldb_private::dwarf;
33 using namespace lldb_private::plugin::dwarf;
34 
35 extern int g_verbose;
36 
37 DWARFUnit::DWARFUnit(SymbolFileDWARF &dwarf, lldb::user_id_t uid,
38                      const llvm::DWARFUnitHeader &header,
39                      const llvm::DWARFAbbreviationDeclarationSet &abbrevs,
40                      DIERef::Section section, bool is_dwo)
41     : UserID(uid), m_dwarf(dwarf), m_header(header), m_abbrevs(&abbrevs),
42       m_cancel_scopes(false), m_section(section), m_is_dwo(is_dwo),
43       m_has_parsed_non_skeleton_unit(false), m_dwo_id(header.getDWOId()) {}
44 
45 DWARFUnit::~DWARFUnit() = default;
46 
47 // Parses first DIE of a compile unit, excluding DWO.
48 void DWARFUnit::ExtractUnitDIENoDwoIfNeeded() {
49   {
50     llvm::sys::ScopedReader lock(m_first_die_mutex);
51     if (m_first_die)
52       return; // Already parsed
53   }
54   llvm::sys::ScopedWriter lock(m_first_die_mutex);
55   if (m_first_die)
56     return; // Already parsed
57 
58   ElapsedTime elapsed(m_dwarf.GetDebugInfoParseTimeRef());
59 
60   // Set the offset to that of the first DIE and calculate the start of the
61   // next compilation unit header.
62   lldb::offset_t offset = GetFirstDIEOffset();
63 
64   // We are in our compile unit, parse starting at the offset we were told to
65   // parse
66   const DWARFDataExtractor &data = GetData();
67   if (offset < GetNextUnitOffset() &&
68       m_first_die.Extract(data, *this, &offset)) {
69     AddUnitDIE(m_first_die);
70     return;
71   }
72 }
73 
74 // Parses first DIE of a compile unit including DWO.
75 void DWARFUnit::ExtractUnitDIEIfNeeded() {
76   ExtractUnitDIENoDwoIfNeeded();
77 
78   if (m_has_parsed_non_skeleton_unit)
79     return;
80 
81   m_has_parsed_non_skeleton_unit = true;
82   m_dwo_error.Clear();
83 
84   if (!m_dwo_id)
85     return; // No DWO file.
86 
87   std::shared_ptr<SymbolFileDWARFDwo> dwo_symbol_file =
88       m_dwarf.GetDwoSymbolFileForCompileUnit(*this, m_first_die);
89   if (!dwo_symbol_file)
90     return;
91 
92   DWARFUnit *dwo_cu = dwo_symbol_file->GetDWOCompileUnitForHash(*m_dwo_id);
93 
94   if (!dwo_cu) {
95     SetDwoError(Status::FromErrorStringWithFormatv(
96         "unable to load .dwo file from \"{0}\" due to ID ({1:x16}) mismatch "
97         "for skeleton DIE at {2:x8}",
98         dwo_symbol_file->GetObjectFile()->GetFileSpec().GetPath(), *m_dwo_id,
99         m_first_die.GetOffset()));
100     return; // Can't fetch the compile unit from the dwo file.
101   }
102 
103   // Link the DWO unit to this object, if it hasn't been linked already (this
104   // can happen when we have an index, and the DWO unit is parsed first).
105   if (!dwo_cu->LinkToSkeletonUnit(*this)) {
106     SetDwoError(Status::FromErrorStringWithFormatv(
107         "multiple compile units with Dwo ID {0:x16}", *m_dwo_id));
108     return;
109   }
110 
111   DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
112   if (!dwo_cu_die.IsValid()) {
113     // Can't fetch the compile unit DIE from the dwo file.
114     SetDwoError(Status::FromErrorStringWithFormatv(
115         "unable to extract compile unit DIE from .dwo file for skeleton "
116         "DIE at {0:x16}",
117         m_first_die.GetOffset()));
118     return;
119   }
120 
121   // Here for DWO CU we want to use the address base set in the skeleton unit
122   // (DW_AT_addr_base) if it is available and use the DW_AT_GNU_addr_base
123   // otherwise. We do that because pre-DWARF v5 could use the DW_AT_GNU_*
124   // attributes which were applicable to the DWO units. The corresponding
125   // DW_AT_* attributes standardized in DWARF v5 are also applicable to the
126   // main unit in contrast.
127   if (m_addr_base)
128     dwo_cu->SetAddrBase(*m_addr_base);
129   else if (m_gnu_addr_base)
130     dwo_cu->SetAddrBase(*m_gnu_addr_base);
131 
132   if (GetVersion() <= 4 && m_gnu_ranges_base)
133     dwo_cu->SetRangesBase(*m_gnu_ranges_base);
134   else if (dwo_symbol_file->GetDWARFContext()
135                .getOrLoadRngListsData()
136                .GetByteSize() > 0)
137     dwo_cu->SetRangesBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
138 
139   if (GetVersion() >= 5 &&
140       dwo_symbol_file->GetDWARFContext().getOrLoadLocListsData().GetByteSize() >
141           0)
142     dwo_cu->SetLoclistsBase(llvm::DWARFListTableHeader::getHeaderSize(DWARF32));
143 
144   dwo_cu->SetBaseAddress(GetBaseAddress());
145 
146   m_dwo = std::shared_ptr<DWARFUnit>(std::move(dwo_symbol_file), dwo_cu);
147 }
148 
149 // Parses a compile unit and indexes its DIEs if it hasn't already been done.
150 // It will leave this compile unit extracted forever.
151 void DWARFUnit::ExtractDIEsIfNeeded() {
152   m_cancel_scopes = true;
153 
154   {
155     llvm::sys::ScopedReader lock(m_die_array_mutex);
156     if (!m_die_array.empty())
157       return; // Already parsed
158   }
159   llvm::sys::ScopedWriter lock(m_die_array_mutex);
160   if (!m_die_array.empty())
161     return; // Already parsed
162 
163   ExtractDIEsRWLocked();
164 }
165 
166 // Parses a compile unit and indexes its DIEs if it hasn't already been done.
167 // It will clear this compile unit after returned instance gets out of scope,
168 // no other ScopedExtractDIEs instance is running for this compile unit
169 // and no ExtractDIEsIfNeeded() has been executed during this ScopedExtractDIEs
170 // lifetime.
171 DWARFUnit::ScopedExtractDIEs DWARFUnit::ExtractDIEsScoped() {
172   ScopedExtractDIEs scoped(*this);
173 
174   {
175     llvm::sys::ScopedReader lock(m_die_array_mutex);
176     if (!m_die_array.empty())
177       return scoped; // Already parsed
178   }
179   llvm::sys::ScopedWriter lock(m_die_array_mutex);
180   if (!m_die_array.empty())
181     return scoped; // Already parsed
182 
183   // Otherwise m_die_array would be already populated.
184   lldbassert(!m_cancel_scopes);
185 
186   ExtractDIEsRWLocked();
187   scoped.m_clear_dies = true;
188   return scoped;
189 }
190 
191 DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(DWARFUnit &cu) : m_cu(&cu) {
192   m_cu->m_die_array_scoped_mutex.lock_shared();
193 }
194 
195 DWARFUnit::ScopedExtractDIEs::~ScopedExtractDIEs() {
196   if (!m_cu)
197     return;
198   m_cu->m_die_array_scoped_mutex.unlock_shared();
199   if (!m_clear_dies || m_cu->m_cancel_scopes)
200     return;
201   // Be sure no other ScopedExtractDIEs is running anymore.
202   llvm::sys::ScopedWriter lock_scoped(m_cu->m_die_array_scoped_mutex);
203   llvm::sys::ScopedWriter lock(m_cu->m_die_array_mutex);
204   if (m_cu->m_cancel_scopes)
205     return;
206   m_cu->ClearDIEsRWLocked();
207 }
208 
209 DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs)
210     : m_cu(rhs.m_cu), m_clear_dies(rhs.m_clear_dies) {
211   rhs.m_cu = nullptr;
212 }
213 
214 DWARFUnit::ScopedExtractDIEs &
215 DWARFUnit::ScopedExtractDIEs::operator=(DWARFUnit::ScopedExtractDIEs &&rhs) {
216   m_cu = rhs.m_cu;
217   rhs.m_cu = nullptr;
218   m_clear_dies = rhs.m_clear_dies;
219   return *this;
220 }
221 
222 // Parses a compile unit and indexes its DIEs, m_die_array_mutex must be
223 // held R/W and m_die_array must be empty.
224 void DWARFUnit::ExtractDIEsRWLocked() {
225   llvm::sys::ScopedWriter first_die_lock(m_first_die_mutex);
226 
227   ElapsedTime elapsed(m_dwarf.GetDebugInfoParseTimeRef());
228   LLDB_SCOPED_TIMERF(
229       "%s",
230       llvm::formatv("{0:x16}: DWARFUnit::ExtractDIEsIfNeeded()", GetOffset())
231           .str()
232           .c_str());
233 
234   // Set the offset to that of the first DIE and calculate the start of the
235   // next compilation unit header.
236   lldb::offset_t offset = GetFirstDIEOffset();
237   lldb::offset_t next_cu_offset = GetNextUnitOffset();
238 
239   DWARFDebugInfoEntry die;
240 
241   uint32_t depth = 0;
242   // We are in our compile unit, parse starting at the offset we were told to
243   // parse
244   const DWARFDataExtractor &data = GetData();
245   std::vector<uint32_t> die_index_stack;
246   die_index_stack.reserve(32);
247   die_index_stack.push_back(0);
248   bool prev_die_had_children = false;
249   while (offset < next_cu_offset && die.Extract(data, *this, &offset)) {
250     const bool null_die = die.IsNULL();
251     if (depth == 0) {
252       assert(m_die_array.empty() && "Compile unit DIE already added");
253 
254       // The average bytes per DIE entry has been seen to be around 14-20 so
255       // lets pre-reserve half of that since we are now stripping the NULL
256       // tags.
257 
258       // Only reserve the memory if we are adding children of the main
259       // compile unit DIE. The compile unit DIE is always the first entry, so
260       // if our size is 1, then we are adding the first compile unit child
261       // DIE and should reserve the memory.
262       m_die_array.reserve(GetDebugInfoSize() / 24);
263       m_die_array.push_back(die);
264 
265       if (!m_first_die)
266         AddUnitDIE(m_die_array.front());
267 
268       // With -fsplit-dwarf-inlining, clang will emit non-empty skeleton compile
269       // units. We are not able to access these DIE *and* the dwo file
270       // simultaneously. We also don't need to do that as the dwo file will
271       // contain a superset of information. So, we don't even attempt to parse
272       // any remaining DIEs.
273       if (m_dwo) {
274         m_die_array.front().SetHasChildren(false);
275         break;
276       }
277 
278     } else {
279       if (null_die) {
280         if (prev_die_had_children) {
281           // This will only happen if a DIE says is has children but all it
282           // contains is a NULL tag. Since we are removing the NULL DIEs from
283           // the list (saves up to 25% in C++ code), we need a way to let the
284           // DIE know that it actually doesn't have children.
285           if (!m_die_array.empty())
286             m_die_array.back().SetHasChildren(false);
287         }
288       } else {
289         die.SetParentIndex(m_die_array.size() - die_index_stack[depth - 1]);
290 
291         if (die_index_stack.back())
292           m_die_array[die_index_stack.back()].SetSiblingIndex(
293               m_die_array.size() - die_index_stack.back());
294 
295         // Only push the DIE if it isn't a NULL DIE
296         m_die_array.push_back(die);
297       }
298     }
299 
300     if (null_die) {
301       // NULL DIE.
302       if (!die_index_stack.empty())
303         die_index_stack.pop_back();
304 
305       if (depth > 0)
306         --depth;
307       prev_die_had_children = false;
308     } else {
309       die_index_stack.back() = m_die_array.size() - 1;
310       // Normal DIE
311       const bool die_has_children = die.HasChildren();
312       if (die_has_children) {
313         die_index_stack.push_back(0);
314         ++depth;
315       }
316       prev_die_had_children = die_has_children;
317     }
318 
319     if (depth == 0)
320       break; // We are done with this compile unit!
321   }
322 
323   if (!m_die_array.empty()) {
324     // The last die cannot have children (if it did, it wouldn't be the last
325     // one). This only makes a difference for malformed dwarf that does not have
326     // a terminating null die.
327     m_die_array.back().SetHasChildren(false);
328 
329     if (m_first_die) {
330       // Only needed for the assertion.
331       m_first_die.SetHasChildren(m_die_array.front().HasChildren());
332       lldbassert(m_first_die == m_die_array.front());
333     }
334     m_first_die = m_die_array.front();
335   }
336 
337   m_die_array.shrink_to_fit();
338 
339   if (m_dwo)
340     m_dwo->ExtractDIEsIfNeeded();
341 }
342 
343 // This is used when a split dwarf is enabled.
344 // A skeleton compilation unit may contain the DW_AT_str_offsets_base attribute
345 // that points to the first string offset of the CU contribution to the
346 // .debug_str_offsets. At the same time, the corresponding split debug unit also
347 // may use DW_FORM_strx* forms pointing to its own .debug_str_offsets.dwo and
348 // for that case, we should find the offset (skip the section header).
349 void DWARFUnit::SetDwoStrOffsetsBase() {
350   lldb::offset_t baseOffset = 0;
351 
352   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.getIndexEntry()) {
353     if (const auto *contribution =
354             entry->getContribution(llvm::DW_SECT_STR_OFFSETS))
355       baseOffset = contribution->getOffset();
356     else
357       return;
358   }
359 
360   if (GetVersion() >= 5) {
361     const DWARFDataExtractor &strOffsets =
362         GetSymbolFileDWARF().GetDWARFContext().getOrLoadStrOffsetsData();
363     uint64_t length = strOffsets.GetU32(&baseOffset);
364     if (length == 0xffffffff)
365       length = strOffsets.GetU64(&baseOffset);
366 
367     // Check version.
368     if (strOffsets.GetU16(&baseOffset) < 5)
369       return;
370 
371     // Skip padding.
372     baseOffset += 2;
373   }
374 
375   SetStrOffsetsBase(baseOffset);
376 }
377 
378 std::optional<uint64_t> DWARFUnit::GetDWOId() {
379   ExtractUnitDIENoDwoIfNeeded();
380   return m_dwo_id;
381 }
382 
383 // m_die_array_mutex must be already held as read/write.
384 void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
385   DWARFAttributes attributes = cu_die.GetAttributes(this);
386 
387   // Extract DW_AT_addr_base first, as other attributes may need it.
388   for (size_t i = 0; i < attributes.Size(); ++i) {
389     if (attributes.AttributeAtIndex(i) != DW_AT_addr_base)
390       continue;
391     DWARFFormValue form_value;
392     if (attributes.ExtractFormValueAtIndex(i, form_value)) {
393       SetAddrBase(form_value.Unsigned());
394       break;
395     }
396   }
397 
398   for (size_t i = 0; i < attributes.Size(); ++i) {
399     dw_attr_t attr = attributes.AttributeAtIndex(i);
400     DWARFFormValue form_value;
401     if (!attributes.ExtractFormValueAtIndex(i, form_value))
402       continue;
403     switch (attr) {
404     default:
405       break;
406     case DW_AT_loclists_base:
407       SetLoclistsBase(form_value.Unsigned());
408       break;
409     case DW_AT_rnglists_base:
410       SetRangesBase(form_value.Unsigned());
411       break;
412     case DW_AT_str_offsets_base:
413       SetStrOffsetsBase(form_value.Unsigned());
414       break;
415     case DW_AT_low_pc:
416       SetBaseAddress(form_value.Address());
417       break;
418     case DW_AT_entry_pc:
419       // If the value was already set by DW_AT_low_pc, don't update it.
420       if (m_base_addr == LLDB_INVALID_ADDRESS)
421         SetBaseAddress(form_value.Address());
422       break;
423     case DW_AT_stmt_list:
424       m_line_table_offset = form_value.Unsigned();
425       break;
426     case DW_AT_GNU_addr_base:
427       m_gnu_addr_base = form_value.Unsigned();
428       break;
429     case DW_AT_GNU_ranges_base:
430       m_gnu_ranges_base = form_value.Unsigned();
431       break;
432     case DW_AT_GNU_dwo_id:
433       m_dwo_id = form_value.Unsigned();
434       break;
435     }
436   }
437 
438   if (m_is_dwo) {
439     m_has_parsed_non_skeleton_unit = true;
440     SetDwoStrOffsetsBase();
441     return;
442   }
443 }
444 
445 size_t DWARFUnit::GetDebugInfoSize() const {
446   return GetLengthByteSize() + GetLength() - GetHeaderByteSize();
447 }
448 
449 const llvm::DWARFAbbreviationDeclarationSet *
450 DWARFUnit::GetAbbreviations() const {
451   return m_abbrevs;
452 }
453 
454 dw_offset_t DWARFUnit::GetAbbrevOffset() const {
455   return m_abbrevs ? m_abbrevs->getOffset() : DW_INVALID_OFFSET;
456 }
457 
458 dw_offset_t DWARFUnit::GetLineTableOffset() {
459   ExtractUnitDIENoDwoIfNeeded();
460   return m_line_table_offset;
461 }
462 
463 void DWARFUnit::SetAddrBase(dw_addr_t addr_base) { m_addr_base = addr_base; }
464 
465 // Parse the rangelist table header, including the optional array of offsets
466 // following it (DWARF v5 and later).
467 template <typename ListTableType>
468 static llvm::Expected<ListTableType>
469 ParseListTableHeader(const llvm::DWARFDataExtractor &data, uint64_t offset,
470                      DwarfFormat format) {
471   // We are expected to be called with Offset 0 or pointing just past the table
472   // header. Correct Offset in the latter case so that it points to the start
473   // of the header.
474   if (offset == 0) {
475     // This means DW_AT_rnglists_base is missing and therefore DW_FORM_rnglistx
476     // cannot be handled. Returning a default-constructed ListTableType allows
477     // DW_FORM_sec_offset to be supported.
478     return ListTableType();
479   }
480 
481   uint64_t HeaderSize = llvm::DWARFListTableHeader::getHeaderSize(format);
482   if (offset < HeaderSize)
483     return llvm::createStringError(std::errc::invalid_argument,
484                                    "did not detect a valid"
485                                    " list table with base = 0x%" PRIx64 "\n",
486                                    offset);
487   offset -= HeaderSize;
488   ListTableType Table;
489   if (llvm::Error E = Table.extractHeaderAndOffsets(data, &offset))
490     return std::move(E);
491   return Table;
492 }
493 
494 void DWARFUnit::SetLoclistsBase(dw_addr_t loclists_base) {
495   uint64_t offset = 0;
496   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.getIndexEntry()) {
497     const auto *contribution = entry->getContribution(llvm::DW_SECT_LOCLISTS);
498     if (!contribution) {
499       GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
500           "Failed to find location list contribution for CU with DWO Id "
501           "{0:x16}",
502           *GetDWOId());
503       return;
504     }
505     offset += contribution->getOffset();
506   }
507   m_loclists_base = loclists_base;
508 
509   uint64_t header_size = llvm::DWARFListTableHeader::getHeaderSize(DWARF32);
510   if (loclists_base < header_size)
511     return;
512 
513   m_loclist_table_header.emplace(".debug_loclists", "locations");
514   offset += loclists_base - header_size;
515   if (llvm::Error E = m_loclist_table_header->extract(
516           m_dwarf.GetDWARFContext().getOrLoadLocListsData().GetAsLLVMDWARF(),
517           &offset)) {
518     GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
519         "Failed to extract location list table at offset {0:x16} (location "
520         "list base: {1:x16}): {2}",
521         offset, loclists_base, toString(std::move(E)).c_str());
522   }
523 }
524 
525 std::unique_ptr<llvm::DWARFLocationTable>
526 DWARFUnit::GetLocationTable(const DataExtractor &data) const {
527   llvm::DWARFDataExtractor llvm_data(
528       data.GetData(), data.GetByteOrder() == lldb::eByteOrderLittle,
529       data.GetAddressByteSize());
530 
531   if (m_is_dwo || GetVersion() >= 5)
532     return std::make_unique<llvm::DWARFDebugLoclists>(llvm_data, GetVersion());
533   return std::make_unique<llvm::DWARFDebugLoc>(llvm_data);
534 }
535 
536 DWARFDataExtractor DWARFUnit::GetLocationData() const {
537   DWARFContext &Ctx = GetSymbolFileDWARF().GetDWARFContext();
538   const DWARFDataExtractor &data =
539       GetVersion() >= 5 ? Ctx.getOrLoadLocListsData() : Ctx.getOrLoadLocData();
540   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.getIndexEntry()) {
541     if (const auto *contribution = entry->getContribution(
542             GetVersion() >= 5 ? llvm::DW_SECT_LOCLISTS : llvm::DW_SECT_EXT_LOC))
543       return DWARFDataExtractor(data, contribution->getOffset(),
544                                 contribution->getLength32());
545     return DWARFDataExtractor();
546   }
547   return data;
548 }
549 
550 DWARFDataExtractor DWARFUnit::GetRnglistData() const {
551   DWARFContext &Ctx = GetSymbolFileDWARF().GetDWARFContext();
552   const DWARFDataExtractor &data = Ctx.getOrLoadRngListsData();
553   if (const llvm::DWARFUnitIndex::Entry *entry = m_header.getIndexEntry()) {
554     if (const auto *contribution =
555             entry->getContribution(llvm::DW_SECT_RNGLISTS))
556       return DWARFDataExtractor(data, contribution->getOffset(),
557                                 contribution->getLength32());
558     GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
559         "Failed to find range list contribution for CU with signature {0:x16}",
560         entry->getSignature());
561 
562     return DWARFDataExtractor();
563   }
564   return data;
565 }
566 
567 void DWARFUnit::SetRangesBase(dw_addr_t ranges_base) {
568   lldbassert(!m_rnglist_table_done);
569 
570   m_ranges_base = ranges_base;
571 }
572 
573 const std::optional<llvm::DWARFDebugRnglistTable> &
574 DWARFUnit::GetRnglistTable() {
575   if (GetVersion() >= 5 && !m_rnglist_table_done) {
576     m_rnglist_table_done = true;
577     if (auto table_or_error =
578             ParseListTableHeader<llvm::DWARFDebugRnglistTable>(
579                 GetRnglistData().GetAsLLVMDWARF(), m_ranges_base, DWARF32))
580       m_rnglist_table = std::move(table_or_error.get());
581     else
582       GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
583           "Failed to extract range list table at offset {0:x16}: {1}",
584           m_ranges_base, toString(table_or_error.takeError()).c_str());
585   }
586   return m_rnglist_table;
587 }
588 
589 // This function is called only for DW_FORM_rnglistx.
590 llvm::Expected<uint64_t> DWARFUnit::GetRnglistOffset(uint32_t Index) {
591   if (!GetRnglistTable())
592     return llvm::createStringError(std::errc::invalid_argument,
593                                    "missing or invalid range list table");
594   if (!m_ranges_base)
595     return llvm::createStringError(
596         std::errc::invalid_argument,
597         llvm::formatv("DW_FORM_rnglistx cannot be used without "
598                       "DW_AT_rnglists_base for CU at {0:x16}",
599                       GetOffset())
600             .str()
601             .c_str());
602   if (std::optional<uint64_t> off = GetRnglistTable()->getOffsetEntry(
603           GetRnglistData().GetAsLLVM(), Index))
604     return *off + m_ranges_base;
605   return llvm::createStringError(
606       std::errc::invalid_argument,
607       "invalid range list table index %u; OffsetEntryCount is %u, "
608       "DW_AT_rnglists_base is %" PRIu64,
609       Index, GetRnglistTable()->getOffsetEntryCount(), m_ranges_base);
610 }
611 
612 void DWARFUnit::SetStrOffsetsBase(dw_offset_t str_offsets_base) {
613   m_str_offsets_base = str_offsets_base;
614 }
615 
616 dw_addr_t DWARFUnit::ReadAddressFromDebugAddrSection(uint32_t index) const {
617   uint32_t index_size = GetAddressByteSize();
618   dw_offset_t addr_base = GetAddrBase();
619   dw_addr_t offset = addr_base + static_cast<dw_addr_t>(index) * index_size;
620   const DWARFDataExtractor &data =
621       m_dwarf.GetDWARFContext().getOrLoadAddrData();
622   if (data.ValidOffsetForDataOfSize(offset, index_size))
623     return data.GetMaxU64_unchecked(&offset, index_size);
624   return LLDB_INVALID_ADDRESS;
625 }
626 
627 // It may be called only with m_die_array_mutex held R/W.
628 void DWARFUnit::ClearDIEsRWLocked() {
629   m_die_array.clear();
630   m_die_array.shrink_to_fit();
631 
632   if (m_dwo && !m_dwo->m_cancel_scopes)
633     m_dwo->ClearDIEsRWLocked();
634 }
635 
636 lldb::ByteOrder DWARFUnit::GetByteOrder() const {
637   return m_dwarf.GetObjectFile()->GetByteOrder();
638 }
639 
640 void DWARFUnit::SetBaseAddress(dw_addr_t base_addr) { m_base_addr = base_addr; }
641 
642 // Compare function DWARFDebugAranges::Range structures
643 static bool CompareDIEOffset(const DWARFDebugInfoEntry &die,
644                              const dw_offset_t die_offset) {
645   return die.GetOffset() < die_offset;
646 }
647 
648 // GetDIE()
649 //
650 // Get the DIE (Debug Information Entry) with the specified offset by first
651 // checking if the DIE is contained within this compile unit and grabbing the
652 // DIE from this compile unit. Otherwise we grab the DIE from the DWARF file.
653 DWARFDIE
654 DWARFUnit::GetDIE(dw_offset_t die_offset) {
655   if (die_offset == DW_INVALID_OFFSET)
656     return DWARFDIE(); // Not found
657 
658   if (!ContainsDIEOffset(die_offset)) {
659     GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
660         "GetDIE for DIE {0:x16} is outside of its CU {1:x16}", die_offset,
661         GetOffset());
662     return DWARFDIE(); // Not found
663   }
664 
665   ExtractDIEsIfNeeded();
666   DWARFDebugInfoEntry::const_iterator end = m_die_array.cend();
667   DWARFDebugInfoEntry::const_iterator pos =
668       lower_bound(m_die_array.cbegin(), end, die_offset, CompareDIEOffset);
669 
670   if (pos != end && die_offset == (*pos).GetOffset())
671     return DWARFDIE(this, &(*pos));
672   return DWARFDIE(); // Not found
673 }
674 
675 llvm::StringRef DWARFUnit::PeekDIEName(dw_offset_t die_offset) {
676   DWARFDebugInfoEntry die;
677   if (!die.Extract(GetData(), *this, &die_offset))
678     return llvm::StringRef();
679 
680   // Does die contain a DW_AT_Name?
681   if (const char *name =
682           die.GetAttributeValueAsString(this, DW_AT_name, nullptr))
683     return name;
684 
685   // Does its DW_AT_specification or DW_AT_abstract_origin contain an AT_Name?
686   for (auto attr : {DW_AT_specification, DW_AT_abstract_origin}) {
687     DWARFFormValue form_value;
688     if (!die.GetAttributeValue(this, attr, form_value))
689       continue;
690     auto [unit, offset] = form_value.ReferencedUnitAndOffset();
691     if (unit)
692       if (auto name = unit->PeekDIEName(offset); !name.empty())
693         return name;
694   }
695 
696   return llvm::StringRef();
697 }
698 
699 DWARFUnit &DWARFUnit::GetNonSkeletonUnit() {
700   ExtractUnitDIEIfNeeded();
701   if (m_dwo)
702     return *m_dwo;
703   return *this;
704 }
705 
706 uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) {
707   if (cu)
708     return cu->GetAddressByteSize();
709   return DWARFUnit::GetDefaultAddressSize();
710 }
711 
712 uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; }
713 
714 DWARFCompileUnit *DWARFUnit::GetSkeletonUnit() {
715   if (m_skeleton_unit.load() == nullptr && IsDWOUnit()) {
716     SymbolFileDWARFDwo *dwo =
717         llvm::dyn_cast_or_null<SymbolFileDWARFDwo>(&GetSymbolFileDWARF());
718     // Do a reverse lookup if the skeleton compile unit wasn't set.
719     DWARFUnit *candidate_skeleton_unit =
720         dwo ? dwo->GetBaseSymbolFile().GetSkeletonUnit(this) : nullptr;
721     if (candidate_skeleton_unit)
722       (void)LinkToSkeletonUnit(*candidate_skeleton_unit);
723     // Linking may fail due to a race, so be sure to return the actual value.
724   }
725   return llvm::dyn_cast_or_null<DWARFCompileUnit>(m_skeleton_unit.load());
726 }
727 
728 bool DWARFUnit::LinkToSkeletonUnit(DWARFUnit &skeleton_unit) {
729   DWARFUnit *expected_unit = nullptr;
730   if (m_skeleton_unit.compare_exchange_strong(expected_unit, &skeleton_unit))
731     return true;
732   if (expected_unit == &skeleton_unit) {
733     // Exchange failed because it already contained the right  value.
734     return true;
735   }
736   return false; // Already linked to a different unit.
737 }
738 
739 bool DWARFUnit::Supports_unnamed_objc_bitfields() {
740   if (GetProducer() == eProducerClang)
741     return GetProducerVersion() >= llvm::VersionTuple(425, 0, 13);
742   // Assume all other compilers didn't have incorrect ObjC bitfield info.
743   return true;
744 }
745 
746 void DWARFUnit::ParseProducerInfo() {
747   m_producer = eProducerOther;
748   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
749   if (!die)
750     return;
751 
752   llvm::StringRef producer(
753       die->GetAttributeValueAsString(this, DW_AT_producer, nullptr));
754   if (producer.empty())
755     return;
756 
757   static const RegularExpression g_swiftlang_version_regex(
758       llvm::StringRef(R"(swiftlang-([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?))"));
759   static const RegularExpression g_clang_version_regex(
760       llvm::StringRef(R"(clang-([0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+)?))"));
761 
762   llvm::SmallVector<llvm::StringRef, 3> matches;
763   if (g_swiftlang_version_regex.Execute(producer, &matches)) {
764     m_producer_version.tryParse(matches[1]);
765     m_producer = eProducerSwift;
766   } else if (producer.contains("clang")) {
767     if (g_clang_version_regex.Execute(producer, &matches))
768       m_producer_version.tryParse(matches[1]);
769     m_producer = eProducerClang;
770   } else if (producer.contains("GNU")) {
771     m_producer = eProducerGCC;
772   }
773 }
774 
775 DWARFProducer DWARFUnit::GetProducer() {
776   if (m_producer == eProducerInvalid)
777     ParseProducerInfo();
778   return m_producer;
779 }
780 
781 llvm::VersionTuple DWARFUnit::GetProducerVersion() {
782   if (m_producer_version.empty())
783     ParseProducerInfo();
784   return m_producer_version;
785 }
786 
787 uint64_t DWARFUnit::GetDWARFLanguageType() {
788   if (m_language_type)
789     return *m_language_type;
790 
791   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
792   if (!die)
793     m_language_type = 0;
794   else
795     m_language_type = die->GetAttributeValueAsUnsigned(this, DW_AT_language, 0);
796   return *m_language_type;
797 }
798 
799 bool DWARFUnit::GetIsOptimized() {
800   if (m_is_optimized == eLazyBoolCalculate) {
801     const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
802     if (die) {
803       m_is_optimized = eLazyBoolNo;
804       if (die->GetAttributeValueAsUnsigned(this, DW_AT_APPLE_optimized, 0) ==
805           1) {
806         m_is_optimized = eLazyBoolYes;
807       }
808     }
809   }
810   return m_is_optimized == eLazyBoolYes;
811 }
812 
813 FileSpec::Style DWARFUnit::GetPathStyle() {
814   if (!m_comp_dir)
815     ComputeCompDirAndGuessPathStyle();
816   return m_comp_dir->GetPathStyle();
817 }
818 
819 const FileSpec &DWARFUnit::GetCompilationDirectory() {
820   if (!m_comp_dir)
821     ComputeCompDirAndGuessPathStyle();
822   return *m_comp_dir;
823 }
824 
825 const FileSpec &DWARFUnit::GetAbsolutePath() {
826   if (!m_file_spec)
827     ComputeAbsolutePath();
828   return *m_file_spec;
829 }
830 
831 FileSpec DWARFUnit::GetFile(size_t file_idx) {
832   return m_dwarf.GetFile(*this, file_idx);
833 }
834 
835 // DWARF2/3 suggests the form hostname:pathname for compilation directory.
836 // Remove the host part if present.
837 static llvm::StringRef
838 removeHostnameFromPathname(llvm::StringRef path_from_dwarf) {
839   if (!path_from_dwarf.contains(':'))
840     return path_from_dwarf;
841   llvm::StringRef host, path;
842   std::tie(host, path) = path_from_dwarf.split(':');
843 
844   if (host.contains('/'))
845     return path_from_dwarf;
846 
847   // check whether we have a windows path, and so the first character is a
848   // drive-letter not a hostname.
849   if (host.size() == 1 && llvm::isAlpha(host[0]) &&
850       (path.starts_with("\\") || path.starts_with("/")))
851     return path_from_dwarf;
852 
853   return path;
854 }
855 
856 void DWARFUnit::ComputeCompDirAndGuessPathStyle() {
857   m_comp_dir = FileSpec();
858   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
859   if (!die)
860     return;
861 
862   llvm::StringRef comp_dir = removeHostnameFromPathname(
863       die->GetAttributeValueAsString(this, DW_AT_comp_dir, nullptr));
864   if (!comp_dir.empty()) {
865     FileSpec::Style comp_dir_style =
866         FileSpec::GuessPathStyle(comp_dir).value_or(FileSpec::Style::native);
867     m_comp_dir = FileSpec(comp_dir, comp_dir_style);
868   } else {
869     // Try to detect the style based on the DW_AT_name attribute, but just store
870     // the detected style in the m_comp_dir field.
871     const char *name =
872         die->GetAttributeValueAsString(this, DW_AT_name, nullptr);
873     m_comp_dir = FileSpec(
874         "", FileSpec::GuessPathStyle(name).value_or(FileSpec::Style::native));
875   }
876 }
877 
878 void DWARFUnit::ComputeAbsolutePath() {
879   m_file_spec = FileSpec();
880   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
881   if (!die)
882     return;
883 
884   m_file_spec =
885       FileSpec(die->GetAttributeValueAsString(this, DW_AT_name, nullptr),
886                GetPathStyle());
887 
888   if (m_file_spec->IsRelative())
889     m_file_spec->MakeAbsolute(GetCompilationDirectory());
890 }
891 
892 SymbolFileDWARFDwo *DWARFUnit::GetDwoSymbolFile(bool load_all_debug_info) {
893   if (load_all_debug_info)
894     ExtractUnitDIEIfNeeded();
895   if (m_dwo)
896     return &llvm::cast<SymbolFileDWARFDwo>(m_dwo->GetSymbolFileDWARF());
897   return nullptr;
898 }
899 
900 const DWARFDebugAranges &DWARFUnit::GetFunctionAranges() {
901   if (m_func_aranges_up == nullptr) {
902     m_func_aranges_up = std::make_unique<DWARFDebugAranges>();
903     const DWARFDebugInfoEntry *die = DIEPtr();
904     if (die)
905       die->BuildFunctionAddressRangeTable(this, m_func_aranges_up.get());
906 
907     if (m_dwo) {
908       const DWARFDebugInfoEntry *dwo_die = m_dwo->DIEPtr();
909       if (dwo_die)
910         dwo_die->BuildFunctionAddressRangeTable(m_dwo.get(),
911                                                 m_func_aranges_up.get());
912     }
913 
914     const bool minimize = false;
915     m_func_aranges_up->Sort(minimize);
916   }
917   return *m_func_aranges_up;
918 }
919 
920 llvm::Expected<DWARFUnitSP>
921 DWARFUnit::extract(SymbolFileDWARF &dwarf, user_id_t uid,
922                    const DWARFDataExtractor &debug_info,
923                    DIERef::Section section, lldb::offset_t *offset_ptr) {
924   assert(debug_info.ValidOffset(*offset_ptr));
925 
926   DWARFContext &context = dwarf.GetDWARFContext();
927 
928   // FIXME: Either properly map between DIERef::Section and
929   // llvm::DWARFSectionKind or switch to llvm's definition entirely.
930   llvm::DWARFSectionKind section_kind_llvm =
931       section == DIERef::Section::DebugInfo
932           ? llvm::DWARFSectionKind::DW_SECT_INFO
933           : llvm::DWARFSectionKind::DW_SECT_EXT_TYPES;
934 
935   llvm::DWARFDataExtractor debug_info_llvm = debug_info.GetAsLLVMDWARF();
936   llvm::DWARFUnitHeader header;
937   if (llvm::Error extract_err = header.extract(
938           context.GetAsLLVM(), debug_info_llvm, offset_ptr, section_kind_llvm))
939     return std::move(extract_err);
940 
941   if (context.isDwo()) {
942     const llvm::DWARFUnitIndex::Entry *entry = nullptr;
943     const llvm::DWARFUnitIndex &index = header.isTypeUnit()
944                                             ? context.GetAsLLVM().getTUIndex()
945                                             : context.GetAsLLVM().getCUIndex();
946     if (index) {
947       if (header.isTypeUnit())
948         entry = index.getFromHash(header.getTypeHash());
949       else if (auto dwo_id = header.getDWOId())
950         entry = index.getFromHash(*dwo_id);
951     }
952     if (!entry)
953       entry = index.getFromOffset(header.getOffset());
954     if (entry)
955       if (llvm::Error err = header.applyIndexEntry(entry))
956         return std::move(err);
957   }
958 
959   const llvm::DWARFDebugAbbrev *abbr = dwarf.DebugAbbrev();
960   if (!abbr)
961     return llvm::make_error<llvm::object::GenericBinaryError>(
962         "No debug_abbrev data");
963 
964   bool abbr_offset_OK =
965       dwarf.GetDWARFContext().getOrLoadAbbrevData().ValidOffset(
966           header.getAbbrOffset());
967   if (!abbr_offset_OK)
968     return llvm::make_error<llvm::object::GenericBinaryError>(
969         "Abbreviation offset for unit is not valid");
970 
971   llvm::Expected<const llvm::DWARFAbbreviationDeclarationSet *> abbrevs_or_err =
972       abbr->getAbbreviationDeclarationSet(header.getAbbrOffset());
973   if (!abbrevs_or_err)
974     return abbrevs_or_err.takeError();
975 
976   const llvm::DWARFAbbreviationDeclarationSet *abbrevs = *abbrevs_or_err;
977   if (!abbrevs)
978     return llvm::make_error<llvm::object::GenericBinaryError>(
979         "No abbrev exists at the specified offset.");
980 
981   bool is_dwo = dwarf.GetDWARFContext().isDwo();
982   if (header.isTypeUnit())
983     return DWARFUnitSP(
984         new DWARFTypeUnit(dwarf, uid, header, *abbrevs, section, is_dwo));
985   return DWARFUnitSP(
986       new DWARFCompileUnit(dwarf, uid, header, *abbrevs, section, is_dwo));
987 }
988 
989 const lldb_private::DWARFDataExtractor &DWARFUnit::GetData() const {
990   return m_section == DIERef::Section::DebugTypes
991              ? m_dwarf.GetDWARFContext().getOrLoadDebugTypesData()
992              : m_dwarf.GetDWARFContext().getOrLoadDebugInfoData();
993 }
994 
995 uint32_t DWARFUnit::GetHeaderByteSize() const {
996   switch (m_header.getUnitType()) {
997   case llvm::dwarf::DW_UT_compile:
998   case llvm::dwarf::DW_UT_partial:
999     return GetVersion() < 5 ? 11 : 12;
1000   case llvm::dwarf::DW_UT_skeleton:
1001   case llvm::dwarf::DW_UT_split_compile:
1002     return 20;
1003   case llvm::dwarf::DW_UT_type:
1004   case llvm::dwarf::DW_UT_split_type:
1005     return GetVersion() < 5 ? 23 : 24;
1006   }
1007   llvm_unreachable("invalid UnitType.");
1008 }
1009 
1010 std::optional<uint64_t>
1011 DWARFUnit::GetStringOffsetSectionItem(uint32_t index) const {
1012   lldb::offset_t offset = GetStrOffsetsBase() + index * 4;
1013   return m_dwarf.GetDWARFContext().getOrLoadStrOffsetsData().GetU32(&offset);
1014 }
1015 
1016 llvm::Expected<llvm::DWARFAddressRangesVector>
1017 DWARFUnit::FindRnglistFromOffset(dw_offset_t offset) {
1018   if (GetVersion() <= 4) {
1019     llvm::DWARFDataExtractor data =
1020         m_dwarf.GetDWARFContext().getOrLoadRangesData().GetAsLLVMDWARF();
1021     data.setAddressSize(m_header.getAddressByteSize());
1022 
1023     llvm::DWARFDebugRangeList list;
1024     if (llvm::Error e = list.extract(data, &offset))
1025       return e;
1026     return list.getAbsoluteRanges(
1027         llvm::object::SectionedAddress{GetBaseAddress()});
1028   }
1029 
1030   // DWARF >= v5
1031   if (!GetRnglistTable())
1032     return llvm::createStringError(std::errc::invalid_argument,
1033                                    "missing or invalid range list table");
1034 
1035   llvm::DWARFDataExtractor data = GetRnglistData().GetAsLLVMDWARF();
1036 
1037   // As DW_AT_rnglists_base may be missing we need to call setAddressSize.
1038   data.setAddressSize(m_header.getAddressByteSize());
1039   auto range_list_or_error = GetRnglistTable()->findList(data, offset);
1040   if (!range_list_or_error)
1041     return range_list_or_error.takeError();
1042 
1043   return range_list_or_error->getAbsoluteRanges(
1044       llvm::object::SectionedAddress{GetBaseAddress()}, GetAddressByteSize(),
1045       [&](uint32_t index) {
1046         uint32_t index_size = GetAddressByteSize();
1047         dw_offset_t addr_base = GetAddrBase();
1048         lldb::offset_t offset =
1049             addr_base + static_cast<lldb::offset_t>(index) * index_size;
1050         return llvm::object::SectionedAddress{
1051             m_dwarf.GetDWARFContext().getOrLoadAddrData().GetMaxU64(
1052                 &offset, index_size)};
1053       });
1054 }
1055 
1056 llvm::Expected<llvm::DWARFAddressRangesVector>
1057 DWARFUnit::FindRnglistFromIndex(uint32_t index) {
1058   llvm::Expected<uint64_t> maybe_offset = GetRnglistOffset(index);
1059   if (!maybe_offset)
1060     return maybe_offset.takeError();
1061   return FindRnglistFromOffset(*maybe_offset);
1062 }
1063 
1064 bool DWARFUnit::HasAny(llvm::ArrayRef<dw_tag_t> tags) {
1065   ExtractUnitDIEIfNeeded();
1066   if (m_dwo)
1067     return m_dwo->HasAny(tags);
1068 
1069   for (const auto &die : m_die_array) {
1070     for (const auto tag : tags) {
1071       if (tag == die.Tag())
1072         return true;
1073     }
1074   }
1075   return false;
1076 }
1077