xref: /llvm-project/llvm/tools/obj2yaml/dwarf2yaml.cpp (revision 8a1846dbdcc62675b51d245caabfe3c6ec6fd209)
1 //===------ dwarf2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/BinaryFormat/Dwarf.h"
10 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
11 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
12 #include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
13 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
14 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
15 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
16 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
17 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
18 #include "llvm/ObjectYAML/DWARFYAML.h"
19 
20 #include <algorithm>
21 #include <optional>
22 
23 using namespace llvm;
24 
25 Error dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) {
26   auto AbbrevSetPtr = DCtx.getDebugAbbrev();
27   if (AbbrevSetPtr) {
28     uint64_t AbbrevTableID = 0;
29     if (Error Err = AbbrevSetPtr->parse())
30       return Err;
31     for (const auto &AbbrvDeclSet : *AbbrevSetPtr) {
32       Y.DebugAbbrev.emplace_back();
33       Y.DebugAbbrev.back().ID = AbbrevTableID++;
34       for (const DWARFAbbreviationDeclaration &AbbrvDecl :
35            AbbrvDeclSet.second) {
36         DWARFYAML::Abbrev Abbrv;
37         Abbrv.Code = AbbrvDecl.getCode();
38         Abbrv.Tag = AbbrvDecl.getTag();
39         Abbrv.Children = AbbrvDecl.hasChildren() ? dwarf::DW_CHILDREN_yes
40                                                  : dwarf::DW_CHILDREN_no;
41         for (auto Attribute : AbbrvDecl.attributes()) {
42           DWARFYAML::AttributeAbbrev AttAbrv;
43           AttAbrv.Attribute = Attribute.Attr;
44           AttAbrv.Form = Attribute.Form;
45           if (AttAbrv.Form == dwarf::DW_FORM_implicit_const)
46             AttAbrv.Value = Attribute.getImplicitConstValue();
47           Abbrv.Attributes.push_back(AttAbrv);
48         }
49         Y.DebugAbbrev.back().Table.push_back(Abbrv);
50       }
51     }
52   }
53   return Error::success();
54 }
55 
56 Error dumpDebugAddr(DWARFContext &DCtx, DWARFYAML::Data &Y) {
57   DWARFDebugAddrTable AddrTable;
58   DWARFDataExtractor AddrData(DCtx.getDWARFObj(),
59                               DCtx.getDWARFObj().getAddrSection(),
60                               DCtx.isLittleEndian(), /*AddressSize=*/0);
61   std::vector<DWARFYAML::AddrTableEntry> AddrTables;
62   uint64_t Offset = 0;
63   while (AddrData.isValidOffset(Offset)) {
64     // We ignore any errors that don't prevent parsing the section, since we can
65     // still represent such sections.
66     if (Error Err = AddrTable.extractV5(AddrData, &Offset, /*CUAddrSize=*/0,
67                                         consumeError))
68       return Err;
69     AddrTables.emplace_back();
70     for (uint64_t Addr : AddrTable.getAddressEntries()) {
71       // Currently, the parser doesn't support parsing an address table with non
72       // linear addresses (segment_selector_size != 0). The segment selectors
73       // are specified to be zero.
74       AddrTables.back().SegAddrPairs.push_back(
75           {/*SegmentSelector=*/0, /*Address=*/Addr});
76     }
77 
78     AddrTables.back().Format = AddrTable.getFormat();
79     AddrTables.back().Length = AddrTable.getLength();
80     AddrTables.back().Version = AddrTable.getVersion();
81     AddrTables.back().AddrSize = AddrTable.getAddressSize();
82     AddrTables.back().SegSelectorSize = AddrTable.getSegmentSelectorSize();
83   }
84   Y.DebugAddr = std::move(AddrTables);
85   return Error::success();
86 }
87 
88 Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) {
89   DataExtractor StrData = DCtx.getStringExtractor();
90   uint64_t Offset = 0;
91   std::vector<StringRef> DebugStr;
92   Error Err = Error::success();
93   while (StrData.isValidOffset(Offset)) {
94     const char *CStr = StrData.getCStr(&Offset, &Err);
95     if (Err)
96       return Err;
97     DebugStr.push_back(CStr);
98   }
99 
100   Y.DebugStrings = DebugStr;
101   return Err;
102 }
103 
104 Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
105   DWARFDataExtractor ArangesData(DCtx.getDWARFObj().getArangesSection(),
106                                  DCtx.isLittleEndian(), 0);
107   uint64_t Offset = 0;
108   DWARFDebugArangeSet Set;
109   std::vector<DWARFYAML::ARange> DebugAranges;
110 
111   // We ignore any errors that don't prevent parsing the section, since we can
112   // still represent such sections. These errors are recorded via the
113   // WarningHandler parameter of Set.extract().
114   auto DiscardError = [](Error Err) { consumeError(std::move(Err)); };
115 
116   while (ArangesData.isValidOffset(Offset)) {
117     if (Error E = Set.extract(ArangesData, &Offset, DiscardError))
118       return E;
119     DWARFYAML::ARange Range;
120     Range.Format = Set.getHeader().Format;
121     Range.Length = Set.getHeader().Length;
122     Range.Version = Set.getHeader().Version;
123     Range.CuOffset = Set.getHeader().CuOffset;
124     Range.AddrSize = Set.getHeader().AddrSize;
125     Range.SegSize = Set.getHeader().SegSize;
126     for (auto Descriptor : Set.descriptors()) {
127       DWARFYAML::ARangeDescriptor Desc;
128       Desc.Address = Descriptor.Address;
129       Desc.Length = Descriptor.Length;
130       Range.Descriptors.push_back(Desc);
131     }
132     DebugAranges.push_back(Range);
133   }
134 
135   Y.DebugAranges = DebugAranges;
136   return ErrorSuccess();
137 }
138 
139 Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) {
140   // We are assuming all address byte sizes will be consistent across all
141   // compile units.
142   uint8_t AddrSize = 0;
143   for (const auto &CU : DCtx.compile_units()) {
144     const uint8_t CUAddrSize = CU->getAddressByteSize();
145     if (AddrSize == 0)
146       AddrSize = CUAddrSize;
147     else if (CUAddrSize != AddrSize)
148       return createStringError(std::errc::invalid_argument,
149                                "address sizes vary in different compile units");
150   }
151 
152   DWARFDataExtractor Data(DCtx.getDWARFObj().getRangesSection().Data,
153                           DCtx.isLittleEndian(), AddrSize);
154   uint64_t Offset = 0;
155   DWARFDebugRangeList DwarfRanges;
156   std::vector<DWARFYAML::Ranges> DebugRanges;
157 
158   while (Data.isValidOffset(Offset)) {
159     DWARFYAML::Ranges YamlRanges;
160     YamlRanges.Offset = Offset;
161     YamlRanges.AddrSize = AddrSize;
162     if (Error E = DwarfRanges.extract(Data, &Offset))
163       return E;
164     for (const auto &RLE : DwarfRanges.getEntries())
165       YamlRanges.Entries.push_back({RLE.StartAddress, RLE.EndAddress});
166     DebugRanges.push_back(std::move(YamlRanges));
167   }
168 
169   Y.DebugRanges = DebugRanges;
170   return ErrorSuccess();
171 }
172 
173 static std::optional<DWARFYAML::PubSection>
174 dumpPubSection(const DWARFContext &DCtx, const DWARFSection &Section,
175                bool IsGNUStyle) {
176   DWARFYAML::PubSection Y;
177   DWARFDataExtractor PubSectionData(DCtx.getDWARFObj(), Section,
178                                     DCtx.isLittleEndian(), 0);
179   DWARFDebugPubTable Table;
180   // We ignore any errors that don't prevent parsing the section, since we can
181   // still represent such sections.
182   Table.extract(PubSectionData, IsGNUStyle,
183                 [](Error Err) { consumeError(std::move(Err)); });
184   ArrayRef<DWARFDebugPubTable::Set> Sets = Table.getData();
185   if (Sets.empty())
186     return std::nullopt;
187 
188   // FIXME: Currently, obj2yaml only supports dumping the first pubtable.
189   Y.Format = Sets[0].Format;
190   Y.Length = Sets[0].Length;
191   Y.Version = Sets[0].Version;
192   Y.UnitOffset = Sets[0].Offset;
193   Y.UnitSize = Sets[0].Size;
194 
195   for (const DWARFDebugPubTable::Entry &E : Sets[0].Entries)
196     Y.Entries.push_back(DWARFYAML::PubEntry{(uint32_t)E.SecOffset,
197                                             E.Descriptor.toBits(), E.Name});
198 
199   return Y;
200 }
201 
202 void dumpDebugPubSections(DWARFContext &DCtx, DWARFYAML::Data &Y) {
203   const DWARFObject &D = DCtx.getDWARFObj();
204 
205   Y.PubNames =
206       dumpPubSection(DCtx, D.getPubnamesSection(), /*IsGNUStyle=*/false);
207   Y.PubTypes =
208       dumpPubSection(DCtx, D.getPubtypesSection(), /*IsGNUStyle=*/false);
209   // TODO: Test dumping .debug_gnu_pubnames section.
210   Y.GNUPubNames =
211       dumpPubSection(DCtx, D.getGnuPubnamesSection(), /*IsGNUStyle=*/true);
212   // TODO: Test dumping .debug_gnu_pubtypes section.
213   Y.GNUPubTypes =
214       dumpPubSection(DCtx, D.getGnuPubtypesSection(), /*IsGNUStyle=*/true);
215 }
216 
217 void dumpDebugInfo(DWARFContext &DCtx, DWARFYAML::Data &Y) {
218   for (const auto &CU : DCtx.compile_units()) {
219     DWARFYAML::Unit NewUnit;
220     NewUnit.Format = CU->getFormat();
221     NewUnit.Length = CU->getLength();
222     NewUnit.Version = CU->getVersion();
223     if (NewUnit.Version >= 5)
224       NewUnit.Type = (dwarf::UnitType)CU->getUnitType();
225     const DWARFDebugAbbrev *DebugAbbrev = DCtx.getDebugAbbrev();
226     // FIXME: Ideally we would propagate this error upwards, but that would
227     // prevent us from displaying any debug info at all. For now we just consume
228     // the error and display everything that was parsed successfully.
229     if (Error Err = DebugAbbrev->parse())
230       llvm::consumeError(std::move(Err));
231 
232     NewUnit.AbbrevTableID = std::distance(
233         DebugAbbrev->begin(),
234         llvm::find_if(
235             *DebugAbbrev,
236             [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
237               return P.first == CU->getAbbreviations()->getOffset();
238             }));
239     NewUnit.AbbrOffset = CU->getAbbreviations()->getOffset();
240     NewUnit.AddrSize = CU->getAddressByteSize();
241     for (auto DIE : CU->dies()) {
242       DWARFYAML::Entry NewEntry;
243       DataExtractor EntryData = CU->getDebugInfoExtractor();
244       uint64_t offset = DIE.getOffset();
245 
246       assert(EntryData.isValidOffset(offset) && "Invalid DIE Offset");
247       if (!EntryData.isValidOffset(offset))
248         continue;
249 
250       NewEntry.AbbrCode = EntryData.getULEB128(&offset);
251 
252       auto AbbrevDecl = DIE.getAbbreviationDeclarationPtr();
253       if (AbbrevDecl) {
254         for (const auto &AttrSpec : AbbrevDecl->attributes()) {
255           DWARFYAML::FormValue NewValue;
256           NewValue.Value = 0xDEADBEEFDEADBEEF;
257           DWARFDie DIEWrapper(CU.get(), &DIE);
258           auto FormValue = DIEWrapper.find(AttrSpec.Attr);
259           if (!FormValue)
260             return;
261           auto Form = FormValue->getForm();
262           bool indirect = false;
263           do {
264             indirect = false;
265             switch (Form) {
266             case dwarf::DW_FORM_addr:
267             case dwarf::DW_FORM_GNU_addr_index:
268               if (auto Val = FormValue->getAsAddress())
269                 NewValue.Value = *Val;
270               break;
271             case dwarf::DW_FORM_ref_addr:
272             case dwarf::DW_FORM_ref1:
273             case dwarf::DW_FORM_ref2:
274             case dwarf::DW_FORM_ref4:
275             case dwarf::DW_FORM_ref8:
276             case dwarf::DW_FORM_ref_udata:
277             case dwarf::DW_FORM_ref_sig8:
278               if (auto Val = FormValue->getAsReferenceUVal())
279                 NewValue.Value = *Val;
280               break;
281             case dwarf::DW_FORM_exprloc:
282             case dwarf::DW_FORM_block:
283             case dwarf::DW_FORM_block1:
284             case dwarf::DW_FORM_block2:
285             case dwarf::DW_FORM_block4:
286               if (auto Val = FormValue->getAsBlock()) {
287                 auto BlockData = *Val;
288                 std::copy(BlockData.begin(), BlockData.end(),
289                           std::back_inserter(NewValue.BlockData));
290               }
291               NewValue.Value = NewValue.BlockData.size();
292               break;
293             case dwarf::DW_FORM_data1:
294             case dwarf::DW_FORM_flag:
295             case dwarf::DW_FORM_data2:
296             case dwarf::DW_FORM_data4:
297             case dwarf::DW_FORM_data8:
298             case dwarf::DW_FORM_sdata:
299             case dwarf::DW_FORM_udata:
300             case dwarf::DW_FORM_ref_sup4:
301             case dwarf::DW_FORM_ref_sup8:
302               if (auto Val = FormValue->getAsUnsignedConstant())
303                 NewValue.Value = *Val;
304               break;
305             case dwarf::DW_FORM_string:
306               if (auto Val = dwarf::toString(FormValue))
307                 NewValue.CStr = *Val;
308               break;
309             case dwarf::DW_FORM_indirect:
310               indirect = true;
311               if (auto Val = FormValue->getAsUnsignedConstant()) {
312                 NewValue.Value = *Val;
313                 NewEntry.Values.push_back(NewValue);
314                 Form = static_cast<dwarf::Form>(*Val);
315               }
316               break;
317             case dwarf::DW_FORM_strp:
318             case dwarf::DW_FORM_sec_offset:
319             case dwarf::DW_FORM_GNU_ref_alt:
320             case dwarf::DW_FORM_GNU_strp_alt:
321             case dwarf::DW_FORM_line_strp:
322             case dwarf::DW_FORM_strp_sup:
323             case dwarf::DW_FORM_GNU_str_index:
324             case dwarf::DW_FORM_strx:
325               if (auto Val = FormValue->getAsCStringOffset())
326                 NewValue.Value = *Val;
327               break;
328             case dwarf::DW_FORM_flag_present:
329               NewValue.Value = 1;
330               break;
331             default:
332               break;
333             }
334           } while (indirect);
335           NewEntry.Values.push_back(NewValue);
336         }
337       }
338 
339       NewUnit.Entries.push_back(NewEntry);
340     }
341     Y.Units.push_back(NewUnit);
342   }
343 }
344 
345 bool dumpFileEntry(DataExtractor &Data, uint64_t &Offset,
346                    DWARFYAML::File &File) {
347   File.Name = Data.getCStr(&Offset);
348   if (File.Name.empty())
349     return false;
350   File.DirIdx = Data.getULEB128(&Offset);
351   File.ModTime = Data.getULEB128(&Offset);
352   File.Length = Data.getULEB128(&Offset);
353   return true;
354 }
355 
356 void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
357   for (const auto &CU : DCtx.compile_units()) {
358     auto CUDIE = CU->getUnitDIE();
359     if (!CUDIE)
360       continue;
361     if (auto StmtOffset =
362             dwarf::toSectionOffset(CUDIE.find(dwarf::DW_AT_stmt_list))) {
363       DWARFYAML::LineTable DebugLines;
364       DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
365                              DCtx.isLittleEndian(), CU->getAddressByteSize());
366       uint64_t Offset = *StmtOffset;
367       uint64_t LengthOrDWARF64Prefix = LineData.getU32(&Offset);
368       if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
369         DebugLines.Format = dwarf::DWARF64;
370         DebugLines.Length = LineData.getU64(&Offset);
371       } else {
372         DebugLines.Format = dwarf::DWARF32;
373         DebugLines.Length = LengthOrDWARF64Prefix;
374       }
375       assert(DebugLines.Length);
376       uint64_t LineTableLength = *DebugLines.Length;
377       uint64_t SizeOfPrologueLength =
378           DebugLines.Format == dwarf::DWARF64 ? 8 : 4;
379       DebugLines.Version = LineData.getU16(&Offset);
380       DebugLines.PrologueLength =
381           LineData.getUnsigned(&Offset, SizeOfPrologueLength);
382       assert(DebugLines.PrologueLength);
383       const uint64_t EndPrologue = *DebugLines.PrologueLength + Offset;
384 
385       DebugLines.MinInstLength = LineData.getU8(&Offset);
386       if (DebugLines.Version >= 4)
387         DebugLines.MaxOpsPerInst = LineData.getU8(&Offset);
388       DebugLines.DefaultIsStmt = LineData.getU8(&Offset);
389       DebugLines.LineBase = LineData.getU8(&Offset);
390       DebugLines.LineRange = LineData.getU8(&Offset);
391       DebugLines.OpcodeBase = LineData.getU8(&Offset);
392 
393       DebugLines.StandardOpcodeLengths.emplace();
394       for (uint8_t i = 1; i < DebugLines.OpcodeBase; ++i)
395         DebugLines.StandardOpcodeLengths->push_back(LineData.getU8(&Offset));
396 
397       while (Offset < EndPrologue) {
398         StringRef Dir = LineData.getCStr(&Offset);
399         if (!Dir.empty())
400           DebugLines.IncludeDirs.push_back(Dir);
401         else
402           break;
403       }
404 
405       while (Offset < EndPrologue) {
406         DWARFYAML::File TmpFile;
407         if (dumpFileEntry(LineData, Offset, TmpFile))
408           DebugLines.Files.push_back(TmpFile);
409         else
410           break;
411       }
412 
413       const uint64_t LineEnd =
414           LineTableLength + *StmtOffset + SizeOfPrologueLength;
415       while (Offset < LineEnd) {
416         DWARFYAML::LineTableOpcode NewOp = {};
417         NewOp.Opcode = (dwarf::LineNumberOps)LineData.getU8(&Offset);
418         if (NewOp.Opcode == 0) {
419           auto StartExt = Offset;
420           NewOp.ExtLen = LineData.getULEB128(&Offset);
421           NewOp.SubOpcode =
422               (dwarf::LineNumberExtendedOps)LineData.getU8(&Offset);
423           switch (NewOp.SubOpcode) {
424           case dwarf::DW_LNE_set_address:
425           case dwarf::DW_LNE_set_discriminator:
426             NewOp.Data = LineData.getAddress(&Offset);
427             break;
428           case dwarf::DW_LNE_define_file:
429             dumpFileEntry(LineData, Offset, NewOp.FileEntry);
430             break;
431           case dwarf::DW_LNE_end_sequence:
432             break;
433           default:
434             while (Offset < StartExt + *NewOp.ExtLen)
435               NewOp.UnknownOpcodeData.push_back(LineData.getU8(&Offset));
436           }
437         } else if (NewOp.Opcode < *DebugLines.OpcodeBase) {
438           switch (NewOp.Opcode) {
439           case dwarf::DW_LNS_copy:
440           case dwarf::DW_LNS_negate_stmt:
441           case dwarf::DW_LNS_set_basic_block:
442           case dwarf::DW_LNS_const_add_pc:
443           case dwarf::DW_LNS_set_prologue_end:
444           case dwarf::DW_LNS_set_epilogue_begin:
445             break;
446 
447           case dwarf::DW_LNS_advance_pc:
448           case dwarf::DW_LNS_set_file:
449           case dwarf::DW_LNS_set_column:
450           case dwarf::DW_LNS_set_isa:
451             NewOp.Data = LineData.getULEB128(&Offset);
452             break;
453 
454           case dwarf::DW_LNS_advance_line:
455             NewOp.SData = LineData.getSLEB128(&Offset);
456             break;
457 
458           case dwarf::DW_LNS_fixed_advance_pc:
459             NewOp.Data = LineData.getU16(&Offset);
460             break;
461 
462           default:
463             for (uint8_t i = 0;
464                  i < (*DebugLines.StandardOpcodeLengths)[NewOp.Opcode - 1]; ++i)
465               NewOp.StandardOpcodeData.push_back(LineData.getULEB128(&Offset));
466           }
467         }
468         DebugLines.Opcodes.push_back(NewOp);
469       }
470       Y.DebugLines.push_back(DebugLines);
471     }
472   }
473 }
474