xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1*5ffd83dbSDimitry Andric //===- DwarfTransformer.cpp -----------------------------------------------===//
2*5ffd83dbSDimitry Andric //
3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5ffd83dbSDimitry Andric //
7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
8*5ffd83dbSDimitry Andric 
9*5ffd83dbSDimitry Andric #include <thread>
10*5ffd83dbSDimitry Andric #include <unordered_set>
11*5ffd83dbSDimitry Andric 
12*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/DIContext.h"
13*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFContext.h"
14*5ffd83dbSDimitry Andric #include "llvm/Support/Error.h"
15*5ffd83dbSDimitry Andric #include "llvm/Support/ThreadPool.h"
16*5ffd83dbSDimitry Andric #include "llvm/Support/raw_ostream.h"
17*5ffd83dbSDimitry Andric 
18*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
19*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
20*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
21*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/GsymReader.h"
22*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/InlineInfo.h"
23*5ffd83dbSDimitry Andric 
24*5ffd83dbSDimitry Andric using namespace llvm;
25*5ffd83dbSDimitry Andric using namespace gsym;
26*5ffd83dbSDimitry Andric 
27*5ffd83dbSDimitry Andric struct llvm::gsym::CUInfo {
28*5ffd83dbSDimitry Andric   const DWARFDebugLine::LineTable *LineTable;
29*5ffd83dbSDimitry Andric   const char *CompDir;
30*5ffd83dbSDimitry Andric   std::vector<uint32_t> FileCache;
31*5ffd83dbSDimitry Andric   uint64_t Language = 0;
32*5ffd83dbSDimitry Andric   uint8_t AddrSize = 0;
33*5ffd83dbSDimitry Andric 
34*5ffd83dbSDimitry Andric   CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
35*5ffd83dbSDimitry Andric     LineTable = DICtx.getLineTableForUnit(CU);
36*5ffd83dbSDimitry Andric     CompDir = CU->getCompilationDir();
37*5ffd83dbSDimitry Andric     FileCache.clear();
38*5ffd83dbSDimitry Andric     if (LineTable)
39*5ffd83dbSDimitry Andric       FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
40*5ffd83dbSDimitry Andric     DWARFDie Die = CU->getUnitDIE();
41*5ffd83dbSDimitry Andric     Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
42*5ffd83dbSDimitry Andric     AddrSize = CU->getAddressByteSize();
43*5ffd83dbSDimitry Andric   }
44*5ffd83dbSDimitry Andric 
45*5ffd83dbSDimitry Andric   /// Return true if Addr is the highest address for a given compile unit. The
46*5ffd83dbSDimitry Andric   /// highest address is encoded as -1, of all ones in the address. These high
47*5ffd83dbSDimitry Andric   /// addresses are used by some linkers to indicate that a function has been
48*5ffd83dbSDimitry Andric   /// dead stripped or didn't end up in the linked executable.
49*5ffd83dbSDimitry Andric   bool isHighestAddress(uint64_t Addr) const {
50*5ffd83dbSDimitry Andric     if (AddrSize == 4)
51*5ffd83dbSDimitry Andric       return Addr == UINT32_MAX;
52*5ffd83dbSDimitry Andric     else if (AddrSize == 8)
53*5ffd83dbSDimitry Andric       return Addr == UINT64_MAX;
54*5ffd83dbSDimitry Andric     return false;
55*5ffd83dbSDimitry Andric   }
56*5ffd83dbSDimitry Andric 
57*5ffd83dbSDimitry Andric   /// Convert a DWARF compile unit file index into a GSYM global file index.
58*5ffd83dbSDimitry Andric   ///
59*5ffd83dbSDimitry Andric   /// Each compile unit in DWARF has its own file table in the line table
60*5ffd83dbSDimitry Andric   /// prologue. GSYM has a single large file table that applies to all files
61*5ffd83dbSDimitry Andric   /// from all of the info in a GSYM file. This function converts between the
62*5ffd83dbSDimitry Andric   /// two and caches and DWARF CU file index that has already been converted so
63*5ffd83dbSDimitry Andric   /// the first client that asks for a compile unit file index will end up
64*5ffd83dbSDimitry Andric   /// doing the conversion, and subsequent clients will get the cached GSYM
65*5ffd83dbSDimitry Andric   /// index.
66*5ffd83dbSDimitry Andric   uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
67*5ffd83dbSDimitry Andric     if (!LineTable)
68*5ffd83dbSDimitry Andric       return 0;
69*5ffd83dbSDimitry Andric     assert(DwarfFileIdx < FileCache.size());
70*5ffd83dbSDimitry Andric     uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
71*5ffd83dbSDimitry Andric     if (GsymFileIdx != UINT32_MAX)
72*5ffd83dbSDimitry Andric       return GsymFileIdx;
73*5ffd83dbSDimitry Andric     std::string File;
74*5ffd83dbSDimitry Andric     if (LineTable->getFileNameByIndex(
75*5ffd83dbSDimitry Andric             DwarfFileIdx, CompDir,
76*5ffd83dbSDimitry Andric             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
77*5ffd83dbSDimitry Andric       GsymFileIdx = Gsym.insertFile(File);
78*5ffd83dbSDimitry Andric     else
79*5ffd83dbSDimitry Andric       GsymFileIdx = 0;
80*5ffd83dbSDimitry Andric     return GsymFileIdx;
81*5ffd83dbSDimitry Andric   }
82*5ffd83dbSDimitry Andric };
83*5ffd83dbSDimitry Andric 
84*5ffd83dbSDimitry Andric 
85*5ffd83dbSDimitry Andric static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
86*5ffd83dbSDimitry Andric   if (DWARFDie SpecDie =
87*5ffd83dbSDimitry Andric           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
88*5ffd83dbSDimitry Andric     if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
89*5ffd83dbSDimitry Andric       return SpecParent;
90*5ffd83dbSDimitry Andric   }
91*5ffd83dbSDimitry Andric   if (DWARFDie AbstDie =
92*5ffd83dbSDimitry Andric           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
93*5ffd83dbSDimitry Andric     if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
94*5ffd83dbSDimitry Andric       return AbstParent;
95*5ffd83dbSDimitry Andric   }
96*5ffd83dbSDimitry Andric 
97*5ffd83dbSDimitry Andric   // We never want to follow parent for inlined subroutine - that would
98*5ffd83dbSDimitry Andric   // give us information about where the function is inlined, not what
99*5ffd83dbSDimitry Andric   // function is inlined
100*5ffd83dbSDimitry Andric   if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
101*5ffd83dbSDimitry Andric     return DWARFDie();
102*5ffd83dbSDimitry Andric 
103*5ffd83dbSDimitry Andric   DWARFDie ParentDie = Die.getParent();
104*5ffd83dbSDimitry Andric   if (!ParentDie)
105*5ffd83dbSDimitry Andric     return DWARFDie();
106*5ffd83dbSDimitry Andric 
107*5ffd83dbSDimitry Andric   switch (ParentDie.getTag()) {
108*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_namespace:
109*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_structure_type:
110*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_union_type:
111*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_class_type:
112*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_subprogram:
113*5ffd83dbSDimitry Andric     return ParentDie; // Found parent decl context DIE
114*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_lexical_block:
115*5ffd83dbSDimitry Andric     return GetParentDeclContextDIE(ParentDie);
116*5ffd83dbSDimitry Andric   default:
117*5ffd83dbSDimitry Andric     break;
118*5ffd83dbSDimitry Andric   }
119*5ffd83dbSDimitry Andric 
120*5ffd83dbSDimitry Andric   return DWARFDie();
121*5ffd83dbSDimitry Andric }
122*5ffd83dbSDimitry Andric 
123*5ffd83dbSDimitry Andric /// Get the GsymCreator string table offset for the qualified name for the
124*5ffd83dbSDimitry Andric /// DIE passed in. This function will avoid making copies of any strings in
125*5ffd83dbSDimitry Andric /// the GsymCreator when possible. We don't need to copy a string when the
126*5ffd83dbSDimitry Andric /// string comes from our .debug_str section or is an inlined string in the
127*5ffd83dbSDimitry Andric /// .debug_info. If we create a qualified name string in this function by
128*5ffd83dbSDimitry Andric /// combining multiple strings in the DWARF string table or info, we will make
129*5ffd83dbSDimitry Andric /// a copy of the string when we add it to the string table.
130*5ffd83dbSDimitry Andric static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die,
131*5ffd83dbSDimitry Andric                                                 uint64_t Language,
132*5ffd83dbSDimitry Andric                                                 GsymCreator &Gsym) {
133*5ffd83dbSDimitry Andric   // If the dwarf has mangled name, use mangled name
134*5ffd83dbSDimitry Andric   if (auto LinkageName =
135*5ffd83dbSDimitry Andric           dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name,
136*5ffd83dbSDimitry Andric                                                dwarf::DW_AT_linkage_name}),
137*5ffd83dbSDimitry Andric                           nullptr))
138*5ffd83dbSDimitry Andric     return Gsym.insertString(LinkageName, /* Copy */ false);
139*5ffd83dbSDimitry Andric 
140*5ffd83dbSDimitry Andric   StringRef ShortName(Die.getName(DINameKind::ShortName));
141*5ffd83dbSDimitry Andric   if (ShortName.empty())
142*5ffd83dbSDimitry Andric     return llvm::None;
143*5ffd83dbSDimitry Andric 
144*5ffd83dbSDimitry Andric   // For C++ and ObjC, prepend names of all parent declaration contexts
145*5ffd83dbSDimitry Andric   if (!(Language == dwarf::DW_LANG_C_plus_plus ||
146*5ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C_plus_plus_03 ||
147*5ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C_plus_plus_11 ||
148*5ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C_plus_plus_14 ||
149*5ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_ObjC_plus_plus ||
150*5ffd83dbSDimitry Andric         // This should not be needed for C, but we see C++ code marked as C
151*5ffd83dbSDimitry Andric         // in some binaries. This should hurt, so let's do it for C as well
152*5ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C))
153*5ffd83dbSDimitry Andric     return Gsym.insertString(ShortName, /* Copy */ false);
154*5ffd83dbSDimitry Andric 
155*5ffd83dbSDimitry Andric   // Some GCC optimizations create functions with names ending with .isra.<num>
156*5ffd83dbSDimitry Andric   // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
157*5ffd83dbSDimitry Andric   // If it looks like it could be the case, don't add any prefix
158*5ffd83dbSDimitry Andric   if (ShortName.startswith("_Z") &&
159*5ffd83dbSDimitry Andric       (ShortName.contains(".isra.") || ShortName.contains(".part.")))
160*5ffd83dbSDimitry Andric     return Gsym.insertString(ShortName, /* Copy */ false);
161*5ffd83dbSDimitry Andric 
162*5ffd83dbSDimitry Andric   DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
163*5ffd83dbSDimitry Andric   if (ParentDeclCtxDie) {
164*5ffd83dbSDimitry Andric     std::string Name = ShortName.str();
165*5ffd83dbSDimitry Andric     while (ParentDeclCtxDie) {
166*5ffd83dbSDimitry Andric       StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
167*5ffd83dbSDimitry Andric       if (!ParentName.empty()) {
168*5ffd83dbSDimitry Andric         // "lambda" names are wrapped in < >. Replace with { }
169*5ffd83dbSDimitry Andric         // to be consistent with demangled names and not to confuse with
170*5ffd83dbSDimitry Andric         // templates
171*5ffd83dbSDimitry Andric         if (ParentName.front() == '<' && ParentName.back() == '>')
172*5ffd83dbSDimitry Andric           Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
173*5ffd83dbSDimitry Andric                 "::" + Name;
174*5ffd83dbSDimitry Andric         else
175*5ffd83dbSDimitry Andric           Name = ParentName.str() + "::" + Name;
176*5ffd83dbSDimitry Andric       }
177*5ffd83dbSDimitry Andric       ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
178*5ffd83dbSDimitry Andric     }
179*5ffd83dbSDimitry Andric     // Copy the name since we created a new name in a std::string.
180*5ffd83dbSDimitry Andric     return Gsym.insertString(Name, /* Copy */ true);
181*5ffd83dbSDimitry Andric   }
182*5ffd83dbSDimitry Andric   // Don't copy the name since it exists in the DWARF object file.
183*5ffd83dbSDimitry Andric   return Gsym.insertString(ShortName, /* Copy */ false);
184*5ffd83dbSDimitry Andric }
185*5ffd83dbSDimitry Andric 
186*5ffd83dbSDimitry Andric static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
187*5ffd83dbSDimitry Andric   bool CheckChildren = true;
188*5ffd83dbSDimitry Andric   switch (Die.getTag()) {
189*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_subprogram:
190*5ffd83dbSDimitry Andric     // Don't look into functions within functions.
191*5ffd83dbSDimitry Andric     CheckChildren = Depth == 0;
192*5ffd83dbSDimitry Andric     break;
193*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_inlined_subroutine:
194*5ffd83dbSDimitry Andric     return true;
195*5ffd83dbSDimitry Andric   default:
196*5ffd83dbSDimitry Andric     break;
197*5ffd83dbSDimitry Andric   }
198*5ffd83dbSDimitry Andric   if (!CheckChildren)
199*5ffd83dbSDimitry Andric     return false;
200*5ffd83dbSDimitry Andric   for (DWARFDie ChildDie : Die.children()) {
201*5ffd83dbSDimitry Andric     if (hasInlineInfo(ChildDie, Depth + 1))
202*5ffd83dbSDimitry Andric       return true;
203*5ffd83dbSDimitry Andric   }
204*5ffd83dbSDimitry Andric   return false;
205*5ffd83dbSDimitry Andric }
206*5ffd83dbSDimitry Andric 
207*5ffd83dbSDimitry Andric static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die,
208*5ffd83dbSDimitry Andric                             uint32_t Depth, FunctionInfo &FI,
209*5ffd83dbSDimitry Andric                             InlineInfo &parent) {
210*5ffd83dbSDimitry Andric   if (!hasInlineInfo(Die, Depth))
211*5ffd83dbSDimitry Andric     return;
212*5ffd83dbSDimitry Andric 
213*5ffd83dbSDimitry Andric   dwarf::Tag Tag = Die.getTag();
214*5ffd83dbSDimitry Andric   if (Tag == dwarf::DW_TAG_inlined_subroutine) {
215*5ffd83dbSDimitry Andric     // create new InlineInfo and append to parent.children
216*5ffd83dbSDimitry Andric     InlineInfo II;
217*5ffd83dbSDimitry Andric     DWARFAddressRange FuncRange =
218*5ffd83dbSDimitry Andric         DWARFAddressRange(FI.startAddress(), FI.endAddress());
219*5ffd83dbSDimitry Andric     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
220*5ffd83dbSDimitry Andric     if (RangesOrError) {
221*5ffd83dbSDimitry Andric       for (const DWARFAddressRange &Range : RangesOrError.get()) {
222*5ffd83dbSDimitry Andric         // Check that the inlined function is within the range of the function
223*5ffd83dbSDimitry Andric         // info, it might not be in case of split functions
224*5ffd83dbSDimitry Andric         if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC)
225*5ffd83dbSDimitry Andric           II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC));
226*5ffd83dbSDimitry Andric       }
227*5ffd83dbSDimitry Andric     }
228*5ffd83dbSDimitry Andric     if (II.Ranges.empty())
229*5ffd83dbSDimitry Andric       return;
230*5ffd83dbSDimitry Andric 
231*5ffd83dbSDimitry Andric     if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
232*5ffd83dbSDimitry Andric       II.Name = *NameIndex;
233*5ffd83dbSDimitry Andric     II.CallFile = CUI.DWARFToGSYMFileIndex(
234*5ffd83dbSDimitry Andric         Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
235*5ffd83dbSDimitry Andric     II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
236*5ffd83dbSDimitry Andric     // parse all children and append to parent
237*5ffd83dbSDimitry Andric     for (DWARFDie ChildDie : Die.children())
238*5ffd83dbSDimitry Andric       parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II);
239*5ffd83dbSDimitry Andric     parent.Children.emplace_back(std::move(II));
240*5ffd83dbSDimitry Andric     return;
241*5ffd83dbSDimitry Andric   }
242*5ffd83dbSDimitry Andric   if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
243*5ffd83dbSDimitry Andric     // skip this Die and just recurse down
244*5ffd83dbSDimitry Andric     for (DWARFDie ChildDie : Die.children())
245*5ffd83dbSDimitry Andric       parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent);
246*5ffd83dbSDimitry Andric   }
247*5ffd83dbSDimitry Andric }
248*5ffd83dbSDimitry Andric 
249*5ffd83dbSDimitry Andric static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
250*5ffd83dbSDimitry Andric                                      DWARFDie Die, GsymCreator &Gsym,
251*5ffd83dbSDimitry Andric                                      FunctionInfo &FI) {
252*5ffd83dbSDimitry Andric   std::vector<uint32_t> RowVector;
253*5ffd83dbSDimitry Andric   const uint64_t StartAddress = FI.startAddress();
254*5ffd83dbSDimitry Andric   const uint64_t EndAddress = FI.endAddress();
255*5ffd83dbSDimitry Andric   const uint64_t RangeSize = EndAddress - StartAddress;
256*5ffd83dbSDimitry Andric   const object::SectionedAddress SecAddress{
257*5ffd83dbSDimitry Andric       StartAddress, object::SectionedAddress::UndefSection};
258*5ffd83dbSDimitry Andric 
259*5ffd83dbSDimitry Andric 
260*5ffd83dbSDimitry Andric   if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
261*5ffd83dbSDimitry Andric     // If we have a DW_TAG_subprogram but no line entries, fall back to using
262*5ffd83dbSDimitry Andric     // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
263*5ffd83dbSDimitry Andric     if (auto FileIdx =
264*5ffd83dbSDimitry Andric             dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) {
265*5ffd83dbSDimitry Andric       if (auto Line =
266*5ffd83dbSDimitry Andric               dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
267*5ffd83dbSDimitry Andric         LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx),
268*5ffd83dbSDimitry Andric                      *Line);
269*5ffd83dbSDimitry Andric         FI.OptLineTable = LineTable();
270*5ffd83dbSDimitry Andric         FI.OptLineTable->push(LE);
271*5ffd83dbSDimitry Andric         // LE.Addr = EndAddress;
272*5ffd83dbSDimitry Andric         // FI.OptLineTable->push(LE);
273*5ffd83dbSDimitry Andric       }
274*5ffd83dbSDimitry Andric     }
275*5ffd83dbSDimitry Andric     return;
276*5ffd83dbSDimitry Andric   }
277*5ffd83dbSDimitry Andric 
278*5ffd83dbSDimitry Andric   FI.OptLineTable = LineTable();
279*5ffd83dbSDimitry Andric   DWARFDebugLine::Row PrevRow;
280*5ffd83dbSDimitry Andric   for (uint32_t RowIndex : RowVector) {
281*5ffd83dbSDimitry Andric     // Take file number and line/column from the row.
282*5ffd83dbSDimitry Andric     const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
283*5ffd83dbSDimitry Andric     const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
284*5ffd83dbSDimitry Andric     uint64_t RowAddress = Row.Address.Address;
285*5ffd83dbSDimitry Andric     // Watch out for a RowAddress that is in the middle of a line table entry
286*5ffd83dbSDimitry Andric     // in the DWARF. If we pass an address in between two line table entries
287*5ffd83dbSDimitry Andric     // we will get a RowIndex for the previous valid line table row which won't
288*5ffd83dbSDimitry Andric     // be contained in our function. This is usually a bug in the DWARF due to
289*5ffd83dbSDimitry Andric     // linker problems or LTO or other DWARF re-linking so it is worth emitting
290*5ffd83dbSDimitry Andric     // an error, but not worth stopping the creation of the GSYM.
291*5ffd83dbSDimitry Andric     if (!FI.Range.contains(RowAddress)) {
292*5ffd83dbSDimitry Andric       if (RowAddress < FI.Range.Start) {
293*5ffd83dbSDimitry Andric         Log << "error: DIE has a start address whose LowPC is between the "
294*5ffd83dbSDimitry Andric           "line table Row[" << RowIndex << "] with address "
295*5ffd83dbSDimitry Andric           << HEX64(RowAddress) << " and the next one.\n";
296*5ffd83dbSDimitry Andric         Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
297*5ffd83dbSDimitry Andric         RowAddress = FI.Range.Start;
298*5ffd83dbSDimitry Andric       } else {
299*5ffd83dbSDimitry Andric         continue;
300*5ffd83dbSDimitry Andric       }
301*5ffd83dbSDimitry Andric     }
302*5ffd83dbSDimitry Andric 
303*5ffd83dbSDimitry Andric     LineEntry LE(RowAddress, FileIdx, Row.Line);
304*5ffd83dbSDimitry Andric     if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
305*5ffd83dbSDimitry Andric       // We have seen full duplicate line tables for functions in some
306*5ffd83dbSDimitry Andric       // DWARF files. Watch for those here by checking the the last
307*5ffd83dbSDimitry Andric       // row was the function's end address (HighPC) and that the
308*5ffd83dbSDimitry Andric       // current line table entry's address is the same as the first
309*5ffd83dbSDimitry Andric       // line entry we already have in our "function_info.Lines". If
310*5ffd83dbSDimitry Andric       // so break out after printing a warning.
311*5ffd83dbSDimitry Andric       auto FirstLE = FI.OptLineTable->first();
312*5ffd83dbSDimitry Andric       if (FirstLE && *FirstLE == LE) {
313*5ffd83dbSDimitry Andric         Log << "warning: duplicate line table detected for DIE:\n";
314*5ffd83dbSDimitry Andric         Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
315*5ffd83dbSDimitry Andric       } else {
316*5ffd83dbSDimitry Andric         // Print out (ignore if os == nulls as this is expensive)
317*5ffd83dbSDimitry Andric         Log << "error: line table has addresses that do not "
318*5ffd83dbSDimitry Andric              << "monotonically increase:\n";
319*5ffd83dbSDimitry Andric         for (uint32_t RowIndex2 : RowVector) {
320*5ffd83dbSDimitry Andric           CUI.LineTable->Rows[RowIndex2].dump(Log);
321*5ffd83dbSDimitry Andric         }
322*5ffd83dbSDimitry Andric         Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
323*5ffd83dbSDimitry Andric       }
324*5ffd83dbSDimitry Andric       break;
325*5ffd83dbSDimitry Andric     }
326*5ffd83dbSDimitry Andric 
327*5ffd83dbSDimitry Andric     // Skip multiple line entries for the same file and line.
328*5ffd83dbSDimitry Andric     auto LastLE = FI.OptLineTable->last();
329*5ffd83dbSDimitry Andric     if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
330*5ffd83dbSDimitry Andric         continue;
331*5ffd83dbSDimitry Andric     // Only push a row if it isn't an end sequence. End sequence markers are
332*5ffd83dbSDimitry Andric     // included for the last address in a function or the last contiguous
333*5ffd83dbSDimitry Andric     // address in a sequence.
334*5ffd83dbSDimitry Andric     if (Row.EndSequence) {
335*5ffd83dbSDimitry Andric       // End sequence means that the next line entry could have a lower address
336*5ffd83dbSDimitry Andric       // that the previous entries. So we clear the previous row so we don't
337*5ffd83dbSDimitry Andric       // trigger the line table error about address that do not monotonically
338*5ffd83dbSDimitry Andric       // increase.
339*5ffd83dbSDimitry Andric       PrevRow = DWARFDebugLine::Row();
340*5ffd83dbSDimitry Andric     } else {
341*5ffd83dbSDimitry Andric       FI.OptLineTable->push(LE);
342*5ffd83dbSDimitry Andric       PrevRow = Row;
343*5ffd83dbSDimitry Andric     }
344*5ffd83dbSDimitry Andric   }
345*5ffd83dbSDimitry Andric   // If not line table rows were added, clear the line table so we don't encode
346*5ffd83dbSDimitry Andric   // on in the GSYM file.
347*5ffd83dbSDimitry Andric   if (FI.OptLineTable->empty())
348*5ffd83dbSDimitry Andric     FI.OptLineTable = llvm::None;
349*5ffd83dbSDimitry Andric }
350*5ffd83dbSDimitry Andric 
351*5ffd83dbSDimitry Andric void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
352*5ffd83dbSDimitry Andric   switch (Die.getTag()) {
353*5ffd83dbSDimitry Andric   case dwarf::DW_TAG_subprogram: {
354*5ffd83dbSDimitry Andric     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
355*5ffd83dbSDimitry Andric     if (!RangesOrError) {
356*5ffd83dbSDimitry Andric       consumeError(RangesOrError.takeError());
357*5ffd83dbSDimitry Andric       break;
358*5ffd83dbSDimitry Andric     }
359*5ffd83dbSDimitry Andric     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
360*5ffd83dbSDimitry Andric     if (Ranges.empty())
361*5ffd83dbSDimitry Andric       break;
362*5ffd83dbSDimitry Andric     auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
363*5ffd83dbSDimitry Andric     if (!NameIndex) {
364*5ffd83dbSDimitry Andric       OS << "error: function at " << HEX64(Die.getOffset())
365*5ffd83dbSDimitry Andric          << " has no name\n ";
366*5ffd83dbSDimitry Andric       Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
367*5ffd83dbSDimitry Andric       break;
368*5ffd83dbSDimitry Andric     }
369*5ffd83dbSDimitry Andric 
370*5ffd83dbSDimitry Andric     // Create a function_info for each range
371*5ffd83dbSDimitry Andric     for (const DWARFAddressRange &Range : Ranges) {
372*5ffd83dbSDimitry Andric       // The low PC must be less than the high PC. Many linkers don't remove
373*5ffd83dbSDimitry Andric       // DWARF for functions that don't get linked into the final executable.
374*5ffd83dbSDimitry Andric       // If both the high and low pc have relocations, linkers will often set
375*5ffd83dbSDimitry Andric       // the address values for both to the same value to indicate the function
376*5ffd83dbSDimitry Andric       // has been remove. Other linkers have been known to set the one or both
377*5ffd83dbSDimitry Andric       // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
378*5ffd83dbSDimitry Andric       // byte addresses to indicate the function isn't valid. The check below
379*5ffd83dbSDimitry Andric       // tries to watch for these cases and abort if it runs into them.
380*5ffd83dbSDimitry Andric       if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
381*5ffd83dbSDimitry Andric         break;
382*5ffd83dbSDimitry Andric 
383*5ffd83dbSDimitry Andric       // Many linkers can't remove DWARF and might set the LowPC to zero. Since
384*5ffd83dbSDimitry Andric       // high PC can be an offset from the low PC in more recent DWARF versions
385*5ffd83dbSDimitry Andric       // we need to watch for a zero'ed low pc which we do using
386*5ffd83dbSDimitry Andric       // ValidTextRanges below.
387*5ffd83dbSDimitry Andric       if (!Gsym.IsValidTextAddress(Range.LowPC)) {
388*5ffd83dbSDimitry Andric         // We expect zero and -1 to be invalid addresses in DWARF depending
389*5ffd83dbSDimitry Andric         // on the linker of the DWARF. This indicates a function was stripped
390*5ffd83dbSDimitry Andric         // and the debug info wasn't able to be stripped from the DWARF. If
391*5ffd83dbSDimitry Andric         // the LowPC isn't zero or -1, then we should emit an error.
392*5ffd83dbSDimitry Andric         if (Range.LowPC != 0) {
393*5ffd83dbSDimitry Andric           // Unexpected invalid address, emit an error
394*5ffd83dbSDimitry Andric           Log << "warning: DIE has an address range whose start address is "
395*5ffd83dbSDimitry Andric               "not in any executable sections (" <<
396*5ffd83dbSDimitry Andric               *Gsym.GetValidTextRanges() << ") and will not be processed:\n";
397*5ffd83dbSDimitry Andric           Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
398*5ffd83dbSDimitry Andric         }
399*5ffd83dbSDimitry Andric         break;
400*5ffd83dbSDimitry Andric       }
401*5ffd83dbSDimitry Andric 
402*5ffd83dbSDimitry Andric       FunctionInfo FI;
403*5ffd83dbSDimitry Andric       FI.setStartAddress(Range.LowPC);
404*5ffd83dbSDimitry Andric       FI.setEndAddress(Range.HighPC);
405*5ffd83dbSDimitry Andric       FI.Name = *NameIndex;
406*5ffd83dbSDimitry Andric       if (CUI.LineTable) {
407*5ffd83dbSDimitry Andric         convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
408*5ffd83dbSDimitry Andric       }
409*5ffd83dbSDimitry Andric       if (hasInlineInfo(Die, 0)) {
410*5ffd83dbSDimitry Andric         FI.Inline = InlineInfo();
411*5ffd83dbSDimitry Andric         FI.Inline->Name = *NameIndex;
412*5ffd83dbSDimitry Andric         FI.Inline->Ranges.insert(FI.Range);
413*5ffd83dbSDimitry Andric         parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline);
414*5ffd83dbSDimitry Andric       }
415*5ffd83dbSDimitry Andric       Gsym.addFunctionInfo(std::move(FI));
416*5ffd83dbSDimitry Andric     }
417*5ffd83dbSDimitry Andric   } break;
418*5ffd83dbSDimitry Andric   default:
419*5ffd83dbSDimitry Andric     break;
420*5ffd83dbSDimitry Andric   }
421*5ffd83dbSDimitry Andric   for (DWARFDie ChildDie : Die.children())
422*5ffd83dbSDimitry Andric     handleDie(OS, CUI, ChildDie);
423*5ffd83dbSDimitry Andric }
424*5ffd83dbSDimitry Andric 
425*5ffd83dbSDimitry Andric Error DwarfTransformer::convert(uint32_t NumThreads) {
426*5ffd83dbSDimitry Andric   size_t NumBefore = Gsym.getNumFunctionInfos();
427*5ffd83dbSDimitry Andric   if (NumThreads == 1) {
428*5ffd83dbSDimitry Andric     // Parse all DWARF data from this thread, use the same string/file table
429*5ffd83dbSDimitry Andric     // for everything
430*5ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units()) {
431*5ffd83dbSDimitry Andric       DWARFDie Die = CU->getUnitDIE(false);
432*5ffd83dbSDimitry Andric       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
433*5ffd83dbSDimitry Andric       handleDie(Log, CUI, Die);
434*5ffd83dbSDimitry Andric     }
435*5ffd83dbSDimitry Andric   } else {
436*5ffd83dbSDimitry Andric     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
437*5ffd83dbSDimitry Andric     // front before we start accessing any DIEs since there might be
438*5ffd83dbSDimitry Andric     // cross compile unit references in the DWARF. If we don't do this we can
439*5ffd83dbSDimitry Andric     // end up crashing.
440*5ffd83dbSDimitry Andric 
441*5ffd83dbSDimitry Andric     // We need to call getAbbreviations sequentially first so that getUnitDIE()
442*5ffd83dbSDimitry Andric     // only works with its local data.
443*5ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units())
444*5ffd83dbSDimitry Andric       CU->getAbbreviations();
445*5ffd83dbSDimitry Andric 
446*5ffd83dbSDimitry Andric     // Now parse all DIEs in case we have cross compile unit references in a
447*5ffd83dbSDimitry Andric     // thread pool.
448*5ffd83dbSDimitry Andric     ThreadPool pool(hardware_concurrency(NumThreads));
449*5ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units())
450*5ffd83dbSDimitry Andric       pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
451*5ffd83dbSDimitry Andric     pool.wait();
452*5ffd83dbSDimitry Andric 
453*5ffd83dbSDimitry Andric     // Now convert all DWARF to GSYM in a thread pool.
454*5ffd83dbSDimitry Andric     std::mutex LogMutex;
455*5ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units()) {
456*5ffd83dbSDimitry Andric       DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/);
457*5ffd83dbSDimitry Andric       if (Die) {
458*5ffd83dbSDimitry Andric         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
459*5ffd83dbSDimitry Andric         pool.async([this, CUI, &LogMutex, Die]() mutable {
460*5ffd83dbSDimitry Andric           std::string ThreadLogStorage;
461*5ffd83dbSDimitry Andric           raw_string_ostream ThreadOS(ThreadLogStorage);
462*5ffd83dbSDimitry Andric           handleDie(ThreadOS, CUI, Die);
463*5ffd83dbSDimitry Andric           ThreadOS.flush();
464*5ffd83dbSDimitry Andric           if (!ThreadLogStorage.empty()) {
465*5ffd83dbSDimitry Andric             // Print ThreadLogStorage lines into an actual stream under a lock
466*5ffd83dbSDimitry Andric             std::lock_guard<std::mutex> guard(LogMutex);
467*5ffd83dbSDimitry Andric             Log << ThreadLogStorage;
468*5ffd83dbSDimitry Andric           }
469*5ffd83dbSDimitry Andric         });
470*5ffd83dbSDimitry Andric       }
471*5ffd83dbSDimitry Andric     }
472*5ffd83dbSDimitry Andric     pool.wait();
473*5ffd83dbSDimitry Andric   }
474*5ffd83dbSDimitry Andric   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
475*5ffd83dbSDimitry Andric   Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
476*5ffd83dbSDimitry Andric   return Error::success();
477*5ffd83dbSDimitry Andric }
478*5ffd83dbSDimitry Andric 
479*5ffd83dbSDimitry Andric llvm::Error DwarfTransformer::verify(StringRef GsymPath) {
480*5ffd83dbSDimitry Andric   Log << "Verifying GSYM file \"" << GsymPath << "\":\n";
481*5ffd83dbSDimitry Andric 
482*5ffd83dbSDimitry Andric   auto Gsym = GsymReader::openFile(GsymPath);
483*5ffd83dbSDimitry Andric   if (!Gsym)
484*5ffd83dbSDimitry Andric     return Gsym.takeError();
485*5ffd83dbSDimitry Andric 
486*5ffd83dbSDimitry Andric   auto NumAddrs = Gsym->getNumAddresses();
487*5ffd83dbSDimitry Andric   DILineInfoSpecifier DLIS(
488*5ffd83dbSDimitry Andric       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
489*5ffd83dbSDimitry Andric       DILineInfoSpecifier::FunctionNameKind::LinkageName);
490*5ffd83dbSDimitry Andric   std::string gsymFilename;
491*5ffd83dbSDimitry Andric   for (uint32_t I = 0; I < NumAddrs; ++I) {
492*5ffd83dbSDimitry Andric     auto FuncAddr = Gsym->getAddress(I);
493*5ffd83dbSDimitry Andric     if (!FuncAddr)
494*5ffd83dbSDimitry Andric         return createStringError(std::errc::invalid_argument,
495*5ffd83dbSDimitry Andric                                   "failed to extract address[%i]", I);
496*5ffd83dbSDimitry Andric 
497*5ffd83dbSDimitry Andric     auto FI = Gsym->getFunctionInfo(*FuncAddr);
498*5ffd83dbSDimitry Andric     if (!FI)
499*5ffd83dbSDimitry Andric       return createStringError(std::errc::invalid_argument,
500*5ffd83dbSDimitry Andric                             "failed to extract function info for address 0x%"
501*5ffd83dbSDimitry Andric                             PRIu64, *FuncAddr);
502*5ffd83dbSDimitry Andric 
503*5ffd83dbSDimitry Andric     for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
504*5ffd83dbSDimitry Andric       const object::SectionedAddress SectAddr{
505*5ffd83dbSDimitry Andric           Addr, object::SectionedAddress::UndefSection};
506*5ffd83dbSDimitry Andric       auto LR = Gsym->lookup(Addr);
507*5ffd83dbSDimitry Andric       if (!LR)
508*5ffd83dbSDimitry Andric         return LR.takeError();
509*5ffd83dbSDimitry Andric 
510*5ffd83dbSDimitry Andric       auto DwarfInlineInfos =
511*5ffd83dbSDimitry Andric           DICtx.getInliningInfoForAddress(SectAddr, DLIS);
512*5ffd83dbSDimitry Andric       uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
513*5ffd83dbSDimitry Andric       if (NumDwarfInlineInfos == 0) {
514*5ffd83dbSDimitry Andric         DwarfInlineInfos.addFrame(
515*5ffd83dbSDimitry Andric             DICtx.getLineInfoForAddress(SectAddr, DLIS));
516*5ffd83dbSDimitry Andric       }
517*5ffd83dbSDimitry Andric 
518*5ffd83dbSDimitry Andric       // Check for 1 entry that has no file and line info
519*5ffd83dbSDimitry Andric       if (NumDwarfInlineInfos == 1 &&
520*5ffd83dbSDimitry Andric           DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
521*5ffd83dbSDimitry Andric         DwarfInlineInfos = DIInliningInfo();
522*5ffd83dbSDimitry Andric         NumDwarfInlineInfos = 0;
523*5ffd83dbSDimitry Andric       }
524*5ffd83dbSDimitry Andric       if (NumDwarfInlineInfos > 0 &&
525*5ffd83dbSDimitry Andric           NumDwarfInlineInfos != LR->Locations.size()) {
526*5ffd83dbSDimitry Andric         Log << "error: address " << HEX64(Addr) << " has "
527*5ffd83dbSDimitry Andric             << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
528*5ffd83dbSDimitry Andric             << LR->Locations.size() << "\n";
529*5ffd83dbSDimitry Andric         Log << "    " << NumDwarfInlineInfos << " DWARF frames:\n";
530*5ffd83dbSDimitry Andric         for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
531*5ffd83dbSDimitry Andric           const auto dii = DwarfInlineInfos.getFrame(Idx);
532*5ffd83dbSDimitry Andric           Log << "    [" << Idx << "]: " << dii.FunctionName << " @ "
533*5ffd83dbSDimitry Andric               << dii.FileName << ':' << dii.Line << '\n';
534*5ffd83dbSDimitry Andric         }
535*5ffd83dbSDimitry Andric         Log << "    " << LR->Locations.size() << " GSYM frames:\n";
536*5ffd83dbSDimitry Andric         for (size_t Idx = 0, count = LR->Locations.size();
537*5ffd83dbSDimitry Andric               Idx < count; ++Idx) {
538*5ffd83dbSDimitry Andric           const auto &gii = LR->Locations[Idx];
539*5ffd83dbSDimitry Andric           Log << "    [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
540*5ffd83dbSDimitry Andric               << '/' << gii.Base << ':' << gii.Line << '\n';
541*5ffd83dbSDimitry Andric         }
542*5ffd83dbSDimitry Andric         DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
543*5ffd83dbSDimitry Andric         Gsym->dump(Log, *FI);
544*5ffd83dbSDimitry Andric         continue;
545*5ffd83dbSDimitry Andric       }
546*5ffd83dbSDimitry Andric 
547*5ffd83dbSDimitry Andric       for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
548*5ffd83dbSDimitry Andric             ++Idx) {
549*5ffd83dbSDimitry Andric         const auto &gii = LR->Locations[Idx];
550*5ffd83dbSDimitry Andric         if (Idx < NumDwarfInlineInfos) {
551*5ffd83dbSDimitry Andric           const auto dii = DwarfInlineInfos.getFrame(Idx);
552*5ffd83dbSDimitry Andric           gsymFilename = LR->getSourceFile(Idx);
553*5ffd83dbSDimitry Andric           // Verify function name
554*5ffd83dbSDimitry Andric           if (dii.FunctionName.find(gii.Name.str()) != 0)
555*5ffd83dbSDimitry Andric             Log << "error: address " << HEX64(Addr) << " DWARF function \""
556*5ffd83dbSDimitry Andric                 << dii.FunctionName.c_str()
557*5ffd83dbSDimitry Andric                 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
558*5ffd83dbSDimitry Andric           // Verify source file path
559*5ffd83dbSDimitry Andric           if (dii.FileName != gsymFilename)
560*5ffd83dbSDimitry Andric             Log << "error: address " << HEX64(Addr) << " DWARF path \""
561*5ffd83dbSDimitry Andric                 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
562*5ffd83dbSDimitry Andric                 << gsymFilename.c_str() << "\"\n";
563*5ffd83dbSDimitry Andric           // Verify source file line
564*5ffd83dbSDimitry Andric           if (dii.Line != gii.Line)
565*5ffd83dbSDimitry Andric             Log << "error: address " << HEX64(Addr) << " DWARF line "
566*5ffd83dbSDimitry Andric                 << dii.Line << " != GSYM line " << gii.Line << "\n";
567*5ffd83dbSDimitry Andric         }
568*5ffd83dbSDimitry Andric       }
569*5ffd83dbSDimitry Andric     }
570*5ffd83dbSDimitry Andric   }
571*5ffd83dbSDimitry Andric   return Error::success();
572*5ffd83dbSDimitry Andric }
573