xref: /llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp (revision 558de0e1f993f413a9c8b93d969b28b651c6e437)
1 //===- DwarfTransformer.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/DebugInfo/DIContext.h"
10 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
11 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
12 #include "llvm/Support/Error.h"
13 #include "llvm/Support/ThreadPool.h"
14 #include "llvm/Support/raw_ostream.h"
15 
16 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
17 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
18 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
19 #include "llvm/DebugInfo/GSYM/GsymReader.h"
20 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
21 #include "llvm/DebugInfo/GSYM/OutputAggregator.h"
22 
23 #include <optional>
24 
25 using namespace llvm;
26 using namespace gsym;
27 
28 struct llvm::gsym::CUInfo {
29   const DWARFDebugLine::LineTable *LineTable;
30   const char *CompDir;
31   std::vector<uint32_t> FileCache;
32   uint64_t Language = 0;
33   uint8_t AddrSize = 0;
34 
35   CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
36     LineTable = DICtx.getLineTableForUnit(CU);
37     CompDir = CU->getCompilationDir();
38     FileCache.clear();
39     if (LineTable)
40       FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
41     DWARFDie Die = CU->getUnitDIE();
42     Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
43     AddrSize = CU->getAddressByteSize();
44   }
45 
46   /// Return true if Addr is the highest address for a given compile unit. The
47   /// highest address is encoded as -1, of all ones in the address. These high
48   /// addresses are used by some linkers to indicate that a function has been
49   /// dead stripped or didn't end up in the linked executable.
50   bool isHighestAddress(uint64_t Addr) const {
51     if (AddrSize == 4)
52       return Addr == UINT32_MAX;
53     else if (AddrSize == 8)
54       return Addr == UINT64_MAX;
55     return false;
56   }
57 
58   /// Convert a DWARF compile unit file index into a GSYM global file index.
59   ///
60   /// Each compile unit in DWARF has its own file table in the line table
61   /// prologue. GSYM has a single large file table that applies to all files
62   /// from all of the info in a GSYM file. This function converts between the
63   /// two and caches and DWARF CU file index that has already been converted so
64   /// the first client that asks for a compile unit file index will end up
65   /// doing the conversion, and subsequent clients will get the cached GSYM
66   /// index.
67   std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
68                                                uint32_t DwarfFileIdx) {
69     if (!LineTable || DwarfFileIdx >= FileCache.size())
70       return std::nullopt;
71     uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
72     if (GsymFileIdx != UINT32_MAX)
73       return GsymFileIdx;
74     std::string File;
75     if (LineTable->getFileNameByIndex(
76             DwarfFileIdx, CompDir,
77             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
78       GsymFileIdx = Gsym.insertFile(File);
79     else
80       GsymFileIdx = 0;
81     return GsymFileIdx;
82   }
83 };
84 
85 
86 static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
87   if (DWARFDie SpecDie =
88           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
89     if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
90       return SpecParent;
91   }
92   if (DWARFDie AbstDie =
93           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
94     if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
95       return AbstParent;
96   }
97 
98   // We never want to follow parent for inlined subroutine - that would
99   // give us information about where the function is inlined, not what
100   // function is inlined
101   if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
102     return DWARFDie();
103 
104   DWARFDie ParentDie = Die.getParent();
105   if (!ParentDie)
106     return DWARFDie();
107 
108   switch (ParentDie.getTag()) {
109   case dwarf::DW_TAG_namespace:
110   case dwarf::DW_TAG_structure_type:
111   case dwarf::DW_TAG_union_type:
112   case dwarf::DW_TAG_class_type:
113   case dwarf::DW_TAG_subprogram:
114     return ParentDie; // Found parent decl context DIE
115   case dwarf::DW_TAG_lexical_block:
116     return GetParentDeclContextDIE(ParentDie);
117   default:
118     break;
119   }
120 
121   return DWARFDie();
122 }
123 
124 /// Get the GsymCreator string table offset for the qualified name for the
125 /// DIE passed in. This function will avoid making copies of any strings in
126 /// the GsymCreator when possible. We don't need to copy a string when the
127 /// string comes from our .debug_str section or is an inlined string in the
128 /// .debug_info. If we create a qualified name string in this function by
129 /// combining multiple strings in the DWARF string table or info, we will make
130 /// a copy of the string when we add it to the string table.
131 static std::optional<uint32_t>
132 getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
133   // If the dwarf has mangled name, use mangled name
134   if (auto LinkageName = Die.getLinkageName()) {
135     // We have seen cases were linkage name is actually empty.
136     if (strlen(LinkageName) > 0)
137       return Gsym.insertString(LinkageName, /* Copy */ false);
138   }
139 
140   StringRef ShortName(Die.getName(DINameKind::ShortName));
141   if (ShortName.empty())
142     return std::nullopt;
143 
144   // For C++ and ObjC, prepend names of all parent declaration contexts
145   if (!(Language == dwarf::DW_LANG_C_plus_plus ||
146         Language == dwarf::DW_LANG_C_plus_plus_03 ||
147         Language == dwarf::DW_LANG_C_plus_plus_11 ||
148         Language == dwarf::DW_LANG_C_plus_plus_14 ||
149         Language == dwarf::DW_LANG_ObjC_plus_plus ||
150         // This should not be needed for C, but we see C++ code marked as C
151         // in some binaries. This should hurt, so let's do it for C as well
152         Language == dwarf::DW_LANG_C))
153     return Gsym.insertString(ShortName, /* Copy */ false);
154 
155   // Some GCC optimizations create functions with names ending with .isra.<num>
156   // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
157   // If it looks like it could be the case, don't add any prefix
158   if (ShortName.starts_with("_Z") &&
159       (ShortName.contains(".isra.") || ShortName.contains(".part.")))
160     return Gsym.insertString(ShortName, /* Copy */ false);
161 
162   DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
163   if (ParentDeclCtxDie) {
164     std::string Name = ShortName.str();
165     while (ParentDeclCtxDie) {
166       StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
167       if (!ParentName.empty()) {
168         // "lambda" names are wrapped in < >. Replace with { }
169         // to be consistent with demangled names and not to confuse with
170         // templates
171         if (ParentName.front() == '<' && ParentName.back() == '>')
172           Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
173                 "::" + Name;
174         else
175           Name = ParentName.str() + "::" + Name;
176       }
177       ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
178     }
179     // Copy the name since we created a new name in a std::string.
180     return Gsym.insertString(Name, /* Copy */ true);
181   }
182   // Don't copy the name since it exists in the DWARF object file.
183   return Gsym.insertString(ShortName, /* Copy */ false);
184 }
185 
186 static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
187   bool CheckChildren = true;
188   switch (Die.getTag()) {
189   case dwarf::DW_TAG_subprogram:
190     // Don't look into functions within functions.
191     CheckChildren = Depth == 0;
192     break;
193   case dwarf::DW_TAG_inlined_subroutine:
194     return true;
195   default:
196     break;
197   }
198   if (!CheckChildren)
199     return false;
200   for (DWARFDie ChildDie : Die.children()) {
201     if (hasInlineInfo(ChildDie, Depth + 1))
202       return true;
203   }
204   return false;
205 }
206 
207 static AddressRanges
208 ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
209   AddressRanges Ranges;
210   for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
211     if (DwarfRange.LowPC < DwarfRange.HighPC)
212       Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
213   }
214   return Ranges;
215 }
216 
217 static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
218                             CUInfo &CUI, DWARFDie Die, uint32_t Depth,
219                             FunctionInfo &FI, InlineInfo &Parent,
220                             const AddressRanges &AllParentRanges,
221                             bool &WarnIfEmpty) {
222   if (!hasInlineInfo(Die, Depth))
223     return;
224 
225   dwarf::Tag Tag = Die.getTag();
226   if (Tag == dwarf::DW_TAG_inlined_subroutine) {
227     // create new InlineInfo and append to parent.children
228     InlineInfo II;
229     AddressRanges AllInlineRanges;
230     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
231     if (RangesOrError) {
232       AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
233       uint32_t EmptyCount = 0;
234       for (const AddressRange &InlineRange : AllInlineRanges) {
235         // Check for empty inline range in case inline function was outlined
236         // or has not code
237         if (InlineRange.empty()) {
238           ++EmptyCount;
239         } else {
240           if (Parent.Ranges.contains(InlineRange)) {
241             II.Ranges.insert(InlineRange);
242           } else {
243             // Only warn if the current inline range is not within any of all
244             // of the parent ranges. If we have a DW_TAG_subpgram with multiple
245             // ranges we will emit a FunctionInfo for each range of that
246             // function that only emits information within the current range,
247             // so we only want to emit an error if the DWARF has issues, not
248             // when a range currently just isn't in the range we are currently
249             // parsing for.
250             if (AllParentRanges.contains(InlineRange)) {
251               WarnIfEmpty = false;
252             } else
253               Out.Report("Function DIE has uncontained address range",
254                          [&](raw_ostream &OS) {
255                            OS << "error: inlined function DIE at "
256                               << HEX32(Die.getOffset()) << " has a range ["
257                               << HEX64(InlineRange.start()) << " - "
258                               << HEX64(InlineRange.end())
259                               << ") that isn't contained in "
260                               << "any parent address ranges, this inline range "
261                                  "will be "
262                                  "removed.\n";
263                          });
264           }
265         }
266       }
267       // If we have all empty ranges for the inlines, then don't warn if we
268       // have an empty InlineInfo at the top level as all inline functions
269       // were elided.
270       if (EmptyCount == AllInlineRanges.size())
271         WarnIfEmpty = false;
272     }
273     if (II.Ranges.empty())
274       return;
275 
276     if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
277       II.Name = *NameIndex;
278     const uint64_t DwarfFileIdx = dwarf::toUnsigned(
279         Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
280     std::optional<uint32_t> OptGSymFileIdx =
281         CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
282     if (OptGSymFileIdx) {
283       II.CallFile = OptGSymFileIdx.value();
284       II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
285       // parse all children and append to parent
286       for (DWARFDie ChildDie : Die.children())
287         parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II,
288                         AllInlineRanges, WarnIfEmpty);
289       Parent.Children.emplace_back(std::move(II));
290     } else
291       Out.Report(
292           "Inlined function die has invlaid file index in DW_AT_call_file",
293           [&](raw_ostream &OS) {
294             OS << "error: inlined function DIE at " << HEX32(Die.getOffset())
295                << " has an invalid file index " << DwarfFileIdx
296                << " in its DW_AT_call_file attribute, this inline entry and "
297                   "all "
298                << "children will be removed.\n";
299           });
300     return;
301   }
302   if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
303     // skip this Die and just recurse down
304     for (DWARFDie ChildDie : Die.children())
305       parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent,
306                       AllParentRanges, WarnIfEmpty);
307   }
308 }
309 
310 static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
311                                      DWARFDie Die, GsymCreator &Gsym,
312                                      FunctionInfo &FI) {
313   std::vector<uint32_t> RowVector;
314   const uint64_t StartAddress = FI.startAddress();
315   const uint64_t EndAddress = FI.endAddress();
316   const uint64_t RangeSize = EndAddress - StartAddress;
317   const object::SectionedAddress SecAddress{
318       StartAddress, object::SectionedAddress::UndefSection};
319 
320 
321   if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
322     // If we have a DW_TAG_subprogram but no line entries, fall back to using
323     // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
324     std::string FilePath = Die.getDeclFile(
325         DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
326     if (FilePath.empty()) {
327       // If we had a DW_AT_decl_file, but got no file then we need to emit a
328       // warning.
329       Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) {
330         const uint64_t DwarfFileIdx = dwarf::toUnsigned(
331             Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
332         OS << "error: function DIE at " << HEX32(Die.getOffset())
333            << " has an invalid file index " << DwarfFileIdx
334            << " in its DW_AT_decl_file attribute, unable to create a single "
335            << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
336            << "attributes.\n";
337       });
338       return;
339     }
340     if (auto Line =
341             dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
342       LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
343       FI.OptLineTable = LineTable();
344       FI.OptLineTable->push(LE);
345     }
346     return;
347   }
348 
349   FI.OptLineTable = LineTable();
350   DWARFDebugLine::Row PrevRow;
351   for (uint32_t RowIndex : RowVector) {
352     // Take file number and line/column from the row.
353     const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
354     std::optional<uint32_t> OptFileIdx =
355         CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
356     if (!OptFileIdx) {
357       Out.Report(
358           "Invalid file index in DWARF line table", [&](raw_ostream &OS) {
359             OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
360                << "a line entry with invalid DWARF file index, this entry will "
361                << "be removed:\n";
362             Row.dumpTableHeader(OS, /*Indent=*/0);
363             Row.dump(OS);
364             OS << "\n";
365           });
366       continue;
367     }
368     const uint32_t FileIdx = OptFileIdx.value();
369     uint64_t RowAddress = Row.Address.Address;
370     // Watch out for a RowAddress that is in the middle of a line table entry
371     // in the DWARF. If we pass an address in between two line table entries
372     // we will get a RowIndex for the previous valid line table row which won't
373     // be contained in our function. This is usually a bug in the DWARF due to
374     // linker problems or LTO or other DWARF re-linking so it is worth emitting
375     // an error, but not worth stopping the creation of the GSYM.
376     if (!FI.Range.contains(RowAddress)) {
377       if (RowAddress < FI.Range.start()) {
378         Out.Report("Start address lies between valid Row table entries",
379                    [&](raw_ostream &OS) {
380                      OS << "error: DIE has a start address whose LowPC is "
381                            "between the "
382                            "line table Row["
383                         << RowIndex << "] with address " << HEX64(RowAddress)
384                         << " and the next one.\n";
385                      Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
386                    });
387         RowAddress = FI.Range.start();
388       } else {
389         continue;
390       }
391     }
392 
393     LineEntry LE(RowAddress, FileIdx, Row.Line);
394     if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
395       // We have seen full duplicate line tables for functions in some
396       // DWARF files. Watch for those here by checking the last
397       // row was the function's end address (HighPC) and that the
398       // current line table entry's address is the same as the first
399       // line entry we already have in our "function_info.Lines". If
400       // so break out after printing a warning.
401       auto FirstLE = FI.OptLineTable->first();
402       if (FirstLE && *FirstLE == LE)
403         // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird
404         Out.Report("Duplicate line table detected", [&](raw_ostream &OS) {
405           OS << "warning: duplicate line table detected for DIE:\n";
406           Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
407         });
408       else
409         Out.Report("Non-monotonically increasing addresses",
410                    [&](raw_ostream &OS) {
411                      OS << "error: line table has addresses that do not "
412                         << "monotonically increase:\n";
413                      for (uint32_t RowIndex2 : RowVector)
414                        CUI.LineTable->Rows[RowIndex2].dump(OS);
415                      Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
416                    });
417       break;
418     }
419 
420     // Skip multiple line entries for the same file and line.
421     auto LastLE = FI.OptLineTable->last();
422     if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
423         continue;
424     // Only push a row if it isn't an end sequence. End sequence markers are
425     // included for the last address in a function or the last contiguous
426     // address in a sequence.
427     if (Row.EndSequence) {
428       // End sequence means that the next line entry could have a lower address
429       // that the previous entries. So we clear the previous row so we don't
430       // trigger the line table error about address that do not monotonically
431       // increase.
432       PrevRow = DWARFDebugLine::Row();
433     } else {
434       FI.OptLineTable->push(LE);
435       PrevRow = Row;
436     }
437   }
438   // If not line table rows were added, clear the line table so we don't encode
439   // on in the GSYM file.
440   if (FI.OptLineTable->empty())
441     FI.OptLineTable = std::nullopt;
442 }
443 
444 void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
445                                  DWARFDie Die) {
446   switch (Die.getTag()) {
447   case dwarf::DW_TAG_subprogram: {
448     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
449     if (!RangesOrError) {
450       consumeError(RangesOrError.takeError());
451       break;
452     }
453     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
454     if (Ranges.empty())
455       break;
456     auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
457     if (!NameIndex) {
458       Out.Report("Function has no name", [&](raw_ostream &OS) {
459         OS << "error: function at " << HEX64(Die.getOffset())
460            << " has no name\n ";
461         Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
462       });
463       break;
464     }
465     // All ranges for the subprogram DIE in case it has multiple. We need to
466     // pass this down into parseInlineInfo so we don't warn about inline
467     // ranges that are not in the current subrange of a function when they
468     // actually are in another subgrange. We do this because when a function
469     // has discontiguos ranges, we create multiple function entries with only
470     // the info for that range contained inside of it.
471     AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
472 
473     // Create a function_info for each range
474     for (const DWARFAddressRange &Range : Ranges) {
475       // The low PC must be less than the high PC. Many linkers don't remove
476       // DWARF for functions that don't get linked into the final executable.
477       // If both the high and low pc have relocations, linkers will often set
478       // the address values for both to the same value to indicate the function
479       // has been remove. Other linkers have been known to set the one or both
480       // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
481       // byte addresses to indicate the function isn't valid. The check below
482       // tries to watch for these cases and abort if it runs into them.
483       if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
484         break;
485 
486       // Many linkers can't remove DWARF and might set the LowPC to zero. Since
487       // high PC can be an offset from the low PC in more recent DWARF versions
488       // we need to watch for a zero'ed low pc which we do using ValidTextRanges
489       // below.
490       if (!Gsym.IsValidTextAddress(Range.LowPC)) {
491         // We expect zero and -1 to be invalid addresses in DWARF depending
492         // on the linker of the DWARF. This indicates a function was stripped
493         // and the debug info wasn't able to be stripped from the DWARF. If
494         // the LowPC isn't zero or -1, then we should emit an error.
495         if (Range.LowPC != 0) {
496           if (!Gsym.isQuiet()) {
497             // Unexpected invalid address, emit a warning
498             Out.Report("Address range starts outside executable section",
499                        [&](raw_ostream &OS) {
500                          OS << "warning: DIE has an address range whose "
501                                "start address "
502                                "is not in any executable sections ("
503                             << *Gsym.GetValidTextRanges()
504                             << ") and will not be processed:\n";
505                          Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
506                        });
507           }
508         }
509         break;
510       }
511 
512       FunctionInfo FI;
513       FI.Range = {Range.LowPC, Range.HighPC};
514       FI.Name = *NameIndex;
515       if (CUI.LineTable)
516         convertFunctionLineTable(Out, CUI, Die, Gsym, FI);
517 
518       if (hasInlineInfo(Die, 0)) {
519         FI.Inline = InlineInfo();
520         FI.Inline->Name = *NameIndex;
521         FI.Inline->Ranges.insert(FI.Range);
522         bool WarnIfEmpty = true;
523         parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline,
524                         AllSubprogramRanges, WarnIfEmpty);
525         // Make sure we at least got some valid inline info other than just
526         // the top level function. If we didn't then remove the inline info
527         // from the function info. We have seen cases where LTO tries to modify
528         // the DWARF for functions and it messes up the address ranges for
529         // the inline functions so it is no longer valid.
530         //
531         // By checking if there are any valid children on the top level inline
532         // information object, we will know if we got anything valid from the
533         // debug info.
534         if (FI.Inline->Children.empty()) {
535           if (WarnIfEmpty && !Gsym.isQuiet())
536             Out.Report("DIE contains inline functions with no valid ranges",
537                        [&](raw_ostream &OS) {
538                          OS << "warning: DIE contains inline function "
539                                "information that has no valid ranges, removing "
540                                "inline information:\n";
541                          Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
542                        });
543           FI.Inline = std::nullopt;
544         }
545       }
546 
547       // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
548       if (LoadDwarfCallSites)
549         parseCallSiteInfoFromDwarf(CUI, Die, FI);
550 
551       Gsym.addFunctionInfo(std::move(FI));
552     }
553   } break;
554   default:
555     break;
556   }
557   for (DWARFDie ChildDie : Die.children())
558     handleDie(Out, CUI, ChildDie);
559 }
560 
561 void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
562                                                   FunctionInfo &FI) {
563   // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
564   // DWARF specification:
565   // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
566   // - DW_AT_call_origin might point to a DIE of the function being called.
567   // For simplicity, we will just extract return_offset and possibly target name
568   // if available.
569 
570   CallSiteInfoCollection CSIC;
571 
572   for (DWARFDie Child : Die.children()) {
573     if (Child.getTag() != dwarf::DW_TAG_call_site)
574       continue;
575 
576     CallSiteInfo CSI;
577     // DW_AT_call_return_pc: the return PC (address). We'll convert it to
578     // offset relative to FI's start.
579     auto ReturnPC =
580         dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
581     if (!ReturnPC || !FI.Range.contains(*ReturnPC))
582       continue;
583 
584     CSI.ReturnOffset = *ReturnPC - FI.startAddress();
585 
586     // Attempt to get function name from DW_AT_call_origin. If present, we can
587     // insert it as a match regex.
588     if (DWARFDie OriginDie =
589             Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
590 
591       // Include the full unmangled name if available, otherwise the short name.
592       if (const char *LinkName = OriginDie.getLinkageName()) {
593         uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
594         CSI.MatchRegex.push_back(LinkNameOff);
595       } else if (const char *ShortName = OriginDie.getShortName()) {
596         uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
597         CSI.MatchRegex.push_back(ShortNameOff);
598       }
599     }
600 
601     // For now, we won't attempt to deduce InternalCall/ExternalCall flags
602     // from DWARF.
603     CSI.Flags = CallSiteInfo::Flags::None;
604 
605     CSIC.CallSites.push_back(CSI);
606   }
607 
608   if (!CSIC.CallSites.empty()) {
609     if (!FI.CallSites)
610       FI.CallSites = CallSiteInfoCollection();
611     // Append parsed DWARF callsites:
612     FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(),
613                                    CSIC.CallSites.begin(),
614                                    CSIC.CallSites.end());
615   }
616 }
617 
618 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
619   size_t NumBefore = Gsym.getNumFunctionInfos();
620   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
621     DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
622     if (DwarfUnit.getDWOId()) {
623       DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
624       if (!DWOCU->isDWOUnit())
625         Out.Report(
626             "warning: Unable to retrieve DWO .debug_info section for some "
627             "object files. (Remove the --quiet flag for full output)",
628             [&](raw_ostream &OS) {
629               std::string DWOName = dwarf::toString(
630                   DwarfUnit.getUnitDIE().find(
631                       {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
632                   "");
633               OS << "warning: Unable to retrieve DWO .debug_info section for "
634                  << DWOName << "\n";
635             });
636       else {
637         ReturnDie = DWOCU->getUnitDIE(false);
638       }
639     }
640     return ReturnDie;
641   };
642   if (NumThreads == 1) {
643     // Parse all DWARF data from this thread, use the same string/file table
644     // for everything
645     for (const auto &CU : DICtx.compile_units()) {
646       DWARFDie Die = getDie(*CU);
647       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
648       handleDie(Out, CUI, Die);
649     }
650   } else {
651     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
652     // front before we start accessing any DIEs since there might be
653     // cross compile unit references in the DWARF. If we don't do this we can
654     // end up crashing.
655 
656     // We need to call getAbbreviations sequentially first so that getUnitDIE()
657     // only works with its local data.
658     for (const auto &CU : DICtx.compile_units())
659       CU->getAbbreviations();
660 
661     // Now parse all DIEs in case we have cross compile unit references in a
662     // thread pool.
663     DefaultThreadPool pool(hardware_concurrency(NumThreads));
664     for (const auto &CU : DICtx.compile_units())
665       pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
666     pool.wait();
667 
668     // Now convert all DWARF to GSYM in a thread pool.
669     std::mutex LogMutex;
670     for (const auto &CU : DICtx.compile_units()) {
671       DWARFDie Die = getDie(*CU);
672       if (Die) {
673         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
674         pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
675           std::string storage;
676           raw_string_ostream StrStream(storage);
677           OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
678           handleDie(ThreadOut, CUI, Die);
679           // Print ThreadLogStorage lines into an actual stream under a lock
680           std::lock_guard<std::mutex> guard(LogMutex);
681           if (Out.GetOS()) {
682             Out << storage;
683           }
684           Out.Merge(ThreadOut);
685         });
686       }
687     }
688     pool.wait();
689   }
690   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
691   Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
692   return Error::success();
693 }
694 
695 llvm::Error DwarfTransformer::verify(StringRef GsymPath,
696                                      OutputAggregator &Out) {
697   Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
698 
699   auto Gsym = GsymReader::openFile(GsymPath);
700   if (!Gsym)
701     return Gsym.takeError();
702 
703   auto NumAddrs = Gsym->getNumAddresses();
704   DILineInfoSpecifier DLIS(
705       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
706       DILineInfoSpecifier::FunctionNameKind::LinkageName);
707   std::string gsymFilename;
708   for (uint32_t I = 0; I < NumAddrs; ++I) {
709     auto FuncAddr = Gsym->getAddress(I);
710     if (!FuncAddr)
711         return createStringError(std::errc::invalid_argument,
712                                   "failed to extract address[%i]", I);
713 
714     auto FI = Gsym->getFunctionInfo(*FuncAddr);
715     if (!FI)
716       return createStringError(
717           std::errc::invalid_argument,
718           "failed to extract function info for address 0x%" PRIu64, *FuncAddr);
719 
720     for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
721       const object::SectionedAddress SectAddr{
722           Addr, object::SectionedAddress::UndefSection};
723       auto LR = Gsym->lookup(Addr);
724       if (!LR)
725         return LR.takeError();
726 
727       auto DwarfInlineInfos =
728           DICtx.getInliningInfoForAddress(SectAddr, DLIS);
729       uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
730       if (NumDwarfInlineInfos == 0) {
731         DwarfInlineInfos.addFrame(
732             DICtx.getLineInfoForAddress(SectAddr, DLIS));
733       }
734 
735       // Check for 1 entry that has no file and line info
736       if (NumDwarfInlineInfos == 1 &&
737           DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
738         DwarfInlineInfos = DIInliningInfo();
739         NumDwarfInlineInfos = 0;
740       }
741       if (NumDwarfInlineInfos > 0 &&
742           NumDwarfInlineInfos != LR->Locations.size()) {
743         if (Out.GetOS()) {
744           raw_ostream &Log = *Out.GetOS();
745           Log << "error: address " << HEX64(Addr) << " has "
746               << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
747               << LR->Locations.size() << "\n";
748           Log << "    " << NumDwarfInlineInfos << " DWARF frames:\n";
749           for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
750             const auto &dii = DwarfInlineInfos.getFrame(Idx);
751             Log << "    [" << Idx << "]: " << dii.FunctionName << " @ "
752                 << dii.FileName << ':' << dii.Line << '\n';
753           }
754           Log << "    " << LR->Locations.size() << " GSYM frames:\n";
755           for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
756                ++Idx) {
757             const auto &gii = LR->Locations[Idx];
758             Log << "    [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
759                 << '/' << gii.Base << ':' << gii.Line << '\n';
760           }
761           Gsym->dump(Log, *FI);
762         }
763         continue;
764       }
765 
766       for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
767             ++Idx) {
768         const auto &gii = LR->Locations[Idx];
769         if (Idx < NumDwarfInlineInfos) {
770           const auto &dii = DwarfInlineInfos.getFrame(Idx);
771           gsymFilename = LR->getSourceFile(Idx);
772           // Verify function name
773           if (dii.FunctionName.find(gii.Name.str()) != 0)
774             Out << "error: address " << HEX64(Addr) << " DWARF function \""
775                 << dii.FunctionName.c_str()
776                 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
777 
778           // Verify source file path
779           if (dii.FileName != gsymFilename)
780             Out << "error: address " << HEX64(Addr) << " DWARF path \""
781                 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
782                 << gsymFilename.c_str() << "\"\n";
783           // Verify source file line
784           if (dii.Line != gii.Line)
785             Out << "error: address " << HEX64(Addr) << " DWARF line "
786                 << dii.Line << " != GSYM line " << gii.Line << "\n";
787         }
788       }
789     }
790   }
791   return Error::success();
792 }
793