xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
15ffd83dbSDimitry Andric //===- DwarfTransformer.cpp -----------------------------------------------===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #include <thread>
105ffd83dbSDimitry Andric #include <unordered_set>
115ffd83dbSDimitry Andric 
125ffd83dbSDimitry Andric #include "llvm/DebugInfo/DIContext.h"
13*81ad6265SDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
145ffd83dbSDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFContext.h"
155ffd83dbSDimitry Andric #include "llvm/Support/Error.h"
165ffd83dbSDimitry Andric #include "llvm/Support/ThreadPool.h"
175ffd83dbSDimitry Andric #include "llvm/Support/raw_ostream.h"
185ffd83dbSDimitry Andric 
195ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
205ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
215ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
225ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/GsymReader.h"
235ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/InlineInfo.h"
245ffd83dbSDimitry Andric 
255ffd83dbSDimitry Andric using namespace llvm;
265ffd83dbSDimitry Andric using namespace gsym;
275ffd83dbSDimitry Andric 
285ffd83dbSDimitry Andric struct llvm::gsym::CUInfo {
295ffd83dbSDimitry Andric   const DWARFDebugLine::LineTable *LineTable;
305ffd83dbSDimitry Andric   const char *CompDir;
315ffd83dbSDimitry Andric   std::vector<uint32_t> FileCache;
325ffd83dbSDimitry Andric   uint64_t Language = 0;
335ffd83dbSDimitry Andric   uint8_t AddrSize = 0;
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric   CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
365ffd83dbSDimitry Andric     LineTable = DICtx.getLineTableForUnit(CU);
375ffd83dbSDimitry Andric     CompDir = CU->getCompilationDir();
385ffd83dbSDimitry Andric     FileCache.clear();
395ffd83dbSDimitry Andric     if (LineTable)
405ffd83dbSDimitry Andric       FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
415ffd83dbSDimitry Andric     DWARFDie Die = CU->getUnitDIE();
425ffd83dbSDimitry Andric     Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
435ffd83dbSDimitry Andric     AddrSize = CU->getAddressByteSize();
445ffd83dbSDimitry Andric   }
455ffd83dbSDimitry Andric 
465ffd83dbSDimitry Andric   /// Return true if Addr is the highest address for a given compile unit. The
475ffd83dbSDimitry Andric   /// highest address is encoded as -1, of all ones in the address. These high
485ffd83dbSDimitry Andric   /// addresses are used by some linkers to indicate that a function has been
495ffd83dbSDimitry Andric   /// dead stripped or didn't end up in the linked executable.
505ffd83dbSDimitry Andric   bool isHighestAddress(uint64_t Addr) const {
515ffd83dbSDimitry Andric     if (AddrSize == 4)
525ffd83dbSDimitry Andric       return Addr == UINT32_MAX;
535ffd83dbSDimitry Andric     else if (AddrSize == 8)
545ffd83dbSDimitry Andric       return Addr == UINT64_MAX;
555ffd83dbSDimitry Andric     return false;
565ffd83dbSDimitry Andric   }
575ffd83dbSDimitry Andric 
585ffd83dbSDimitry Andric   /// Convert a DWARF compile unit file index into a GSYM global file index.
595ffd83dbSDimitry Andric   ///
605ffd83dbSDimitry Andric   /// Each compile unit in DWARF has its own file table in the line table
615ffd83dbSDimitry Andric   /// prologue. GSYM has a single large file table that applies to all files
625ffd83dbSDimitry Andric   /// from all of the info in a GSYM file. This function converts between the
635ffd83dbSDimitry Andric   /// two and caches and DWARF CU file index that has already been converted so
645ffd83dbSDimitry Andric   /// the first client that asks for a compile unit file index will end up
655ffd83dbSDimitry Andric   /// doing the conversion, and subsequent clients will get the cached GSYM
665ffd83dbSDimitry Andric   /// index.
675ffd83dbSDimitry Andric   uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
685ffd83dbSDimitry Andric     if (!LineTable)
695ffd83dbSDimitry Andric       return 0;
705ffd83dbSDimitry Andric     assert(DwarfFileIdx < FileCache.size());
715ffd83dbSDimitry Andric     uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
725ffd83dbSDimitry Andric     if (GsymFileIdx != UINT32_MAX)
735ffd83dbSDimitry Andric       return GsymFileIdx;
745ffd83dbSDimitry Andric     std::string File;
755ffd83dbSDimitry Andric     if (LineTable->getFileNameByIndex(
765ffd83dbSDimitry Andric             DwarfFileIdx, CompDir,
775ffd83dbSDimitry Andric             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
785ffd83dbSDimitry Andric       GsymFileIdx = Gsym.insertFile(File);
795ffd83dbSDimitry Andric     else
805ffd83dbSDimitry Andric       GsymFileIdx = 0;
815ffd83dbSDimitry Andric     return GsymFileIdx;
825ffd83dbSDimitry Andric   }
835ffd83dbSDimitry Andric };
845ffd83dbSDimitry Andric 
855ffd83dbSDimitry Andric 
865ffd83dbSDimitry Andric static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
875ffd83dbSDimitry Andric   if (DWARFDie SpecDie =
885ffd83dbSDimitry Andric           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
895ffd83dbSDimitry Andric     if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
905ffd83dbSDimitry Andric       return SpecParent;
915ffd83dbSDimitry Andric   }
925ffd83dbSDimitry Andric   if (DWARFDie AbstDie =
935ffd83dbSDimitry Andric           Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
945ffd83dbSDimitry Andric     if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
955ffd83dbSDimitry Andric       return AbstParent;
965ffd83dbSDimitry Andric   }
975ffd83dbSDimitry Andric 
985ffd83dbSDimitry Andric   // We never want to follow parent for inlined subroutine - that would
995ffd83dbSDimitry Andric   // give us information about where the function is inlined, not what
1005ffd83dbSDimitry Andric   // function is inlined
1015ffd83dbSDimitry Andric   if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
1025ffd83dbSDimitry Andric     return DWARFDie();
1035ffd83dbSDimitry Andric 
1045ffd83dbSDimitry Andric   DWARFDie ParentDie = Die.getParent();
1055ffd83dbSDimitry Andric   if (!ParentDie)
1065ffd83dbSDimitry Andric     return DWARFDie();
1075ffd83dbSDimitry Andric 
1085ffd83dbSDimitry Andric   switch (ParentDie.getTag()) {
1095ffd83dbSDimitry Andric   case dwarf::DW_TAG_namespace:
1105ffd83dbSDimitry Andric   case dwarf::DW_TAG_structure_type:
1115ffd83dbSDimitry Andric   case dwarf::DW_TAG_union_type:
1125ffd83dbSDimitry Andric   case dwarf::DW_TAG_class_type:
1135ffd83dbSDimitry Andric   case dwarf::DW_TAG_subprogram:
1145ffd83dbSDimitry Andric     return ParentDie; // Found parent decl context DIE
1155ffd83dbSDimitry Andric   case dwarf::DW_TAG_lexical_block:
1165ffd83dbSDimitry Andric     return GetParentDeclContextDIE(ParentDie);
1175ffd83dbSDimitry Andric   default:
1185ffd83dbSDimitry Andric     break;
1195ffd83dbSDimitry Andric   }
1205ffd83dbSDimitry Andric 
1215ffd83dbSDimitry Andric   return DWARFDie();
1225ffd83dbSDimitry Andric }
1235ffd83dbSDimitry Andric 
1245ffd83dbSDimitry Andric /// Get the GsymCreator string table offset for the qualified name for the
1255ffd83dbSDimitry Andric /// DIE passed in. This function will avoid making copies of any strings in
1265ffd83dbSDimitry Andric /// the GsymCreator when possible. We don't need to copy a string when the
1275ffd83dbSDimitry Andric /// string comes from our .debug_str section or is an inlined string in the
1285ffd83dbSDimitry Andric /// .debug_info. If we create a qualified name string in this function by
1295ffd83dbSDimitry Andric /// combining multiple strings in the DWARF string table or info, we will make
1305ffd83dbSDimitry Andric /// a copy of the string when we add it to the string table.
1315ffd83dbSDimitry Andric static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die,
1325ffd83dbSDimitry Andric                                                 uint64_t Language,
1335ffd83dbSDimitry Andric                                                 GsymCreator &Gsym) {
1345ffd83dbSDimitry Andric   // If the dwarf has mangled name, use mangled name
1355ffd83dbSDimitry Andric   if (auto LinkageName =
1365ffd83dbSDimitry Andric           dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name,
1375ffd83dbSDimitry Andric                                                dwarf::DW_AT_linkage_name}),
1385ffd83dbSDimitry Andric                           nullptr))
1395ffd83dbSDimitry Andric     return Gsym.insertString(LinkageName, /* Copy */ false);
1405ffd83dbSDimitry Andric 
1415ffd83dbSDimitry Andric   StringRef ShortName(Die.getName(DINameKind::ShortName));
1425ffd83dbSDimitry Andric   if (ShortName.empty())
1435ffd83dbSDimitry Andric     return llvm::None;
1445ffd83dbSDimitry Andric 
1455ffd83dbSDimitry Andric   // For C++ and ObjC, prepend names of all parent declaration contexts
1465ffd83dbSDimitry Andric   if (!(Language == dwarf::DW_LANG_C_plus_plus ||
1475ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C_plus_plus_03 ||
1485ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C_plus_plus_11 ||
1495ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C_plus_plus_14 ||
1505ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_ObjC_plus_plus ||
1515ffd83dbSDimitry Andric         // This should not be needed for C, but we see C++ code marked as C
1525ffd83dbSDimitry Andric         // in some binaries. This should hurt, so let's do it for C as well
1535ffd83dbSDimitry Andric         Language == dwarf::DW_LANG_C))
1545ffd83dbSDimitry Andric     return Gsym.insertString(ShortName, /* Copy */ false);
1555ffd83dbSDimitry Andric 
1565ffd83dbSDimitry Andric   // Some GCC optimizations create functions with names ending with .isra.<num>
1575ffd83dbSDimitry Andric   // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
1585ffd83dbSDimitry Andric   // If it looks like it could be the case, don't add any prefix
1595ffd83dbSDimitry Andric   if (ShortName.startswith("_Z") &&
1605ffd83dbSDimitry Andric       (ShortName.contains(".isra.") || ShortName.contains(".part.")))
1615ffd83dbSDimitry Andric     return Gsym.insertString(ShortName, /* Copy */ false);
1625ffd83dbSDimitry Andric 
1635ffd83dbSDimitry Andric   DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
1645ffd83dbSDimitry Andric   if (ParentDeclCtxDie) {
1655ffd83dbSDimitry Andric     std::string Name = ShortName.str();
1665ffd83dbSDimitry Andric     while (ParentDeclCtxDie) {
1675ffd83dbSDimitry Andric       StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
1685ffd83dbSDimitry Andric       if (!ParentName.empty()) {
1695ffd83dbSDimitry Andric         // "lambda" names are wrapped in < >. Replace with { }
1705ffd83dbSDimitry Andric         // to be consistent with demangled names and not to confuse with
1715ffd83dbSDimitry Andric         // templates
1725ffd83dbSDimitry Andric         if (ParentName.front() == '<' && ParentName.back() == '>')
1735ffd83dbSDimitry Andric           Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
1745ffd83dbSDimitry Andric                 "::" + Name;
1755ffd83dbSDimitry Andric         else
1765ffd83dbSDimitry Andric           Name = ParentName.str() + "::" + Name;
1775ffd83dbSDimitry Andric       }
1785ffd83dbSDimitry Andric       ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
1795ffd83dbSDimitry Andric     }
1805ffd83dbSDimitry Andric     // Copy the name since we created a new name in a std::string.
1815ffd83dbSDimitry Andric     return Gsym.insertString(Name, /* Copy */ true);
1825ffd83dbSDimitry Andric   }
1835ffd83dbSDimitry Andric   // Don't copy the name since it exists in the DWARF object file.
1845ffd83dbSDimitry Andric   return Gsym.insertString(ShortName, /* Copy */ false);
1855ffd83dbSDimitry Andric }
1865ffd83dbSDimitry Andric 
1875ffd83dbSDimitry Andric static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
1885ffd83dbSDimitry Andric   bool CheckChildren = true;
1895ffd83dbSDimitry Andric   switch (Die.getTag()) {
1905ffd83dbSDimitry Andric   case dwarf::DW_TAG_subprogram:
1915ffd83dbSDimitry Andric     // Don't look into functions within functions.
1925ffd83dbSDimitry Andric     CheckChildren = Depth == 0;
1935ffd83dbSDimitry Andric     break;
1945ffd83dbSDimitry Andric   case dwarf::DW_TAG_inlined_subroutine:
1955ffd83dbSDimitry Andric     return true;
1965ffd83dbSDimitry Andric   default:
1975ffd83dbSDimitry Andric     break;
1985ffd83dbSDimitry Andric   }
1995ffd83dbSDimitry Andric   if (!CheckChildren)
2005ffd83dbSDimitry Andric     return false;
2015ffd83dbSDimitry Andric   for (DWARFDie ChildDie : Die.children()) {
2025ffd83dbSDimitry Andric     if (hasInlineInfo(ChildDie, Depth + 1))
2035ffd83dbSDimitry Andric       return true;
2045ffd83dbSDimitry Andric   }
2055ffd83dbSDimitry Andric   return false;
2065ffd83dbSDimitry Andric }
2075ffd83dbSDimitry Andric 
2085ffd83dbSDimitry Andric static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die,
2095ffd83dbSDimitry Andric                             uint32_t Depth, FunctionInfo &FI,
2105ffd83dbSDimitry Andric                             InlineInfo &parent) {
2115ffd83dbSDimitry Andric   if (!hasInlineInfo(Die, Depth))
2125ffd83dbSDimitry Andric     return;
2135ffd83dbSDimitry Andric 
2145ffd83dbSDimitry Andric   dwarf::Tag Tag = Die.getTag();
2155ffd83dbSDimitry Andric   if (Tag == dwarf::DW_TAG_inlined_subroutine) {
2165ffd83dbSDimitry Andric     // create new InlineInfo and append to parent.children
2175ffd83dbSDimitry Andric     InlineInfo II;
2185ffd83dbSDimitry Andric     DWARFAddressRange FuncRange =
2195ffd83dbSDimitry Andric         DWARFAddressRange(FI.startAddress(), FI.endAddress());
2205ffd83dbSDimitry Andric     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
2215ffd83dbSDimitry Andric     if (RangesOrError) {
2225ffd83dbSDimitry Andric       for (const DWARFAddressRange &Range : RangesOrError.get()) {
2235ffd83dbSDimitry Andric         // Check that the inlined function is within the range of the function
2245ffd83dbSDimitry Andric         // info, it might not be in case of split functions
2255ffd83dbSDimitry Andric         if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC)
2265ffd83dbSDimitry Andric           II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC));
2275ffd83dbSDimitry Andric       }
2285ffd83dbSDimitry Andric     }
2295ffd83dbSDimitry Andric     if (II.Ranges.empty())
2305ffd83dbSDimitry Andric       return;
2315ffd83dbSDimitry Andric 
2325ffd83dbSDimitry Andric     if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
2335ffd83dbSDimitry Andric       II.Name = *NameIndex;
2345ffd83dbSDimitry Andric     II.CallFile = CUI.DWARFToGSYMFileIndex(
2355ffd83dbSDimitry Andric         Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
2365ffd83dbSDimitry Andric     II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
2375ffd83dbSDimitry Andric     // parse all children and append to parent
2385ffd83dbSDimitry Andric     for (DWARFDie ChildDie : Die.children())
2395ffd83dbSDimitry Andric       parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II);
2405ffd83dbSDimitry Andric     parent.Children.emplace_back(std::move(II));
2415ffd83dbSDimitry Andric     return;
2425ffd83dbSDimitry Andric   }
2435ffd83dbSDimitry Andric   if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
2445ffd83dbSDimitry Andric     // skip this Die and just recurse down
2455ffd83dbSDimitry Andric     for (DWARFDie ChildDie : Die.children())
2465ffd83dbSDimitry Andric       parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent);
2475ffd83dbSDimitry Andric   }
2485ffd83dbSDimitry Andric }
2495ffd83dbSDimitry Andric 
2505ffd83dbSDimitry Andric static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
2515ffd83dbSDimitry Andric                                      DWARFDie Die, GsymCreator &Gsym,
2525ffd83dbSDimitry Andric                                      FunctionInfo &FI) {
2535ffd83dbSDimitry Andric   std::vector<uint32_t> RowVector;
2545ffd83dbSDimitry Andric   const uint64_t StartAddress = FI.startAddress();
2555ffd83dbSDimitry Andric   const uint64_t EndAddress = FI.endAddress();
2565ffd83dbSDimitry Andric   const uint64_t RangeSize = EndAddress - StartAddress;
2575ffd83dbSDimitry Andric   const object::SectionedAddress SecAddress{
2585ffd83dbSDimitry Andric       StartAddress, object::SectionedAddress::UndefSection};
2595ffd83dbSDimitry Andric 
2605ffd83dbSDimitry Andric 
2615ffd83dbSDimitry Andric   if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
2625ffd83dbSDimitry Andric     // If we have a DW_TAG_subprogram but no line entries, fall back to using
2635ffd83dbSDimitry Andric     // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
264349cc55cSDimitry Andric     std::string FilePath = Die.getDeclFile(
265349cc55cSDimitry Andric         DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
266349cc55cSDimitry Andric     if (FilePath.empty())
267349cc55cSDimitry Andric       return;
2685ffd83dbSDimitry Andric     if (auto Line =
2695ffd83dbSDimitry Andric             dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
270349cc55cSDimitry Andric       LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
2715ffd83dbSDimitry Andric       FI.OptLineTable = LineTable();
2725ffd83dbSDimitry Andric       FI.OptLineTable->push(LE);
2735ffd83dbSDimitry Andric     }
2745ffd83dbSDimitry Andric     return;
2755ffd83dbSDimitry Andric   }
2765ffd83dbSDimitry Andric 
2775ffd83dbSDimitry Andric   FI.OptLineTable = LineTable();
2785ffd83dbSDimitry Andric   DWARFDebugLine::Row PrevRow;
2795ffd83dbSDimitry Andric   for (uint32_t RowIndex : RowVector) {
2805ffd83dbSDimitry Andric     // Take file number and line/column from the row.
2815ffd83dbSDimitry Andric     const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
2825ffd83dbSDimitry Andric     const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
2835ffd83dbSDimitry Andric     uint64_t RowAddress = Row.Address.Address;
2845ffd83dbSDimitry Andric     // Watch out for a RowAddress that is in the middle of a line table entry
2855ffd83dbSDimitry Andric     // in the DWARF. If we pass an address in between two line table entries
2865ffd83dbSDimitry Andric     // we will get a RowIndex for the previous valid line table row which won't
2875ffd83dbSDimitry Andric     // be contained in our function. This is usually a bug in the DWARF due to
2885ffd83dbSDimitry Andric     // linker problems or LTO or other DWARF re-linking so it is worth emitting
2895ffd83dbSDimitry Andric     // an error, but not worth stopping the creation of the GSYM.
2905ffd83dbSDimitry Andric     if (!FI.Range.contains(RowAddress)) {
291*81ad6265SDimitry Andric       if (RowAddress < FI.Range.start()) {
2925ffd83dbSDimitry Andric         Log << "error: DIE has a start address whose LowPC is between the "
2935ffd83dbSDimitry Andric           "line table Row[" << RowIndex << "] with address "
2945ffd83dbSDimitry Andric           << HEX64(RowAddress) << " and the next one.\n";
2955ffd83dbSDimitry Andric         Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
296*81ad6265SDimitry Andric         RowAddress = FI.Range.start();
2975ffd83dbSDimitry Andric       } else {
2985ffd83dbSDimitry Andric         continue;
2995ffd83dbSDimitry Andric       }
3005ffd83dbSDimitry Andric     }
3015ffd83dbSDimitry Andric 
3025ffd83dbSDimitry Andric     LineEntry LE(RowAddress, FileIdx, Row.Line);
3035ffd83dbSDimitry Andric     if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
3045ffd83dbSDimitry Andric       // We have seen full duplicate line tables for functions in some
3055ffd83dbSDimitry Andric       // DWARF files. Watch for those here by checking the the last
3065ffd83dbSDimitry Andric       // row was the function's end address (HighPC) and that the
3075ffd83dbSDimitry Andric       // current line table entry's address is the same as the first
3085ffd83dbSDimitry Andric       // line entry we already have in our "function_info.Lines". If
3095ffd83dbSDimitry Andric       // so break out after printing a warning.
3105ffd83dbSDimitry Andric       auto FirstLE = FI.OptLineTable->first();
3115ffd83dbSDimitry Andric       if (FirstLE && *FirstLE == LE) {
312fe6060f1SDimitry Andric         if (!Gsym.isQuiet()) {
3135ffd83dbSDimitry Andric           Log << "warning: duplicate line table detected for DIE:\n";
3145ffd83dbSDimitry Andric           Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
315fe6060f1SDimitry Andric         }
3165ffd83dbSDimitry Andric       } else {
3175ffd83dbSDimitry Andric         // Print out (ignore if os == nulls as this is expensive)
3185ffd83dbSDimitry Andric         Log << "error: line table has addresses that do not "
3195ffd83dbSDimitry Andric              << "monotonically increase:\n";
3205ffd83dbSDimitry Andric         for (uint32_t RowIndex2 : RowVector) {
3215ffd83dbSDimitry Andric           CUI.LineTable->Rows[RowIndex2].dump(Log);
3225ffd83dbSDimitry Andric         }
3235ffd83dbSDimitry Andric         Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
3245ffd83dbSDimitry Andric       }
3255ffd83dbSDimitry Andric       break;
3265ffd83dbSDimitry Andric     }
3275ffd83dbSDimitry Andric 
3285ffd83dbSDimitry Andric     // Skip multiple line entries for the same file and line.
3295ffd83dbSDimitry Andric     auto LastLE = FI.OptLineTable->last();
3305ffd83dbSDimitry Andric     if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
3315ffd83dbSDimitry Andric         continue;
3325ffd83dbSDimitry Andric     // Only push a row if it isn't an end sequence. End sequence markers are
3335ffd83dbSDimitry Andric     // included for the last address in a function or the last contiguous
3345ffd83dbSDimitry Andric     // address in a sequence.
3355ffd83dbSDimitry Andric     if (Row.EndSequence) {
3365ffd83dbSDimitry Andric       // End sequence means that the next line entry could have a lower address
3375ffd83dbSDimitry Andric       // that the previous entries. So we clear the previous row so we don't
3385ffd83dbSDimitry Andric       // trigger the line table error about address that do not monotonically
3395ffd83dbSDimitry Andric       // increase.
3405ffd83dbSDimitry Andric       PrevRow = DWARFDebugLine::Row();
3415ffd83dbSDimitry Andric     } else {
3425ffd83dbSDimitry Andric       FI.OptLineTable->push(LE);
3435ffd83dbSDimitry Andric       PrevRow = Row;
3445ffd83dbSDimitry Andric     }
3455ffd83dbSDimitry Andric   }
3465ffd83dbSDimitry Andric   // If not line table rows were added, clear the line table so we don't encode
3475ffd83dbSDimitry Andric   // on in the GSYM file.
3485ffd83dbSDimitry Andric   if (FI.OptLineTable->empty())
3495ffd83dbSDimitry Andric     FI.OptLineTable = llvm::None;
3505ffd83dbSDimitry Andric }
3515ffd83dbSDimitry Andric 
3525ffd83dbSDimitry Andric void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
3535ffd83dbSDimitry Andric   switch (Die.getTag()) {
3545ffd83dbSDimitry Andric   case dwarf::DW_TAG_subprogram: {
3555ffd83dbSDimitry Andric     Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
3565ffd83dbSDimitry Andric     if (!RangesOrError) {
3575ffd83dbSDimitry Andric       consumeError(RangesOrError.takeError());
3585ffd83dbSDimitry Andric       break;
3595ffd83dbSDimitry Andric     }
3605ffd83dbSDimitry Andric     const DWARFAddressRangesVector &Ranges = RangesOrError.get();
3615ffd83dbSDimitry Andric     if (Ranges.empty())
3625ffd83dbSDimitry Andric       break;
3635ffd83dbSDimitry Andric     auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
3645ffd83dbSDimitry Andric     if (!NameIndex) {
3655ffd83dbSDimitry Andric       OS << "error: function at " << HEX64(Die.getOffset())
3665ffd83dbSDimitry Andric          << " has no name\n ";
3675ffd83dbSDimitry Andric       Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
3685ffd83dbSDimitry Andric       break;
3695ffd83dbSDimitry Andric     }
3705ffd83dbSDimitry Andric 
3715ffd83dbSDimitry Andric     // Create a function_info for each range
3725ffd83dbSDimitry Andric     for (const DWARFAddressRange &Range : Ranges) {
3735ffd83dbSDimitry Andric       // The low PC must be less than the high PC. Many linkers don't remove
3745ffd83dbSDimitry Andric       // DWARF for functions that don't get linked into the final executable.
3755ffd83dbSDimitry Andric       // If both the high and low pc have relocations, linkers will often set
3765ffd83dbSDimitry Andric       // the address values for both to the same value to indicate the function
3775ffd83dbSDimitry Andric       // has been remove. Other linkers have been known to set the one or both
3785ffd83dbSDimitry Andric       // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
3795ffd83dbSDimitry Andric       // byte addresses to indicate the function isn't valid. The check below
3805ffd83dbSDimitry Andric       // tries to watch for these cases and abort if it runs into them.
3815ffd83dbSDimitry Andric       if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
3825ffd83dbSDimitry Andric         break;
3835ffd83dbSDimitry Andric 
3845ffd83dbSDimitry Andric       // Many linkers can't remove DWARF and might set the LowPC to zero. Since
3855ffd83dbSDimitry Andric       // high PC can be an offset from the low PC in more recent DWARF versions
3865ffd83dbSDimitry Andric       // we need to watch for a zero'ed low pc which we do using
3875ffd83dbSDimitry Andric       // ValidTextRanges below.
3885ffd83dbSDimitry Andric       if (!Gsym.IsValidTextAddress(Range.LowPC)) {
3895ffd83dbSDimitry Andric         // We expect zero and -1 to be invalid addresses in DWARF depending
3905ffd83dbSDimitry Andric         // on the linker of the DWARF. This indicates a function was stripped
3915ffd83dbSDimitry Andric         // and the debug info wasn't able to be stripped from the DWARF. If
3925ffd83dbSDimitry Andric         // the LowPC isn't zero or -1, then we should emit an error.
3935ffd83dbSDimitry Andric         if (Range.LowPC != 0) {
394fe6060f1SDimitry Andric           if (!Gsym.isQuiet()) {
395fe6060f1SDimitry Andric             // Unexpected invalid address, emit a warning
396349cc55cSDimitry Andric             OS << "warning: DIE has an address range whose start address is "
397fe6060f1SDimitry Andric                   "not in any executable sections ("
398fe6060f1SDimitry Andric                << *Gsym.GetValidTextRanges()
399fe6060f1SDimitry Andric                << ") and will not be processed:\n";
400349cc55cSDimitry Andric             Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
4015ffd83dbSDimitry Andric           }
402fe6060f1SDimitry Andric         }
4035ffd83dbSDimitry Andric         break;
4045ffd83dbSDimitry Andric       }
4055ffd83dbSDimitry Andric 
4065ffd83dbSDimitry Andric       FunctionInfo FI;
407*81ad6265SDimitry Andric       FI.Range = {Range.LowPC, Range.HighPC};
4085ffd83dbSDimitry Andric       FI.Name = *NameIndex;
4095ffd83dbSDimitry Andric       if (CUI.LineTable) {
4105ffd83dbSDimitry Andric         convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
4115ffd83dbSDimitry Andric       }
4125ffd83dbSDimitry Andric       if (hasInlineInfo(Die, 0)) {
4135ffd83dbSDimitry Andric         FI.Inline = InlineInfo();
4145ffd83dbSDimitry Andric         FI.Inline->Name = *NameIndex;
4155ffd83dbSDimitry Andric         FI.Inline->Ranges.insert(FI.Range);
4165ffd83dbSDimitry Andric         parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline);
4175ffd83dbSDimitry Andric       }
4185ffd83dbSDimitry Andric       Gsym.addFunctionInfo(std::move(FI));
4195ffd83dbSDimitry Andric     }
4205ffd83dbSDimitry Andric   } break;
4215ffd83dbSDimitry Andric   default:
4225ffd83dbSDimitry Andric     break;
4235ffd83dbSDimitry Andric   }
4245ffd83dbSDimitry Andric   for (DWARFDie ChildDie : Die.children())
4255ffd83dbSDimitry Andric     handleDie(OS, CUI, ChildDie);
4265ffd83dbSDimitry Andric }
4275ffd83dbSDimitry Andric 
4285ffd83dbSDimitry Andric Error DwarfTransformer::convert(uint32_t NumThreads) {
4295ffd83dbSDimitry Andric   size_t NumBefore = Gsym.getNumFunctionInfos();
430*81ad6265SDimitry Andric   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
431*81ad6265SDimitry Andric     DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
432*81ad6265SDimitry Andric     if (llvm::Optional<uint64_t> DWOId = DwarfUnit.getDWOId()) {
433*81ad6265SDimitry Andric       DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
434*81ad6265SDimitry Andric       if (!DWOCU->isDWOUnit()) {
435*81ad6265SDimitry Andric         std::string DWOName = dwarf::toString(
436*81ad6265SDimitry Andric             DwarfUnit.getUnitDIE().find(
437*81ad6265SDimitry Andric                 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
438*81ad6265SDimitry Andric             "");
439*81ad6265SDimitry Andric         Log << "warning: Unable to retrieve DWO .debug_info section for "
440*81ad6265SDimitry Andric             << DWOName << "\n";
441*81ad6265SDimitry Andric       } else {
442*81ad6265SDimitry Andric         ReturnDie = DWOCU->getUnitDIE(false);
443*81ad6265SDimitry Andric       }
444*81ad6265SDimitry Andric     }
445*81ad6265SDimitry Andric     return ReturnDie;
446*81ad6265SDimitry Andric   };
4475ffd83dbSDimitry Andric   if (NumThreads == 1) {
4485ffd83dbSDimitry Andric     // Parse all DWARF data from this thread, use the same string/file table
4495ffd83dbSDimitry Andric     // for everything
4505ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units()) {
451*81ad6265SDimitry Andric       DWARFDie Die = getDie(*CU);
4525ffd83dbSDimitry Andric       CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
4535ffd83dbSDimitry Andric       handleDie(Log, CUI, Die);
4545ffd83dbSDimitry Andric     }
4555ffd83dbSDimitry Andric   } else {
4565ffd83dbSDimitry Andric     // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
4575ffd83dbSDimitry Andric     // front before we start accessing any DIEs since there might be
4585ffd83dbSDimitry Andric     // cross compile unit references in the DWARF. If we don't do this we can
4595ffd83dbSDimitry Andric     // end up crashing.
4605ffd83dbSDimitry Andric 
4615ffd83dbSDimitry Andric     // We need to call getAbbreviations sequentially first so that getUnitDIE()
4625ffd83dbSDimitry Andric     // only works with its local data.
4635ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units())
4645ffd83dbSDimitry Andric       CU->getAbbreviations();
4655ffd83dbSDimitry Andric 
4665ffd83dbSDimitry Andric     // Now parse all DIEs in case we have cross compile unit references in a
4675ffd83dbSDimitry Andric     // thread pool.
4685ffd83dbSDimitry Andric     ThreadPool pool(hardware_concurrency(NumThreads));
4695ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units())
4705ffd83dbSDimitry Andric       pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
4715ffd83dbSDimitry Andric     pool.wait();
4725ffd83dbSDimitry Andric 
4735ffd83dbSDimitry Andric     // Now convert all DWARF to GSYM in a thread pool.
4745ffd83dbSDimitry Andric     std::mutex LogMutex;
4755ffd83dbSDimitry Andric     for (const auto &CU : DICtx.compile_units()) {
476*81ad6265SDimitry Andric       DWARFDie Die = getDie(*CU);
4775ffd83dbSDimitry Andric       if (Die) {
4785ffd83dbSDimitry Andric         CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
4795ffd83dbSDimitry Andric         pool.async([this, CUI, &LogMutex, Die]() mutable {
4805ffd83dbSDimitry Andric           std::string ThreadLogStorage;
4815ffd83dbSDimitry Andric           raw_string_ostream ThreadOS(ThreadLogStorage);
4825ffd83dbSDimitry Andric           handleDie(ThreadOS, CUI, Die);
4835ffd83dbSDimitry Andric           ThreadOS.flush();
4845ffd83dbSDimitry Andric           if (!ThreadLogStorage.empty()) {
4855ffd83dbSDimitry Andric             // Print ThreadLogStorage lines into an actual stream under a lock
4865ffd83dbSDimitry Andric             std::lock_guard<std::mutex> guard(LogMutex);
4875ffd83dbSDimitry Andric             Log << ThreadLogStorage;
4885ffd83dbSDimitry Andric           }
4895ffd83dbSDimitry Andric         });
4905ffd83dbSDimitry Andric       }
4915ffd83dbSDimitry Andric     }
4925ffd83dbSDimitry Andric     pool.wait();
4935ffd83dbSDimitry Andric   }
4945ffd83dbSDimitry Andric   size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
4955ffd83dbSDimitry Andric   Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
4965ffd83dbSDimitry Andric   return Error::success();
4975ffd83dbSDimitry Andric }
4985ffd83dbSDimitry Andric 
4995ffd83dbSDimitry Andric llvm::Error DwarfTransformer::verify(StringRef GsymPath) {
5005ffd83dbSDimitry Andric   Log << "Verifying GSYM file \"" << GsymPath << "\":\n";
5015ffd83dbSDimitry Andric 
5025ffd83dbSDimitry Andric   auto Gsym = GsymReader::openFile(GsymPath);
5035ffd83dbSDimitry Andric   if (!Gsym)
5045ffd83dbSDimitry Andric     return Gsym.takeError();
5055ffd83dbSDimitry Andric 
5065ffd83dbSDimitry Andric   auto NumAddrs = Gsym->getNumAddresses();
5075ffd83dbSDimitry Andric   DILineInfoSpecifier DLIS(
5085ffd83dbSDimitry Andric       DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
5095ffd83dbSDimitry Andric       DILineInfoSpecifier::FunctionNameKind::LinkageName);
5105ffd83dbSDimitry Andric   std::string gsymFilename;
5115ffd83dbSDimitry Andric   for (uint32_t I = 0; I < NumAddrs; ++I) {
5125ffd83dbSDimitry Andric     auto FuncAddr = Gsym->getAddress(I);
5135ffd83dbSDimitry Andric     if (!FuncAddr)
5145ffd83dbSDimitry Andric         return createStringError(std::errc::invalid_argument,
5155ffd83dbSDimitry Andric                                   "failed to extract address[%i]", I);
5165ffd83dbSDimitry Andric 
5175ffd83dbSDimitry Andric     auto FI = Gsym->getFunctionInfo(*FuncAddr);
5185ffd83dbSDimitry Andric     if (!FI)
5195ffd83dbSDimitry Andric       return createStringError(std::errc::invalid_argument,
5205ffd83dbSDimitry Andric                             "failed to extract function info for address 0x%"
5215ffd83dbSDimitry Andric                             PRIu64, *FuncAddr);
5225ffd83dbSDimitry Andric 
5235ffd83dbSDimitry Andric     for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
5245ffd83dbSDimitry Andric       const object::SectionedAddress SectAddr{
5255ffd83dbSDimitry Andric           Addr, object::SectionedAddress::UndefSection};
5265ffd83dbSDimitry Andric       auto LR = Gsym->lookup(Addr);
5275ffd83dbSDimitry Andric       if (!LR)
5285ffd83dbSDimitry Andric         return LR.takeError();
5295ffd83dbSDimitry Andric 
5305ffd83dbSDimitry Andric       auto DwarfInlineInfos =
5315ffd83dbSDimitry Andric           DICtx.getInliningInfoForAddress(SectAddr, DLIS);
5325ffd83dbSDimitry Andric       uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
5335ffd83dbSDimitry Andric       if (NumDwarfInlineInfos == 0) {
5345ffd83dbSDimitry Andric         DwarfInlineInfos.addFrame(
5355ffd83dbSDimitry Andric             DICtx.getLineInfoForAddress(SectAddr, DLIS));
5365ffd83dbSDimitry Andric       }
5375ffd83dbSDimitry Andric 
5385ffd83dbSDimitry Andric       // Check for 1 entry that has no file and line info
5395ffd83dbSDimitry Andric       if (NumDwarfInlineInfos == 1 &&
5405ffd83dbSDimitry Andric           DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
5415ffd83dbSDimitry Andric         DwarfInlineInfos = DIInliningInfo();
5425ffd83dbSDimitry Andric         NumDwarfInlineInfos = 0;
5435ffd83dbSDimitry Andric       }
5445ffd83dbSDimitry Andric       if (NumDwarfInlineInfos > 0 &&
5455ffd83dbSDimitry Andric           NumDwarfInlineInfos != LR->Locations.size()) {
5465ffd83dbSDimitry Andric         Log << "error: address " << HEX64(Addr) << " has "
5475ffd83dbSDimitry Andric             << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
5485ffd83dbSDimitry Andric             << LR->Locations.size() << "\n";
5495ffd83dbSDimitry Andric         Log << "    " << NumDwarfInlineInfos << " DWARF frames:\n";
5505ffd83dbSDimitry Andric         for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
5510eae32dcSDimitry Andric           const auto &dii = DwarfInlineInfos.getFrame(Idx);
5525ffd83dbSDimitry Andric           Log << "    [" << Idx << "]: " << dii.FunctionName << " @ "
5535ffd83dbSDimitry Andric               << dii.FileName << ':' << dii.Line << '\n';
5545ffd83dbSDimitry Andric         }
5555ffd83dbSDimitry Andric         Log << "    " << LR->Locations.size() << " GSYM frames:\n";
5565ffd83dbSDimitry Andric         for (size_t Idx = 0, count = LR->Locations.size();
5575ffd83dbSDimitry Andric               Idx < count; ++Idx) {
5585ffd83dbSDimitry Andric           const auto &gii = LR->Locations[Idx];
5595ffd83dbSDimitry Andric           Log << "    [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
5605ffd83dbSDimitry Andric               << '/' << gii.Base << ':' << gii.Line << '\n';
5615ffd83dbSDimitry Andric         }
5625ffd83dbSDimitry Andric         DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
5635ffd83dbSDimitry Andric         Gsym->dump(Log, *FI);
5645ffd83dbSDimitry Andric         continue;
5655ffd83dbSDimitry Andric       }
5665ffd83dbSDimitry Andric 
5675ffd83dbSDimitry Andric       for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
5685ffd83dbSDimitry Andric             ++Idx) {
5695ffd83dbSDimitry Andric         const auto &gii = LR->Locations[Idx];
5705ffd83dbSDimitry Andric         if (Idx < NumDwarfInlineInfos) {
5710eae32dcSDimitry Andric           const auto &dii = DwarfInlineInfos.getFrame(Idx);
5725ffd83dbSDimitry Andric           gsymFilename = LR->getSourceFile(Idx);
5735ffd83dbSDimitry Andric           // Verify function name
5745ffd83dbSDimitry Andric           if (dii.FunctionName.find(gii.Name.str()) != 0)
5755ffd83dbSDimitry Andric             Log << "error: address " << HEX64(Addr) << " DWARF function \""
5765ffd83dbSDimitry Andric                 << dii.FunctionName.c_str()
5775ffd83dbSDimitry Andric                 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
5785ffd83dbSDimitry Andric           // Verify source file path
5795ffd83dbSDimitry Andric           if (dii.FileName != gsymFilename)
5805ffd83dbSDimitry Andric             Log << "error: address " << HEX64(Addr) << " DWARF path \""
5815ffd83dbSDimitry Andric                 << dii.FileName.c_str() << "\" doesn't match GSYM path \""
5825ffd83dbSDimitry Andric                 << gsymFilename.c_str() << "\"\n";
5835ffd83dbSDimitry Andric           // Verify source file line
5845ffd83dbSDimitry Andric           if (dii.Line != gii.Line)
5855ffd83dbSDimitry Andric             Log << "error: address " << HEX64(Addr) << " DWARF line "
5865ffd83dbSDimitry Andric                 << dii.Line << " != GSYM line " << gii.Line << "\n";
5875ffd83dbSDimitry Andric         }
5885ffd83dbSDimitry Andric       }
5895ffd83dbSDimitry Andric     }
5905ffd83dbSDimitry Andric   }
5915ffd83dbSDimitry Andric   return Error::success();
5925ffd83dbSDimitry Andric }
593