1*5ffd83dbSDimitry Andric //===- DwarfTransformer.cpp -----------------------------------------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric 9*5ffd83dbSDimitry Andric #include <thread> 10*5ffd83dbSDimitry Andric #include <unordered_set> 11*5ffd83dbSDimitry Andric 12*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/DIContext.h" 13*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/DWARF/DWARFContext.h" 14*5ffd83dbSDimitry Andric #include "llvm/Support/Error.h" 15*5ffd83dbSDimitry Andric #include "llvm/Support/ThreadPool.h" 16*5ffd83dbSDimitry Andric #include "llvm/Support/raw_ostream.h" 17*5ffd83dbSDimitry Andric 18*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" 19*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 20*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h" 21*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/GsymReader.h" 22*5ffd83dbSDimitry Andric #include "llvm/DebugInfo/GSYM/InlineInfo.h" 23*5ffd83dbSDimitry Andric 24*5ffd83dbSDimitry Andric using namespace llvm; 25*5ffd83dbSDimitry Andric using namespace gsym; 26*5ffd83dbSDimitry Andric 27*5ffd83dbSDimitry Andric struct llvm::gsym::CUInfo { 28*5ffd83dbSDimitry Andric const DWARFDebugLine::LineTable *LineTable; 29*5ffd83dbSDimitry Andric const char *CompDir; 30*5ffd83dbSDimitry Andric std::vector<uint32_t> FileCache; 31*5ffd83dbSDimitry Andric uint64_t Language = 0; 32*5ffd83dbSDimitry Andric uint8_t AddrSize = 0; 33*5ffd83dbSDimitry Andric 34*5ffd83dbSDimitry Andric CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { 35*5ffd83dbSDimitry Andric LineTable = DICtx.getLineTableForUnit(CU); 36*5ffd83dbSDimitry Andric CompDir = CU->getCompilationDir(); 37*5ffd83dbSDimitry Andric FileCache.clear(); 38*5ffd83dbSDimitry Andric if (LineTable) 39*5ffd83dbSDimitry Andric FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); 40*5ffd83dbSDimitry Andric DWARFDie Die = CU->getUnitDIE(); 41*5ffd83dbSDimitry Andric Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); 42*5ffd83dbSDimitry Andric AddrSize = CU->getAddressByteSize(); 43*5ffd83dbSDimitry Andric } 44*5ffd83dbSDimitry Andric 45*5ffd83dbSDimitry Andric /// Return true if Addr is the highest address for a given compile unit. The 46*5ffd83dbSDimitry Andric /// highest address is encoded as -1, of all ones in the address. These high 47*5ffd83dbSDimitry Andric /// addresses are used by some linkers to indicate that a function has been 48*5ffd83dbSDimitry Andric /// dead stripped or didn't end up in the linked executable. 49*5ffd83dbSDimitry Andric bool isHighestAddress(uint64_t Addr) const { 50*5ffd83dbSDimitry Andric if (AddrSize == 4) 51*5ffd83dbSDimitry Andric return Addr == UINT32_MAX; 52*5ffd83dbSDimitry Andric else if (AddrSize == 8) 53*5ffd83dbSDimitry Andric return Addr == UINT64_MAX; 54*5ffd83dbSDimitry Andric return false; 55*5ffd83dbSDimitry Andric } 56*5ffd83dbSDimitry Andric 57*5ffd83dbSDimitry Andric /// Convert a DWARF compile unit file index into a GSYM global file index. 58*5ffd83dbSDimitry Andric /// 59*5ffd83dbSDimitry Andric /// Each compile unit in DWARF has its own file table in the line table 60*5ffd83dbSDimitry Andric /// prologue. GSYM has a single large file table that applies to all files 61*5ffd83dbSDimitry Andric /// from all of the info in a GSYM file. This function converts between the 62*5ffd83dbSDimitry Andric /// two and caches and DWARF CU file index that has already been converted so 63*5ffd83dbSDimitry Andric /// the first client that asks for a compile unit file index will end up 64*5ffd83dbSDimitry Andric /// doing the conversion, and subsequent clients will get the cached GSYM 65*5ffd83dbSDimitry Andric /// index. 66*5ffd83dbSDimitry Andric uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { 67*5ffd83dbSDimitry Andric if (!LineTable) 68*5ffd83dbSDimitry Andric return 0; 69*5ffd83dbSDimitry Andric assert(DwarfFileIdx < FileCache.size()); 70*5ffd83dbSDimitry Andric uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; 71*5ffd83dbSDimitry Andric if (GsymFileIdx != UINT32_MAX) 72*5ffd83dbSDimitry Andric return GsymFileIdx; 73*5ffd83dbSDimitry Andric std::string File; 74*5ffd83dbSDimitry Andric if (LineTable->getFileNameByIndex( 75*5ffd83dbSDimitry Andric DwarfFileIdx, CompDir, 76*5ffd83dbSDimitry Andric DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) 77*5ffd83dbSDimitry Andric GsymFileIdx = Gsym.insertFile(File); 78*5ffd83dbSDimitry Andric else 79*5ffd83dbSDimitry Andric GsymFileIdx = 0; 80*5ffd83dbSDimitry Andric return GsymFileIdx; 81*5ffd83dbSDimitry Andric } 82*5ffd83dbSDimitry Andric }; 83*5ffd83dbSDimitry Andric 84*5ffd83dbSDimitry Andric 85*5ffd83dbSDimitry Andric static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { 86*5ffd83dbSDimitry Andric if (DWARFDie SpecDie = 87*5ffd83dbSDimitry Andric Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { 88*5ffd83dbSDimitry Andric if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) 89*5ffd83dbSDimitry Andric return SpecParent; 90*5ffd83dbSDimitry Andric } 91*5ffd83dbSDimitry Andric if (DWARFDie AbstDie = 92*5ffd83dbSDimitry Andric Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { 93*5ffd83dbSDimitry Andric if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) 94*5ffd83dbSDimitry Andric return AbstParent; 95*5ffd83dbSDimitry Andric } 96*5ffd83dbSDimitry Andric 97*5ffd83dbSDimitry Andric // We never want to follow parent for inlined subroutine - that would 98*5ffd83dbSDimitry Andric // give us information about where the function is inlined, not what 99*5ffd83dbSDimitry Andric // function is inlined 100*5ffd83dbSDimitry Andric if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) 101*5ffd83dbSDimitry Andric return DWARFDie(); 102*5ffd83dbSDimitry Andric 103*5ffd83dbSDimitry Andric DWARFDie ParentDie = Die.getParent(); 104*5ffd83dbSDimitry Andric if (!ParentDie) 105*5ffd83dbSDimitry Andric return DWARFDie(); 106*5ffd83dbSDimitry Andric 107*5ffd83dbSDimitry Andric switch (ParentDie.getTag()) { 108*5ffd83dbSDimitry Andric case dwarf::DW_TAG_namespace: 109*5ffd83dbSDimitry Andric case dwarf::DW_TAG_structure_type: 110*5ffd83dbSDimitry Andric case dwarf::DW_TAG_union_type: 111*5ffd83dbSDimitry Andric case dwarf::DW_TAG_class_type: 112*5ffd83dbSDimitry Andric case dwarf::DW_TAG_subprogram: 113*5ffd83dbSDimitry Andric return ParentDie; // Found parent decl context DIE 114*5ffd83dbSDimitry Andric case dwarf::DW_TAG_lexical_block: 115*5ffd83dbSDimitry Andric return GetParentDeclContextDIE(ParentDie); 116*5ffd83dbSDimitry Andric default: 117*5ffd83dbSDimitry Andric break; 118*5ffd83dbSDimitry Andric } 119*5ffd83dbSDimitry Andric 120*5ffd83dbSDimitry Andric return DWARFDie(); 121*5ffd83dbSDimitry Andric } 122*5ffd83dbSDimitry Andric 123*5ffd83dbSDimitry Andric /// Get the GsymCreator string table offset for the qualified name for the 124*5ffd83dbSDimitry Andric /// DIE passed in. This function will avoid making copies of any strings in 125*5ffd83dbSDimitry Andric /// the GsymCreator when possible. We don't need to copy a string when the 126*5ffd83dbSDimitry Andric /// string comes from our .debug_str section or is an inlined string in the 127*5ffd83dbSDimitry Andric /// .debug_info. If we create a qualified name string in this function by 128*5ffd83dbSDimitry Andric /// combining multiple strings in the DWARF string table or info, we will make 129*5ffd83dbSDimitry Andric /// a copy of the string when we add it to the string table. 130*5ffd83dbSDimitry Andric static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die, 131*5ffd83dbSDimitry Andric uint64_t Language, 132*5ffd83dbSDimitry Andric GsymCreator &Gsym) { 133*5ffd83dbSDimitry Andric // If the dwarf has mangled name, use mangled name 134*5ffd83dbSDimitry Andric if (auto LinkageName = 135*5ffd83dbSDimitry Andric dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, 136*5ffd83dbSDimitry Andric dwarf::DW_AT_linkage_name}), 137*5ffd83dbSDimitry Andric nullptr)) 138*5ffd83dbSDimitry Andric return Gsym.insertString(LinkageName, /* Copy */ false); 139*5ffd83dbSDimitry Andric 140*5ffd83dbSDimitry Andric StringRef ShortName(Die.getName(DINameKind::ShortName)); 141*5ffd83dbSDimitry Andric if (ShortName.empty()) 142*5ffd83dbSDimitry Andric return llvm::None; 143*5ffd83dbSDimitry Andric 144*5ffd83dbSDimitry Andric // For C++ and ObjC, prepend names of all parent declaration contexts 145*5ffd83dbSDimitry Andric if (!(Language == dwarf::DW_LANG_C_plus_plus || 146*5ffd83dbSDimitry Andric Language == dwarf::DW_LANG_C_plus_plus_03 || 147*5ffd83dbSDimitry Andric Language == dwarf::DW_LANG_C_plus_plus_11 || 148*5ffd83dbSDimitry Andric Language == dwarf::DW_LANG_C_plus_plus_14 || 149*5ffd83dbSDimitry Andric Language == dwarf::DW_LANG_ObjC_plus_plus || 150*5ffd83dbSDimitry Andric // This should not be needed for C, but we see C++ code marked as C 151*5ffd83dbSDimitry Andric // in some binaries. This should hurt, so let's do it for C as well 152*5ffd83dbSDimitry Andric Language == dwarf::DW_LANG_C)) 153*5ffd83dbSDimitry Andric return Gsym.insertString(ShortName, /* Copy */ false); 154*5ffd83dbSDimitry Andric 155*5ffd83dbSDimitry Andric // Some GCC optimizations create functions with names ending with .isra.<num> 156*5ffd83dbSDimitry Andric // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name 157*5ffd83dbSDimitry Andric // If it looks like it could be the case, don't add any prefix 158*5ffd83dbSDimitry Andric if (ShortName.startswith("_Z") && 159*5ffd83dbSDimitry Andric (ShortName.contains(".isra.") || ShortName.contains(".part."))) 160*5ffd83dbSDimitry Andric return Gsym.insertString(ShortName, /* Copy */ false); 161*5ffd83dbSDimitry Andric 162*5ffd83dbSDimitry Andric DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); 163*5ffd83dbSDimitry Andric if (ParentDeclCtxDie) { 164*5ffd83dbSDimitry Andric std::string Name = ShortName.str(); 165*5ffd83dbSDimitry Andric while (ParentDeclCtxDie) { 166*5ffd83dbSDimitry Andric StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); 167*5ffd83dbSDimitry Andric if (!ParentName.empty()) { 168*5ffd83dbSDimitry Andric // "lambda" names are wrapped in < >. Replace with { } 169*5ffd83dbSDimitry Andric // to be consistent with demangled names and not to confuse with 170*5ffd83dbSDimitry Andric // templates 171*5ffd83dbSDimitry Andric if (ParentName.front() == '<' && ParentName.back() == '>') 172*5ffd83dbSDimitry Andric Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + 173*5ffd83dbSDimitry Andric "::" + Name; 174*5ffd83dbSDimitry Andric else 175*5ffd83dbSDimitry Andric Name = ParentName.str() + "::" + Name; 176*5ffd83dbSDimitry Andric } 177*5ffd83dbSDimitry Andric ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); 178*5ffd83dbSDimitry Andric } 179*5ffd83dbSDimitry Andric // Copy the name since we created a new name in a std::string. 180*5ffd83dbSDimitry Andric return Gsym.insertString(Name, /* Copy */ true); 181*5ffd83dbSDimitry Andric } 182*5ffd83dbSDimitry Andric // Don't copy the name since it exists in the DWARF object file. 183*5ffd83dbSDimitry Andric return Gsym.insertString(ShortName, /* Copy */ false); 184*5ffd83dbSDimitry Andric } 185*5ffd83dbSDimitry Andric 186*5ffd83dbSDimitry Andric static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { 187*5ffd83dbSDimitry Andric bool CheckChildren = true; 188*5ffd83dbSDimitry Andric switch (Die.getTag()) { 189*5ffd83dbSDimitry Andric case dwarf::DW_TAG_subprogram: 190*5ffd83dbSDimitry Andric // Don't look into functions within functions. 191*5ffd83dbSDimitry Andric CheckChildren = Depth == 0; 192*5ffd83dbSDimitry Andric break; 193*5ffd83dbSDimitry Andric case dwarf::DW_TAG_inlined_subroutine: 194*5ffd83dbSDimitry Andric return true; 195*5ffd83dbSDimitry Andric default: 196*5ffd83dbSDimitry Andric break; 197*5ffd83dbSDimitry Andric } 198*5ffd83dbSDimitry Andric if (!CheckChildren) 199*5ffd83dbSDimitry Andric return false; 200*5ffd83dbSDimitry Andric for (DWARFDie ChildDie : Die.children()) { 201*5ffd83dbSDimitry Andric if (hasInlineInfo(ChildDie, Depth + 1)) 202*5ffd83dbSDimitry Andric return true; 203*5ffd83dbSDimitry Andric } 204*5ffd83dbSDimitry Andric return false; 205*5ffd83dbSDimitry Andric } 206*5ffd83dbSDimitry Andric 207*5ffd83dbSDimitry Andric static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, 208*5ffd83dbSDimitry Andric uint32_t Depth, FunctionInfo &FI, 209*5ffd83dbSDimitry Andric InlineInfo &parent) { 210*5ffd83dbSDimitry Andric if (!hasInlineInfo(Die, Depth)) 211*5ffd83dbSDimitry Andric return; 212*5ffd83dbSDimitry Andric 213*5ffd83dbSDimitry Andric dwarf::Tag Tag = Die.getTag(); 214*5ffd83dbSDimitry Andric if (Tag == dwarf::DW_TAG_inlined_subroutine) { 215*5ffd83dbSDimitry Andric // create new InlineInfo and append to parent.children 216*5ffd83dbSDimitry Andric InlineInfo II; 217*5ffd83dbSDimitry Andric DWARFAddressRange FuncRange = 218*5ffd83dbSDimitry Andric DWARFAddressRange(FI.startAddress(), FI.endAddress()); 219*5ffd83dbSDimitry Andric Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 220*5ffd83dbSDimitry Andric if (RangesOrError) { 221*5ffd83dbSDimitry Andric for (const DWARFAddressRange &Range : RangesOrError.get()) { 222*5ffd83dbSDimitry Andric // Check that the inlined function is within the range of the function 223*5ffd83dbSDimitry Andric // info, it might not be in case of split functions 224*5ffd83dbSDimitry Andric if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) 225*5ffd83dbSDimitry Andric II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC)); 226*5ffd83dbSDimitry Andric } 227*5ffd83dbSDimitry Andric } 228*5ffd83dbSDimitry Andric if (II.Ranges.empty()) 229*5ffd83dbSDimitry Andric return; 230*5ffd83dbSDimitry Andric 231*5ffd83dbSDimitry Andric if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) 232*5ffd83dbSDimitry Andric II.Name = *NameIndex; 233*5ffd83dbSDimitry Andric II.CallFile = CUI.DWARFToGSYMFileIndex( 234*5ffd83dbSDimitry Andric Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); 235*5ffd83dbSDimitry Andric II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); 236*5ffd83dbSDimitry Andric // parse all children and append to parent 237*5ffd83dbSDimitry Andric for (DWARFDie ChildDie : Die.children()) 238*5ffd83dbSDimitry Andric parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II); 239*5ffd83dbSDimitry Andric parent.Children.emplace_back(std::move(II)); 240*5ffd83dbSDimitry Andric return; 241*5ffd83dbSDimitry Andric } 242*5ffd83dbSDimitry Andric if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { 243*5ffd83dbSDimitry Andric // skip this Die and just recurse down 244*5ffd83dbSDimitry Andric for (DWARFDie ChildDie : Die.children()) 245*5ffd83dbSDimitry Andric parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent); 246*5ffd83dbSDimitry Andric } 247*5ffd83dbSDimitry Andric } 248*5ffd83dbSDimitry Andric 249*5ffd83dbSDimitry Andric static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, 250*5ffd83dbSDimitry Andric DWARFDie Die, GsymCreator &Gsym, 251*5ffd83dbSDimitry Andric FunctionInfo &FI) { 252*5ffd83dbSDimitry Andric std::vector<uint32_t> RowVector; 253*5ffd83dbSDimitry Andric const uint64_t StartAddress = FI.startAddress(); 254*5ffd83dbSDimitry Andric const uint64_t EndAddress = FI.endAddress(); 255*5ffd83dbSDimitry Andric const uint64_t RangeSize = EndAddress - StartAddress; 256*5ffd83dbSDimitry Andric const object::SectionedAddress SecAddress{ 257*5ffd83dbSDimitry Andric StartAddress, object::SectionedAddress::UndefSection}; 258*5ffd83dbSDimitry Andric 259*5ffd83dbSDimitry Andric 260*5ffd83dbSDimitry Andric if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { 261*5ffd83dbSDimitry Andric // If we have a DW_TAG_subprogram but no line entries, fall back to using 262*5ffd83dbSDimitry Andric // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. 263*5ffd83dbSDimitry Andric if (auto FileIdx = 264*5ffd83dbSDimitry Andric dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) { 265*5ffd83dbSDimitry Andric if (auto Line = 266*5ffd83dbSDimitry Andric dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { 267*5ffd83dbSDimitry Andric LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx), 268*5ffd83dbSDimitry Andric *Line); 269*5ffd83dbSDimitry Andric FI.OptLineTable = LineTable(); 270*5ffd83dbSDimitry Andric FI.OptLineTable->push(LE); 271*5ffd83dbSDimitry Andric // LE.Addr = EndAddress; 272*5ffd83dbSDimitry Andric // FI.OptLineTable->push(LE); 273*5ffd83dbSDimitry Andric } 274*5ffd83dbSDimitry Andric } 275*5ffd83dbSDimitry Andric return; 276*5ffd83dbSDimitry Andric } 277*5ffd83dbSDimitry Andric 278*5ffd83dbSDimitry Andric FI.OptLineTable = LineTable(); 279*5ffd83dbSDimitry Andric DWARFDebugLine::Row PrevRow; 280*5ffd83dbSDimitry Andric for (uint32_t RowIndex : RowVector) { 281*5ffd83dbSDimitry Andric // Take file number and line/column from the row. 282*5ffd83dbSDimitry Andric const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; 283*5ffd83dbSDimitry Andric const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); 284*5ffd83dbSDimitry Andric uint64_t RowAddress = Row.Address.Address; 285*5ffd83dbSDimitry Andric // Watch out for a RowAddress that is in the middle of a line table entry 286*5ffd83dbSDimitry Andric // in the DWARF. If we pass an address in between two line table entries 287*5ffd83dbSDimitry Andric // we will get a RowIndex for the previous valid line table row which won't 288*5ffd83dbSDimitry Andric // be contained in our function. This is usually a bug in the DWARF due to 289*5ffd83dbSDimitry Andric // linker problems or LTO or other DWARF re-linking so it is worth emitting 290*5ffd83dbSDimitry Andric // an error, but not worth stopping the creation of the GSYM. 291*5ffd83dbSDimitry Andric if (!FI.Range.contains(RowAddress)) { 292*5ffd83dbSDimitry Andric if (RowAddress < FI.Range.Start) { 293*5ffd83dbSDimitry Andric Log << "error: DIE has a start address whose LowPC is between the " 294*5ffd83dbSDimitry Andric "line table Row[" << RowIndex << "] with address " 295*5ffd83dbSDimitry Andric << HEX64(RowAddress) << " and the next one.\n"; 296*5ffd83dbSDimitry Andric Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); 297*5ffd83dbSDimitry Andric RowAddress = FI.Range.Start; 298*5ffd83dbSDimitry Andric } else { 299*5ffd83dbSDimitry Andric continue; 300*5ffd83dbSDimitry Andric } 301*5ffd83dbSDimitry Andric } 302*5ffd83dbSDimitry Andric 303*5ffd83dbSDimitry Andric LineEntry LE(RowAddress, FileIdx, Row.Line); 304*5ffd83dbSDimitry Andric if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { 305*5ffd83dbSDimitry Andric // We have seen full duplicate line tables for functions in some 306*5ffd83dbSDimitry Andric // DWARF files. Watch for those here by checking the the last 307*5ffd83dbSDimitry Andric // row was the function's end address (HighPC) and that the 308*5ffd83dbSDimitry Andric // current line table entry's address is the same as the first 309*5ffd83dbSDimitry Andric // line entry we already have in our "function_info.Lines". If 310*5ffd83dbSDimitry Andric // so break out after printing a warning. 311*5ffd83dbSDimitry Andric auto FirstLE = FI.OptLineTable->first(); 312*5ffd83dbSDimitry Andric if (FirstLE && *FirstLE == LE) { 313*5ffd83dbSDimitry Andric Log << "warning: duplicate line table detected for DIE:\n"; 314*5ffd83dbSDimitry Andric Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); 315*5ffd83dbSDimitry Andric } else { 316*5ffd83dbSDimitry Andric // Print out (ignore if os == nulls as this is expensive) 317*5ffd83dbSDimitry Andric Log << "error: line table has addresses that do not " 318*5ffd83dbSDimitry Andric << "monotonically increase:\n"; 319*5ffd83dbSDimitry Andric for (uint32_t RowIndex2 : RowVector) { 320*5ffd83dbSDimitry Andric CUI.LineTable->Rows[RowIndex2].dump(Log); 321*5ffd83dbSDimitry Andric } 322*5ffd83dbSDimitry Andric Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); 323*5ffd83dbSDimitry Andric } 324*5ffd83dbSDimitry Andric break; 325*5ffd83dbSDimitry Andric } 326*5ffd83dbSDimitry Andric 327*5ffd83dbSDimitry Andric // Skip multiple line entries for the same file and line. 328*5ffd83dbSDimitry Andric auto LastLE = FI.OptLineTable->last(); 329*5ffd83dbSDimitry Andric if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) 330*5ffd83dbSDimitry Andric continue; 331*5ffd83dbSDimitry Andric // Only push a row if it isn't an end sequence. End sequence markers are 332*5ffd83dbSDimitry Andric // included for the last address in a function or the last contiguous 333*5ffd83dbSDimitry Andric // address in a sequence. 334*5ffd83dbSDimitry Andric if (Row.EndSequence) { 335*5ffd83dbSDimitry Andric // End sequence means that the next line entry could have a lower address 336*5ffd83dbSDimitry Andric // that the previous entries. So we clear the previous row so we don't 337*5ffd83dbSDimitry Andric // trigger the line table error about address that do not monotonically 338*5ffd83dbSDimitry Andric // increase. 339*5ffd83dbSDimitry Andric PrevRow = DWARFDebugLine::Row(); 340*5ffd83dbSDimitry Andric } else { 341*5ffd83dbSDimitry Andric FI.OptLineTable->push(LE); 342*5ffd83dbSDimitry Andric PrevRow = Row; 343*5ffd83dbSDimitry Andric } 344*5ffd83dbSDimitry Andric } 345*5ffd83dbSDimitry Andric // If not line table rows were added, clear the line table so we don't encode 346*5ffd83dbSDimitry Andric // on in the GSYM file. 347*5ffd83dbSDimitry Andric if (FI.OptLineTable->empty()) 348*5ffd83dbSDimitry Andric FI.OptLineTable = llvm::None; 349*5ffd83dbSDimitry Andric } 350*5ffd83dbSDimitry Andric 351*5ffd83dbSDimitry Andric void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { 352*5ffd83dbSDimitry Andric switch (Die.getTag()) { 353*5ffd83dbSDimitry Andric case dwarf::DW_TAG_subprogram: { 354*5ffd83dbSDimitry Andric Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 355*5ffd83dbSDimitry Andric if (!RangesOrError) { 356*5ffd83dbSDimitry Andric consumeError(RangesOrError.takeError()); 357*5ffd83dbSDimitry Andric break; 358*5ffd83dbSDimitry Andric } 359*5ffd83dbSDimitry Andric const DWARFAddressRangesVector &Ranges = RangesOrError.get(); 360*5ffd83dbSDimitry Andric if (Ranges.empty()) 361*5ffd83dbSDimitry Andric break; 362*5ffd83dbSDimitry Andric auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); 363*5ffd83dbSDimitry Andric if (!NameIndex) { 364*5ffd83dbSDimitry Andric OS << "error: function at " << HEX64(Die.getOffset()) 365*5ffd83dbSDimitry Andric << " has no name\n "; 366*5ffd83dbSDimitry Andric Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 367*5ffd83dbSDimitry Andric break; 368*5ffd83dbSDimitry Andric } 369*5ffd83dbSDimitry Andric 370*5ffd83dbSDimitry Andric // Create a function_info for each range 371*5ffd83dbSDimitry Andric for (const DWARFAddressRange &Range : Ranges) { 372*5ffd83dbSDimitry Andric // The low PC must be less than the high PC. Many linkers don't remove 373*5ffd83dbSDimitry Andric // DWARF for functions that don't get linked into the final executable. 374*5ffd83dbSDimitry Andric // If both the high and low pc have relocations, linkers will often set 375*5ffd83dbSDimitry Andric // the address values for both to the same value to indicate the function 376*5ffd83dbSDimitry Andric // has been remove. Other linkers have been known to set the one or both 377*5ffd83dbSDimitry Andric // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 378*5ffd83dbSDimitry Andric // byte addresses to indicate the function isn't valid. The check below 379*5ffd83dbSDimitry Andric // tries to watch for these cases and abort if it runs into them. 380*5ffd83dbSDimitry Andric if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) 381*5ffd83dbSDimitry Andric break; 382*5ffd83dbSDimitry Andric 383*5ffd83dbSDimitry Andric // Many linkers can't remove DWARF and might set the LowPC to zero. Since 384*5ffd83dbSDimitry Andric // high PC can be an offset from the low PC in more recent DWARF versions 385*5ffd83dbSDimitry Andric // we need to watch for a zero'ed low pc which we do using 386*5ffd83dbSDimitry Andric // ValidTextRanges below. 387*5ffd83dbSDimitry Andric if (!Gsym.IsValidTextAddress(Range.LowPC)) { 388*5ffd83dbSDimitry Andric // We expect zero and -1 to be invalid addresses in DWARF depending 389*5ffd83dbSDimitry Andric // on the linker of the DWARF. This indicates a function was stripped 390*5ffd83dbSDimitry Andric // and the debug info wasn't able to be stripped from the DWARF. If 391*5ffd83dbSDimitry Andric // the LowPC isn't zero or -1, then we should emit an error. 392*5ffd83dbSDimitry Andric if (Range.LowPC != 0) { 393*5ffd83dbSDimitry Andric // Unexpected invalid address, emit an error 394*5ffd83dbSDimitry Andric Log << "warning: DIE has an address range whose start address is " 395*5ffd83dbSDimitry Andric "not in any executable sections (" << 396*5ffd83dbSDimitry Andric *Gsym.GetValidTextRanges() << ") and will not be processed:\n"; 397*5ffd83dbSDimitry Andric Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); 398*5ffd83dbSDimitry Andric } 399*5ffd83dbSDimitry Andric break; 400*5ffd83dbSDimitry Andric } 401*5ffd83dbSDimitry Andric 402*5ffd83dbSDimitry Andric FunctionInfo FI; 403*5ffd83dbSDimitry Andric FI.setStartAddress(Range.LowPC); 404*5ffd83dbSDimitry Andric FI.setEndAddress(Range.HighPC); 405*5ffd83dbSDimitry Andric FI.Name = *NameIndex; 406*5ffd83dbSDimitry Andric if (CUI.LineTable) { 407*5ffd83dbSDimitry Andric convertFunctionLineTable(OS, CUI, Die, Gsym, FI); 408*5ffd83dbSDimitry Andric } 409*5ffd83dbSDimitry Andric if (hasInlineInfo(Die, 0)) { 410*5ffd83dbSDimitry Andric FI.Inline = InlineInfo(); 411*5ffd83dbSDimitry Andric FI.Inline->Name = *NameIndex; 412*5ffd83dbSDimitry Andric FI.Inline->Ranges.insert(FI.Range); 413*5ffd83dbSDimitry Andric parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline); 414*5ffd83dbSDimitry Andric } 415*5ffd83dbSDimitry Andric Gsym.addFunctionInfo(std::move(FI)); 416*5ffd83dbSDimitry Andric } 417*5ffd83dbSDimitry Andric } break; 418*5ffd83dbSDimitry Andric default: 419*5ffd83dbSDimitry Andric break; 420*5ffd83dbSDimitry Andric } 421*5ffd83dbSDimitry Andric for (DWARFDie ChildDie : Die.children()) 422*5ffd83dbSDimitry Andric handleDie(OS, CUI, ChildDie); 423*5ffd83dbSDimitry Andric } 424*5ffd83dbSDimitry Andric 425*5ffd83dbSDimitry Andric Error DwarfTransformer::convert(uint32_t NumThreads) { 426*5ffd83dbSDimitry Andric size_t NumBefore = Gsym.getNumFunctionInfos(); 427*5ffd83dbSDimitry Andric if (NumThreads == 1) { 428*5ffd83dbSDimitry Andric // Parse all DWARF data from this thread, use the same string/file table 429*5ffd83dbSDimitry Andric // for everything 430*5ffd83dbSDimitry Andric for (const auto &CU : DICtx.compile_units()) { 431*5ffd83dbSDimitry Andric DWARFDie Die = CU->getUnitDIE(false); 432*5ffd83dbSDimitry Andric CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 433*5ffd83dbSDimitry Andric handleDie(Log, CUI, Die); 434*5ffd83dbSDimitry Andric } 435*5ffd83dbSDimitry Andric } else { 436*5ffd83dbSDimitry Andric // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up 437*5ffd83dbSDimitry Andric // front before we start accessing any DIEs since there might be 438*5ffd83dbSDimitry Andric // cross compile unit references in the DWARF. If we don't do this we can 439*5ffd83dbSDimitry Andric // end up crashing. 440*5ffd83dbSDimitry Andric 441*5ffd83dbSDimitry Andric // We need to call getAbbreviations sequentially first so that getUnitDIE() 442*5ffd83dbSDimitry Andric // only works with its local data. 443*5ffd83dbSDimitry Andric for (const auto &CU : DICtx.compile_units()) 444*5ffd83dbSDimitry Andric CU->getAbbreviations(); 445*5ffd83dbSDimitry Andric 446*5ffd83dbSDimitry Andric // Now parse all DIEs in case we have cross compile unit references in a 447*5ffd83dbSDimitry Andric // thread pool. 448*5ffd83dbSDimitry Andric ThreadPool pool(hardware_concurrency(NumThreads)); 449*5ffd83dbSDimitry Andric for (const auto &CU : DICtx.compile_units()) 450*5ffd83dbSDimitry Andric pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); 451*5ffd83dbSDimitry Andric pool.wait(); 452*5ffd83dbSDimitry Andric 453*5ffd83dbSDimitry Andric // Now convert all DWARF to GSYM in a thread pool. 454*5ffd83dbSDimitry Andric std::mutex LogMutex; 455*5ffd83dbSDimitry Andric for (const auto &CU : DICtx.compile_units()) { 456*5ffd83dbSDimitry Andric DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); 457*5ffd83dbSDimitry Andric if (Die) { 458*5ffd83dbSDimitry Andric CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 459*5ffd83dbSDimitry Andric pool.async([this, CUI, &LogMutex, Die]() mutable { 460*5ffd83dbSDimitry Andric std::string ThreadLogStorage; 461*5ffd83dbSDimitry Andric raw_string_ostream ThreadOS(ThreadLogStorage); 462*5ffd83dbSDimitry Andric handleDie(ThreadOS, CUI, Die); 463*5ffd83dbSDimitry Andric ThreadOS.flush(); 464*5ffd83dbSDimitry Andric if (!ThreadLogStorage.empty()) { 465*5ffd83dbSDimitry Andric // Print ThreadLogStorage lines into an actual stream under a lock 466*5ffd83dbSDimitry Andric std::lock_guard<std::mutex> guard(LogMutex); 467*5ffd83dbSDimitry Andric Log << ThreadLogStorage; 468*5ffd83dbSDimitry Andric } 469*5ffd83dbSDimitry Andric }); 470*5ffd83dbSDimitry Andric } 471*5ffd83dbSDimitry Andric } 472*5ffd83dbSDimitry Andric pool.wait(); 473*5ffd83dbSDimitry Andric } 474*5ffd83dbSDimitry Andric size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; 475*5ffd83dbSDimitry Andric Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; 476*5ffd83dbSDimitry Andric return Error::success(); 477*5ffd83dbSDimitry Andric } 478*5ffd83dbSDimitry Andric 479*5ffd83dbSDimitry Andric llvm::Error DwarfTransformer::verify(StringRef GsymPath) { 480*5ffd83dbSDimitry Andric Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; 481*5ffd83dbSDimitry Andric 482*5ffd83dbSDimitry Andric auto Gsym = GsymReader::openFile(GsymPath); 483*5ffd83dbSDimitry Andric if (!Gsym) 484*5ffd83dbSDimitry Andric return Gsym.takeError(); 485*5ffd83dbSDimitry Andric 486*5ffd83dbSDimitry Andric auto NumAddrs = Gsym->getNumAddresses(); 487*5ffd83dbSDimitry Andric DILineInfoSpecifier DLIS( 488*5ffd83dbSDimitry Andric DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, 489*5ffd83dbSDimitry Andric DILineInfoSpecifier::FunctionNameKind::LinkageName); 490*5ffd83dbSDimitry Andric std::string gsymFilename; 491*5ffd83dbSDimitry Andric for (uint32_t I = 0; I < NumAddrs; ++I) { 492*5ffd83dbSDimitry Andric auto FuncAddr = Gsym->getAddress(I); 493*5ffd83dbSDimitry Andric if (!FuncAddr) 494*5ffd83dbSDimitry Andric return createStringError(std::errc::invalid_argument, 495*5ffd83dbSDimitry Andric "failed to extract address[%i]", I); 496*5ffd83dbSDimitry Andric 497*5ffd83dbSDimitry Andric auto FI = Gsym->getFunctionInfo(*FuncAddr); 498*5ffd83dbSDimitry Andric if (!FI) 499*5ffd83dbSDimitry Andric return createStringError(std::errc::invalid_argument, 500*5ffd83dbSDimitry Andric "failed to extract function info for address 0x%" 501*5ffd83dbSDimitry Andric PRIu64, *FuncAddr); 502*5ffd83dbSDimitry Andric 503*5ffd83dbSDimitry Andric for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { 504*5ffd83dbSDimitry Andric const object::SectionedAddress SectAddr{ 505*5ffd83dbSDimitry Andric Addr, object::SectionedAddress::UndefSection}; 506*5ffd83dbSDimitry Andric auto LR = Gsym->lookup(Addr); 507*5ffd83dbSDimitry Andric if (!LR) 508*5ffd83dbSDimitry Andric return LR.takeError(); 509*5ffd83dbSDimitry Andric 510*5ffd83dbSDimitry Andric auto DwarfInlineInfos = 511*5ffd83dbSDimitry Andric DICtx.getInliningInfoForAddress(SectAddr, DLIS); 512*5ffd83dbSDimitry Andric uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); 513*5ffd83dbSDimitry Andric if (NumDwarfInlineInfos == 0) { 514*5ffd83dbSDimitry Andric DwarfInlineInfos.addFrame( 515*5ffd83dbSDimitry Andric DICtx.getLineInfoForAddress(SectAddr, DLIS)); 516*5ffd83dbSDimitry Andric } 517*5ffd83dbSDimitry Andric 518*5ffd83dbSDimitry Andric // Check for 1 entry that has no file and line info 519*5ffd83dbSDimitry Andric if (NumDwarfInlineInfos == 1 && 520*5ffd83dbSDimitry Andric DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { 521*5ffd83dbSDimitry Andric DwarfInlineInfos = DIInliningInfo(); 522*5ffd83dbSDimitry Andric NumDwarfInlineInfos = 0; 523*5ffd83dbSDimitry Andric } 524*5ffd83dbSDimitry Andric if (NumDwarfInlineInfos > 0 && 525*5ffd83dbSDimitry Andric NumDwarfInlineInfos != LR->Locations.size()) { 526*5ffd83dbSDimitry Andric Log << "error: address " << HEX64(Addr) << " has " 527*5ffd83dbSDimitry Andric << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " 528*5ffd83dbSDimitry Andric << LR->Locations.size() << "\n"; 529*5ffd83dbSDimitry Andric Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; 530*5ffd83dbSDimitry Andric for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { 531*5ffd83dbSDimitry Andric const auto dii = DwarfInlineInfos.getFrame(Idx); 532*5ffd83dbSDimitry Andric Log << " [" << Idx << "]: " << dii.FunctionName << " @ " 533*5ffd83dbSDimitry Andric << dii.FileName << ':' << dii.Line << '\n'; 534*5ffd83dbSDimitry Andric } 535*5ffd83dbSDimitry Andric Log << " " << LR->Locations.size() << " GSYM frames:\n"; 536*5ffd83dbSDimitry Andric for (size_t Idx = 0, count = LR->Locations.size(); 537*5ffd83dbSDimitry Andric Idx < count; ++Idx) { 538*5ffd83dbSDimitry Andric const auto &gii = LR->Locations[Idx]; 539*5ffd83dbSDimitry Andric Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir 540*5ffd83dbSDimitry Andric << '/' << gii.Base << ':' << gii.Line << '\n'; 541*5ffd83dbSDimitry Andric } 542*5ffd83dbSDimitry Andric DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); 543*5ffd83dbSDimitry Andric Gsym->dump(Log, *FI); 544*5ffd83dbSDimitry Andric continue; 545*5ffd83dbSDimitry Andric } 546*5ffd83dbSDimitry Andric 547*5ffd83dbSDimitry Andric for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; 548*5ffd83dbSDimitry Andric ++Idx) { 549*5ffd83dbSDimitry Andric const auto &gii = LR->Locations[Idx]; 550*5ffd83dbSDimitry Andric if (Idx < NumDwarfInlineInfos) { 551*5ffd83dbSDimitry Andric const auto dii = DwarfInlineInfos.getFrame(Idx); 552*5ffd83dbSDimitry Andric gsymFilename = LR->getSourceFile(Idx); 553*5ffd83dbSDimitry Andric // Verify function name 554*5ffd83dbSDimitry Andric if (dii.FunctionName.find(gii.Name.str()) != 0) 555*5ffd83dbSDimitry Andric Log << "error: address " << HEX64(Addr) << " DWARF function \"" 556*5ffd83dbSDimitry Andric << dii.FunctionName.c_str() 557*5ffd83dbSDimitry Andric << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; 558*5ffd83dbSDimitry Andric // Verify source file path 559*5ffd83dbSDimitry Andric if (dii.FileName != gsymFilename) 560*5ffd83dbSDimitry Andric Log << "error: address " << HEX64(Addr) << " DWARF path \"" 561*5ffd83dbSDimitry Andric << dii.FileName.c_str() << "\" doesn't match GSYM path \"" 562*5ffd83dbSDimitry Andric << gsymFilename.c_str() << "\"\n"; 563*5ffd83dbSDimitry Andric // Verify source file line 564*5ffd83dbSDimitry Andric if (dii.Line != gii.Line) 565*5ffd83dbSDimitry Andric Log << "error: address " << HEX64(Addr) << " DWARF line " 566*5ffd83dbSDimitry Andric << dii.Line << " != GSYM line " << gii.Line << "\n"; 567*5ffd83dbSDimitry Andric } 568*5ffd83dbSDimitry Andric } 569*5ffd83dbSDimitry Andric } 570*5ffd83dbSDimitry Andric } 571*5ffd83dbSDimitry Andric return Error::success(); 572*5ffd83dbSDimitry Andric } 573