1*8bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===// 2*8bcb0991SDimitry Andric // 3*8bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*8bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*8bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*8bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 7*8bcb0991SDimitry Andric 8*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h" 10*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h" 11*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h" 12*8bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h" 13*8bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h" 14*8bcb0991SDimitry Andric 15*8bcb0991SDimitry Andric #include <algorithm> 16*8bcb0991SDimitry Andric #include <cassert> 17*8bcb0991SDimitry Andric #include <functional> 18*8bcb0991SDimitry Andric #include <vector> 19*8bcb0991SDimitry Andric 20*8bcb0991SDimitry Andric using namespace llvm; 21*8bcb0991SDimitry Andric using namespace gsym; 22*8bcb0991SDimitry Andric 23*8bcb0991SDimitry Andric 24*8bcb0991SDimitry Andric GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { 25*8bcb0991SDimitry Andric insertFile(StringRef()); 26*8bcb0991SDimitry Andric } 27*8bcb0991SDimitry Andric 28*8bcb0991SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, 29*8bcb0991SDimitry Andric llvm::sys::path::Style Style) { 30*8bcb0991SDimitry Andric llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 31*8bcb0991SDimitry Andric llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 32*8bcb0991SDimitry Andric FileEntry FE(insertString(directory), insertString(filename)); 33*8bcb0991SDimitry Andric 34*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 35*8bcb0991SDimitry Andric const auto NextIndex = Files.size(); 36*8bcb0991SDimitry Andric // Find FE in hash map and insert if not present. 37*8bcb0991SDimitry Andric auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 38*8bcb0991SDimitry Andric if (R.second) 39*8bcb0991SDimitry Andric Files.emplace_back(FE); 40*8bcb0991SDimitry Andric return R.first->second; 41*8bcb0991SDimitry Andric } 42*8bcb0991SDimitry Andric 43*8bcb0991SDimitry Andric llvm::Error GsymCreator::save(StringRef Path, 44*8bcb0991SDimitry Andric llvm::support::endianness ByteOrder) const { 45*8bcb0991SDimitry Andric std::error_code EC; 46*8bcb0991SDimitry Andric raw_fd_ostream OutStrm(Path, EC); 47*8bcb0991SDimitry Andric if (EC) 48*8bcb0991SDimitry Andric return llvm::errorCodeToError(EC); 49*8bcb0991SDimitry Andric FileWriter O(OutStrm, ByteOrder); 50*8bcb0991SDimitry Andric return encode(O); 51*8bcb0991SDimitry Andric } 52*8bcb0991SDimitry Andric 53*8bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const { 54*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 55*8bcb0991SDimitry Andric if (Funcs.empty()) 56*8bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 57*8bcb0991SDimitry Andric "no functions to encode"); 58*8bcb0991SDimitry Andric if (!Finalized) 59*8bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 60*8bcb0991SDimitry Andric "GsymCreator wasn't finalized prior to encoding"); 61*8bcb0991SDimitry Andric 62*8bcb0991SDimitry Andric if (Funcs.size() > UINT32_MAX) 63*8bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 64*8bcb0991SDimitry Andric "too many FunctionInfos"); 65*8bcb0991SDimitry Andric const uint64_t MinAddr = Funcs.front().startAddress(); 66*8bcb0991SDimitry Andric const uint64_t MaxAddr = Funcs.back().startAddress(); 67*8bcb0991SDimitry Andric const uint64_t AddrDelta = MaxAddr - MinAddr; 68*8bcb0991SDimitry Andric Header Hdr; 69*8bcb0991SDimitry Andric Hdr.Magic = GSYM_MAGIC; 70*8bcb0991SDimitry Andric Hdr.Version = GSYM_VERSION; 71*8bcb0991SDimitry Andric Hdr.AddrOffSize = 0; 72*8bcb0991SDimitry Andric Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 73*8bcb0991SDimitry Andric Hdr.BaseAddress = MinAddr; 74*8bcb0991SDimitry Andric Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 75*8bcb0991SDimitry Andric Hdr.StrtabOffset = 0; // We will fix this up later. 76*8bcb0991SDimitry Andric Hdr.StrtabOffset = 0; // We will fix this up later. 77*8bcb0991SDimitry Andric memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 78*8bcb0991SDimitry Andric if (UUID.size() > sizeof(Hdr.UUID)) 79*8bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 80*8bcb0991SDimitry Andric "invalid UUID size %u", (uint32_t)UUID.size()); 81*8bcb0991SDimitry Andric // Set the address offset size correctly in the GSYM header. 82*8bcb0991SDimitry Andric if (AddrDelta <= UINT8_MAX) 83*8bcb0991SDimitry Andric Hdr.AddrOffSize = 1; 84*8bcb0991SDimitry Andric else if (AddrDelta <= UINT16_MAX) 85*8bcb0991SDimitry Andric Hdr.AddrOffSize = 2; 86*8bcb0991SDimitry Andric else if (AddrDelta <= UINT32_MAX) 87*8bcb0991SDimitry Andric Hdr.AddrOffSize = 4; 88*8bcb0991SDimitry Andric else 89*8bcb0991SDimitry Andric Hdr.AddrOffSize = 8; 90*8bcb0991SDimitry Andric // Copy the UUID value if we have one. 91*8bcb0991SDimitry Andric if (UUID.size() > 0) 92*8bcb0991SDimitry Andric memcpy(Hdr.UUID, UUID.data(), UUID.size()); 93*8bcb0991SDimitry Andric // Write out the header. 94*8bcb0991SDimitry Andric llvm::Error Err = Hdr.encode(O); 95*8bcb0991SDimitry Andric if (Err) 96*8bcb0991SDimitry Andric return Err; 97*8bcb0991SDimitry Andric 98*8bcb0991SDimitry Andric // Write out the address offsets. 99*8bcb0991SDimitry Andric O.alignTo(Hdr.AddrOffSize); 100*8bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 101*8bcb0991SDimitry Andric uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 102*8bcb0991SDimitry Andric switch(Hdr.AddrOffSize) { 103*8bcb0991SDimitry Andric case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break; 104*8bcb0991SDimitry Andric case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break; 105*8bcb0991SDimitry Andric case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break; 106*8bcb0991SDimitry Andric case 8: O.writeU64(AddrOffset); break; 107*8bcb0991SDimitry Andric } 108*8bcb0991SDimitry Andric } 109*8bcb0991SDimitry Andric 110*8bcb0991SDimitry Andric // Write out all zeros for the AddrInfoOffsets. 111*8bcb0991SDimitry Andric O.alignTo(4); 112*8bcb0991SDimitry Andric const off_t AddrInfoOffsetsOffset = O.tell(); 113*8bcb0991SDimitry Andric for (size_t i = 0, n = Funcs.size(); i < n; ++i) 114*8bcb0991SDimitry Andric O.writeU32(0); 115*8bcb0991SDimitry Andric 116*8bcb0991SDimitry Andric // Write out the file table 117*8bcb0991SDimitry Andric O.alignTo(4); 118*8bcb0991SDimitry Andric assert(!Files.empty()); 119*8bcb0991SDimitry Andric assert(Files[0].Dir == 0); 120*8bcb0991SDimitry Andric assert(Files[0].Base == 0); 121*8bcb0991SDimitry Andric size_t NumFiles = Files.size(); 122*8bcb0991SDimitry Andric if (NumFiles > UINT32_MAX) 123*8bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 124*8bcb0991SDimitry Andric "too many files"); 125*8bcb0991SDimitry Andric O.writeU32(static_cast<uint32_t>(NumFiles)); 126*8bcb0991SDimitry Andric for (auto File: Files) { 127*8bcb0991SDimitry Andric O.writeU32(File.Dir); 128*8bcb0991SDimitry Andric O.writeU32(File.Base); 129*8bcb0991SDimitry Andric } 130*8bcb0991SDimitry Andric 131*8bcb0991SDimitry Andric // Write out the sting table. 132*8bcb0991SDimitry Andric const off_t StrtabOffset = O.tell(); 133*8bcb0991SDimitry Andric StrTab.write(O.get_stream()); 134*8bcb0991SDimitry Andric const off_t StrtabSize = O.tell() - StrtabOffset; 135*8bcb0991SDimitry Andric std::vector<uint32_t> AddrInfoOffsets; 136*8bcb0991SDimitry Andric 137*8bcb0991SDimitry Andric // Write out the address infos for each function info. 138*8bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 139*8bcb0991SDimitry Andric if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 140*8bcb0991SDimitry Andric AddrInfoOffsets.push_back(OffsetOrErr.get()); 141*8bcb0991SDimitry Andric else 142*8bcb0991SDimitry Andric return OffsetOrErr.takeError(); 143*8bcb0991SDimitry Andric } 144*8bcb0991SDimitry Andric // Fixup the string table offset and size in the header 145*8bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 146*8bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 147*8bcb0991SDimitry Andric 148*8bcb0991SDimitry Andric // Fixup all address info offsets 149*8bcb0991SDimitry Andric uint64_t Offset = 0; 150*8bcb0991SDimitry Andric for (auto AddrInfoOffset: AddrInfoOffsets) { 151*8bcb0991SDimitry Andric O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 152*8bcb0991SDimitry Andric Offset += 4; 153*8bcb0991SDimitry Andric } 154*8bcb0991SDimitry Andric return ErrorSuccess(); 155*8bcb0991SDimitry Andric } 156*8bcb0991SDimitry Andric 157*8bcb0991SDimitry Andric llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 158*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 159*8bcb0991SDimitry Andric if (Finalized) 160*8bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 161*8bcb0991SDimitry Andric "already finalized"); 162*8bcb0991SDimitry Andric Finalized = true; 163*8bcb0991SDimitry Andric 164*8bcb0991SDimitry Andric // Sort function infos so we can emit sorted functions. 165*8bcb0991SDimitry Andric llvm::sort(Funcs.begin(), Funcs.end()); 166*8bcb0991SDimitry Andric 167*8bcb0991SDimitry Andric // Don't let the string table indexes change by finalizing in order. 168*8bcb0991SDimitry Andric StrTab.finalizeInOrder(); 169*8bcb0991SDimitry Andric 170*8bcb0991SDimitry Andric // Remove duplicates function infos that have both entries from debug info 171*8bcb0991SDimitry Andric // (DWARF or Breakpad) and entries from the SymbolTable. 172*8bcb0991SDimitry Andric // 173*8bcb0991SDimitry Andric // Also handle overlapping function. Usually there shouldn't be any, but they 174*8bcb0991SDimitry Andric // can and do happen in some rare cases. 175*8bcb0991SDimitry Andric // 176*8bcb0991SDimitry Andric // (a) (b) (c) 177*8bcb0991SDimitry Andric // ^ ^ ^ ^ 178*8bcb0991SDimitry Andric // |X |Y |X ^ |X 179*8bcb0991SDimitry Andric // | | | |Y | ^ 180*8bcb0991SDimitry Andric // | | | v v |Y 181*8bcb0991SDimitry Andric // v v v v 182*8bcb0991SDimitry Andric // 183*8bcb0991SDimitry Andric // In (a) and (b), Y is ignored and X will be reported for the full range. 184*8bcb0991SDimitry Andric // In (c), both functions will be included in the result and lookups for an 185*8bcb0991SDimitry Andric // address in the intersection will return Y because of binary search. 186*8bcb0991SDimitry Andric // 187*8bcb0991SDimitry Andric // Note that in case of (b), we cannot include Y in the result because then 188*8bcb0991SDimitry Andric // we wouldn't find any function for range (end of Y, end of X) 189*8bcb0991SDimitry Andric // with binary search 190*8bcb0991SDimitry Andric auto NumBefore = Funcs.size(); 191*8bcb0991SDimitry Andric auto Curr = Funcs.begin(); 192*8bcb0991SDimitry Andric auto Prev = Funcs.end(); 193*8bcb0991SDimitry Andric while (Curr != Funcs.end()) { 194*8bcb0991SDimitry Andric // Can't check for overlaps or same address ranges if we don't have a 195*8bcb0991SDimitry Andric // previous entry 196*8bcb0991SDimitry Andric if (Prev != Funcs.end()) { 197*8bcb0991SDimitry Andric if (Prev->Range.intersects(Curr->Range)) { 198*8bcb0991SDimitry Andric // Overlapping address ranges. 199*8bcb0991SDimitry Andric if (Prev->Range == Curr->Range) { 200*8bcb0991SDimitry Andric // Same address range. Check if one is from debug info and the other 201*8bcb0991SDimitry Andric // is from a symbol table. If so, then keep the one with debug info. 202*8bcb0991SDimitry Andric // Our sorting guarantees that entries with matching address ranges 203*8bcb0991SDimitry Andric // that have debug info are last in the sort. 204*8bcb0991SDimitry Andric if (*Prev == *Curr) { 205*8bcb0991SDimitry Andric // FunctionInfo entries match exactly (range, lines, inlines) 206*8bcb0991SDimitry Andric OS << "warning: duplicate function info entries, removing " 207*8bcb0991SDimitry Andric "duplicate:\n" 208*8bcb0991SDimitry Andric << *Curr << '\n'; 209*8bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 210*8bcb0991SDimitry Andric } else { 211*8bcb0991SDimitry Andric if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { 212*8bcb0991SDimitry Andric // Same address range, one with no debug info (symbol) and the 213*8bcb0991SDimitry Andric // next with debug info. Keep the latter. 214*8bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 215*8bcb0991SDimitry Andric } else { 216*8bcb0991SDimitry Andric OS << "warning: same address range contains different debug " 217*8bcb0991SDimitry Andric << "info. Removing:\n" 218*8bcb0991SDimitry Andric << *Prev << "\nIn favor of this one:\n" 219*8bcb0991SDimitry Andric << *Curr << "\n"; 220*8bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 221*8bcb0991SDimitry Andric } 222*8bcb0991SDimitry Andric } 223*8bcb0991SDimitry Andric } else { 224*8bcb0991SDimitry Andric // print warnings about overlaps 225*8bcb0991SDimitry Andric OS << "warning: function ranges overlap:\n" 226*8bcb0991SDimitry Andric << *Prev << "\n" 227*8bcb0991SDimitry Andric << *Curr << "\n"; 228*8bcb0991SDimitry Andric } 229*8bcb0991SDimitry Andric } else if (Prev->Range.size() == 0 && 230*8bcb0991SDimitry Andric Curr->Range.contains(Prev->Range.Start)) { 231*8bcb0991SDimitry Andric OS << "warning: removing symbol:\n" 232*8bcb0991SDimitry Andric << *Prev << "\nKeeping:\n" 233*8bcb0991SDimitry Andric << *Curr << "\n"; 234*8bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 235*8bcb0991SDimitry Andric } 236*8bcb0991SDimitry Andric } 237*8bcb0991SDimitry Andric if (Curr == Funcs.end()) 238*8bcb0991SDimitry Andric break; 239*8bcb0991SDimitry Andric Prev = Curr++; 240*8bcb0991SDimitry Andric } 241*8bcb0991SDimitry Andric 242*8bcb0991SDimitry Andric OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 243*8bcb0991SDimitry Andric << Funcs.size() << " total\n"; 244*8bcb0991SDimitry Andric return Error::success(); 245*8bcb0991SDimitry Andric } 246*8bcb0991SDimitry Andric 247*8bcb0991SDimitry Andric uint32_t GsymCreator::insertString(StringRef S) { 248*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 249*8bcb0991SDimitry Andric if (S.empty()) 250*8bcb0991SDimitry Andric return 0; 251*8bcb0991SDimitry Andric return StrTab.add(S); 252*8bcb0991SDimitry Andric } 253*8bcb0991SDimitry Andric 254*8bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 255*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 256*8bcb0991SDimitry Andric Funcs.emplace_back(FI); 257*8bcb0991SDimitry Andric } 258*8bcb0991SDimitry Andric 259*8bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 260*8bcb0991SDimitry Andric std::function<bool(FunctionInfo &)> const &Callback) { 261*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 262*8bcb0991SDimitry Andric for (auto &FI : Funcs) { 263*8bcb0991SDimitry Andric if (!Callback(FI)) 264*8bcb0991SDimitry Andric break; 265*8bcb0991SDimitry Andric } 266*8bcb0991SDimitry Andric } 267*8bcb0991SDimitry Andric 268*8bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 269*8bcb0991SDimitry Andric std::function<bool(const FunctionInfo &)> const &Callback) const { 270*8bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 271*8bcb0991SDimitry Andric for (const auto &FI : Funcs) { 272*8bcb0991SDimitry Andric if (!Callback(FI)) 273*8bcb0991SDimitry Andric break; 274*8bcb0991SDimitry Andric } 275*8bcb0991SDimitry Andric } 276