18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 78bcb0991SDimitry Andric 88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h" 98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h" 108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h" 118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h" 128bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h" 138bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h" 148bcb0991SDimitry Andric 158bcb0991SDimitry Andric #include <algorithm> 168bcb0991SDimitry Andric #include <cassert> 178bcb0991SDimitry Andric #include <functional> 188bcb0991SDimitry Andric #include <vector> 198bcb0991SDimitry Andric 208bcb0991SDimitry Andric using namespace llvm; 218bcb0991SDimitry Andric using namespace gsym; 228bcb0991SDimitry Andric 23fe6060f1SDimitry Andric GsymCreator::GsymCreator(bool Quiet) 24fe6060f1SDimitry Andric : StrTab(StringTableBuilder::ELF), Quiet(Quiet) { 258bcb0991SDimitry Andric insertFile(StringRef()); 268bcb0991SDimitry Andric } 278bcb0991SDimitry Andric 28fe6060f1SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { 298bcb0991SDimitry Andric llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 308bcb0991SDimitry Andric llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 315ffd83dbSDimitry Andric // We must insert the strings first, then call the FileEntry constructor. 325ffd83dbSDimitry Andric // If we inline the insertString() function call into the constructor, the 335ffd83dbSDimitry Andric // call order is undefined due to parameter lists not having any ordering 345ffd83dbSDimitry Andric // requirements. 355ffd83dbSDimitry Andric const uint32_t Dir = insertString(directory); 365ffd83dbSDimitry Andric const uint32_t Base = insertString(filename); 37*06c3fb27SDimitry Andric return insertFileEntry(FileEntry(Dir, Base)); 38*06c3fb27SDimitry Andric } 398bcb0991SDimitry Andric 40*06c3fb27SDimitry Andric uint32_t GsymCreator::insertFileEntry(FileEntry FE) { 41fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 428bcb0991SDimitry Andric const auto NextIndex = Files.size(); 438bcb0991SDimitry Andric // Find FE in hash map and insert if not present. 448bcb0991SDimitry Andric auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 458bcb0991SDimitry Andric if (R.second) 468bcb0991SDimitry Andric Files.emplace_back(FE); 478bcb0991SDimitry Andric return R.first->second; 488bcb0991SDimitry Andric } 498bcb0991SDimitry Andric 50*06c3fb27SDimitry Andric uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) { 51*06c3fb27SDimitry Andric // File index zero is reserved for a FileEntry with no directory and no 52*06c3fb27SDimitry Andric // filename. Any other file and we need to copy the strings for the directory 53*06c3fb27SDimitry Andric // and filename. 54*06c3fb27SDimitry Andric if (FileIdx == 0) 55*06c3fb27SDimitry Andric return 0; 56*06c3fb27SDimitry Andric const FileEntry SrcFE = SrcGC.Files[FileIdx]; 57*06c3fb27SDimitry Andric // Copy the strings for the file and then add the newly converted file entry. 58*06c3fb27SDimitry Andric uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second); 59*06c3fb27SDimitry Andric uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second); 60*06c3fb27SDimitry Andric FileEntry DstFE(Dir, Base); 61*06c3fb27SDimitry Andric return insertFileEntry(DstFE); 62*06c3fb27SDimitry Andric } 63*06c3fb27SDimitry Andric 64*06c3fb27SDimitry Andric 658bcb0991SDimitry Andric llvm::Error GsymCreator::save(StringRef Path, 66*06c3fb27SDimitry Andric llvm::support::endianness ByteOrder, 67*06c3fb27SDimitry Andric std::optional<uint64_t> SegmentSize) const { 68*06c3fb27SDimitry Andric if (SegmentSize) 69*06c3fb27SDimitry Andric return saveSegments(Path, ByteOrder, *SegmentSize); 708bcb0991SDimitry Andric std::error_code EC; 718bcb0991SDimitry Andric raw_fd_ostream OutStrm(Path, EC); 728bcb0991SDimitry Andric if (EC) 738bcb0991SDimitry Andric return llvm::errorCodeToError(EC); 748bcb0991SDimitry Andric FileWriter O(OutStrm, ByteOrder); 758bcb0991SDimitry Andric return encode(O); 768bcb0991SDimitry Andric } 778bcb0991SDimitry Andric 788bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const { 79fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 808bcb0991SDimitry Andric if (Funcs.empty()) 818bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 828bcb0991SDimitry Andric "no functions to encode"); 838bcb0991SDimitry Andric if (!Finalized) 848bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 858bcb0991SDimitry Andric "GsymCreator wasn't finalized prior to encoding"); 868bcb0991SDimitry Andric 878bcb0991SDimitry Andric if (Funcs.size() > UINT32_MAX) 888bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 898bcb0991SDimitry Andric "too many FunctionInfos"); 905ffd83dbSDimitry Andric 91*06c3fb27SDimitry Andric std::optional<uint64_t> BaseAddress = getBaseAddress(); 92*06c3fb27SDimitry Andric // Base address should be valid if we have any functions. 93*06c3fb27SDimitry Andric if (!BaseAddress) 94*06c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument, 95*06c3fb27SDimitry Andric "invalid base address"); 968bcb0991SDimitry Andric Header Hdr; 978bcb0991SDimitry Andric Hdr.Magic = GSYM_MAGIC; 988bcb0991SDimitry Andric Hdr.Version = GSYM_VERSION; 99*06c3fb27SDimitry Andric Hdr.AddrOffSize = getAddressOffsetSize(); 1008bcb0991SDimitry Andric Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 101*06c3fb27SDimitry Andric Hdr.BaseAddress = *BaseAddress; 1028bcb0991SDimitry Andric Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 1038bcb0991SDimitry Andric Hdr.StrtabOffset = 0; // We will fix this up later. 1045ffd83dbSDimitry Andric Hdr.StrtabSize = 0; // We will fix this up later. 1058bcb0991SDimitry Andric memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 1068bcb0991SDimitry Andric if (UUID.size() > sizeof(Hdr.UUID)) 1078bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 1088bcb0991SDimitry Andric "invalid UUID size %u", (uint32_t)UUID.size()); 1098bcb0991SDimitry Andric // Copy the UUID value if we have one. 1108bcb0991SDimitry Andric if (UUID.size() > 0) 1118bcb0991SDimitry Andric memcpy(Hdr.UUID, UUID.data(), UUID.size()); 1128bcb0991SDimitry Andric // Write out the header. 1138bcb0991SDimitry Andric llvm::Error Err = Hdr.encode(O); 1148bcb0991SDimitry Andric if (Err) 1158bcb0991SDimitry Andric return Err; 1168bcb0991SDimitry Andric 117*06c3fb27SDimitry Andric const uint64_t MaxAddressOffset = getMaxAddressOffset(); 1188bcb0991SDimitry Andric // Write out the address offsets. 1198bcb0991SDimitry Andric O.alignTo(Hdr.AddrOffSize); 1208bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 1218bcb0991SDimitry Andric uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 122*06c3fb27SDimitry Andric // Make sure we calculated the address offsets byte size correctly by 123*06c3fb27SDimitry Andric // verifying the current address offset is within ranges. We have seen bugs 124*06c3fb27SDimitry Andric // introduced when the code changes that can cause problems here so it is 125*06c3fb27SDimitry Andric // good to catch this during testing. 126*06c3fb27SDimitry Andric assert(AddrOffset <= MaxAddressOffset); 127*06c3fb27SDimitry Andric (void)MaxAddressOffset; 1288bcb0991SDimitry Andric switch (Hdr.AddrOffSize) { 129fe6060f1SDimitry Andric case 1: 130fe6060f1SDimitry Andric O.writeU8(static_cast<uint8_t>(AddrOffset)); 131fe6060f1SDimitry Andric break; 132fe6060f1SDimitry Andric case 2: 133fe6060f1SDimitry Andric O.writeU16(static_cast<uint16_t>(AddrOffset)); 134fe6060f1SDimitry Andric break; 135fe6060f1SDimitry Andric case 4: 136fe6060f1SDimitry Andric O.writeU32(static_cast<uint32_t>(AddrOffset)); 137fe6060f1SDimitry Andric break; 138fe6060f1SDimitry Andric case 8: 139fe6060f1SDimitry Andric O.writeU64(AddrOffset); 140fe6060f1SDimitry Andric break; 1418bcb0991SDimitry Andric } 1428bcb0991SDimitry Andric } 1438bcb0991SDimitry Andric 1448bcb0991SDimitry Andric // Write out all zeros for the AddrInfoOffsets. 1458bcb0991SDimitry Andric O.alignTo(4); 1468bcb0991SDimitry Andric const off_t AddrInfoOffsetsOffset = O.tell(); 1478bcb0991SDimitry Andric for (size_t i = 0, n = Funcs.size(); i < n; ++i) 1488bcb0991SDimitry Andric O.writeU32(0); 1498bcb0991SDimitry Andric 1508bcb0991SDimitry Andric // Write out the file table 1518bcb0991SDimitry Andric O.alignTo(4); 1528bcb0991SDimitry Andric assert(!Files.empty()); 1538bcb0991SDimitry Andric assert(Files[0].Dir == 0); 1548bcb0991SDimitry Andric assert(Files[0].Base == 0); 1558bcb0991SDimitry Andric size_t NumFiles = Files.size(); 1568bcb0991SDimitry Andric if (NumFiles > UINT32_MAX) 157fe6060f1SDimitry Andric return createStringError(std::errc::invalid_argument, "too many files"); 1588bcb0991SDimitry Andric O.writeU32(static_cast<uint32_t>(NumFiles)); 1598bcb0991SDimitry Andric for (auto File : Files) { 1608bcb0991SDimitry Andric O.writeU32(File.Dir); 1618bcb0991SDimitry Andric O.writeU32(File.Base); 1628bcb0991SDimitry Andric } 1638bcb0991SDimitry Andric 164*06c3fb27SDimitry Andric // Write out the string table. 1658bcb0991SDimitry Andric const off_t StrtabOffset = O.tell(); 1668bcb0991SDimitry Andric StrTab.write(O.get_stream()); 1678bcb0991SDimitry Andric const off_t StrtabSize = O.tell() - StrtabOffset; 1688bcb0991SDimitry Andric std::vector<uint32_t> AddrInfoOffsets; 1698bcb0991SDimitry Andric 1708bcb0991SDimitry Andric // Write out the address infos for each function info. 1718bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 1728bcb0991SDimitry Andric if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 1738bcb0991SDimitry Andric AddrInfoOffsets.push_back(OffsetOrErr.get()); 1748bcb0991SDimitry Andric else 1758bcb0991SDimitry Andric return OffsetOrErr.takeError(); 1768bcb0991SDimitry Andric } 1778bcb0991SDimitry Andric // Fixup the string table offset and size in the header 1788bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 1798bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 1808bcb0991SDimitry Andric 1818bcb0991SDimitry Andric // Fixup all address info offsets 1828bcb0991SDimitry Andric uint64_t Offset = 0; 1838bcb0991SDimitry Andric for (auto AddrInfoOffset : AddrInfoOffsets) { 1848bcb0991SDimitry Andric O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 1858bcb0991SDimitry Andric Offset += 4; 1868bcb0991SDimitry Andric } 1878bcb0991SDimitry Andric return ErrorSuccess(); 1888bcb0991SDimitry Andric } 1898bcb0991SDimitry Andric 190fe6060f1SDimitry Andric // Similar to std::remove_if, but the predicate is binary and it is passed both 191fe6060f1SDimitry Andric // the previous and the current element. 192fe6060f1SDimitry Andric template <class ForwardIt, class BinaryPredicate> 193fe6060f1SDimitry Andric static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt, 194fe6060f1SDimitry Andric BinaryPredicate Pred) { 195fe6060f1SDimitry Andric if (FirstIt != LastIt) { 196fe6060f1SDimitry Andric auto PrevIt = FirstIt++; 197fe6060f1SDimitry Andric FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) { 198fe6060f1SDimitry Andric return Pred(*PrevIt++, Curr); 199fe6060f1SDimitry Andric }); 200fe6060f1SDimitry Andric if (FirstIt != LastIt) 201fe6060f1SDimitry Andric for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;) 202fe6060f1SDimitry Andric if (!Pred(*PrevIt, *CurrIt)) { 203fe6060f1SDimitry Andric PrevIt = FirstIt; 204fe6060f1SDimitry Andric *FirstIt++ = std::move(*CurrIt); 205fe6060f1SDimitry Andric } 206fe6060f1SDimitry Andric } 207fe6060f1SDimitry Andric return FirstIt; 208fe6060f1SDimitry Andric } 209fe6060f1SDimitry Andric 2108bcb0991SDimitry Andric llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 211fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 2128bcb0991SDimitry Andric if (Finalized) 213fe6060f1SDimitry Andric return createStringError(std::errc::invalid_argument, "already finalized"); 2148bcb0991SDimitry Andric Finalized = true; 2158bcb0991SDimitry Andric 2168bcb0991SDimitry Andric // Sort function infos so we can emit sorted functions. 217e8d8bef9SDimitry Andric llvm::sort(Funcs); 2188bcb0991SDimitry Andric 2198bcb0991SDimitry Andric // Don't let the string table indexes change by finalizing in order. 2208bcb0991SDimitry Andric StrTab.finalizeInOrder(); 2218bcb0991SDimitry Andric 2228bcb0991SDimitry Andric // Remove duplicates function infos that have both entries from debug info 2238bcb0991SDimitry Andric // (DWARF or Breakpad) and entries from the SymbolTable. 2248bcb0991SDimitry Andric // 2258bcb0991SDimitry Andric // Also handle overlapping function. Usually there shouldn't be any, but they 2268bcb0991SDimitry Andric // can and do happen in some rare cases. 2278bcb0991SDimitry Andric // 2288bcb0991SDimitry Andric // (a) (b) (c) 2298bcb0991SDimitry Andric // ^ ^ ^ ^ 2308bcb0991SDimitry Andric // |X |Y |X ^ |X 2318bcb0991SDimitry Andric // | | | |Y | ^ 2328bcb0991SDimitry Andric // | | | v v |Y 2338bcb0991SDimitry Andric // v v v v 2348bcb0991SDimitry Andric // 2358bcb0991SDimitry Andric // In (a) and (b), Y is ignored and X will be reported for the full range. 2368bcb0991SDimitry Andric // In (c), both functions will be included in the result and lookups for an 2378bcb0991SDimitry Andric // address in the intersection will return Y because of binary search. 2388bcb0991SDimitry Andric // 2398bcb0991SDimitry Andric // Note that in case of (b), we cannot include Y in the result because then 2408bcb0991SDimitry Andric // we wouldn't find any function for range (end of Y, end of X) 2418bcb0991SDimitry Andric // with binary search 2428bcb0991SDimitry Andric auto NumBefore = Funcs.size(); 243fe6060f1SDimitry Andric Funcs.erase( 244fe6060f1SDimitry Andric removeIfBinary(Funcs.begin(), Funcs.end(), 245fe6060f1SDimitry Andric [&](const auto &Prev, const auto &Curr) { 246fe6060f1SDimitry Andric // Empty ranges won't intersect, but we still need to 247fe6060f1SDimitry Andric // catch the case where we have multiple symbols at the 248fe6060f1SDimitry Andric // same address and coalesce them. 249fe6060f1SDimitry Andric const bool ranges_equal = Prev.Range == Curr.Range; 250fe6060f1SDimitry Andric if (ranges_equal || Prev.Range.intersects(Curr.Range)) { 251fe6060f1SDimitry Andric // Overlapping ranges or empty identical ranges. 252fe6060f1SDimitry Andric if (ranges_equal) { 253fe6060f1SDimitry Andric // Same address range. Check if one is from debug 254fe6060f1SDimitry Andric // info and the other is from a symbol table. If 255fe6060f1SDimitry Andric // so, then keep the one with debug info. Our 256fe6060f1SDimitry Andric // sorting guarantees that entries with matching 257fe6060f1SDimitry Andric // address ranges that have debug info are last in 258fe6060f1SDimitry Andric // the sort. 259fe6060f1SDimitry Andric if (Prev == Curr) { 260fe6060f1SDimitry Andric // FunctionInfo entries match exactly (range, 261fe6060f1SDimitry Andric // lines, inlines) 262fe6060f1SDimitry Andric 263fe6060f1SDimitry Andric // We used to output a warning here, but this was 264fe6060f1SDimitry Andric // so frequent on some binaries, in particular 265fe6060f1SDimitry Andric // when those were built with GCC, that it slowed 266fe6060f1SDimitry Andric // down processing extremely. 267fe6060f1SDimitry Andric return true; 2688bcb0991SDimitry Andric } else { 269fe6060f1SDimitry Andric if (!Prev.hasRichInfo() && Curr.hasRichInfo()) { 270fe6060f1SDimitry Andric // Same address range, one with no debug info 271fe6060f1SDimitry Andric // (symbol) and the next with debug info. Keep 272fe6060f1SDimitry Andric // the latter. 273fe6060f1SDimitry Andric return true; 2748bcb0991SDimitry Andric } else { 275fe6060f1SDimitry Andric if (!Quiet) { 276fe6060f1SDimitry Andric OS << "warning: same address range contains " 277fe6060f1SDimitry Andric "different debug " 2788bcb0991SDimitry Andric << "info. Removing:\n" 279fe6060f1SDimitry Andric << Prev << "\nIn favor of this one:\n" 280fe6060f1SDimitry Andric << Curr << "\n"; 281fe6060f1SDimitry Andric } 282fe6060f1SDimitry Andric return true; 2838bcb0991SDimitry Andric } 2848bcb0991SDimitry Andric } 2858bcb0991SDimitry Andric } else { 286fe6060f1SDimitry Andric if (!Quiet) { // print warnings about overlaps 2878bcb0991SDimitry Andric OS << "warning: function ranges overlap:\n" 288fe6060f1SDimitry Andric << Prev << "\n" 289fe6060f1SDimitry Andric << Curr << "\n"; 2908bcb0991SDimitry Andric } 291fe6060f1SDimitry Andric } 292fe6060f1SDimitry Andric } else if (Prev.Range.size() == 0 && 29381ad6265SDimitry Andric Curr.Range.contains(Prev.Range.start())) { 294fe6060f1SDimitry Andric if (!Quiet) { 2958bcb0991SDimitry Andric OS << "warning: removing symbol:\n" 296fe6060f1SDimitry Andric << Prev << "\nKeeping:\n" 297fe6060f1SDimitry Andric << Curr << "\n"; 2988bcb0991SDimitry Andric } 299fe6060f1SDimitry Andric return true; 3008bcb0991SDimitry Andric } 301fe6060f1SDimitry Andric 302fe6060f1SDimitry Andric return false; 303fe6060f1SDimitry Andric }), 304fe6060f1SDimitry Andric Funcs.end()); 3058bcb0991SDimitry Andric 3065ffd83dbSDimitry Andric // If our last function info entry doesn't have a size and if we have valid 3075ffd83dbSDimitry Andric // text ranges, we should set the size of the last entry since any search for 3085ffd83dbSDimitry Andric // a high address might match our last entry. By fixing up this size, we can 3095ffd83dbSDimitry Andric // help ensure we don't cause lookups to always return the last symbol that 3105ffd83dbSDimitry Andric // has no size when doing lookups. 3115ffd83dbSDimitry Andric if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 312fe6060f1SDimitry Andric if (auto Range = 31381ad6265SDimitry Andric ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) { 31481ad6265SDimitry Andric Funcs.back().Range = {Funcs.back().Range.start(), Range->end()}; 3155ffd83dbSDimitry Andric } 3165ffd83dbSDimitry Andric } 3178bcb0991SDimitry Andric OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 3188bcb0991SDimitry Andric << Funcs.size() << " total\n"; 3198bcb0991SDimitry Andric return Error::success(); 3208bcb0991SDimitry Andric } 3218bcb0991SDimitry Andric 322*06c3fb27SDimitry Andric uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) { 323*06c3fb27SDimitry Andric // String offset at zero is always the empty string, no copying needed. 324*06c3fb27SDimitry Andric if (StrOff == 0) 325*06c3fb27SDimitry Andric return 0; 326*06c3fb27SDimitry Andric return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second); 327*06c3fb27SDimitry Andric } 328*06c3fb27SDimitry Andric 3295ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 3308bcb0991SDimitry Andric if (S.empty()) 3318bcb0991SDimitry Andric return 0; 332fe6060f1SDimitry Andric 333fe6060f1SDimitry Andric // The hash can be calculated outside the lock. 334fe6060f1SDimitry Andric CachedHashStringRef CHStr(S); 335fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3365ffd83dbSDimitry Andric if (Copy) { 3375ffd83dbSDimitry Andric // We need to provide backing storage for the string if requested 3385ffd83dbSDimitry Andric // since StringTableBuilder stores references to strings. Any string 3395ffd83dbSDimitry Andric // that comes from a section in an object file doesn't need to be 3405ffd83dbSDimitry Andric // copied, but any string created by code will need to be copied. 3415ffd83dbSDimitry Andric // This allows GsymCreator to be really fast when parsing DWARF and 3425ffd83dbSDimitry Andric // other object files as most strings don't need to be copied. 3435ffd83dbSDimitry Andric if (!StrTab.contains(CHStr)) 344fe6060f1SDimitry Andric CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), 345fe6060f1SDimitry Andric CHStr.hash()}; 3465ffd83dbSDimitry Andric } 347*06c3fb27SDimitry Andric const uint32_t StrOff = StrTab.add(CHStr); 348*06c3fb27SDimitry Andric // Save a mapping of string offsets to the cached string reference in case 349*06c3fb27SDimitry Andric // we need to segment the GSYM file and copy string from one string table to 350*06c3fb27SDimitry Andric // another. 351*06c3fb27SDimitry Andric if (StringOffsetMap.count(StrOff) == 0) 352*06c3fb27SDimitry Andric StringOffsetMap.insert(std::make_pair(StrOff, CHStr)); 353*06c3fb27SDimitry Andric return StrOff; 3548bcb0991SDimitry Andric } 3558bcb0991SDimitry Andric 3568bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 357fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3585ffd83dbSDimitry Andric Ranges.insert(FI.Range); 359fe6060f1SDimitry Andric Funcs.emplace_back(std::move(FI)); 3608bcb0991SDimitry Andric } 3618bcb0991SDimitry Andric 3628bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 3638bcb0991SDimitry Andric std::function<bool(FunctionInfo &)> const &Callback) { 364fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3658bcb0991SDimitry Andric for (auto &FI : Funcs) { 3668bcb0991SDimitry Andric if (!Callback(FI)) 3678bcb0991SDimitry Andric break; 3688bcb0991SDimitry Andric } 3698bcb0991SDimitry Andric } 3708bcb0991SDimitry Andric 3718bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 3728bcb0991SDimitry Andric std::function<bool(const FunctionInfo &)> const &Callback) const { 373fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3748bcb0991SDimitry Andric for (const auto &FI : Funcs) { 3758bcb0991SDimitry Andric if (!Callback(FI)) 3768bcb0991SDimitry Andric break; 3778bcb0991SDimitry Andric } 3788bcb0991SDimitry Andric } 3795ffd83dbSDimitry Andric 3805ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const { 381fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3825ffd83dbSDimitry Andric return Funcs.size(); 3835ffd83dbSDimitry Andric } 3845ffd83dbSDimitry Andric 3855ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 3865ffd83dbSDimitry Andric if (ValidTextRanges) 3875ffd83dbSDimitry Andric return ValidTextRanges->contains(Addr); 3885ffd83dbSDimitry Andric return true; // No valid text ranges has been set, so accept all ranges. 3895ffd83dbSDimitry Andric } 3905ffd83dbSDimitry Andric 3915ffd83dbSDimitry Andric bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { 392fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3935ffd83dbSDimitry Andric return Ranges.contains(Addr); 3945ffd83dbSDimitry Andric } 395*06c3fb27SDimitry Andric 396*06c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const { 397*06c3fb27SDimitry Andric if (Finalized && !Funcs.empty()) 398*06c3fb27SDimitry Andric return std::optional<uint64_t>(Funcs.front().startAddress()); 399*06c3fb27SDimitry Andric // This code gets used by the segmentation of GSYM files to help determine the 400*06c3fb27SDimitry Andric // size of the GSYM header while continually adding new FunctionInfo objects 401*06c3fb27SDimitry Andric // to this object, so we haven't finalized this object yet. 402*06c3fb27SDimitry Andric if (Ranges.empty()) 403*06c3fb27SDimitry Andric return std::nullopt; 404*06c3fb27SDimitry Andric return std::optional<uint64_t>(Ranges.begin()->start()); 405*06c3fb27SDimitry Andric } 406*06c3fb27SDimitry Andric 407*06c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const { 408*06c3fb27SDimitry Andric if (Finalized && !Funcs.empty()) 409*06c3fb27SDimitry Andric return std::optional<uint64_t>(Funcs.back().startAddress()); 410*06c3fb27SDimitry Andric // This code gets used by the segmentation of GSYM files to help determine the 411*06c3fb27SDimitry Andric // size of the GSYM header while continually adding new FunctionInfo objects 412*06c3fb27SDimitry Andric // to this object, so we haven't finalized this object yet. 413*06c3fb27SDimitry Andric if (Ranges.empty()) 414*06c3fb27SDimitry Andric return std::nullopt; 415*06c3fb27SDimitry Andric return std::optional<uint64_t>((Ranges.end() - 1)->end()); 416*06c3fb27SDimitry Andric } 417*06c3fb27SDimitry Andric 418*06c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getBaseAddress() const { 419*06c3fb27SDimitry Andric if (BaseAddress) 420*06c3fb27SDimitry Andric return BaseAddress; 421*06c3fb27SDimitry Andric return getFirstFunctionAddress(); 422*06c3fb27SDimitry Andric } 423*06c3fb27SDimitry Andric 424*06c3fb27SDimitry Andric uint64_t GsymCreator::getMaxAddressOffset() const { 425*06c3fb27SDimitry Andric switch (getAddressOffsetSize()) { 426*06c3fb27SDimitry Andric case 1: return UINT8_MAX; 427*06c3fb27SDimitry Andric case 2: return UINT16_MAX; 428*06c3fb27SDimitry Andric case 4: return UINT32_MAX; 429*06c3fb27SDimitry Andric case 8: return UINT64_MAX; 430*06c3fb27SDimitry Andric } 431*06c3fb27SDimitry Andric llvm_unreachable("invalid address offset"); 432*06c3fb27SDimitry Andric } 433*06c3fb27SDimitry Andric 434*06c3fb27SDimitry Andric uint8_t GsymCreator::getAddressOffsetSize() const { 435*06c3fb27SDimitry Andric const std::optional<uint64_t> BaseAddress = getBaseAddress(); 436*06c3fb27SDimitry Andric const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress(); 437*06c3fb27SDimitry Andric if (BaseAddress && LastFuncAddr) { 438*06c3fb27SDimitry Andric const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress; 439*06c3fb27SDimitry Andric if (AddrDelta <= UINT8_MAX) 440*06c3fb27SDimitry Andric return 1; 441*06c3fb27SDimitry Andric else if (AddrDelta <= UINT16_MAX) 442*06c3fb27SDimitry Andric return 2; 443*06c3fb27SDimitry Andric else if (AddrDelta <= UINT32_MAX) 444*06c3fb27SDimitry Andric return 4; 445*06c3fb27SDimitry Andric return 8; 446*06c3fb27SDimitry Andric } 447*06c3fb27SDimitry Andric return 1; 448*06c3fb27SDimitry Andric } 449*06c3fb27SDimitry Andric 450*06c3fb27SDimitry Andric uint64_t GsymCreator::calculateHeaderAndTableSize() const { 451*06c3fb27SDimitry Andric uint64_t Size = sizeof(Header); 452*06c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size(); 453*06c3fb27SDimitry Andric // Add size of address offset table 454*06c3fb27SDimitry Andric Size += NumFuncs * getAddressOffsetSize(); 455*06c3fb27SDimitry Andric // Add size of address info offsets which are 32 bit integers in version 1. 456*06c3fb27SDimitry Andric Size += NumFuncs * sizeof(uint32_t); 457*06c3fb27SDimitry Andric // Add file table size 458*06c3fb27SDimitry Andric Size += Files.size() * sizeof(FileEntry); 459*06c3fb27SDimitry Andric // Add string table size 460*06c3fb27SDimitry Andric Size += StrTab.getSize(); 461*06c3fb27SDimitry Andric 462*06c3fb27SDimitry Andric return Size; 463*06c3fb27SDimitry Andric } 464*06c3fb27SDimitry Andric 465*06c3fb27SDimitry Andric // This function takes a InlineInfo class that was copy constructed from an 466*06c3fb27SDimitry Andric // InlineInfo from the \a SrcGC and updates all members that point to strings 467*06c3fb27SDimitry Andric // and files to point to strings and files from this GsymCreator. 468*06c3fb27SDimitry Andric void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) { 469*06c3fb27SDimitry Andric II.Name = copyString(SrcGC, II.Name); 470*06c3fb27SDimitry Andric II.CallFile = copyFile(SrcGC, II.CallFile); 471*06c3fb27SDimitry Andric for (auto &ChildII: II.Children) 472*06c3fb27SDimitry Andric fixupInlineInfo(SrcGC, ChildII); 473*06c3fb27SDimitry Andric } 474*06c3fb27SDimitry Andric 475*06c3fb27SDimitry Andric uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) { 476*06c3fb27SDimitry Andric // To copy a function info we need to copy any files and strings over into 477*06c3fb27SDimitry Andric // this GsymCreator and then copy the function info and update the string 478*06c3fb27SDimitry Andric // table offsets to match the new offsets. 479*06c3fb27SDimitry Andric const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx]; 480*06c3fb27SDimitry Andric Ranges.insert(SrcFI.Range); 481*06c3fb27SDimitry Andric 482*06c3fb27SDimitry Andric FunctionInfo DstFI; 483*06c3fb27SDimitry Andric DstFI.Range = SrcFI.Range; 484*06c3fb27SDimitry Andric DstFI.Name = copyString(SrcGC, SrcFI.Name); 485*06c3fb27SDimitry Andric // Copy the line table if there is one. 486*06c3fb27SDimitry Andric if (SrcFI.OptLineTable) { 487*06c3fb27SDimitry Andric // Copy the entire line table. 488*06c3fb27SDimitry Andric DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value()); 489*06c3fb27SDimitry Andric // Fixup all LineEntry::File entries which are indexes in the the file table 490*06c3fb27SDimitry Andric // from SrcGC and must be converted to file indexes from this GsymCreator. 491*06c3fb27SDimitry Andric LineTable &DstLT = DstFI.OptLineTable.value(); 492*06c3fb27SDimitry Andric const size_t NumLines = DstLT.size(); 493*06c3fb27SDimitry Andric for (size_t I=0; I<NumLines; ++I) { 494*06c3fb27SDimitry Andric LineEntry &LE = DstLT.get(I); 495*06c3fb27SDimitry Andric LE.File = copyFile(SrcGC, LE.File); 496*06c3fb27SDimitry Andric } 497*06c3fb27SDimitry Andric } 498*06c3fb27SDimitry Andric // Copy the inline information if needed. 499*06c3fb27SDimitry Andric if (SrcFI.Inline) { 500*06c3fb27SDimitry Andric // Make a copy of the source inline information. 501*06c3fb27SDimitry Andric DstFI.Inline = SrcFI.Inline.value(); 502*06c3fb27SDimitry Andric // Fixup all strings and files in the copied inline information. 503*06c3fb27SDimitry Andric fixupInlineInfo(SrcGC, *DstFI.Inline); 504*06c3fb27SDimitry Andric } 505*06c3fb27SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 506*06c3fb27SDimitry Andric Funcs.push_back(DstFI); 507*06c3fb27SDimitry Andric return Funcs.back().cacheEncoding(); 508*06c3fb27SDimitry Andric } 509*06c3fb27SDimitry Andric 510*06c3fb27SDimitry Andric llvm::Error GsymCreator::saveSegments(StringRef Path, 511*06c3fb27SDimitry Andric llvm::support::endianness ByteOrder, 512*06c3fb27SDimitry Andric uint64_t SegmentSize) const { 513*06c3fb27SDimitry Andric if (SegmentSize == 0) 514*06c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument, 515*06c3fb27SDimitry Andric "invalid segment size zero"); 516*06c3fb27SDimitry Andric 517*06c3fb27SDimitry Andric size_t FuncIdx = 0; 518*06c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size(); 519*06c3fb27SDimitry Andric while (FuncIdx < NumFuncs) { 520*06c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC = 521*06c3fb27SDimitry Andric createSegment(SegmentSize, FuncIdx); 522*06c3fb27SDimitry Andric if (ExpectedGC) { 523*06c3fb27SDimitry Andric GsymCreator *GC = ExpectedGC->get(); 524*06c3fb27SDimitry Andric if (GC == NULL) 525*06c3fb27SDimitry Andric break; // We had not more functions to encode. 526*06c3fb27SDimitry Andric raw_null_ostream ErrorStrm; 527*06c3fb27SDimitry Andric llvm::Error Err = GC->finalize(ErrorStrm); 528*06c3fb27SDimitry Andric if (Err) 529*06c3fb27SDimitry Andric return Err; 530*06c3fb27SDimitry Andric std::string SegmentedGsymPath; 531*06c3fb27SDimitry Andric raw_string_ostream SGP(SegmentedGsymPath); 532*06c3fb27SDimitry Andric std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress(); 533*06c3fb27SDimitry Andric if (FirstFuncAddr) { 534*06c3fb27SDimitry Andric SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1); 535*06c3fb27SDimitry Andric SGP.flush(); 536*06c3fb27SDimitry Andric Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt); 537*06c3fb27SDimitry Andric if (Err) 538*06c3fb27SDimitry Andric return Err; 539*06c3fb27SDimitry Andric } 540*06c3fb27SDimitry Andric } else { 541*06c3fb27SDimitry Andric return ExpectedGC.takeError(); 542*06c3fb27SDimitry Andric } 543*06c3fb27SDimitry Andric } 544*06c3fb27SDimitry Andric return Error::success(); 545*06c3fb27SDimitry Andric } 546*06c3fb27SDimitry Andric 547*06c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>> 548*06c3fb27SDimitry Andric GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const { 549*06c3fb27SDimitry Andric // No function entries, return empty unique pointer 550*06c3fb27SDimitry Andric if (FuncIdx >= Funcs.size()) 551*06c3fb27SDimitry Andric return std::unique_ptr<GsymCreator>(); 552*06c3fb27SDimitry Andric 553*06c3fb27SDimitry Andric std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true)); 554*06c3fb27SDimitry Andric // Set the base address if there is one. 555*06c3fb27SDimitry Andric if (BaseAddress) 556*06c3fb27SDimitry Andric GC->setBaseAddress(*BaseAddress); 557*06c3fb27SDimitry Andric // Copy the UUID value from this object into the new creator. 558*06c3fb27SDimitry Andric GC->setUUID(UUID); 559*06c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size(); 560*06c3fb27SDimitry Andric // Track how big the function infos are for the current segment so we can 561*06c3fb27SDimitry Andric // emit segments that are close to the requested size. It is quick math to 562*06c3fb27SDimitry Andric // determine the current header and tables sizes, so we can do that each loop. 563*06c3fb27SDimitry Andric uint64_t SegmentFuncInfosSize = 0; 564*06c3fb27SDimitry Andric for (; FuncIdx < NumFuncs; ++FuncIdx) { 565*06c3fb27SDimitry Andric const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize(); 566*06c3fb27SDimitry Andric if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) { 567*06c3fb27SDimitry Andric if (SegmentFuncInfosSize == 0) 568*06c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument, 569*06c3fb27SDimitry Andric "a segment size of %" PRIu64 " is to small to " 570*06c3fb27SDimitry Andric "fit any function infos, specify a larger value", 571*06c3fb27SDimitry Andric SegmentSize); 572*06c3fb27SDimitry Andric 573*06c3fb27SDimitry Andric break; 574*06c3fb27SDimitry Andric } 575*06c3fb27SDimitry Andric SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4); 576*06c3fb27SDimitry Andric } 577*06c3fb27SDimitry Andric return std::move(GC); 578*06c3fb27SDimitry Andric } 579