18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 78bcb0991SDimitry Andric 88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h" 98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h" 108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h" 118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h" 12*0fca6ea1SDimitry Andric #include "llvm/DebugInfo/GSYM/OutputAggregator.h" 138bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h" 148bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h" 158bcb0991SDimitry Andric 168bcb0991SDimitry Andric #include <algorithm> 178bcb0991SDimitry Andric #include <cassert> 188bcb0991SDimitry Andric #include <functional> 198bcb0991SDimitry Andric #include <vector> 208bcb0991SDimitry Andric 218bcb0991SDimitry Andric using namespace llvm; 228bcb0991SDimitry Andric using namespace gsym; 238bcb0991SDimitry Andric 24fe6060f1SDimitry Andric GsymCreator::GsymCreator(bool Quiet) 25fe6060f1SDimitry Andric : StrTab(StringTableBuilder::ELF), Quiet(Quiet) { 268bcb0991SDimitry Andric insertFile(StringRef()); 278bcb0991SDimitry Andric } 288bcb0991SDimitry Andric 29fe6060f1SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { 308bcb0991SDimitry Andric llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 318bcb0991SDimitry Andric llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 325ffd83dbSDimitry Andric // We must insert the strings first, then call the FileEntry constructor. 335ffd83dbSDimitry Andric // If we inline the insertString() function call into the constructor, the 345ffd83dbSDimitry Andric // call order is undefined due to parameter lists not having any ordering 355ffd83dbSDimitry Andric // requirements. 365ffd83dbSDimitry Andric const uint32_t Dir = insertString(directory); 375ffd83dbSDimitry Andric const uint32_t Base = insertString(filename); 3806c3fb27SDimitry Andric return insertFileEntry(FileEntry(Dir, Base)); 3906c3fb27SDimitry Andric } 408bcb0991SDimitry Andric 4106c3fb27SDimitry Andric uint32_t GsymCreator::insertFileEntry(FileEntry FE) { 42fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 438bcb0991SDimitry Andric const auto NextIndex = Files.size(); 448bcb0991SDimitry Andric // Find FE in hash map and insert if not present. 458bcb0991SDimitry Andric auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 468bcb0991SDimitry Andric if (R.second) 478bcb0991SDimitry Andric Files.emplace_back(FE); 488bcb0991SDimitry Andric return R.first->second; 498bcb0991SDimitry Andric } 508bcb0991SDimitry Andric 5106c3fb27SDimitry Andric uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) { 5206c3fb27SDimitry Andric // File index zero is reserved for a FileEntry with no directory and no 5306c3fb27SDimitry Andric // filename. Any other file and we need to copy the strings for the directory 5406c3fb27SDimitry Andric // and filename. 5506c3fb27SDimitry Andric if (FileIdx == 0) 5606c3fb27SDimitry Andric return 0; 5706c3fb27SDimitry Andric const FileEntry SrcFE = SrcGC.Files[FileIdx]; 5806c3fb27SDimitry Andric // Copy the strings for the file and then add the newly converted file entry. 59*0fca6ea1SDimitry Andric uint32_t Dir = 60*0fca6ea1SDimitry Andric SrcFE.Dir == 0 61*0fca6ea1SDimitry Andric ? 0 62*0fca6ea1SDimitry Andric : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second); 6306c3fb27SDimitry Andric uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second); 6406c3fb27SDimitry Andric FileEntry DstFE(Dir, Base); 6506c3fb27SDimitry Andric return insertFileEntry(DstFE); 6606c3fb27SDimitry Andric } 6706c3fb27SDimitry Andric 685f757f3fSDimitry Andric llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder, 6906c3fb27SDimitry Andric std::optional<uint64_t> SegmentSize) const { 7006c3fb27SDimitry Andric if (SegmentSize) 7106c3fb27SDimitry Andric return saveSegments(Path, ByteOrder, *SegmentSize); 728bcb0991SDimitry Andric std::error_code EC; 738bcb0991SDimitry Andric raw_fd_ostream OutStrm(Path, EC); 748bcb0991SDimitry Andric if (EC) 758bcb0991SDimitry Andric return llvm::errorCodeToError(EC); 768bcb0991SDimitry Andric FileWriter O(OutStrm, ByteOrder); 778bcb0991SDimitry Andric return encode(O); 788bcb0991SDimitry Andric } 798bcb0991SDimitry Andric 808bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const { 81fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 828bcb0991SDimitry Andric if (Funcs.empty()) 838bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 848bcb0991SDimitry Andric "no functions to encode"); 858bcb0991SDimitry Andric if (!Finalized) 868bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 878bcb0991SDimitry Andric "GsymCreator wasn't finalized prior to encoding"); 888bcb0991SDimitry Andric 898bcb0991SDimitry Andric if (Funcs.size() > UINT32_MAX) 908bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 918bcb0991SDimitry Andric "too many FunctionInfos"); 925ffd83dbSDimitry Andric 9306c3fb27SDimitry Andric std::optional<uint64_t> BaseAddress = getBaseAddress(); 9406c3fb27SDimitry Andric // Base address should be valid if we have any functions. 9506c3fb27SDimitry Andric if (!BaseAddress) 9606c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument, 9706c3fb27SDimitry Andric "invalid base address"); 988bcb0991SDimitry Andric Header Hdr; 998bcb0991SDimitry Andric Hdr.Magic = GSYM_MAGIC; 1008bcb0991SDimitry Andric Hdr.Version = GSYM_VERSION; 10106c3fb27SDimitry Andric Hdr.AddrOffSize = getAddressOffsetSize(); 1028bcb0991SDimitry Andric Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 10306c3fb27SDimitry Andric Hdr.BaseAddress = *BaseAddress; 1048bcb0991SDimitry Andric Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 1058bcb0991SDimitry Andric Hdr.StrtabOffset = 0; // We will fix this up later. 1065ffd83dbSDimitry Andric Hdr.StrtabSize = 0; // We will fix this up later. 1078bcb0991SDimitry Andric memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 1088bcb0991SDimitry Andric if (UUID.size() > sizeof(Hdr.UUID)) 1098bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 1108bcb0991SDimitry Andric "invalid UUID size %u", (uint32_t)UUID.size()); 1118bcb0991SDimitry Andric // Copy the UUID value if we have one. 1128bcb0991SDimitry Andric if (UUID.size() > 0) 1138bcb0991SDimitry Andric memcpy(Hdr.UUID, UUID.data(), UUID.size()); 1148bcb0991SDimitry Andric // Write out the header. 1158bcb0991SDimitry Andric llvm::Error Err = Hdr.encode(O); 1168bcb0991SDimitry Andric if (Err) 1178bcb0991SDimitry Andric return Err; 1188bcb0991SDimitry Andric 11906c3fb27SDimitry Andric const uint64_t MaxAddressOffset = getMaxAddressOffset(); 1208bcb0991SDimitry Andric // Write out the address offsets. 1218bcb0991SDimitry Andric O.alignTo(Hdr.AddrOffSize); 1228bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 1238bcb0991SDimitry Andric uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 12406c3fb27SDimitry Andric // Make sure we calculated the address offsets byte size correctly by 12506c3fb27SDimitry Andric // verifying the current address offset is within ranges. We have seen bugs 12606c3fb27SDimitry Andric // introduced when the code changes that can cause problems here so it is 12706c3fb27SDimitry Andric // good to catch this during testing. 12806c3fb27SDimitry Andric assert(AddrOffset <= MaxAddressOffset); 12906c3fb27SDimitry Andric (void)MaxAddressOffset; 1308bcb0991SDimitry Andric switch (Hdr.AddrOffSize) { 131fe6060f1SDimitry Andric case 1: 132fe6060f1SDimitry Andric O.writeU8(static_cast<uint8_t>(AddrOffset)); 133fe6060f1SDimitry Andric break; 134fe6060f1SDimitry Andric case 2: 135fe6060f1SDimitry Andric O.writeU16(static_cast<uint16_t>(AddrOffset)); 136fe6060f1SDimitry Andric break; 137fe6060f1SDimitry Andric case 4: 138fe6060f1SDimitry Andric O.writeU32(static_cast<uint32_t>(AddrOffset)); 139fe6060f1SDimitry Andric break; 140fe6060f1SDimitry Andric case 8: 141fe6060f1SDimitry Andric O.writeU64(AddrOffset); 142fe6060f1SDimitry Andric break; 1438bcb0991SDimitry Andric } 1448bcb0991SDimitry Andric } 1458bcb0991SDimitry Andric 1468bcb0991SDimitry Andric // Write out all zeros for the AddrInfoOffsets. 1478bcb0991SDimitry Andric O.alignTo(4); 1488bcb0991SDimitry Andric const off_t AddrInfoOffsetsOffset = O.tell(); 1498bcb0991SDimitry Andric for (size_t i = 0, n = Funcs.size(); i < n; ++i) 1508bcb0991SDimitry Andric O.writeU32(0); 1518bcb0991SDimitry Andric 1528bcb0991SDimitry Andric // Write out the file table 1538bcb0991SDimitry Andric O.alignTo(4); 1548bcb0991SDimitry Andric assert(!Files.empty()); 1558bcb0991SDimitry Andric assert(Files[0].Dir == 0); 1568bcb0991SDimitry Andric assert(Files[0].Base == 0); 1578bcb0991SDimitry Andric size_t NumFiles = Files.size(); 1588bcb0991SDimitry Andric if (NumFiles > UINT32_MAX) 159fe6060f1SDimitry Andric return createStringError(std::errc::invalid_argument, "too many files"); 1608bcb0991SDimitry Andric O.writeU32(static_cast<uint32_t>(NumFiles)); 1618bcb0991SDimitry Andric for (auto File : Files) { 1628bcb0991SDimitry Andric O.writeU32(File.Dir); 1638bcb0991SDimitry Andric O.writeU32(File.Base); 1648bcb0991SDimitry Andric } 1658bcb0991SDimitry Andric 16606c3fb27SDimitry Andric // Write out the string table. 1678bcb0991SDimitry Andric const off_t StrtabOffset = O.tell(); 1688bcb0991SDimitry Andric StrTab.write(O.get_stream()); 1698bcb0991SDimitry Andric const off_t StrtabSize = O.tell() - StrtabOffset; 1708bcb0991SDimitry Andric std::vector<uint32_t> AddrInfoOffsets; 1718bcb0991SDimitry Andric 1728bcb0991SDimitry Andric // Write out the address infos for each function info. 1738bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 1748bcb0991SDimitry Andric if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 1758bcb0991SDimitry Andric AddrInfoOffsets.push_back(OffsetOrErr.get()); 1768bcb0991SDimitry Andric else 1778bcb0991SDimitry Andric return OffsetOrErr.takeError(); 1788bcb0991SDimitry Andric } 1798bcb0991SDimitry Andric // Fixup the string table offset and size in the header 1808bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 1818bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 1828bcb0991SDimitry Andric 1838bcb0991SDimitry Andric // Fixup all address info offsets 1848bcb0991SDimitry Andric uint64_t Offset = 0; 1858bcb0991SDimitry Andric for (auto AddrInfoOffset : AddrInfoOffsets) { 1868bcb0991SDimitry Andric O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 1878bcb0991SDimitry Andric Offset += 4; 1888bcb0991SDimitry Andric } 1898bcb0991SDimitry Andric return ErrorSuccess(); 1908bcb0991SDimitry Andric } 1918bcb0991SDimitry Andric 192*0fca6ea1SDimitry Andric llvm::Error GsymCreator::finalize(OutputAggregator &Out) { 193fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 1948bcb0991SDimitry Andric if (Finalized) 195fe6060f1SDimitry Andric return createStringError(std::errc::invalid_argument, "already finalized"); 1968bcb0991SDimitry Andric Finalized = true; 1978bcb0991SDimitry Andric 1988bcb0991SDimitry Andric // Don't let the string table indexes change by finalizing in order. 1998bcb0991SDimitry Andric StrTab.finalizeInOrder(); 2008bcb0991SDimitry Andric 2018bcb0991SDimitry Andric // Remove duplicates function infos that have both entries from debug info 2028bcb0991SDimitry Andric // (DWARF or Breakpad) and entries from the SymbolTable. 2038bcb0991SDimitry Andric // 2048bcb0991SDimitry Andric // Also handle overlapping function. Usually there shouldn't be any, but they 2058bcb0991SDimitry Andric // can and do happen in some rare cases. 2068bcb0991SDimitry Andric // 2078bcb0991SDimitry Andric // (a) (b) (c) 2088bcb0991SDimitry Andric // ^ ^ ^ ^ 2098bcb0991SDimitry Andric // |X |Y |X ^ |X 2108bcb0991SDimitry Andric // | | | |Y | ^ 2118bcb0991SDimitry Andric // | | | v v |Y 2128bcb0991SDimitry Andric // v v v v 2138bcb0991SDimitry Andric // 2148bcb0991SDimitry Andric // In (a) and (b), Y is ignored and X will be reported for the full range. 2158bcb0991SDimitry Andric // In (c), both functions will be included in the result and lookups for an 2168bcb0991SDimitry Andric // address in the intersection will return Y because of binary search. 2178bcb0991SDimitry Andric // 2188bcb0991SDimitry Andric // Note that in case of (b), we cannot include Y in the result because then 2198bcb0991SDimitry Andric // we wouldn't find any function for range (end of Y, end of X) 2208bcb0991SDimitry Andric // with binary search 2215f757f3fSDimitry Andric 2225f757f3fSDimitry Andric const auto NumBefore = Funcs.size(); 2235f757f3fSDimitry Andric // Only sort and unique if this isn't a segment. If this is a segment we 2245f757f3fSDimitry Andric // already finalized the main GsymCreator with all of the function infos 2255f757f3fSDimitry Andric // and then the already sorted and uniqued function infos were added to this 2265f757f3fSDimitry Andric // object. 2275f757f3fSDimitry Andric if (!IsSegment) { 2285f757f3fSDimitry Andric if (NumBefore > 1) { 2295f757f3fSDimitry Andric // Sort function infos so we can emit sorted functions. 2305f757f3fSDimitry Andric llvm::sort(Funcs); 2315f757f3fSDimitry Andric std::vector<FunctionInfo> FinalizedFuncs; 2325f757f3fSDimitry Andric FinalizedFuncs.reserve(Funcs.size()); 2335f757f3fSDimitry Andric FinalizedFuncs.emplace_back(std::move(Funcs.front())); 2345f757f3fSDimitry Andric for (size_t Idx=1; Idx < NumBefore; ++Idx) { 2355f757f3fSDimitry Andric FunctionInfo &Prev = FinalizedFuncs.back(); 2365f757f3fSDimitry Andric FunctionInfo &Curr = Funcs[Idx]; 237fe6060f1SDimitry Andric // Empty ranges won't intersect, but we still need to 238fe6060f1SDimitry Andric // catch the case where we have multiple symbols at the 239fe6060f1SDimitry Andric // same address and coalesce them. 240fe6060f1SDimitry Andric const bool ranges_equal = Prev.Range == Curr.Range; 241fe6060f1SDimitry Andric if (ranges_equal || Prev.Range.intersects(Curr.Range)) { 242fe6060f1SDimitry Andric // Overlapping ranges or empty identical ranges. 243fe6060f1SDimitry Andric if (ranges_equal) { 244fe6060f1SDimitry Andric // Same address range. Check if one is from debug 245fe6060f1SDimitry Andric // info and the other is from a symbol table. If 246fe6060f1SDimitry Andric // so, then keep the one with debug info. Our 247fe6060f1SDimitry Andric // sorting guarantees that entries with matching 248fe6060f1SDimitry Andric // address ranges that have debug info are last in 249fe6060f1SDimitry Andric // the sort. 2505f757f3fSDimitry Andric if (!(Prev == Curr)) { 251*0fca6ea1SDimitry Andric if (Prev.hasRichInfo() && Curr.hasRichInfo()) 252*0fca6ea1SDimitry Andric Out.Report( 253*0fca6ea1SDimitry Andric "Duplicate address ranges with different debug info.", 254*0fca6ea1SDimitry Andric [&](raw_ostream &OS) { 255fe6060f1SDimitry Andric OS << "warning: same address range contains " 256fe6060f1SDimitry Andric "different debug " 2578bcb0991SDimitry Andric << "info. Removing:\n" 258fe6060f1SDimitry Andric << Prev << "\nIn favor of this one:\n" 259fe6060f1SDimitry Andric << Curr << "\n"; 260*0fca6ea1SDimitry Andric }); 261*0fca6ea1SDimitry Andric 2625f757f3fSDimitry Andric // We want to swap the current entry with the previous since 2635f757f3fSDimitry Andric // later entries with the same range always have more debug info 2645f757f3fSDimitry Andric // or different debug info. 2655f757f3fSDimitry Andric std::swap(Prev, Curr); 2668bcb0991SDimitry Andric } 2678bcb0991SDimitry Andric } else { 268*0fca6ea1SDimitry Andric Out.Report("Overlapping function ranges", [&](raw_ostream &OS) { 269*0fca6ea1SDimitry Andric // print warnings about overlaps 2708bcb0991SDimitry Andric OS << "warning: function ranges overlap:\n" 271fe6060f1SDimitry Andric << Prev << "\n" 272fe6060f1SDimitry Andric << Curr << "\n"; 273*0fca6ea1SDimitry Andric }); 2745f757f3fSDimitry Andric FinalizedFuncs.emplace_back(std::move(Curr)); 275fe6060f1SDimitry Andric } 2765f757f3fSDimitry Andric } else { 2775f757f3fSDimitry Andric if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) { 2785f757f3fSDimitry Andric // Symbols on macOS don't have address ranges, so if the range 2795f757f3fSDimitry Andric // doesn't match and the size is zero, then we replace the empty 2805f757f3fSDimitry Andric // symbol function info with the current one. 2815f757f3fSDimitry Andric std::swap(Prev, Curr); 2825f757f3fSDimitry Andric } else { 2835f757f3fSDimitry Andric FinalizedFuncs.emplace_back(std::move(Curr)); 2848bcb0991SDimitry Andric } 2858bcb0991SDimitry Andric } 2865f757f3fSDimitry Andric } 2875f757f3fSDimitry Andric std::swap(Funcs, FinalizedFuncs); 2885f757f3fSDimitry Andric } 2895ffd83dbSDimitry Andric // If our last function info entry doesn't have a size and if we have valid 2905ffd83dbSDimitry Andric // text ranges, we should set the size of the last entry since any search for 2915ffd83dbSDimitry Andric // a high address might match our last entry. By fixing up this size, we can 2925ffd83dbSDimitry Andric // help ensure we don't cause lookups to always return the last symbol that 2935ffd83dbSDimitry Andric // has no size when doing lookups. 2945ffd83dbSDimitry Andric if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 295fe6060f1SDimitry Andric if (auto Range = 29681ad6265SDimitry Andric ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) { 29781ad6265SDimitry Andric Funcs.back().Range = {Funcs.back().Range.start(), Range->end()}; 2985ffd83dbSDimitry Andric } 2995ffd83dbSDimitry Andric } 300*0fca6ea1SDimitry Andric Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 3018bcb0991SDimitry Andric << Funcs.size() << " total\n"; 3025f757f3fSDimitry Andric } 3038bcb0991SDimitry Andric return Error::success(); 3048bcb0991SDimitry Andric } 3058bcb0991SDimitry Andric 30606c3fb27SDimitry Andric uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) { 30706c3fb27SDimitry Andric // String offset at zero is always the empty string, no copying needed. 30806c3fb27SDimitry Andric if (StrOff == 0) 30906c3fb27SDimitry Andric return 0; 31006c3fb27SDimitry Andric return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second); 31106c3fb27SDimitry Andric } 31206c3fb27SDimitry Andric 3135ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 3148bcb0991SDimitry Andric if (S.empty()) 3158bcb0991SDimitry Andric return 0; 316fe6060f1SDimitry Andric 317fe6060f1SDimitry Andric // The hash can be calculated outside the lock. 318fe6060f1SDimitry Andric CachedHashStringRef CHStr(S); 319fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3205ffd83dbSDimitry Andric if (Copy) { 3215ffd83dbSDimitry Andric // We need to provide backing storage for the string if requested 3225ffd83dbSDimitry Andric // since StringTableBuilder stores references to strings. Any string 3235ffd83dbSDimitry Andric // that comes from a section in an object file doesn't need to be 3245ffd83dbSDimitry Andric // copied, but any string created by code will need to be copied. 3255ffd83dbSDimitry Andric // This allows GsymCreator to be really fast when parsing DWARF and 3265ffd83dbSDimitry Andric // other object files as most strings don't need to be copied. 3275ffd83dbSDimitry Andric if (!StrTab.contains(CHStr)) 328fe6060f1SDimitry Andric CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), 329fe6060f1SDimitry Andric CHStr.hash()}; 3305ffd83dbSDimitry Andric } 33106c3fb27SDimitry Andric const uint32_t StrOff = StrTab.add(CHStr); 33206c3fb27SDimitry Andric // Save a mapping of string offsets to the cached string reference in case 33306c3fb27SDimitry Andric // we need to segment the GSYM file and copy string from one string table to 33406c3fb27SDimitry Andric // another. 33506c3fb27SDimitry Andric if (StringOffsetMap.count(StrOff) == 0) 33606c3fb27SDimitry Andric StringOffsetMap.insert(std::make_pair(StrOff, CHStr)); 33706c3fb27SDimitry Andric return StrOff; 3388bcb0991SDimitry Andric } 3398bcb0991SDimitry Andric 3408bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 341fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 342fe6060f1SDimitry Andric Funcs.emplace_back(std::move(FI)); 3438bcb0991SDimitry Andric } 3448bcb0991SDimitry Andric 3458bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 3468bcb0991SDimitry Andric std::function<bool(FunctionInfo &)> const &Callback) { 347fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3488bcb0991SDimitry Andric for (auto &FI : Funcs) { 3498bcb0991SDimitry Andric if (!Callback(FI)) 3508bcb0991SDimitry Andric break; 3518bcb0991SDimitry Andric } 3528bcb0991SDimitry Andric } 3538bcb0991SDimitry Andric 3548bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 3558bcb0991SDimitry Andric std::function<bool(const FunctionInfo &)> const &Callback) const { 356fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3578bcb0991SDimitry Andric for (const auto &FI : Funcs) { 3588bcb0991SDimitry Andric if (!Callback(FI)) 3598bcb0991SDimitry Andric break; 3608bcb0991SDimitry Andric } 3618bcb0991SDimitry Andric } 3625ffd83dbSDimitry Andric 3635ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const { 364fe6060f1SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 3655ffd83dbSDimitry Andric return Funcs.size(); 3665ffd83dbSDimitry Andric } 3675ffd83dbSDimitry Andric 3685ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 3695ffd83dbSDimitry Andric if (ValidTextRanges) 3705ffd83dbSDimitry Andric return ValidTextRanges->contains(Addr); 3715ffd83dbSDimitry Andric return true; // No valid text ranges has been set, so accept all ranges. 3725ffd83dbSDimitry Andric } 3735ffd83dbSDimitry Andric 37406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const { 3755f757f3fSDimitry Andric // If we have finalized then Funcs are sorted. If we are a segment then 3765f757f3fSDimitry Andric // Funcs will be sorted as well since function infos get added from an 3775f757f3fSDimitry Andric // already finalized GsymCreator object where its functions were sorted and 3785f757f3fSDimitry Andric // uniqued. 3795f757f3fSDimitry Andric if ((Finalized || IsSegment) && !Funcs.empty()) 38006c3fb27SDimitry Andric return std::optional<uint64_t>(Funcs.front().startAddress()); 38106c3fb27SDimitry Andric return std::nullopt; 38206c3fb27SDimitry Andric } 38306c3fb27SDimitry Andric 38406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const { 3855f757f3fSDimitry Andric // If we have finalized then Funcs are sorted. If we are a segment then 3865f757f3fSDimitry Andric // Funcs will be sorted as well since function infos get added from an 3875f757f3fSDimitry Andric // already finalized GsymCreator object where its functions were sorted and 3885f757f3fSDimitry Andric // uniqued. 3895f757f3fSDimitry Andric if ((Finalized || IsSegment) && !Funcs.empty()) 39006c3fb27SDimitry Andric return std::optional<uint64_t>(Funcs.back().startAddress()); 39106c3fb27SDimitry Andric return std::nullopt; 39206c3fb27SDimitry Andric } 39306c3fb27SDimitry Andric 39406c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getBaseAddress() const { 39506c3fb27SDimitry Andric if (BaseAddress) 39606c3fb27SDimitry Andric return BaseAddress; 39706c3fb27SDimitry Andric return getFirstFunctionAddress(); 39806c3fb27SDimitry Andric } 39906c3fb27SDimitry Andric 40006c3fb27SDimitry Andric uint64_t GsymCreator::getMaxAddressOffset() const { 40106c3fb27SDimitry Andric switch (getAddressOffsetSize()) { 40206c3fb27SDimitry Andric case 1: return UINT8_MAX; 40306c3fb27SDimitry Andric case 2: return UINT16_MAX; 40406c3fb27SDimitry Andric case 4: return UINT32_MAX; 40506c3fb27SDimitry Andric case 8: return UINT64_MAX; 40606c3fb27SDimitry Andric } 40706c3fb27SDimitry Andric llvm_unreachable("invalid address offset"); 40806c3fb27SDimitry Andric } 40906c3fb27SDimitry Andric 41006c3fb27SDimitry Andric uint8_t GsymCreator::getAddressOffsetSize() const { 41106c3fb27SDimitry Andric const std::optional<uint64_t> BaseAddress = getBaseAddress(); 41206c3fb27SDimitry Andric const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress(); 41306c3fb27SDimitry Andric if (BaseAddress && LastFuncAddr) { 41406c3fb27SDimitry Andric const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress; 41506c3fb27SDimitry Andric if (AddrDelta <= UINT8_MAX) 41606c3fb27SDimitry Andric return 1; 41706c3fb27SDimitry Andric else if (AddrDelta <= UINT16_MAX) 41806c3fb27SDimitry Andric return 2; 41906c3fb27SDimitry Andric else if (AddrDelta <= UINT32_MAX) 42006c3fb27SDimitry Andric return 4; 42106c3fb27SDimitry Andric return 8; 42206c3fb27SDimitry Andric } 42306c3fb27SDimitry Andric return 1; 42406c3fb27SDimitry Andric } 42506c3fb27SDimitry Andric 42606c3fb27SDimitry Andric uint64_t GsymCreator::calculateHeaderAndTableSize() const { 42706c3fb27SDimitry Andric uint64_t Size = sizeof(Header); 42806c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size(); 42906c3fb27SDimitry Andric // Add size of address offset table 43006c3fb27SDimitry Andric Size += NumFuncs * getAddressOffsetSize(); 43106c3fb27SDimitry Andric // Add size of address info offsets which are 32 bit integers in version 1. 43206c3fb27SDimitry Andric Size += NumFuncs * sizeof(uint32_t); 43306c3fb27SDimitry Andric // Add file table size 43406c3fb27SDimitry Andric Size += Files.size() * sizeof(FileEntry); 43506c3fb27SDimitry Andric // Add string table size 43606c3fb27SDimitry Andric Size += StrTab.getSize(); 43706c3fb27SDimitry Andric 43806c3fb27SDimitry Andric return Size; 43906c3fb27SDimitry Andric } 44006c3fb27SDimitry Andric 44106c3fb27SDimitry Andric // This function takes a InlineInfo class that was copy constructed from an 44206c3fb27SDimitry Andric // InlineInfo from the \a SrcGC and updates all members that point to strings 44306c3fb27SDimitry Andric // and files to point to strings and files from this GsymCreator. 44406c3fb27SDimitry Andric void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) { 44506c3fb27SDimitry Andric II.Name = copyString(SrcGC, II.Name); 44606c3fb27SDimitry Andric II.CallFile = copyFile(SrcGC, II.CallFile); 44706c3fb27SDimitry Andric for (auto &ChildII: II.Children) 44806c3fb27SDimitry Andric fixupInlineInfo(SrcGC, ChildII); 44906c3fb27SDimitry Andric } 45006c3fb27SDimitry Andric 45106c3fb27SDimitry Andric uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) { 45206c3fb27SDimitry Andric // To copy a function info we need to copy any files and strings over into 45306c3fb27SDimitry Andric // this GsymCreator and then copy the function info and update the string 45406c3fb27SDimitry Andric // table offsets to match the new offsets. 45506c3fb27SDimitry Andric const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx]; 45606c3fb27SDimitry Andric 45706c3fb27SDimitry Andric FunctionInfo DstFI; 45806c3fb27SDimitry Andric DstFI.Range = SrcFI.Range; 45906c3fb27SDimitry Andric DstFI.Name = copyString(SrcGC, SrcFI.Name); 46006c3fb27SDimitry Andric // Copy the line table if there is one. 46106c3fb27SDimitry Andric if (SrcFI.OptLineTable) { 46206c3fb27SDimitry Andric // Copy the entire line table. 46306c3fb27SDimitry Andric DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value()); 46406c3fb27SDimitry Andric // Fixup all LineEntry::File entries which are indexes in the the file table 46506c3fb27SDimitry Andric // from SrcGC and must be converted to file indexes from this GsymCreator. 46606c3fb27SDimitry Andric LineTable &DstLT = DstFI.OptLineTable.value(); 46706c3fb27SDimitry Andric const size_t NumLines = DstLT.size(); 46806c3fb27SDimitry Andric for (size_t I=0; I<NumLines; ++I) { 46906c3fb27SDimitry Andric LineEntry &LE = DstLT.get(I); 47006c3fb27SDimitry Andric LE.File = copyFile(SrcGC, LE.File); 47106c3fb27SDimitry Andric } 47206c3fb27SDimitry Andric } 47306c3fb27SDimitry Andric // Copy the inline information if needed. 47406c3fb27SDimitry Andric if (SrcFI.Inline) { 47506c3fb27SDimitry Andric // Make a copy of the source inline information. 47606c3fb27SDimitry Andric DstFI.Inline = SrcFI.Inline.value(); 47706c3fb27SDimitry Andric // Fixup all strings and files in the copied inline information. 47806c3fb27SDimitry Andric fixupInlineInfo(SrcGC, *DstFI.Inline); 47906c3fb27SDimitry Andric } 48006c3fb27SDimitry Andric std::lock_guard<std::mutex> Guard(Mutex); 4815f757f3fSDimitry Andric Funcs.emplace_back(DstFI); 48206c3fb27SDimitry Andric return Funcs.back().cacheEncoding(); 48306c3fb27SDimitry Andric } 48406c3fb27SDimitry Andric 48506c3fb27SDimitry Andric llvm::Error GsymCreator::saveSegments(StringRef Path, 4865f757f3fSDimitry Andric llvm::endianness ByteOrder, 48706c3fb27SDimitry Andric uint64_t SegmentSize) const { 48806c3fb27SDimitry Andric if (SegmentSize == 0) 48906c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument, 49006c3fb27SDimitry Andric "invalid segment size zero"); 49106c3fb27SDimitry Andric 49206c3fb27SDimitry Andric size_t FuncIdx = 0; 49306c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size(); 49406c3fb27SDimitry Andric while (FuncIdx < NumFuncs) { 49506c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC = 49606c3fb27SDimitry Andric createSegment(SegmentSize, FuncIdx); 49706c3fb27SDimitry Andric if (ExpectedGC) { 49806c3fb27SDimitry Andric GsymCreator *GC = ExpectedGC->get(); 49906c3fb27SDimitry Andric if (GC == NULL) 50006c3fb27SDimitry Andric break; // We had not more functions to encode. 501*0fca6ea1SDimitry Andric // Don't collect any messages at all 502*0fca6ea1SDimitry Andric OutputAggregator Out(nullptr); 503*0fca6ea1SDimitry Andric llvm::Error Err = GC->finalize(Out); 50406c3fb27SDimitry Andric if (Err) 50506c3fb27SDimitry Andric return Err; 50606c3fb27SDimitry Andric std::string SegmentedGsymPath; 50706c3fb27SDimitry Andric raw_string_ostream SGP(SegmentedGsymPath); 50806c3fb27SDimitry Andric std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress(); 50906c3fb27SDimitry Andric if (FirstFuncAddr) { 51006c3fb27SDimitry Andric SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1); 51106c3fb27SDimitry Andric SGP.flush(); 51206c3fb27SDimitry Andric Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt); 51306c3fb27SDimitry Andric if (Err) 51406c3fb27SDimitry Andric return Err; 51506c3fb27SDimitry Andric } 51606c3fb27SDimitry Andric } else { 51706c3fb27SDimitry Andric return ExpectedGC.takeError(); 51806c3fb27SDimitry Andric } 51906c3fb27SDimitry Andric } 52006c3fb27SDimitry Andric return Error::success(); 52106c3fb27SDimitry Andric } 52206c3fb27SDimitry Andric 52306c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>> 52406c3fb27SDimitry Andric GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const { 52506c3fb27SDimitry Andric // No function entries, return empty unique pointer 52606c3fb27SDimitry Andric if (FuncIdx >= Funcs.size()) 52706c3fb27SDimitry Andric return std::unique_ptr<GsymCreator>(); 52806c3fb27SDimitry Andric 52906c3fb27SDimitry Andric std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true)); 5305f757f3fSDimitry Andric 5315f757f3fSDimitry Andric // Tell the creator that this is a segment. 5325f757f3fSDimitry Andric GC->setIsSegment(); 5335f757f3fSDimitry Andric 53406c3fb27SDimitry Andric // Set the base address if there is one. 53506c3fb27SDimitry Andric if (BaseAddress) 53606c3fb27SDimitry Andric GC->setBaseAddress(*BaseAddress); 53706c3fb27SDimitry Andric // Copy the UUID value from this object into the new creator. 53806c3fb27SDimitry Andric GC->setUUID(UUID); 53906c3fb27SDimitry Andric const size_t NumFuncs = Funcs.size(); 54006c3fb27SDimitry Andric // Track how big the function infos are for the current segment so we can 54106c3fb27SDimitry Andric // emit segments that are close to the requested size. It is quick math to 54206c3fb27SDimitry Andric // determine the current header and tables sizes, so we can do that each loop. 54306c3fb27SDimitry Andric uint64_t SegmentFuncInfosSize = 0; 54406c3fb27SDimitry Andric for (; FuncIdx < NumFuncs; ++FuncIdx) { 54506c3fb27SDimitry Andric const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize(); 54606c3fb27SDimitry Andric if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) { 54706c3fb27SDimitry Andric if (SegmentFuncInfosSize == 0) 54806c3fb27SDimitry Andric return createStringError(std::errc::invalid_argument, 54906c3fb27SDimitry Andric "a segment size of %" PRIu64 " is to small to " 55006c3fb27SDimitry Andric "fit any function infos, specify a larger value", 55106c3fb27SDimitry Andric SegmentSize); 55206c3fb27SDimitry Andric 55306c3fb27SDimitry Andric break; 55406c3fb27SDimitry Andric } 55506c3fb27SDimitry Andric SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4); 55606c3fb27SDimitry Andric } 55706c3fb27SDimitry Andric return std::move(GC); 55806c3fb27SDimitry Andric } 559