18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 78bcb0991SDimitry Andric 88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h" 98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h" 108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h" 118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h" 128bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h" 138bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h" 148bcb0991SDimitry Andric 158bcb0991SDimitry Andric #include <algorithm> 168bcb0991SDimitry Andric #include <cassert> 178bcb0991SDimitry Andric #include <functional> 188bcb0991SDimitry Andric #include <vector> 198bcb0991SDimitry Andric 208bcb0991SDimitry Andric using namespace llvm; 218bcb0991SDimitry Andric using namespace gsym; 228bcb0991SDimitry Andric 238bcb0991SDimitry Andric 248bcb0991SDimitry Andric GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { 258bcb0991SDimitry Andric insertFile(StringRef()); 268bcb0991SDimitry Andric } 278bcb0991SDimitry Andric 288bcb0991SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, 298bcb0991SDimitry Andric llvm::sys::path::Style Style) { 308bcb0991SDimitry Andric llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 318bcb0991SDimitry Andric llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 325ffd83dbSDimitry Andric // We must insert the strings first, then call the FileEntry constructor. 335ffd83dbSDimitry Andric // If we inline the insertString() function call into the constructor, the 345ffd83dbSDimitry Andric // call order is undefined due to parameter lists not having any ordering 355ffd83dbSDimitry Andric // requirements. 365ffd83dbSDimitry Andric const uint32_t Dir = insertString(directory); 375ffd83dbSDimitry Andric const uint32_t Base = insertString(filename); 385ffd83dbSDimitry Andric FileEntry FE(Dir, Base); 398bcb0991SDimitry Andric 408bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 418bcb0991SDimitry Andric const auto NextIndex = Files.size(); 428bcb0991SDimitry Andric // Find FE in hash map and insert if not present. 438bcb0991SDimitry Andric auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 448bcb0991SDimitry Andric if (R.second) 458bcb0991SDimitry Andric Files.emplace_back(FE); 468bcb0991SDimitry Andric return R.first->second; 478bcb0991SDimitry Andric } 488bcb0991SDimitry Andric 498bcb0991SDimitry Andric llvm::Error GsymCreator::save(StringRef Path, 508bcb0991SDimitry Andric llvm::support::endianness ByteOrder) const { 518bcb0991SDimitry Andric std::error_code EC; 528bcb0991SDimitry Andric raw_fd_ostream OutStrm(Path, EC); 538bcb0991SDimitry Andric if (EC) 548bcb0991SDimitry Andric return llvm::errorCodeToError(EC); 558bcb0991SDimitry Andric FileWriter O(OutStrm, ByteOrder); 568bcb0991SDimitry Andric return encode(O); 578bcb0991SDimitry Andric } 588bcb0991SDimitry Andric 598bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const { 608bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 618bcb0991SDimitry Andric if (Funcs.empty()) 628bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 638bcb0991SDimitry Andric "no functions to encode"); 648bcb0991SDimitry Andric if (!Finalized) 658bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 668bcb0991SDimitry Andric "GsymCreator wasn't finalized prior to encoding"); 678bcb0991SDimitry Andric 688bcb0991SDimitry Andric if (Funcs.size() > UINT32_MAX) 698bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 708bcb0991SDimitry Andric "too many FunctionInfos"); 715ffd83dbSDimitry Andric 725ffd83dbSDimitry Andric const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress(); 738bcb0991SDimitry Andric const uint64_t MaxAddr = Funcs.back().startAddress(); 748bcb0991SDimitry Andric const uint64_t AddrDelta = MaxAddr - MinAddr; 758bcb0991SDimitry Andric Header Hdr; 768bcb0991SDimitry Andric Hdr.Magic = GSYM_MAGIC; 778bcb0991SDimitry Andric Hdr.Version = GSYM_VERSION; 788bcb0991SDimitry Andric Hdr.AddrOffSize = 0; 798bcb0991SDimitry Andric Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 808bcb0991SDimitry Andric Hdr.BaseAddress = MinAddr; 818bcb0991SDimitry Andric Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 828bcb0991SDimitry Andric Hdr.StrtabOffset = 0; // We will fix this up later. 835ffd83dbSDimitry Andric Hdr.StrtabSize = 0; // We will fix this up later. 848bcb0991SDimitry Andric memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 858bcb0991SDimitry Andric if (UUID.size() > sizeof(Hdr.UUID)) 868bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 878bcb0991SDimitry Andric "invalid UUID size %u", (uint32_t)UUID.size()); 888bcb0991SDimitry Andric // Set the address offset size correctly in the GSYM header. 898bcb0991SDimitry Andric if (AddrDelta <= UINT8_MAX) 908bcb0991SDimitry Andric Hdr.AddrOffSize = 1; 918bcb0991SDimitry Andric else if (AddrDelta <= UINT16_MAX) 928bcb0991SDimitry Andric Hdr.AddrOffSize = 2; 938bcb0991SDimitry Andric else if (AddrDelta <= UINT32_MAX) 948bcb0991SDimitry Andric Hdr.AddrOffSize = 4; 958bcb0991SDimitry Andric else 968bcb0991SDimitry Andric Hdr.AddrOffSize = 8; 978bcb0991SDimitry Andric // Copy the UUID value if we have one. 988bcb0991SDimitry Andric if (UUID.size() > 0) 998bcb0991SDimitry Andric memcpy(Hdr.UUID, UUID.data(), UUID.size()); 1008bcb0991SDimitry Andric // Write out the header. 1018bcb0991SDimitry Andric llvm::Error Err = Hdr.encode(O); 1028bcb0991SDimitry Andric if (Err) 1038bcb0991SDimitry Andric return Err; 1048bcb0991SDimitry Andric 1058bcb0991SDimitry Andric // Write out the address offsets. 1068bcb0991SDimitry Andric O.alignTo(Hdr.AddrOffSize); 1078bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 1088bcb0991SDimitry Andric uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 1098bcb0991SDimitry Andric switch(Hdr.AddrOffSize) { 1108bcb0991SDimitry Andric case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break; 1118bcb0991SDimitry Andric case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break; 1128bcb0991SDimitry Andric case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break; 1138bcb0991SDimitry Andric case 8: O.writeU64(AddrOffset); break; 1148bcb0991SDimitry Andric } 1158bcb0991SDimitry Andric } 1168bcb0991SDimitry Andric 1178bcb0991SDimitry Andric // Write out all zeros for the AddrInfoOffsets. 1188bcb0991SDimitry Andric O.alignTo(4); 1198bcb0991SDimitry Andric const off_t AddrInfoOffsetsOffset = O.tell(); 1208bcb0991SDimitry Andric for (size_t i = 0, n = Funcs.size(); i < n; ++i) 1218bcb0991SDimitry Andric O.writeU32(0); 1228bcb0991SDimitry Andric 1238bcb0991SDimitry Andric // Write out the file table 1248bcb0991SDimitry Andric O.alignTo(4); 1258bcb0991SDimitry Andric assert(!Files.empty()); 1268bcb0991SDimitry Andric assert(Files[0].Dir == 0); 1278bcb0991SDimitry Andric assert(Files[0].Base == 0); 1288bcb0991SDimitry Andric size_t NumFiles = Files.size(); 1298bcb0991SDimitry Andric if (NumFiles > UINT32_MAX) 1308bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 1318bcb0991SDimitry Andric "too many files"); 1328bcb0991SDimitry Andric O.writeU32(static_cast<uint32_t>(NumFiles)); 1338bcb0991SDimitry Andric for (auto File: Files) { 1348bcb0991SDimitry Andric O.writeU32(File.Dir); 1358bcb0991SDimitry Andric O.writeU32(File.Base); 1368bcb0991SDimitry Andric } 1378bcb0991SDimitry Andric 1388bcb0991SDimitry Andric // Write out the sting table. 1398bcb0991SDimitry Andric const off_t StrtabOffset = O.tell(); 1408bcb0991SDimitry Andric StrTab.write(O.get_stream()); 1418bcb0991SDimitry Andric const off_t StrtabSize = O.tell() - StrtabOffset; 1428bcb0991SDimitry Andric std::vector<uint32_t> AddrInfoOffsets; 1438bcb0991SDimitry Andric 1448bcb0991SDimitry Andric // Write out the address infos for each function info. 1458bcb0991SDimitry Andric for (const auto &FuncInfo : Funcs) { 1468bcb0991SDimitry Andric if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 1478bcb0991SDimitry Andric AddrInfoOffsets.push_back(OffsetOrErr.get()); 1488bcb0991SDimitry Andric else 1498bcb0991SDimitry Andric return OffsetOrErr.takeError(); 1508bcb0991SDimitry Andric } 1518bcb0991SDimitry Andric // Fixup the string table offset and size in the header 1528bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 1538bcb0991SDimitry Andric O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 1548bcb0991SDimitry Andric 1558bcb0991SDimitry Andric // Fixup all address info offsets 1568bcb0991SDimitry Andric uint64_t Offset = 0; 1578bcb0991SDimitry Andric for (auto AddrInfoOffset: AddrInfoOffsets) { 1588bcb0991SDimitry Andric O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 1598bcb0991SDimitry Andric Offset += 4; 1608bcb0991SDimitry Andric } 1618bcb0991SDimitry Andric return ErrorSuccess(); 1628bcb0991SDimitry Andric } 1638bcb0991SDimitry Andric 1648bcb0991SDimitry Andric llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 1658bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 1668bcb0991SDimitry Andric if (Finalized) 1678bcb0991SDimitry Andric return createStringError(std::errc::invalid_argument, 1688bcb0991SDimitry Andric "already finalized"); 1698bcb0991SDimitry Andric Finalized = true; 1708bcb0991SDimitry Andric 1718bcb0991SDimitry Andric // Sort function infos so we can emit sorted functions. 172*e8d8bef9SDimitry Andric llvm::sort(Funcs); 1738bcb0991SDimitry Andric 1748bcb0991SDimitry Andric // Don't let the string table indexes change by finalizing in order. 1758bcb0991SDimitry Andric StrTab.finalizeInOrder(); 1768bcb0991SDimitry Andric 1778bcb0991SDimitry Andric // Remove duplicates function infos that have both entries from debug info 1788bcb0991SDimitry Andric // (DWARF or Breakpad) and entries from the SymbolTable. 1798bcb0991SDimitry Andric // 1808bcb0991SDimitry Andric // Also handle overlapping function. Usually there shouldn't be any, but they 1818bcb0991SDimitry Andric // can and do happen in some rare cases. 1828bcb0991SDimitry Andric // 1838bcb0991SDimitry Andric // (a) (b) (c) 1848bcb0991SDimitry Andric // ^ ^ ^ ^ 1858bcb0991SDimitry Andric // |X |Y |X ^ |X 1868bcb0991SDimitry Andric // | | | |Y | ^ 1878bcb0991SDimitry Andric // | | | v v |Y 1888bcb0991SDimitry Andric // v v v v 1898bcb0991SDimitry Andric // 1908bcb0991SDimitry Andric // In (a) and (b), Y is ignored and X will be reported for the full range. 1918bcb0991SDimitry Andric // In (c), both functions will be included in the result and lookups for an 1928bcb0991SDimitry Andric // address in the intersection will return Y because of binary search. 1938bcb0991SDimitry Andric // 1948bcb0991SDimitry Andric // Note that in case of (b), we cannot include Y in the result because then 1958bcb0991SDimitry Andric // we wouldn't find any function for range (end of Y, end of X) 1968bcb0991SDimitry Andric // with binary search 1978bcb0991SDimitry Andric auto NumBefore = Funcs.size(); 1988bcb0991SDimitry Andric auto Curr = Funcs.begin(); 1998bcb0991SDimitry Andric auto Prev = Funcs.end(); 2008bcb0991SDimitry Andric while (Curr != Funcs.end()) { 2018bcb0991SDimitry Andric // Can't check for overlaps or same address ranges if we don't have a 2028bcb0991SDimitry Andric // previous entry 2038bcb0991SDimitry Andric if (Prev != Funcs.end()) { 2048bcb0991SDimitry Andric if (Prev->Range.intersects(Curr->Range)) { 2058bcb0991SDimitry Andric // Overlapping address ranges. 2068bcb0991SDimitry Andric if (Prev->Range == Curr->Range) { 2078bcb0991SDimitry Andric // Same address range. Check if one is from debug info and the other 2088bcb0991SDimitry Andric // is from a symbol table. If so, then keep the one with debug info. 2098bcb0991SDimitry Andric // Our sorting guarantees that entries with matching address ranges 2108bcb0991SDimitry Andric // that have debug info are last in the sort. 2118bcb0991SDimitry Andric if (*Prev == *Curr) { 2128bcb0991SDimitry Andric // FunctionInfo entries match exactly (range, lines, inlines) 2135ffd83dbSDimitry Andric OS << "warning: duplicate function info entries for range: " 2145ffd83dbSDimitry Andric << Curr->Range << '\n'; 2158bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 2168bcb0991SDimitry Andric } else { 2178bcb0991SDimitry Andric if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { 2188bcb0991SDimitry Andric // Same address range, one with no debug info (symbol) and the 2198bcb0991SDimitry Andric // next with debug info. Keep the latter. 2208bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 2218bcb0991SDimitry Andric } else { 2228bcb0991SDimitry Andric OS << "warning: same address range contains different debug " 2238bcb0991SDimitry Andric << "info. Removing:\n" 2248bcb0991SDimitry Andric << *Prev << "\nIn favor of this one:\n" 2258bcb0991SDimitry Andric << *Curr << "\n"; 2268bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 2278bcb0991SDimitry Andric } 2288bcb0991SDimitry Andric } 2298bcb0991SDimitry Andric } else { 2308bcb0991SDimitry Andric // print warnings about overlaps 2318bcb0991SDimitry Andric OS << "warning: function ranges overlap:\n" 2328bcb0991SDimitry Andric << *Prev << "\n" 2338bcb0991SDimitry Andric << *Curr << "\n"; 2348bcb0991SDimitry Andric } 2358bcb0991SDimitry Andric } else if (Prev->Range.size() == 0 && 2368bcb0991SDimitry Andric Curr->Range.contains(Prev->Range.Start)) { 2378bcb0991SDimitry Andric OS << "warning: removing symbol:\n" 2388bcb0991SDimitry Andric << *Prev << "\nKeeping:\n" 2398bcb0991SDimitry Andric << *Curr << "\n"; 2408bcb0991SDimitry Andric Curr = Funcs.erase(Prev); 2418bcb0991SDimitry Andric } 2428bcb0991SDimitry Andric } 2438bcb0991SDimitry Andric if (Curr == Funcs.end()) 2448bcb0991SDimitry Andric break; 2458bcb0991SDimitry Andric Prev = Curr++; 2468bcb0991SDimitry Andric } 2478bcb0991SDimitry Andric 2485ffd83dbSDimitry Andric // If our last function info entry doesn't have a size and if we have valid 2495ffd83dbSDimitry Andric // text ranges, we should set the size of the last entry since any search for 2505ffd83dbSDimitry Andric // a high address might match our last entry. By fixing up this size, we can 2515ffd83dbSDimitry Andric // help ensure we don't cause lookups to always return the last symbol that 2525ffd83dbSDimitry Andric // has no size when doing lookups. 2535ffd83dbSDimitry Andric if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 2545ffd83dbSDimitry Andric if (auto Range = ValidTextRanges->getRangeThatContains( 2555ffd83dbSDimitry Andric Funcs.back().Range.Start)) { 2565ffd83dbSDimitry Andric Funcs.back().Range.End = Range->End; 2575ffd83dbSDimitry Andric } 2585ffd83dbSDimitry Andric } 2598bcb0991SDimitry Andric OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 2608bcb0991SDimitry Andric << Funcs.size() << " total\n"; 2618bcb0991SDimitry Andric return Error::success(); 2628bcb0991SDimitry Andric } 2638bcb0991SDimitry Andric 2645ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 2658bcb0991SDimitry Andric if (S.empty()) 2668bcb0991SDimitry Andric return 0; 2675ffd83dbSDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 2685ffd83dbSDimitry Andric if (Copy) { 2695ffd83dbSDimitry Andric // We need to provide backing storage for the string if requested 2705ffd83dbSDimitry Andric // since StringTableBuilder stores references to strings. Any string 2715ffd83dbSDimitry Andric // that comes from a section in an object file doesn't need to be 2725ffd83dbSDimitry Andric // copied, but any string created by code will need to be copied. 2735ffd83dbSDimitry Andric // This allows GsymCreator to be really fast when parsing DWARF and 2745ffd83dbSDimitry Andric // other object files as most strings don't need to be copied. 2755ffd83dbSDimitry Andric CachedHashStringRef CHStr(S); 2765ffd83dbSDimitry Andric if (!StrTab.contains(CHStr)) 2775ffd83dbSDimitry Andric S = StringStorage.insert(S).first->getKey(); 2785ffd83dbSDimitry Andric } 2798bcb0991SDimitry Andric return StrTab.add(S); 2808bcb0991SDimitry Andric } 2818bcb0991SDimitry Andric 2828bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 2838bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 2845ffd83dbSDimitry Andric Ranges.insert(FI.Range); 2858bcb0991SDimitry Andric Funcs.emplace_back(FI); 2868bcb0991SDimitry Andric } 2878bcb0991SDimitry Andric 2888bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 2898bcb0991SDimitry Andric std::function<bool(FunctionInfo &)> const &Callback) { 2908bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 2918bcb0991SDimitry Andric for (auto &FI : Funcs) { 2928bcb0991SDimitry Andric if (!Callback(FI)) 2938bcb0991SDimitry Andric break; 2948bcb0991SDimitry Andric } 2958bcb0991SDimitry Andric } 2968bcb0991SDimitry Andric 2978bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo( 2988bcb0991SDimitry Andric std::function<bool(const FunctionInfo &)> const &Callback) const { 2998bcb0991SDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 3008bcb0991SDimitry Andric for (const auto &FI : Funcs) { 3018bcb0991SDimitry Andric if (!Callback(FI)) 3028bcb0991SDimitry Andric break; 3038bcb0991SDimitry Andric } 3048bcb0991SDimitry Andric } 3055ffd83dbSDimitry Andric 3065ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const{ 3075ffd83dbSDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 3085ffd83dbSDimitry Andric return Funcs.size(); 3095ffd83dbSDimitry Andric } 3105ffd83dbSDimitry Andric 3115ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 3125ffd83dbSDimitry Andric if (ValidTextRanges) 3135ffd83dbSDimitry Andric return ValidTextRanges->contains(Addr); 3145ffd83dbSDimitry Andric return true; // No valid text ranges has been set, so accept all ranges. 3155ffd83dbSDimitry Andric } 3165ffd83dbSDimitry Andric 3175ffd83dbSDimitry Andric bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { 3185ffd83dbSDimitry Andric std::lock_guard<std::recursive_mutex> Guard(Mutex); 3195ffd83dbSDimitry Andric return Ranges.contains(Addr); 3205ffd83dbSDimitry Andric } 321