xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===//
28bcb0991SDimitry Andric //
38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
78bcb0991SDimitry Andric 
88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h"
108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h"
118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h"
128bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
138bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h"
148bcb0991SDimitry Andric 
158bcb0991SDimitry Andric #include <algorithm>
168bcb0991SDimitry Andric #include <cassert>
178bcb0991SDimitry Andric #include <functional>
188bcb0991SDimitry Andric #include <vector>
198bcb0991SDimitry Andric 
208bcb0991SDimitry Andric using namespace llvm;
218bcb0991SDimitry Andric using namespace gsym;
228bcb0991SDimitry Andric 
238bcb0991SDimitry Andric 
248bcb0991SDimitry Andric GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
258bcb0991SDimitry Andric   insertFile(StringRef());
268bcb0991SDimitry Andric }
278bcb0991SDimitry Andric 
288bcb0991SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path,
298bcb0991SDimitry Andric                                  llvm::sys::path::Style Style) {
308bcb0991SDimitry Andric   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
318bcb0991SDimitry Andric   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
325ffd83dbSDimitry Andric   // We must insert the strings first, then call the FileEntry constructor.
335ffd83dbSDimitry Andric   // If we inline the insertString() function call into the constructor, the
345ffd83dbSDimitry Andric   // call order is undefined due to parameter lists not having any ordering
355ffd83dbSDimitry Andric   // requirements.
365ffd83dbSDimitry Andric   const uint32_t Dir = insertString(directory);
375ffd83dbSDimitry Andric   const uint32_t Base = insertString(filename);
385ffd83dbSDimitry Andric   FileEntry FE(Dir, Base);
398bcb0991SDimitry Andric 
408bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
418bcb0991SDimitry Andric   const auto NextIndex = Files.size();
428bcb0991SDimitry Andric   // Find FE in hash map and insert if not present.
438bcb0991SDimitry Andric   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
448bcb0991SDimitry Andric   if (R.second)
458bcb0991SDimitry Andric     Files.emplace_back(FE);
468bcb0991SDimitry Andric   return R.first->second;
478bcb0991SDimitry Andric }
488bcb0991SDimitry Andric 
498bcb0991SDimitry Andric llvm::Error GsymCreator::save(StringRef Path,
508bcb0991SDimitry Andric                               llvm::support::endianness ByteOrder) const {
518bcb0991SDimitry Andric   std::error_code EC;
528bcb0991SDimitry Andric   raw_fd_ostream OutStrm(Path, EC);
538bcb0991SDimitry Andric   if (EC)
548bcb0991SDimitry Andric     return llvm::errorCodeToError(EC);
558bcb0991SDimitry Andric   FileWriter O(OutStrm, ByteOrder);
568bcb0991SDimitry Andric   return encode(O);
578bcb0991SDimitry Andric }
588bcb0991SDimitry Andric 
598bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const {
608bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
618bcb0991SDimitry Andric   if (Funcs.empty())
628bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
638bcb0991SDimitry Andric                              "no functions to encode");
648bcb0991SDimitry Andric   if (!Finalized)
658bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
668bcb0991SDimitry Andric                              "GsymCreator wasn't finalized prior to encoding");
678bcb0991SDimitry Andric 
688bcb0991SDimitry Andric   if (Funcs.size() > UINT32_MAX)
698bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
708bcb0991SDimitry Andric                              "too many FunctionInfos");
715ffd83dbSDimitry Andric 
725ffd83dbSDimitry Andric   const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress();
738bcb0991SDimitry Andric   const uint64_t MaxAddr = Funcs.back().startAddress();
748bcb0991SDimitry Andric   const uint64_t AddrDelta = MaxAddr - MinAddr;
758bcb0991SDimitry Andric   Header Hdr;
768bcb0991SDimitry Andric   Hdr.Magic = GSYM_MAGIC;
778bcb0991SDimitry Andric   Hdr.Version = GSYM_VERSION;
788bcb0991SDimitry Andric   Hdr.AddrOffSize = 0;
798bcb0991SDimitry Andric   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
808bcb0991SDimitry Andric   Hdr.BaseAddress = MinAddr;
818bcb0991SDimitry Andric   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
828bcb0991SDimitry Andric   Hdr.StrtabOffset = 0; // We will fix this up later.
835ffd83dbSDimitry Andric   Hdr.StrtabSize = 0; // We will fix this up later.
848bcb0991SDimitry Andric   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
858bcb0991SDimitry Andric   if (UUID.size() > sizeof(Hdr.UUID))
868bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
878bcb0991SDimitry Andric                              "invalid UUID size %u", (uint32_t)UUID.size());
888bcb0991SDimitry Andric   // Set the address offset size correctly in the GSYM header.
898bcb0991SDimitry Andric   if (AddrDelta <= UINT8_MAX)
908bcb0991SDimitry Andric     Hdr.AddrOffSize = 1;
918bcb0991SDimitry Andric   else if (AddrDelta <= UINT16_MAX)
928bcb0991SDimitry Andric     Hdr.AddrOffSize = 2;
938bcb0991SDimitry Andric   else if (AddrDelta <= UINT32_MAX)
948bcb0991SDimitry Andric     Hdr.AddrOffSize = 4;
958bcb0991SDimitry Andric   else
968bcb0991SDimitry Andric     Hdr.AddrOffSize = 8;
978bcb0991SDimitry Andric   // Copy the UUID value if we have one.
988bcb0991SDimitry Andric   if (UUID.size() > 0)
998bcb0991SDimitry Andric     memcpy(Hdr.UUID, UUID.data(), UUID.size());
1008bcb0991SDimitry Andric   // Write out the header.
1018bcb0991SDimitry Andric   llvm::Error Err = Hdr.encode(O);
1028bcb0991SDimitry Andric   if (Err)
1038bcb0991SDimitry Andric     return Err;
1048bcb0991SDimitry Andric 
1058bcb0991SDimitry Andric   // Write out the address offsets.
1068bcb0991SDimitry Andric   O.alignTo(Hdr.AddrOffSize);
1078bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
1088bcb0991SDimitry Andric     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
1098bcb0991SDimitry Andric     switch(Hdr.AddrOffSize) {
1108bcb0991SDimitry Andric       case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
1118bcb0991SDimitry Andric       case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
1128bcb0991SDimitry Andric       case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
1138bcb0991SDimitry Andric       case 8: O.writeU64(AddrOffset); break;
1148bcb0991SDimitry Andric     }
1158bcb0991SDimitry Andric   }
1168bcb0991SDimitry Andric 
1178bcb0991SDimitry Andric   // Write out all zeros for the AddrInfoOffsets.
1188bcb0991SDimitry Andric   O.alignTo(4);
1198bcb0991SDimitry Andric   const off_t AddrInfoOffsetsOffset = O.tell();
1208bcb0991SDimitry Andric   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
1218bcb0991SDimitry Andric     O.writeU32(0);
1228bcb0991SDimitry Andric 
1238bcb0991SDimitry Andric   // Write out the file table
1248bcb0991SDimitry Andric   O.alignTo(4);
1258bcb0991SDimitry Andric   assert(!Files.empty());
1268bcb0991SDimitry Andric   assert(Files[0].Dir == 0);
1278bcb0991SDimitry Andric   assert(Files[0].Base == 0);
1288bcb0991SDimitry Andric   size_t NumFiles = Files.size();
1298bcb0991SDimitry Andric   if (NumFiles > UINT32_MAX)
1308bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
1318bcb0991SDimitry Andric                              "too many files");
1328bcb0991SDimitry Andric   O.writeU32(static_cast<uint32_t>(NumFiles));
1338bcb0991SDimitry Andric   for (auto File: Files) {
1348bcb0991SDimitry Andric       O.writeU32(File.Dir);
1358bcb0991SDimitry Andric       O.writeU32(File.Base);
1368bcb0991SDimitry Andric   }
1378bcb0991SDimitry Andric 
1388bcb0991SDimitry Andric   // Write out the sting table.
1398bcb0991SDimitry Andric   const off_t StrtabOffset = O.tell();
1408bcb0991SDimitry Andric   StrTab.write(O.get_stream());
1418bcb0991SDimitry Andric   const off_t StrtabSize = O.tell() - StrtabOffset;
1428bcb0991SDimitry Andric   std::vector<uint32_t> AddrInfoOffsets;
1438bcb0991SDimitry Andric 
1448bcb0991SDimitry Andric   // Write out the address infos for each function info.
1458bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
1468bcb0991SDimitry Andric     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
1478bcb0991SDimitry Andric         AddrInfoOffsets.push_back(OffsetOrErr.get());
1488bcb0991SDimitry Andric     else
1498bcb0991SDimitry Andric         return OffsetOrErr.takeError();
1508bcb0991SDimitry Andric   }
1518bcb0991SDimitry Andric   // Fixup the string table offset and size in the header
1528bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
1538bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
1548bcb0991SDimitry Andric 
1558bcb0991SDimitry Andric   // Fixup all address info offsets
1568bcb0991SDimitry Andric   uint64_t Offset = 0;
1578bcb0991SDimitry Andric   for (auto AddrInfoOffset: AddrInfoOffsets) {
1588bcb0991SDimitry Andric     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
1598bcb0991SDimitry Andric     Offset += 4;
1608bcb0991SDimitry Andric   }
1618bcb0991SDimitry Andric   return ErrorSuccess();
1628bcb0991SDimitry Andric }
1638bcb0991SDimitry Andric 
1648bcb0991SDimitry Andric llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
1658bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
1668bcb0991SDimitry Andric   if (Finalized)
1678bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
1688bcb0991SDimitry Andric                              "already finalized");
1698bcb0991SDimitry Andric   Finalized = true;
1708bcb0991SDimitry Andric 
1718bcb0991SDimitry Andric   // Sort function infos so we can emit sorted functions.
172*e8d8bef9SDimitry Andric   llvm::sort(Funcs);
1738bcb0991SDimitry Andric 
1748bcb0991SDimitry Andric   // Don't let the string table indexes change by finalizing in order.
1758bcb0991SDimitry Andric   StrTab.finalizeInOrder();
1768bcb0991SDimitry Andric 
1778bcb0991SDimitry Andric   // Remove duplicates function infos that have both entries from debug info
1788bcb0991SDimitry Andric   // (DWARF or Breakpad) and entries from the SymbolTable.
1798bcb0991SDimitry Andric   //
1808bcb0991SDimitry Andric   // Also handle overlapping function. Usually there shouldn't be any, but they
1818bcb0991SDimitry Andric   // can and do happen in some rare cases.
1828bcb0991SDimitry Andric   //
1838bcb0991SDimitry Andric   // (a)          (b)         (c)
1848bcb0991SDimitry Andric   //     ^  ^       ^            ^
1858bcb0991SDimitry Andric   //     |X |Y      |X ^         |X
1868bcb0991SDimitry Andric   //     |  |       |  |Y        |  ^
1878bcb0991SDimitry Andric   //     |  |       |  v         v  |Y
1888bcb0991SDimitry Andric   //     v  v       v               v
1898bcb0991SDimitry Andric   //
1908bcb0991SDimitry Andric   // In (a) and (b), Y is ignored and X will be reported for the full range.
1918bcb0991SDimitry Andric   // In (c), both functions will be included in the result and lookups for an
1928bcb0991SDimitry Andric   // address in the intersection will return Y because of binary search.
1938bcb0991SDimitry Andric   //
1948bcb0991SDimitry Andric   // Note that in case of (b), we cannot include Y in the result because then
1958bcb0991SDimitry Andric   // we wouldn't find any function for range (end of Y, end of X)
1968bcb0991SDimitry Andric   // with binary search
1978bcb0991SDimitry Andric   auto NumBefore = Funcs.size();
1988bcb0991SDimitry Andric   auto Curr = Funcs.begin();
1998bcb0991SDimitry Andric   auto Prev = Funcs.end();
2008bcb0991SDimitry Andric   while (Curr != Funcs.end()) {
2018bcb0991SDimitry Andric     // Can't check for overlaps or same address ranges if we don't have a
2028bcb0991SDimitry Andric     // previous entry
2038bcb0991SDimitry Andric     if (Prev != Funcs.end()) {
2048bcb0991SDimitry Andric       if (Prev->Range.intersects(Curr->Range)) {
2058bcb0991SDimitry Andric         // Overlapping address ranges.
2068bcb0991SDimitry Andric         if (Prev->Range == Curr->Range) {
2078bcb0991SDimitry Andric           // Same address range. Check if one is from debug info and the other
2088bcb0991SDimitry Andric           // is from a symbol table. If so, then keep the one with debug info.
2098bcb0991SDimitry Andric           // Our sorting guarantees that entries with matching address ranges
2108bcb0991SDimitry Andric           // that have debug info are last in the sort.
2118bcb0991SDimitry Andric           if (*Prev == *Curr) {
2128bcb0991SDimitry Andric             // FunctionInfo entries match exactly (range, lines, inlines)
2135ffd83dbSDimitry Andric             OS << "warning: duplicate function info entries for range: "
2145ffd83dbSDimitry Andric                << Curr->Range << '\n';
2158bcb0991SDimitry Andric             Curr = Funcs.erase(Prev);
2168bcb0991SDimitry Andric           } else {
2178bcb0991SDimitry Andric             if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
2188bcb0991SDimitry Andric               // Same address range, one with no debug info (symbol) and the
2198bcb0991SDimitry Andric               // next with debug info. Keep the latter.
2208bcb0991SDimitry Andric               Curr = Funcs.erase(Prev);
2218bcb0991SDimitry Andric             } else {
2228bcb0991SDimitry Andric               OS << "warning: same address range contains different debug "
2238bcb0991SDimitry Andric                  << "info. Removing:\n"
2248bcb0991SDimitry Andric                  << *Prev << "\nIn favor of this one:\n"
2258bcb0991SDimitry Andric                  << *Curr << "\n";
2268bcb0991SDimitry Andric               Curr = Funcs.erase(Prev);
2278bcb0991SDimitry Andric             }
2288bcb0991SDimitry Andric           }
2298bcb0991SDimitry Andric         } else {
2308bcb0991SDimitry Andric           // print warnings about overlaps
2318bcb0991SDimitry Andric           OS << "warning: function ranges overlap:\n"
2328bcb0991SDimitry Andric              << *Prev << "\n"
2338bcb0991SDimitry Andric              << *Curr << "\n";
2348bcb0991SDimitry Andric         }
2358bcb0991SDimitry Andric       } else if (Prev->Range.size() == 0 &&
2368bcb0991SDimitry Andric                  Curr->Range.contains(Prev->Range.Start)) {
2378bcb0991SDimitry Andric         OS << "warning: removing symbol:\n"
2388bcb0991SDimitry Andric            << *Prev << "\nKeeping:\n"
2398bcb0991SDimitry Andric            << *Curr << "\n";
2408bcb0991SDimitry Andric         Curr = Funcs.erase(Prev);
2418bcb0991SDimitry Andric       }
2428bcb0991SDimitry Andric     }
2438bcb0991SDimitry Andric     if (Curr == Funcs.end())
2448bcb0991SDimitry Andric       break;
2458bcb0991SDimitry Andric     Prev = Curr++;
2468bcb0991SDimitry Andric   }
2478bcb0991SDimitry Andric 
2485ffd83dbSDimitry Andric   // If our last function info entry doesn't have a size and if we have valid
2495ffd83dbSDimitry Andric   // text ranges, we should set the size of the last entry since any search for
2505ffd83dbSDimitry Andric   // a high address might match our last entry. By fixing up this size, we can
2515ffd83dbSDimitry Andric   // help ensure we don't cause lookups to always return the last symbol that
2525ffd83dbSDimitry Andric   // has no size when doing lookups.
2535ffd83dbSDimitry Andric   if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
2545ffd83dbSDimitry Andric     if (auto Range = ValidTextRanges->getRangeThatContains(
2555ffd83dbSDimitry Andric           Funcs.back().Range.Start)) {
2565ffd83dbSDimitry Andric       Funcs.back().Range.End = Range->End;
2575ffd83dbSDimitry Andric     }
2585ffd83dbSDimitry Andric   }
2598bcb0991SDimitry Andric   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
2608bcb0991SDimitry Andric      << Funcs.size() << " total\n";
2618bcb0991SDimitry Andric   return Error::success();
2628bcb0991SDimitry Andric }
2638bcb0991SDimitry Andric 
2645ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
2658bcb0991SDimitry Andric   if (S.empty())
2668bcb0991SDimitry Andric     return 0;
2675ffd83dbSDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
2685ffd83dbSDimitry Andric   if (Copy) {
2695ffd83dbSDimitry Andric     // We need to provide backing storage for the string if requested
2705ffd83dbSDimitry Andric     // since StringTableBuilder stores references to strings. Any string
2715ffd83dbSDimitry Andric     // that comes from a section in an object file doesn't need to be
2725ffd83dbSDimitry Andric     // copied, but any string created by code will need to be copied.
2735ffd83dbSDimitry Andric     // This allows GsymCreator to be really fast when parsing DWARF and
2745ffd83dbSDimitry Andric     // other object files as most strings don't need to be copied.
2755ffd83dbSDimitry Andric     CachedHashStringRef CHStr(S);
2765ffd83dbSDimitry Andric     if (!StrTab.contains(CHStr))
2775ffd83dbSDimitry Andric       S = StringStorage.insert(S).first->getKey();
2785ffd83dbSDimitry Andric   }
2798bcb0991SDimitry Andric   return StrTab.add(S);
2808bcb0991SDimitry Andric }
2818bcb0991SDimitry Andric 
2828bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
2838bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
2845ffd83dbSDimitry Andric   Ranges.insert(FI.Range);
2858bcb0991SDimitry Andric   Funcs.emplace_back(FI);
2868bcb0991SDimitry Andric }
2878bcb0991SDimitry Andric 
2888bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
2898bcb0991SDimitry Andric     std::function<bool(FunctionInfo &)> const &Callback) {
2908bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
2918bcb0991SDimitry Andric   for (auto &FI : Funcs) {
2928bcb0991SDimitry Andric     if (!Callback(FI))
2938bcb0991SDimitry Andric       break;
2948bcb0991SDimitry Andric   }
2958bcb0991SDimitry Andric }
2968bcb0991SDimitry Andric 
2978bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
2988bcb0991SDimitry Andric     std::function<bool(const FunctionInfo &)> const &Callback) const {
2998bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
3008bcb0991SDimitry Andric   for (const auto &FI : Funcs) {
3018bcb0991SDimitry Andric     if (!Callback(FI))
3028bcb0991SDimitry Andric       break;
3038bcb0991SDimitry Andric   }
3048bcb0991SDimitry Andric }
3055ffd83dbSDimitry Andric 
3065ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const{
3075ffd83dbSDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
3085ffd83dbSDimitry Andric   return Funcs.size();
3095ffd83dbSDimitry Andric }
3105ffd83dbSDimitry Andric 
3115ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
3125ffd83dbSDimitry Andric   if (ValidTextRanges)
3135ffd83dbSDimitry Andric     return ValidTextRanges->contains(Addr);
3145ffd83dbSDimitry Andric   return true; // No valid text ranges has been set, so accept all ranges.
3155ffd83dbSDimitry Andric }
3165ffd83dbSDimitry Andric 
3175ffd83dbSDimitry Andric bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
3185ffd83dbSDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
3195ffd83dbSDimitry Andric   return Ranges.contains(Addr);
3205ffd83dbSDimitry Andric }
321