xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
18bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===//
28bcb0991SDimitry Andric //
38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
68bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
78bcb0991SDimitry Andric 
88bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
98bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h"
108bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h"
118bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h"
128bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
138bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h"
148bcb0991SDimitry Andric 
158bcb0991SDimitry Andric #include <algorithm>
168bcb0991SDimitry Andric #include <cassert>
178bcb0991SDimitry Andric #include <functional>
188bcb0991SDimitry Andric #include <vector>
198bcb0991SDimitry Andric 
208bcb0991SDimitry Andric using namespace llvm;
218bcb0991SDimitry Andric using namespace gsym;
228bcb0991SDimitry Andric 
23fe6060f1SDimitry Andric GsymCreator::GsymCreator(bool Quiet)
24fe6060f1SDimitry Andric     : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
258bcb0991SDimitry Andric   insertFile(StringRef());
268bcb0991SDimitry Andric }
278bcb0991SDimitry Andric 
28fe6060f1SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
298bcb0991SDimitry Andric   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
308bcb0991SDimitry Andric   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
315ffd83dbSDimitry Andric   // We must insert the strings first, then call the FileEntry constructor.
325ffd83dbSDimitry Andric   // If we inline the insertString() function call into the constructor, the
335ffd83dbSDimitry Andric   // call order is undefined due to parameter lists not having any ordering
345ffd83dbSDimitry Andric   // requirements.
355ffd83dbSDimitry Andric   const uint32_t Dir = insertString(directory);
365ffd83dbSDimitry Andric   const uint32_t Base = insertString(filename);
37*06c3fb27SDimitry Andric   return insertFileEntry(FileEntry(Dir, Base));
38*06c3fb27SDimitry Andric }
398bcb0991SDimitry Andric 
40*06c3fb27SDimitry Andric uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
41fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
428bcb0991SDimitry Andric   const auto NextIndex = Files.size();
438bcb0991SDimitry Andric   // Find FE in hash map and insert if not present.
448bcb0991SDimitry Andric   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
458bcb0991SDimitry Andric   if (R.second)
468bcb0991SDimitry Andric     Files.emplace_back(FE);
478bcb0991SDimitry Andric   return R.first->second;
488bcb0991SDimitry Andric }
498bcb0991SDimitry Andric 
50*06c3fb27SDimitry Andric uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
51*06c3fb27SDimitry Andric   // File index zero is reserved for a FileEntry with no directory and no
52*06c3fb27SDimitry Andric   // filename. Any other file and we need to copy the strings for the directory
53*06c3fb27SDimitry Andric   // and filename.
54*06c3fb27SDimitry Andric   if (FileIdx == 0)
55*06c3fb27SDimitry Andric     return 0;
56*06c3fb27SDimitry Andric   const FileEntry SrcFE = SrcGC.Files[FileIdx];
57*06c3fb27SDimitry Andric   // Copy the strings for the file and then add the newly converted file entry.
58*06c3fb27SDimitry Andric   uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
59*06c3fb27SDimitry Andric   uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
60*06c3fb27SDimitry Andric   FileEntry DstFE(Dir, Base);
61*06c3fb27SDimitry Andric   return insertFileEntry(DstFE);
62*06c3fb27SDimitry Andric }
63*06c3fb27SDimitry Andric 
64*06c3fb27SDimitry Andric 
658bcb0991SDimitry Andric llvm::Error GsymCreator::save(StringRef Path,
66*06c3fb27SDimitry Andric                               llvm::support::endianness ByteOrder,
67*06c3fb27SDimitry Andric                               std::optional<uint64_t> SegmentSize) const {
68*06c3fb27SDimitry Andric   if (SegmentSize)
69*06c3fb27SDimitry Andric     return saveSegments(Path, ByteOrder, *SegmentSize);
708bcb0991SDimitry Andric   std::error_code EC;
718bcb0991SDimitry Andric   raw_fd_ostream OutStrm(Path, EC);
728bcb0991SDimitry Andric   if (EC)
738bcb0991SDimitry Andric     return llvm::errorCodeToError(EC);
748bcb0991SDimitry Andric   FileWriter O(OutStrm, ByteOrder);
758bcb0991SDimitry Andric   return encode(O);
768bcb0991SDimitry Andric }
778bcb0991SDimitry Andric 
788bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const {
79fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
808bcb0991SDimitry Andric   if (Funcs.empty())
818bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
828bcb0991SDimitry Andric                              "no functions to encode");
838bcb0991SDimitry Andric   if (!Finalized)
848bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
858bcb0991SDimitry Andric                              "GsymCreator wasn't finalized prior to encoding");
868bcb0991SDimitry Andric 
878bcb0991SDimitry Andric   if (Funcs.size() > UINT32_MAX)
888bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
898bcb0991SDimitry Andric                              "too many FunctionInfos");
905ffd83dbSDimitry Andric 
91*06c3fb27SDimitry Andric   std::optional<uint64_t> BaseAddress = getBaseAddress();
92*06c3fb27SDimitry Andric   // Base address should be valid if we have any functions.
93*06c3fb27SDimitry Andric   if (!BaseAddress)
94*06c3fb27SDimitry Andric     return createStringError(std::errc::invalid_argument,
95*06c3fb27SDimitry Andric                              "invalid base address");
968bcb0991SDimitry Andric   Header Hdr;
978bcb0991SDimitry Andric   Hdr.Magic = GSYM_MAGIC;
988bcb0991SDimitry Andric   Hdr.Version = GSYM_VERSION;
99*06c3fb27SDimitry Andric   Hdr.AddrOffSize = getAddressOffsetSize();
1008bcb0991SDimitry Andric   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
101*06c3fb27SDimitry Andric   Hdr.BaseAddress = *BaseAddress;
1028bcb0991SDimitry Andric   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
1038bcb0991SDimitry Andric   Hdr.StrtabOffset = 0; // We will fix this up later.
1045ffd83dbSDimitry Andric   Hdr.StrtabSize = 0;   // We will fix this up later.
1058bcb0991SDimitry Andric   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
1068bcb0991SDimitry Andric   if (UUID.size() > sizeof(Hdr.UUID))
1078bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
1088bcb0991SDimitry Andric                              "invalid UUID size %u", (uint32_t)UUID.size());
1098bcb0991SDimitry Andric   // Copy the UUID value if we have one.
1108bcb0991SDimitry Andric   if (UUID.size() > 0)
1118bcb0991SDimitry Andric     memcpy(Hdr.UUID, UUID.data(), UUID.size());
1128bcb0991SDimitry Andric   // Write out the header.
1138bcb0991SDimitry Andric   llvm::Error Err = Hdr.encode(O);
1148bcb0991SDimitry Andric   if (Err)
1158bcb0991SDimitry Andric     return Err;
1168bcb0991SDimitry Andric 
117*06c3fb27SDimitry Andric   const uint64_t MaxAddressOffset = getMaxAddressOffset();
1188bcb0991SDimitry Andric   // Write out the address offsets.
1198bcb0991SDimitry Andric   O.alignTo(Hdr.AddrOffSize);
1208bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
1218bcb0991SDimitry Andric     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
122*06c3fb27SDimitry Andric     // Make sure we calculated the address offsets byte size correctly by
123*06c3fb27SDimitry Andric     // verifying the current address offset is within ranges. We have seen bugs
124*06c3fb27SDimitry Andric     // introduced when the code changes that can cause problems here so it is
125*06c3fb27SDimitry Andric     // good to catch this during testing.
126*06c3fb27SDimitry Andric     assert(AddrOffset <= MaxAddressOffset);
127*06c3fb27SDimitry Andric     (void)MaxAddressOffset;
1288bcb0991SDimitry Andric     switch (Hdr.AddrOffSize) {
129fe6060f1SDimitry Andric     case 1:
130fe6060f1SDimitry Andric       O.writeU8(static_cast<uint8_t>(AddrOffset));
131fe6060f1SDimitry Andric       break;
132fe6060f1SDimitry Andric     case 2:
133fe6060f1SDimitry Andric       O.writeU16(static_cast<uint16_t>(AddrOffset));
134fe6060f1SDimitry Andric       break;
135fe6060f1SDimitry Andric     case 4:
136fe6060f1SDimitry Andric       O.writeU32(static_cast<uint32_t>(AddrOffset));
137fe6060f1SDimitry Andric       break;
138fe6060f1SDimitry Andric     case 8:
139fe6060f1SDimitry Andric       O.writeU64(AddrOffset);
140fe6060f1SDimitry Andric       break;
1418bcb0991SDimitry Andric     }
1428bcb0991SDimitry Andric   }
1438bcb0991SDimitry Andric 
1448bcb0991SDimitry Andric   // Write out all zeros for the AddrInfoOffsets.
1458bcb0991SDimitry Andric   O.alignTo(4);
1468bcb0991SDimitry Andric   const off_t AddrInfoOffsetsOffset = O.tell();
1478bcb0991SDimitry Andric   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
1488bcb0991SDimitry Andric     O.writeU32(0);
1498bcb0991SDimitry Andric 
1508bcb0991SDimitry Andric   // Write out the file table
1518bcb0991SDimitry Andric   O.alignTo(4);
1528bcb0991SDimitry Andric   assert(!Files.empty());
1538bcb0991SDimitry Andric   assert(Files[0].Dir == 0);
1548bcb0991SDimitry Andric   assert(Files[0].Base == 0);
1558bcb0991SDimitry Andric   size_t NumFiles = Files.size();
1568bcb0991SDimitry Andric   if (NumFiles > UINT32_MAX)
157fe6060f1SDimitry Andric     return createStringError(std::errc::invalid_argument, "too many files");
1588bcb0991SDimitry Andric   O.writeU32(static_cast<uint32_t>(NumFiles));
1598bcb0991SDimitry Andric   for (auto File : Files) {
1608bcb0991SDimitry Andric     O.writeU32(File.Dir);
1618bcb0991SDimitry Andric     O.writeU32(File.Base);
1628bcb0991SDimitry Andric   }
1638bcb0991SDimitry Andric 
164*06c3fb27SDimitry Andric   // Write out the string table.
1658bcb0991SDimitry Andric   const off_t StrtabOffset = O.tell();
1668bcb0991SDimitry Andric   StrTab.write(O.get_stream());
1678bcb0991SDimitry Andric   const off_t StrtabSize = O.tell() - StrtabOffset;
1688bcb0991SDimitry Andric   std::vector<uint32_t> AddrInfoOffsets;
1698bcb0991SDimitry Andric 
1708bcb0991SDimitry Andric   // Write out the address infos for each function info.
1718bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
1728bcb0991SDimitry Andric     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
1738bcb0991SDimitry Andric       AddrInfoOffsets.push_back(OffsetOrErr.get());
1748bcb0991SDimitry Andric     else
1758bcb0991SDimitry Andric       return OffsetOrErr.takeError();
1768bcb0991SDimitry Andric   }
1778bcb0991SDimitry Andric   // Fixup the string table offset and size in the header
1788bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
1798bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
1808bcb0991SDimitry Andric 
1818bcb0991SDimitry Andric   // Fixup all address info offsets
1828bcb0991SDimitry Andric   uint64_t Offset = 0;
1838bcb0991SDimitry Andric   for (auto AddrInfoOffset : AddrInfoOffsets) {
1848bcb0991SDimitry Andric     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
1858bcb0991SDimitry Andric     Offset += 4;
1868bcb0991SDimitry Andric   }
1878bcb0991SDimitry Andric   return ErrorSuccess();
1888bcb0991SDimitry Andric }
1898bcb0991SDimitry Andric 
190fe6060f1SDimitry Andric // Similar to std::remove_if, but the predicate is binary and it is passed both
191fe6060f1SDimitry Andric // the previous and the current element.
192fe6060f1SDimitry Andric template <class ForwardIt, class BinaryPredicate>
193fe6060f1SDimitry Andric static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
194fe6060f1SDimitry Andric                                 BinaryPredicate Pred) {
195fe6060f1SDimitry Andric   if (FirstIt != LastIt) {
196fe6060f1SDimitry Andric     auto PrevIt = FirstIt++;
197fe6060f1SDimitry Andric     FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
198fe6060f1SDimitry Andric       return Pred(*PrevIt++, Curr);
199fe6060f1SDimitry Andric     });
200fe6060f1SDimitry Andric     if (FirstIt != LastIt)
201fe6060f1SDimitry Andric       for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
202fe6060f1SDimitry Andric         if (!Pred(*PrevIt, *CurrIt)) {
203fe6060f1SDimitry Andric           PrevIt = FirstIt;
204fe6060f1SDimitry Andric           *FirstIt++ = std::move(*CurrIt);
205fe6060f1SDimitry Andric         }
206fe6060f1SDimitry Andric   }
207fe6060f1SDimitry Andric   return FirstIt;
208fe6060f1SDimitry Andric }
209fe6060f1SDimitry Andric 
2108bcb0991SDimitry Andric llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
211fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
2128bcb0991SDimitry Andric   if (Finalized)
213fe6060f1SDimitry Andric     return createStringError(std::errc::invalid_argument, "already finalized");
2148bcb0991SDimitry Andric   Finalized = true;
2158bcb0991SDimitry Andric 
2168bcb0991SDimitry Andric   // Sort function infos so we can emit sorted functions.
217e8d8bef9SDimitry Andric   llvm::sort(Funcs);
2188bcb0991SDimitry Andric 
2198bcb0991SDimitry Andric   // Don't let the string table indexes change by finalizing in order.
2208bcb0991SDimitry Andric   StrTab.finalizeInOrder();
2218bcb0991SDimitry Andric 
2228bcb0991SDimitry Andric   // Remove duplicates function infos that have both entries from debug info
2238bcb0991SDimitry Andric   // (DWARF or Breakpad) and entries from the SymbolTable.
2248bcb0991SDimitry Andric   //
2258bcb0991SDimitry Andric   // Also handle overlapping function. Usually there shouldn't be any, but they
2268bcb0991SDimitry Andric   // can and do happen in some rare cases.
2278bcb0991SDimitry Andric   //
2288bcb0991SDimitry Andric   // (a)          (b)         (c)
2298bcb0991SDimitry Andric   //     ^  ^       ^            ^
2308bcb0991SDimitry Andric   //     |X |Y      |X ^         |X
2318bcb0991SDimitry Andric   //     |  |       |  |Y        |  ^
2328bcb0991SDimitry Andric   //     |  |       |  v         v  |Y
2338bcb0991SDimitry Andric   //     v  v       v               v
2348bcb0991SDimitry Andric   //
2358bcb0991SDimitry Andric   // In (a) and (b), Y is ignored and X will be reported for the full range.
2368bcb0991SDimitry Andric   // In (c), both functions will be included in the result and lookups for an
2378bcb0991SDimitry Andric   // address in the intersection will return Y because of binary search.
2388bcb0991SDimitry Andric   //
2398bcb0991SDimitry Andric   // Note that in case of (b), we cannot include Y in the result because then
2408bcb0991SDimitry Andric   // we wouldn't find any function for range (end of Y, end of X)
2418bcb0991SDimitry Andric   // with binary search
2428bcb0991SDimitry Andric   auto NumBefore = Funcs.size();
243fe6060f1SDimitry Andric   Funcs.erase(
244fe6060f1SDimitry Andric       removeIfBinary(Funcs.begin(), Funcs.end(),
245fe6060f1SDimitry Andric                      [&](const auto &Prev, const auto &Curr) {
246fe6060f1SDimitry Andric                        // Empty ranges won't intersect, but we still need to
247fe6060f1SDimitry Andric                        // catch the case where we have multiple symbols at the
248fe6060f1SDimitry Andric                        // same address and coalesce them.
249fe6060f1SDimitry Andric                        const bool ranges_equal = Prev.Range == Curr.Range;
250fe6060f1SDimitry Andric                        if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
251fe6060f1SDimitry Andric                          // Overlapping ranges or empty identical ranges.
252fe6060f1SDimitry Andric                          if (ranges_equal) {
253fe6060f1SDimitry Andric                            // Same address range. Check if one is from debug
254fe6060f1SDimitry Andric                            // info and the other is from a symbol table. If
255fe6060f1SDimitry Andric                            // so, then keep the one with debug info. Our
256fe6060f1SDimitry Andric                            // sorting guarantees that entries with matching
257fe6060f1SDimitry Andric                            // address ranges that have debug info are last in
258fe6060f1SDimitry Andric                            // the sort.
259fe6060f1SDimitry Andric                            if (Prev == Curr) {
260fe6060f1SDimitry Andric                              // FunctionInfo entries match exactly (range,
261fe6060f1SDimitry Andric                              // lines, inlines)
262fe6060f1SDimitry Andric 
263fe6060f1SDimitry Andric                              // We used to output a warning here, but this was
264fe6060f1SDimitry Andric                              // so frequent on some binaries, in particular
265fe6060f1SDimitry Andric                              // when those were built with GCC, that it slowed
266fe6060f1SDimitry Andric                              // down processing extremely.
267fe6060f1SDimitry Andric                              return true;
2688bcb0991SDimitry Andric                            } else {
269fe6060f1SDimitry Andric                              if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
270fe6060f1SDimitry Andric                                // Same address range, one with no debug info
271fe6060f1SDimitry Andric                                // (symbol) and the next with debug info. Keep
272fe6060f1SDimitry Andric                                // the latter.
273fe6060f1SDimitry Andric                                return true;
2748bcb0991SDimitry Andric                              } else {
275fe6060f1SDimitry Andric                                if (!Quiet) {
276fe6060f1SDimitry Andric                                  OS << "warning: same address range contains "
277fe6060f1SDimitry Andric                                        "different debug "
2788bcb0991SDimitry Andric                                     << "info. Removing:\n"
279fe6060f1SDimitry Andric                                     << Prev << "\nIn favor of this one:\n"
280fe6060f1SDimitry Andric                                     << Curr << "\n";
281fe6060f1SDimitry Andric                                }
282fe6060f1SDimitry Andric                                return true;
2838bcb0991SDimitry Andric                              }
2848bcb0991SDimitry Andric                            }
2858bcb0991SDimitry Andric                          } else {
286fe6060f1SDimitry Andric                            if (!Quiet) { // print warnings about overlaps
2878bcb0991SDimitry Andric                              OS << "warning: function ranges overlap:\n"
288fe6060f1SDimitry Andric                                 << Prev << "\n"
289fe6060f1SDimitry Andric                                 << Curr << "\n";
2908bcb0991SDimitry Andric                            }
291fe6060f1SDimitry Andric                          }
292fe6060f1SDimitry Andric                        } else if (Prev.Range.size() == 0 &&
29381ad6265SDimitry Andric                                   Curr.Range.contains(Prev.Range.start())) {
294fe6060f1SDimitry Andric                          if (!Quiet) {
2958bcb0991SDimitry Andric                            OS << "warning: removing symbol:\n"
296fe6060f1SDimitry Andric                               << Prev << "\nKeeping:\n"
297fe6060f1SDimitry Andric                               << Curr << "\n";
2988bcb0991SDimitry Andric                          }
299fe6060f1SDimitry Andric                          return true;
3008bcb0991SDimitry Andric                        }
301fe6060f1SDimitry Andric 
302fe6060f1SDimitry Andric                        return false;
303fe6060f1SDimitry Andric                      }),
304fe6060f1SDimitry Andric       Funcs.end());
3058bcb0991SDimitry Andric 
3065ffd83dbSDimitry Andric   // If our last function info entry doesn't have a size and if we have valid
3075ffd83dbSDimitry Andric   // text ranges, we should set the size of the last entry since any search for
3085ffd83dbSDimitry Andric   // a high address might match our last entry. By fixing up this size, we can
3095ffd83dbSDimitry Andric   // help ensure we don't cause lookups to always return the last symbol that
3105ffd83dbSDimitry Andric   // has no size when doing lookups.
3115ffd83dbSDimitry Andric   if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
312fe6060f1SDimitry Andric     if (auto Range =
31381ad6265SDimitry Andric             ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
31481ad6265SDimitry Andric       Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
3155ffd83dbSDimitry Andric     }
3165ffd83dbSDimitry Andric   }
3178bcb0991SDimitry Andric   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
3188bcb0991SDimitry Andric      << Funcs.size() << " total\n";
3198bcb0991SDimitry Andric   return Error::success();
3208bcb0991SDimitry Andric }
3218bcb0991SDimitry Andric 
322*06c3fb27SDimitry Andric uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
323*06c3fb27SDimitry Andric   // String offset at zero is always the empty string, no copying needed.
324*06c3fb27SDimitry Andric   if (StrOff == 0)
325*06c3fb27SDimitry Andric     return 0;
326*06c3fb27SDimitry Andric   return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
327*06c3fb27SDimitry Andric }
328*06c3fb27SDimitry Andric 
3295ffd83dbSDimitry Andric uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
3308bcb0991SDimitry Andric   if (S.empty())
3318bcb0991SDimitry Andric     return 0;
332fe6060f1SDimitry Andric 
333fe6060f1SDimitry Andric   // The hash can be calculated outside the lock.
334fe6060f1SDimitry Andric   CachedHashStringRef CHStr(S);
335fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3365ffd83dbSDimitry Andric   if (Copy) {
3375ffd83dbSDimitry Andric     // We need to provide backing storage for the string if requested
3385ffd83dbSDimitry Andric     // since StringTableBuilder stores references to strings. Any string
3395ffd83dbSDimitry Andric     // that comes from a section in an object file doesn't need to be
3405ffd83dbSDimitry Andric     // copied, but any string created by code will need to be copied.
3415ffd83dbSDimitry Andric     // This allows GsymCreator to be really fast when parsing DWARF and
3425ffd83dbSDimitry Andric     // other object files as most strings don't need to be copied.
3435ffd83dbSDimitry Andric     if (!StrTab.contains(CHStr))
344fe6060f1SDimitry Andric       CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
345fe6060f1SDimitry Andric                                   CHStr.hash()};
3465ffd83dbSDimitry Andric   }
347*06c3fb27SDimitry Andric   const uint32_t StrOff = StrTab.add(CHStr);
348*06c3fb27SDimitry Andric   // Save a mapping of string offsets to the cached string reference in case
349*06c3fb27SDimitry Andric   // we need to segment the GSYM file and copy string from one string table to
350*06c3fb27SDimitry Andric   // another.
351*06c3fb27SDimitry Andric   if (StringOffsetMap.count(StrOff) == 0)
352*06c3fb27SDimitry Andric     StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
353*06c3fb27SDimitry Andric   return StrOff;
3548bcb0991SDimitry Andric }
3558bcb0991SDimitry Andric 
3568bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
357fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3585ffd83dbSDimitry Andric   Ranges.insert(FI.Range);
359fe6060f1SDimitry Andric   Funcs.emplace_back(std::move(FI));
3608bcb0991SDimitry Andric }
3618bcb0991SDimitry Andric 
3628bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
3638bcb0991SDimitry Andric     std::function<bool(FunctionInfo &)> const &Callback) {
364fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3658bcb0991SDimitry Andric   for (auto &FI : Funcs) {
3668bcb0991SDimitry Andric     if (!Callback(FI))
3678bcb0991SDimitry Andric       break;
3688bcb0991SDimitry Andric   }
3698bcb0991SDimitry Andric }
3708bcb0991SDimitry Andric 
3718bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
3728bcb0991SDimitry Andric     std::function<bool(const FunctionInfo &)> const &Callback) const {
373fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3748bcb0991SDimitry Andric   for (const auto &FI : Funcs) {
3758bcb0991SDimitry Andric     if (!Callback(FI))
3768bcb0991SDimitry Andric       break;
3778bcb0991SDimitry Andric   }
3788bcb0991SDimitry Andric }
3795ffd83dbSDimitry Andric 
3805ffd83dbSDimitry Andric size_t GsymCreator::getNumFunctionInfos() const {
381fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3825ffd83dbSDimitry Andric   return Funcs.size();
3835ffd83dbSDimitry Andric }
3845ffd83dbSDimitry Andric 
3855ffd83dbSDimitry Andric bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
3865ffd83dbSDimitry Andric   if (ValidTextRanges)
3875ffd83dbSDimitry Andric     return ValidTextRanges->contains(Addr);
3885ffd83dbSDimitry Andric   return true; // No valid text ranges has been set, so accept all ranges.
3895ffd83dbSDimitry Andric }
3905ffd83dbSDimitry Andric 
3915ffd83dbSDimitry Andric bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
392fe6060f1SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
3935ffd83dbSDimitry Andric   return Ranges.contains(Addr);
3945ffd83dbSDimitry Andric }
395*06c3fb27SDimitry Andric 
396*06c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
397*06c3fb27SDimitry Andric   if (Finalized && !Funcs.empty())
398*06c3fb27SDimitry Andric     return std::optional<uint64_t>(Funcs.front().startAddress());
399*06c3fb27SDimitry Andric   // This code gets used by the segmentation of GSYM files to help determine the
400*06c3fb27SDimitry Andric   // size of the GSYM header while continually adding new FunctionInfo objects
401*06c3fb27SDimitry Andric   // to this object, so we haven't finalized this object yet.
402*06c3fb27SDimitry Andric   if (Ranges.empty())
403*06c3fb27SDimitry Andric     return std::nullopt;
404*06c3fb27SDimitry Andric   return std::optional<uint64_t>(Ranges.begin()->start());
405*06c3fb27SDimitry Andric }
406*06c3fb27SDimitry Andric 
407*06c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
408*06c3fb27SDimitry Andric   if (Finalized && !Funcs.empty())
409*06c3fb27SDimitry Andric     return std::optional<uint64_t>(Funcs.back().startAddress());
410*06c3fb27SDimitry Andric   // This code gets used by the segmentation of GSYM files to help determine the
411*06c3fb27SDimitry Andric   // size of the GSYM header while continually adding new FunctionInfo objects
412*06c3fb27SDimitry Andric   // to this object, so we haven't finalized this object yet.
413*06c3fb27SDimitry Andric   if (Ranges.empty())
414*06c3fb27SDimitry Andric     return std::nullopt;
415*06c3fb27SDimitry Andric   return std::optional<uint64_t>((Ranges.end() - 1)->end());
416*06c3fb27SDimitry Andric }
417*06c3fb27SDimitry Andric 
418*06c3fb27SDimitry Andric std::optional<uint64_t> GsymCreator::getBaseAddress() const {
419*06c3fb27SDimitry Andric   if (BaseAddress)
420*06c3fb27SDimitry Andric     return BaseAddress;
421*06c3fb27SDimitry Andric   return getFirstFunctionAddress();
422*06c3fb27SDimitry Andric }
423*06c3fb27SDimitry Andric 
424*06c3fb27SDimitry Andric uint64_t GsymCreator::getMaxAddressOffset() const {
425*06c3fb27SDimitry Andric   switch (getAddressOffsetSize()) {
426*06c3fb27SDimitry Andric     case 1: return UINT8_MAX;
427*06c3fb27SDimitry Andric     case 2: return UINT16_MAX;
428*06c3fb27SDimitry Andric     case 4: return UINT32_MAX;
429*06c3fb27SDimitry Andric     case 8: return UINT64_MAX;
430*06c3fb27SDimitry Andric   }
431*06c3fb27SDimitry Andric   llvm_unreachable("invalid address offset");
432*06c3fb27SDimitry Andric }
433*06c3fb27SDimitry Andric 
434*06c3fb27SDimitry Andric uint8_t GsymCreator::getAddressOffsetSize() const {
435*06c3fb27SDimitry Andric   const std::optional<uint64_t> BaseAddress = getBaseAddress();
436*06c3fb27SDimitry Andric   const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
437*06c3fb27SDimitry Andric   if (BaseAddress && LastFuncAddr) {
438*06c3fb27SDimitry Andric     const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
439*06c3fb27SDimitry Andric     if (AddrDelta <= UINT8_MAX)
440*06c3fb27SDimitry Andric       return 1;
441*06c3fb27SDimitry Andric     else if (AddrDelta <= UINT16_MAX)
442*06c3fb27SDimitry Andric       return 2;
443*06c3fb27SDimitry Andric     else if (AddrDelta <= UINT32_MAX)
444*06c3fb27SDimitry Andric       return 4;
445*06c3fb27SDimitry Andric     return 8;
446*06c3fb27SDimitry Andric   }
447*06c3fb27SDimitry Andric   return 1;
448*06c3fb27SDimitry Andric }
449*06c3fb27SDimitry Andric 
450*06c3fb27SDimitry Andric uint64_t GsymCreator::calculateHeaderAndTableSize() const {
451*06c3fb27SDimitry Andric   uint64_t Size = sizeof(Header);
452*06c3fb27SDimitry Andric   const size_t NumFuncs = Funcs.size();
453*06c3fb27SDimitry Andric   // Add size of address offset table
454*06c3fb27SDimitry Andric   Size += NumFuncs * getAddressOffsetSize();
455*06c3fb27SDimitry Andric   // Add size of address info offsets which are 32 bit integers in version 1.
456*06c3fb27SDimitry Andric   Size += NumFuncs * sizeof(uint32_t);
457*06c3fb27SDimitry Andric   // Add file table size
458*06c3fb27SDimitry Andric   Size += Files.size() * sizeof(FileEntry);
459*06c3fb27SDimitry Andric   // Add string table size
460*06c3fb27SDimitry Andric   Size += StrTab.getSize();
461*06c3fb27SDimitry Andric 
462*06c3fb27SDimitry Andric   return Size;
463*06c3fb27SDimitry Andric }
464*06c3fb27SDimitry Andric 
465*06c3fb27SDimitry Andric // This function takes a InlineInfo class that was copy constructed from an
466*06c3fb27SDimitry Andric // InlineInfo from the \a SrcGC and updates all members that point to strings
467*06c3fb27SDimitry Andric // and files to point to strings and files from this GsymCreator.
468*06c3fb27SDimitry Andric void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
469*06c3fb27SDimitry Andric   II.Name = copyString(SrcGC, II.Name);
470*06c3fb27SDimitry Andric   II.CallFile = copyFile(SrcGC, II.CallFile);
471*06c3fb27SDimitry Andric   for (auto &ChildII: II.Children)
472*06c3fb27SDimitry Andric     fixupInlineInfo(SrcGC, ChildII);
473*06c3fb27SDimitry Andric }
474*06c3fb27SDimitry Andric 
475*06c3fb27SDimitry Andric uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
476*06c3fb27SDimitry Andric   // To copy a function info we need to copy any files and strings over into
477*06c3fb27SDimitry Andric   // this GsymCreator and then copy the function info and update the string
478*06c3fb27SDimitry Andric   // table offsets to match the new offsets.
479*06c3fb27SDimitry Andric   const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
480*06c3fb27SDimitry Andric   Ranges.insert(SrcFI.Range);
481*06c3fb27SDimitry Andric 
482*06c3fb27SDimitry Andric   FunctionInfo DstFI;
483*06c3fb27SDimitry Andric   DstFI.Range = SrcFI.Range;
484*06c3fb27SDimitry Andric   DstFI.Name = copyString(SrcGC, SrcFI.Name);
485*06c3fb27SDimitry Andric   // Copy the line table if there is one.
486*06c3fb27SDimitry Andric   if (SrcFI.OptLineTable) {
487*06c3fb27SDimitry Andric     // Copy the entire line table.
488*06c3fb27SDimitry Andric     DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
489*06c3fb27SDimitry Andric     // Fixup all LineEntry::File entries which are indexes in the the file table
490*06c3fb27SDimitry Andric     // from SrcGC and must be converted to file indexes from this GsymCreator.
491*06c3fb27SDimitry Andric     LineTable &DstLT = DstFI.OptLineTable.value();
492*06c3fb27SDimitry Andric     const size_t NumLines = DstLT.size();
493*06c3fb27SDimitry Andric     for (size_t I=0; I<NumLines; ++I) {
494*06c3fb27SDimitry Andric       LineEntry &LE = DstLT.get(I);
495*06c3fb27SDimitry Andric       LE.File = copyFile(SrcGC, LE.File);
496*06c3fb27SDimitry Andric     }
497*06c3fb27SDimitry Andric   }
498*06c3fb27SDimitry Andric   // Copy the inline information if needed.
499*06c3fb27SDimitry Andric   if (SrcFI.Inline) {
500*06c3fb27SDimitry Andric     // Make a copy of the source inline information.
501*06c3fb27SDimitry Andric     DstFI.Inline = SrcFI.Inline.value();
502*06c3fb27SDimitry Andric     // Fixup all strings and files in the copied inline information.
503*06c3fb27SDimitry Andric     fixupInlineInfo(SrcGC, *DstFI.Inline);
504*06c3fb27SDimitry Andric   }
505*06c3fb27SDimitry Andric   std::lock_guard<std::mutex> Guard(Mutex);
506*06c3fb27SDimitry Andric   Funcs.push_back(DstFI);
507*06c3fb27SDimitry Andric   return Funcs.back().cacheEncoding();
508*06c3fb27SDimitry Andric }
509*06c3fb27SDimitry Andric 
510*06c3fb27SDimitry Andric llvm::Error GsymCreator::saveSegments(StringRef Path,
511*06c3fb27SDimitry Andric                                       llvm::support::endianness ByteOrder,
512*06c3fb27SDimitry Andric                                       uint64_t SegmentSize) const {
513*06c3fb27SDimitry Andric   if (SegmentSize == 0)
514*06c3fb27SDimitry Andric     return createStringError(std::errc::invalid_argument,
515*06c3fb27SDimitry Andric                              "invalid segment size zero");
516*06c3fb27SDimitry Andric 
517*06c3fb27SDimitry Andric   size_t FuncIdx = 0;
518*06c3fb27SDimitry Andric   const size_t NumFuncs = Funcs.size();
519*06c3fb27SDimitry Andric   while (FuncIdx < NumFuncs) {
520*06c3fb27SDimitry Andric     llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
521*06c3fb27SDimitry Andric         createSegment(SegmentSize, FuncIdx);
522*06c3fb27SDimitry Andric     if (ExpectedGC) {
523*06c3fb27SDimitry Andric       GsymCreator *GC = ExpectedGC->get();
524*06c3fb27SDimitry Andric       if (GC == NULL)
525*06c3fb27SDimitry Andric         break; // We had not more functions to encode.
526*06c3fb27SDimitry Andric       raw_null_ostream ErrorStrm;
527*06c3fb27SDimitry Andric       llvm::Error Err = GC->finalize(ErrorStrm);
528*06c3fb27SDimitry Andric       if (Err)
529*06c3fb27SDimitry Andric         return Err;
530*06c3fb27SDimitry Andric       std::string SegmentedGsymPath;
531*06c3fb27SDimitry Andric       raw_string_ostream SGP(SegmentedGsymPath);
532*06c3fb27SDimitry Andric       std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
533*06c3fb27SDimitry Andric       if (FirstFuncAddr) {
534*06c3fb27SDimitry Andric         SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
535*06c3fb27SDimitry Andric         SGP.flush();
536*06c3fb27SDimitry Andric         Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
537*06c3fb27SDimitry Andric         if (Err)
538*06c3fb27SDimitry Andric           return Err;
539*06c3fb27SDimitry Andric       }
540*06c3fb27SDimitry Andric     } else {
541*06c3fb27SDimitry Andric       return ExpectedGC.takeError();
542*06c3fb27SDimitry Andric     }
543*06c3fb27SDimitry Andric   }
544*06c3fb27SDimitry Andric   return Error::success();
545*06c3fb27SDimitry Andric }
546*06c3fb27SDimitry Andric 
547*06c3fb27SDimitry Andric llvm::Expected<std::unique_ptr<GsymCreator>>
548*06c3fb27SDimitry Andric GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
549*06c3fb27SDimitry Andric   // No function entries, return empty unique pointer
550*06c3fb27SDimitry Andric   if (FuncIdx >= Funcs.size())
551*06c3fb27SDimitry Andric     return std::unique_ptr<GsymCreator>();
552*06c3fb27SDimitry Andric 
553*06c3fb27SDimitry Andric   std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
554*06c3fb27SDimitry Andric   // Set the base address if there is one.
555*06c3fb27SDimitry Andric   if (BaseAddress)
556*06c3fb27SDimitry Andric     GC->setBaseAddress(*BaseAddress);
557*06c3fb27SDimitry Andric   // Copy the UUID value from this object into the new creator.
558*06c3fb27SDimitry Andric   GC->setUUID(UUID);
559*06c3fb27SDimitry Andric   const size_t NumFuncs = Funcs.size();
560*06c3fb27SDimitry Andric   // Track how big the function infos are for the current segment so we can
561*06c3fb27SDimitry Andric   // emit segments that are close to the requested size. It is quick math to
562*06c3fb27SDimitry Andric   // determine the current header and tables sizes, so we can do that each loop.
563*06c3fb27SDimitry Andric   uint64_t SegmentFuncInfosSize = 0;
564*06c3fb27SDimitry Andric   for (; FuncIdx < NumFuncs; ++FuncIdx) {
565*06c3fb27SDimitry Andric     const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
566*06c3fb27SDimitry Andric     if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
567*06c3fb27SDimitry Andric       if (SegmentFuncInfosSize == 0)
568*06c3fb27SDimitry Andric         return createStringError(std::errc::invalid_argument,
569*06c3fb27SDimitry Andric                                  "a segment size of %" PRIu64 " is to small to "
570*06c3fb27SDimitry Andric                                  "fit any function infos, specify a larger value",
571*06c3fb27SDimitry Andric                                  SegmentSize);
572*06c3fb27SDimitry Andric 
573*06c3fb27SDimitry Andric       break;
574*06c3fb27SDimitry Andric     }
575*06c3fb27SDimitry Andric     SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
576*06c3fb27SDimitry Andric   }
577*06c3fb27SDimitry Andric   return std::move(GC);
578*06c3fb27SDimitry Andric }
579