xref: /freebsd-src/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp (revision 8bcb0991864975618c09697b1aca10683346d9f0)
1*8bcb0991SDimitry Andric //===- GsymCreator.cpp ----------------------------------------------------===//
2*8bcb0991SDimitry Andric //
3*8bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*8bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*8bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*8bcb0991SDimitry Andric //===----------------------------------------------------------------------===//
7*8bcb0991SDimitry Andric 
8*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/FileWriter.h"
10*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/Header.h"
11*8bcb0991SDimitry Andric #include "llvm/DebugInfo/GSYM/LineTable.h"
12*8bcb0991SDimitry Andric #include "llvm/MC/StringTableBuilder.h"
13*8bcb0991SDimitry Andric #include "llvm/Support/raw_ostream.h"
14*8bcb0991SDimitry Andric 
15*8bcb0991SDimitry Andric #include <algorithm>
16*8bcb0991SDimitry Andric #include <cassert>
17*8bcb0991SDimitry Andric #include <functional>
18*8bcb0991SDimitry Andric #include <vector>
19*8bcb0991SDimitry Andric 
20*8bcb0991SDimitry Andric using namespace llvm;
21*8bcb0991SDimitry Andric using namespace gsym;
22*8bcb0991SDimitry Andric 
23*8bcb0991SDimitry Andric 
24*8bcb0991SDimitry Andric GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
25*8bcb0991SDimitry Andric   insertFile(StringRef());
26*8bcb0991SDimitry Andric }
27*8bcb0991SDimitry Andric 
28*8bcb0991SDimitry Andric uint32_t GsymCreator::insertFile(StringRef Path,
29*8bcb0991SDimitry Andric                                  llvm::sys::path::Style Style) {
30*8bcb0991SDimitry Andric   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
31*8bcb0991SDimitry Andric   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
32*8bcb0991SDimitry Andric   FileEntry FE(insertString(directory), insertString(filename));
33*8bcb0991SDimitry Andric 
34*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
35*8bcb0991SDimitry Andric   const auto NextIndex = Files.size();
36*8bcb0991SDimitry Andric   // Find FE in hash map and insert if not present.
37*8bcb0991SDimitry Andric   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
38*8bcb0991SDimitry Andric   if (R.second)
39*8bcb0991SDimitry Andric     Files.emplace_back(FE);
40*8bcb0991SDimitry Andric   return R.first->second;
41*8bcb0991SDimitry Andric }
42*8bcb0991SDimitry Andric 
43*8bcb0991SDimitry Andric llvm::Error GsymCreator::save(StringRef Path,
44*8bcb0991SDimitry Andric                               llvm::support::endianness ByteOrder) const {
45*8bcb0991SDimitry Andric   std::error_code EC;
46*8bcb0991SDimitry Andric   raw_fd_ostream OutStrm(Path, EC);
47*8bcb0991SDimitry Andric   if (EC)
48*8bcb0991SDimitry Andric     return llvm::errorCodeToError(EC);
49*8bcb0991SDimitry Andric   FileWriter O(OutStrm, ByteOrder);
50*8bcb0991SDimitry Andric   return encode(O);
51*8bcb0991SDimitry Andric }
52*8bcb0991SDimitry Andric 
53*8bcb0991SDimitry Andric llvm::Error GsymCreator::encode(FileWriter &O) const {
54*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
55*8bcb0991SDimitry Andric   if (Funcs.empty())
56*8bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
57*8bcb0991SDimitry Andric                              "no functions to encode");
58*8bcb0991SDimitry Andric   if (!Finalized)
59*8bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
60*8bcb0991SDimitry Andric                              "GsymCreator wasn't finalized prior to encoding");
61*8bcb0991SDimitry Andric 
62*8bcb0991SDimitry Andric   if (Funcs.size() > UINT32_MAX)
63*8bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
64*8bcb0991SDimitry Andric                              "too many FunctionInfos");
65*8bcb0991SDimitry Andric   const uint64_t MinAddr = Funcs.front().startAddress();
66*8bcb0991SDimitry Andric   const uint64_t MaxAddr = Funcs.back().startAddress();
67*8bcb0991SDimitry Andric   const uint64_t AddrDelta = MaxAddr - MinAddr;
68*8bcb0991SDimitry Andric   Header Hdr;
69*8bcb0991SDimitry Andric   Hdr.Magic = GSYM_MAGIC;
70*8bcb0991SDimitry Andric   Hdr.Version = GSYM_VERSION;
71*8bcb0991SDimitry Andric   Hdr.AddrOffSize = 0;
72*8bcb0991SDimitry Andric   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
73*8bcb0991SDimitry Andric   Hdr.BaseAddress = MinAddr;
74*8bcb0991SDimitry Andric   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
75*8bcb0991SDimitry Andric   Hdr.StrtabOffset = 0; // We will fix this up later.
76*8bcb0991SDimitry Andric   Hdr.StrtabOffset = 0; // We will fix this up later.
77*8bcb0991SDimitry Andric   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
78*8bcb0991SDimitry Andric   if (UUID.size() > sizeof(Hdr.UUID))
79*8bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
80*8bcb0991SDimitry Andric                              "invalid UUID size %u", (uint32_t)UUID.size());
81*8bcb0991SDimitry Andric   // Set the address offset size correctly in the GSYM header.
82*8bcb0991SDimitry Andric   if (AddrDelta <= UINT8_MAX)
83*8bcb0991SDimitry Andric     Hdr.AddrOffSize = 1;
84*8bcb0991SDimitry Andric   else if (AddrDelta <= UINT16_MAX)
85*8bcb0991SDimitry Andric     Hdr.AddrOffSize = 2;
86*8bcb0991SDimitry Andric   else if (AddrDelta <= UINT32_MAX)
87*8bcb0991SDimitry Andric     Hdr.AddrOffSize = 4;
88*8bcb0991SDimitry Andric   else
89*8bcb0991SDimitry Andric     Hdr.AddrOffSize = 8;
90*8bcb0991SDimitry Andric   // Copy the UUID value if we have one.
91*8bcb0991SDimitry Andric   if (UUID.size() > 0)
92*8bcb0991SDimitry Andric     memcpy(Hdr.UUID, UUID.data(), UUID.size());
93*8bcb0991SDimitry Andric   // Write out the header.
94*8bcb0991SDimitry Andric   llvm::Error Err = Hdr.encode(O);
95*8bcb0991SDimitry Andric   if (Err)
96*8bcb0991SDimitry Andric     return Err;
97*8bcb0991SDimitry Andric 
98*8bcb0991SDimitry Andric   // Write out the address offsets.
99*8bcb0991SDimitry Andric   O.alignTo(Hdr.AddrOffSize);
100*8bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
101*8bcb0991SDimitry Andric     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
102*8bcb0991SDimitry Andric     switch(Hdr.AddrOffSize) {
103*8bcb0991SDimitry Andric       case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
104*8bcb0991SDimitry Andric       case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
105*8bcb0991SDimitry Andric       case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
106*8bcb0991SDimitry Andric       case 8: O.writeU64(AddrOffset); break;
107*8bcb0991SDimitry Andric     }
108*8bcb0991SDimitry Andric   }
109*8bcb0991SDimitry Andric 
110*8bcb0991SDimitry Andric   // Write out all zeros for the AddrInfoOffsets.
111*8bcb0991SDimitry Andric   O.alignTo(4);
112*8bcb0991SDimitry Andric   const off_t AddrInfoOffsetsOffset = O.tell();
113*8bcb0991SDimitry Andric   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
114*8bcb0991SDimitry Andric     O.writeU32(0);
115*8bcb0991SDimitry Andric 
116*8bcb0991SDimitry Andric   // Write out the file table
117*8bcb0991SDimitry Andric   O.alignTo(4);
118*8bcb0991SDimitry Andric   assert(!Files.empty());
119*8bcb0991SDimitry Andric   assert(Files[0].Dir == 0);
120*8bcb0991SDimitry Andric   assert(Files[0].Base == 0);
121*8bcb0991SDimitry Andric   size_t NumFiles = Files.size();
122*8bcb0991SDimitry Andric   if (NumFiles > UINT32_MAX)
123*8bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
124*8bcb0991SDimitry Andric                              "too many files");
125*8bcb0991SDimitry Andric   O.writeU32(static_cast<uint32_t>(NumFiles));
126*8bcb0991SDimitry Andric   for (auto File: Files) {
127*8bcb0991SDimitry Andric       O.writeU32(File.Dir);
128*8bcb0991SDimitry Andric       O.writeU32(File.Base);
129*8bcb0991SDimitry Andric   }
130*8bcb0991SDimitry Andric 
131*8bcb0991SDimitry Andric   // Write out the sting table.
132*8bcb0991SDimitry Andric   const off_t StrtabOffset = O.tell();
133*8bcb0991SDimitry Andric   StrTab.write(O.get_stream());
134*8bcb0991SDimitry Andric   const off_t StrtabSize = O.tell() - StrtabOffset;
135*8bcb0991SDimitry Andric   std::vector<uint32_t> AddrInfoOffsets;
136*8bcb0991SDimitry Andric 
137*8bcb0991SDimitry Andric   // Write out the address infos for each function info.
138*8bcb0991SDimitry Andric   for (const auto &FuncInfo : Funcs) {
139*8bcb0991SDimitry Andric     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
140*8bcb0991SDimitry Andric         AddrInfoOffsets.push_back(OffsetOrErr.get());
141*8bcb0991SDimitry Andric     else
142*8bcb0991SDimitry Andric         return OffsetOrErr.takeError();
143*8bcb0991SDimitry Andric   }
144*8bcb0991SDimitry Andric   // Fixup the string table offset and size in the header
145*8bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
146*8bcb0991SDimitry Andric   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
147*8bcb0991SDimitry Andric 
148*8bcb0991SDimitry Andric   // Fixup all address info offsets
149*8bcb0991SDimitry Andric   uint64_t Offset = 0;
150*8bcb0991SDimitry Andric   for (auto AddrInfoOffset: AddrInfoOffsets) {
151*8bcb0991SDimitry Andric     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
152*8bcb0991SDimitry Andric     Offset += 4;
153*8bcb0991SDimitry Andric   }
154*8bcb0991SDimitry Andric   return ErrorSuccess();
155*8bcb0991SDimitry Andric }
156*8bcb0991SDimitry Andric 
157*8bcb0991SDimitry Andric llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
158*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
159*8bcb0991SDimitry Andric   if (Finalized)
160*8bcb0991SDimitry Andric     return createStringError(std::errc::invalid_argument,
161*8bcb0991SDimitry Andric                              "already finalized");
162*8bcb0991SDimitry Andric   Finalized = true;
163*8bcb0991SDimitry Andric 
164*8bcb0991SDimitry Andric   // Sort function infos so we can emit sorted functions.
165*8bcb0991SDimitry Andric   llvm::sort(Funcs.begin(), Funcs.end());
166*8bcb0991SDimitry Andric 
167*8bcb0991SDimitry Andric   // Don't let the string table indexes change by finalizing in order.
168*8bcb0991SDimitry Andric   StrTab.finalizeInOrder();
169*8bcb0991SDimitry Andric 
170*8bcb0991SDimitry Andric   // Remove duplicates function infos that have both entries from debug info
171*8bcb0991SDimitry Andric   // (DWARF or Breakpad) and entries from the SymbolTable.
172*8bcb0991SDimitry Andric   //
173*8bcb0991SDimitry Andric   // Also handle overlapping function. Usually there shouldn't be any, but they
174*8bcb0991SDimitry Andric   // can and do happen in some rare cases.
175*8bcb0991SDimitry Andric   //
176*8bcb0991SDimitry Andric   // (a)          (b)         (c)
177*8bcb0991SDimitry Andric   //     ^  ^       ^            ^
178*8bcb0991SDimitry Andric   //     |X |Y      |X ^         |X
179*8bcb0991SDimitry Andric   //     |  |       |  |Y        |  ^
180*8bcb0991SDimitry Andric   //     |  |       |  v         v  |Y
181*8bcb0991SDimitry Andric   //     v  v       v               v
182*8bcb0991SDimitry Andric   //
183*8bcb0991SDimitry Andric   // In (a) and (b), Y is ignored and X will be reported for the full range.
184*8bcb0991SDimitry Andric   // In (c), both functions will be included in the result and lookups for an
185*8bcb0991SDimitry Andric   // address in the intersection will return Y because of binary search.
186*8bcb0991SDimitry Andric   //
187*8bcb0991SDimitry Andric   // Note that in case of (b), we cannot include Y in the result because then
188*8bcb0991SDimitry Andric   // we wouldn't find any function for range (end of Y, end of X)
189*8bcb0991SDimitry Andric   // with binary search
190*8bcb0991SDimitry Andric   auto NumBefore = Funcs.size();
191*8bcb0991SDimitry Andric   auto Curr = Funcs.begin();
192*8bcb0991SDimitry Andric   auto Prev = Funcs.end();
193*8bcb0991SDimitry Andric   while (Curr != Funcs.end()) {
194*8bcb0991SDimitry Andric     // Can't check for overlaps or same address ranges if we don't have a
195*8bcb0991SDimitry Andric     // previous entry
196*8bcb0991SDimitry Andric     if (Prev != Funcs.end()) {
197*8bcb0991SDimitry Andric       if (Prev->Range.intersects(Curr->Range)) {
198*8bcb0991SDimitry Andric         // Overlapping address ranges.
199*8bcb0991SDimitry Andric         if (Prev->Range == Curr->Range) {
200*8bcb0991SDimitry Andric           // Same address range. Check if one is from debug info and the other
201*8bcb0991SDimitry Andric           // is from a symbol table. If so, then keep the one with debug info.
202*8bcb0991SDimitry Andric           // Our sorting guarantees that entries with matching address ranges
203*8bcb0991SDimitry Andric           // that have debug info are last in the sort.
204*8bcb0991SDimitry Andric           if (*Prev == *Curr) {
205*8bcb0991SDimitry Andric             // FunctionInfo entries match exactly (range, lines, inlines)
206*8bcb0991SDimitry Andric             OS << "warning: duplicate function info entries, removing "
207*8bcb0991SDimitry Andric                   "duplicate:\n"
208*8bcb0991SDimitry Andric                << *Curr << '\n';
209*8bcb0991SDimitry Andric             Curr = Funcs.erase(Prev);
210*8bcb0991SDimitry Andric           } else {
211*8bcb0991SDimitry Andric             if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
212*8bcb0991SDimitry Andric               // Same address range, one with no debug info (symbol) and the
213*8bcb0991SDimitry Andric               // next with debug info. Keep the latter.
214*8bcb0991SDimitry Andric               Curr = Funcs.erase(Prev);
215*8bcb0991SDimitry Andric             } else {
216*8bcb0991SDimitry Andric               OS << "warning: same address range contains different debug "
217*8bcb0991SDimitry Andric                  << "info. Removing:\n"
218*8bcb0991SDimitry Andric                  << *Prev << "\nIn favor of this one:\n"
219*8bcb0991SDimitry Andric                  << *Curr << "\n";
220*8bcb0991SDimitry Andric               Curr = Funcs.erase(Prev);
221*8bcb0991SDimitry Andric             }
222*8bcb0991SDimitry Andric           }
223*8bcb0991SDimitry Andric         } else {
224*8bcb0991SDimitry Andric           // print warnings about overlaps
225*8bcb0991SDimitry Andric           OS << "warning: function ranges overlap:\n"
226*8bcb0991SDimitry Andric              << *Prev << "\n"
227*8bcb0991SDimitry Andric              << *Curr << "\n";
228*8bcb0991SDimitry Andric         }
229*8bcb0991SDimitry Andric       } else if (Prev->Range.size() == 0 &&
230*8bcb0991SDimitry Andric                  Curr->Range.contains(Prev->Range.Start)) {
231*8bcb0991SDimitry Andric         OS << "warning: removing symbol:\n"
232*8bcb0991SDimitry Andric            << *Prev << "\nKeeping:\n"
233*8bcb0991SDimitry Andric            << *Curr << "\n";
234*8bcb0991SDimitry Andric         Curr = Funcs.erase(Prev);
235*8bcb0991SDimitry Andric       }
236*8bcb0991SDimitry Andric     }
237*8bcb0991SDimitry Andric     if (Curr == Funcs.end())
238*8bcb0991SDimitry Andric       break;
239*8bcb0991SDimitry Andric     Prev = Curr++;
240*8bcb0991SDimitry Andric   }
241*8bcb0991SDimitry Andric 
242*8bcb0991SDimitry Andric   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
243*8bcb0991SDimitry Andric      << Funcs.size() << " total\n";
244*8bcb0991SDimitry Andric   return Error::success();
245*8bcb0991SDimitry Andric }
246*8bcb0991SDimitry Andric 
247*8bcb0991SDimitry Andric uint32_t GsymCreator::insertString(StringRef S) {
248*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
249*8bcb0991SDimitry Andric   if (S.empty())
250*8bcb0991SDimitry Andric     return 0;
251*8bcb0991SDimitry Andric   return StrTab.add(S);
252*8bcb0991SDimitry Andric }
253*8bcb0991SDimitry Andric 
254*8bcb0991SDimitry Andric void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
255*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
256*8bcb0991SDimitry Andric   Funcs.emplace_back(FI);
257*8bcb0991SDimitry Andric }
258*8bcb0991SDimitry Andric 
259*8bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
260*8bcb0991SDimitry Andric     std::function<bool(FunctionInfo &)> const &Callback) {
261*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
262*8bcb0991SDimitry Andric   for (auto &FI : Funcs) {
263*8bcb0991SDimitry Andric     if (!Callback(FI))
264*8bcb0991SDimitry Andric       break;
265*8bcb0991SDimitry Andric   }
266*8bcb0991SDimitry Andric }
267*8bcb0991SDimitry Andric 
268*8bcb0991SDimitry Andric void GsymCreator::forEachFunctionInfo(
269*8bcb0991SDimitry Andric     std::function<bool(const FunctionInfo &)> const &Callback) const {
270*8bcb0991SDimitry Andric   std::lock_guard<std::recursive_mutex> Guard(Mutex);
271*8bcb0991SDimitry Andric   for (const auto &FI : Funcs) {
272*8bcb0991SDimitry Andric     if (!Callback(FI))
273*8bcb0991SDimitry Andric       break;
274*8bcb0991SDimitry Andric   }
275*8bcb0991SDimitry Andric }
276