xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- GsymCreator.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
8 #include "llvm/DebugInfo/GSYM/GsymCreator.h"
9 #include "llvm/DebugInfo/GSYM/FileWriter.h"
10 #include "llvm/DebugInfo/GSYM/Header.h"
11 #include "llvm/DebugInfo/GSYM/LineTable.h"
12 #include "llvm/MC/StringTableBuilder.h"
13 #include "llvm/Support/raw_ostream.h"
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <functional>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace gsym;
22 
GsymCreator()23 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
24   insertFile(StringRef());
25 }
26 
insertFile(StringRef Path,llvm::sys::path::Style Style)27 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
28   llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
29   llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
30   // We must insert the strings first, then call the FileEntry constructor.
31   // If we inline the insertString() function call into the constructor, the
32   // call order is undefined due to parameter lists not having any ordering
33   // requirements.
34   const uint32_t Dir = insertString(directory);
35   const uint32_t Base = insertString(filename);
36   FileEntry FE(Dir, Base);
37 
38   std::lock_guard<std::mutex> Guard(Mutex);
39   const auto NextIndex = Files.size();
40   // Find FE in hash map and insert if not present.
41   auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
42   if (R.second)
43     Files.emplace_back(FE);
44   return R.first->second;
45 }
46 
save(StringRef Path,llvm::support::endianness ByteOrder) const47 llvm::Error GsymCreator::save(StringRef Path,
48                               llvm::support::endianness ByteOrder) const {
49   std::error_code EC;
50   raw_fd_ostream OutStrm(Path, EC);
51   if (EC)
52     return llvm::errorCodeToError(EC);
53   FileWriter O(OutStrm, ByteOrder);
54   return encode(O);
55 }
56 
encode(FileWriter & O) const57 llvm::Error GsymCreator::encode(FileWriter &O) const {
58   std::lock_guard<std::mutex> Guard(Mutex);
59   if (Funcs.empty())
60     return createStringError(std::errc::invalid_argument,
61                              "no functions to encode");
62   if (!Finalized)
63     return createStringError(std::errc::invalid_argument,
64                              "GsymCreator wasn't finalized prior to encoding");
65 
66   if (Funcs.size() > UINT32_MAX)
67     return createStringError(std::errc::invalid_argument,
68                              "too many FunctionInfos");
69 
70   const uint64_t MinAddr =
71       BaseAddress ? *BaseAddress : Funcs.front().startAddress();
72   const uint64_t MaxAddr = Funcs.back().startAddress();
73   const uint64_t AddrDelta = MaxAddr - MinAddr;
74   Header Hdr;
75   Hdr.Magic = GSYM_MAGIC;
76   Hdr.Version = GSYM_VERSION;
77   Hdr.AddrOffSize = 0;
78   Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
79   Hdr.BaseAddress = MinAddr;
80   Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
81   Hdr.StrtabOffset = 0; // We will fix this up later.
82   Hdr.StrtabSize = 0;   // We will fix this up later.
83   memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
84   if (UUID.size() > sizeof(Hdr.UUID))
85     return createStringError(std::errc::invalid_argument,
86                              "invalid UUID size %u", (uint32_t)UUID.size());
87   // Set the address offset size correctly in the GSYM header.
88   if (AddrDelta <= UINT8_MAX)
89     Hdr.AddrOffSize = 1;
90   else if (AddrDelta <= UINT16_MAX)
91     Hdr.AddrOffSize = 2;
92   else if (AddrDelta <= UINT32_MAX)
93     Hdr.AddrOffSize = 4;
94   else
95     Hdr.AddrOffSize = 8;
96   // Copy the UUID value if we have one.
97   if (UUID.size() > 0)
98     memcpy(Hdr.UUID, UUID.data(), UUID.size());
99   // Write out the header.
100   llvm::Error Err = Hdr.encode(O);
101   if (Err)
102     return Err;
103 
104   // Write out the address offsets.
105   O.alignTo(Hdr.AddrOffSize);
106   for (const auto &FuncInfo : Funcs) {
107     uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
108     switch (Hdr.AddrOffSize) {
109     case 1:
110       O.writeU8(static_cast<uint8_t>(AddrOffset));
111       break;
112     case 2:
113       O.writeU16(static_cast<uint16_t>(AddrOffset));
114       break;
115     case 4:
116       O.writeU32(static_cast<uint32_t>(AddrOffset));
117       break;
118     case 8:
119       O.writeU64(AddrOffset);
120       break;
121     }
122   }
123 
124   // Write out all zeros for the AddrInfoOffsets.
125   O.alignTo(4);
126   const off_t AddrInfoOffsetsOffset = O.tell();
127   for (size_t i = 0, n = Funcs.size(); i < n; ++i)
128     O.writeU32(0);
129 
130   // Write out the file table
131   O.alignTo(4);
132   assert(!Files.empty());
133   assert(Files[0].Dir == 0);
134   assert(Files[0].Base == 0);
135   size_t NumFiles = Files.size();
136   if (NumFiles > UINT32_MAX)
137     return createStringError(std::errc::invalid_argument, "too many files");
138   O.writeU32(static_cast<uint32_t>(NumFiles));
139   for (auto File : Files) {
140     O.writeU32(File.Dir);
141     O.writeU32(File.Base);
142   }
143 
144   // Write out the sting table.
145   const off_t StrtabOffset = O.tell();
146   StrTab.write(O.get_stream());
147   const off_t StrtabSize = O.tell() - StrtabOffset;
148   std::vector<uint32_t> AddrInfoOffsets;
149 
150   // Write out the address infos for each function info.
151   for (const auto &FuncInfo : Funcs) {
152     if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
153       AddrInfoOffsets.push_back(OffsetOrErr.get());
154     else
155       return OffsetOrErr.takeError();
156   }
157   // Fixup the string table offset and size in the header
158   O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
159   O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
160 
161   // Fixup all address info offsets
162   uint64_t Offset = 0;
163   for (auto AddrInfoOffset : AddrInfoOffsets) {
164     O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
165     Offset += 4;
166   }
167   return ErrorSuccess();
168 }
169 
170 // Similar to std::remove_if, but the predicate is binary and it is passed both
171 // the previous and the current element.
172 template <class ForwardIt, class BinaryPredicate>
removeIfBinary(ForwardIt FirstIt,ForwardIt LastIt,BinaryPredicate Pred)173 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
174                                 BinaryPredicate Pred) {
175   if (FirstIt != LastIt) {
176     auto PrevIt = FirstIt++;
177     FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
178       return Pred(*PrevIt++, Curr);
179     });
180     if (FirstIt != LastIt)
181       for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
182         if (!Pred(*PrevIt, *CurrIt)) {
183           PrevIt = FirstIt;
184           *FirstIt++ = std::move(*CurrIt);
185         }
186   }
187   return FirstIt;
188 }
189 
finalize(llvm::raw_ostream & OS)190 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
191   std::lock_guard<std::mutex> Guard(Mutex);
192   if (Finalized)
193     return createStringError(std::errc::invalid_argument, "already finalized");
194   Finalized = true;
195 
196   // Sort function infos so we can emit sorted functions.
197   llvm::sort(Funcs);
198 
199   // Don't let the string table indexes change by finalizing in order.
200   StrTab.finalizeInOrder();
201 
202   // Remove duplicates function infos that have both entries from debug info
203   // (DWARF or Breakpad) and entries from the SymbolTable.
204   //
205   // Also handle overlapping function. Usually there shouldn't be any, but they
206   // can and do happen in some rare cases.
207   //
208   // (a)          (b)         (c)
209   //     ^  ^       ^            ^
210   //     |X |Y      |X ^         |X
211   //     |  |       |  |Y        |  ^
212   //     |  |       |  v         v  |Y
213   //     v  v       v               v
214   //
215   // In (a) and (b), Y is ignored and X will be reported for the full range.
216   // In (c), both functions will be included in the result and lookups for an
217   // address in the intersection will return Y because of binary search.
218   //
219   // Note that in case of (b), we cannot include Y in the result because then
220   // we wouldn't find any function for range (end of Y, end of X)
221   // with binary search
222   auto NumBefore = Funcs.size();
223   Funcs.erase(
224       removeIfBinary(Funcs.begin(), Funcs.end(),
225                      [&](const auto &Prev, const auto &Curr) {
226                        if (Prev.Range.intersects(Curr.Range)) {
227                          // Overlapping address ranges.
228                          if (Prev.Range == Curr.Range) {
229                            // Same address range. Check if one is from debug
230                            // info and the other is from a symbol table. If
231                            // so, then keep the one with debug info. Our
232                            // sorting guarantees that entries with matching
233                            // address ranges that have debug info are last in
234                            // the sort.
235                            if (Prev == Curr) {
236                              // FunctionInfo entries match exactly (range,
237                              // lines, inlines)
238 
239                              // We used to output a warning here, but this was
240                              // so frequent on some binaries, in particular
241                              // when those were built with GCC, that it slowed
242                              // down processing extremely.
243                              return true;
244                            } else {
245                              if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
246                                // Same address range, one with no debug info
247                                // (symbol) and the next with debug info. Keep
248                                // the latter.
249                                return true;
250                              } else {
251                                OS << "warning: same address range contains "
252                                      "different debug "
253                                   << "info. Removing:\n"
254                                   << Prev << "\nIn favor of this one:\n"
255                                   << Curr << "\n";
256                                return true;
257                              }
258                            }
259                          } else {
260                            // print warnings about overlaps
261                            OS << "warning: function ranges overlap:\n"
262                               << Prev << "\n"
263                               << Curr << "\n";
264                          }
265                        } else if (Prev.Range.size() == 0 &&
266                                   Curr.Range.contains(Prev.Range.Start)) {
267                          OS << "warning: removing symbol:\n"
268                             << Prev << "\nKeeping:\n"
269                             << Curr << "\n";
270                          return true;
271                        }
272 
273                        return false;
274                      }),
275       Funcs.end());
276 
277   // If our last function info entry doesn't have a size and if we have valid
278   // text ranges, we should set the size of the last entry since any search for
279   // a high address might match our last entry. By fixing up this size, we can
280   // help ensure we don't cause lookups to always return the last symbol that
281   // has no size when doing lookups.
282   if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
283     if (auto Range =
284             ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) {
285       Funcs.back().Range.End = Range->End;
286     }
287   }
288   OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
289      << Funcs.size() << " total\n";
290   return Error::success();
291 }
292 
insertString(StringRef S,bool Copy)293 uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
294   if (S.empty())
295     return 0;
296 
297   // The hash can be calculated outside the lock.
298   CachedHashStringRef CHStr(S);
299   std::lock_guard<std::mutex> Guard(Mutex);
300   if (Copy) {
301     // We need to provide backing storage for the string if requested
302     // since StringTableBuilder stores references to strings. Any string
303     // that comes from a section in an object file doesn't need to be
304     // copied, but any string created by code will need to be copied.
305     // This allows GsymCreator to be really fast when parsing DWARF and
306     // other object files as most strings don't need to be copied.
307     if (!StrTab.contains(CHStr))
308       CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
309                                   CHStr.hash()};
310   }
311   return StrTab.add(CHStr);
312 }
313 
addFunctionInfo(FunctionInfo && FI)314 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
315   std::lock_guard<std::mutex> Guard(Mutex);
316   Ranges.insert(FI.Range);
317   Funcs.emplace_back(std::move(FI));
318 }
319 
forEachFunctionInfo(std::function<bool (FunctionInfo &)> const & Callback)320 void GsymCreator::forEachFunctionInfo(
321     std::function<bool(FunctionInfo &)> const &Callback) {
322   std::lock_guard<std::mutex> Guard(Mutex);
323   for (auto &FI : Funcs) {
324     if (!Callback(FI))
325       break;
326   }
327 }
328 
forEachFunctionInfo(std::function<bool (const FunctionInfo &)> const & Callback) const329 void GsymCreator::forEachFunctionInfo(
330     std::function<bool(const FunctionInfo &)> const &Callback) const {
331   std::lock_guard<std::mutex> Guard(Mutex);
332   for (const auto &FI : Funcs) {
333     if (!Callback(FI))
334       break;
335   }
336 }
337 
getNumFunctionInfos() const338 size_t GsymCreator::getNumFunctionInfos() const {
339   std::lock_guard<std::mutex> Guard(Mutex);
340   return Funcs.size();
341 }
342 
IsValidTextAddress(uint64_t Addr) const343 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
344   if (ValidTextRanges)
345     return ValidTextRanges->contains(Addr);
346   return true; // No valid text ranges has been set, so accept all ranges.
347 }
348 
hasFunctionInfoForAddress(uint64_t Addr) const349 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
350   std::lock_guard<std::mutex> Guard(Mutex);
351   return Ranges.contains(Addr);
352 }
353