xref: /llvm-project/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h (revision 6f28b4b5e960e1c4eeebad18b48e667df1e806a8)
1 //===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
10 #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11 
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
14 #include "llvm/DebugInfo/GSYM/ExtractRanges.h"
15 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
16 #include "llvm/DebugInfo/GSYM/LineTable.h"
17 #include "llvm/DebugInfo/GSYM/LookupResult.h"
18 #include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
19 #include "llvm/DebugInfo/GSYM/StringTable.h"
20 #include <cstdint>
21 
22 namespace llvm {
23 class raw_ostream;
24 
25 namespace gsym {
26 
27 class GsymReader;
28 /// Function information in GSYM files encodes information for one contiguous
29 /// address range. If a function has discontiguous address ranges, they will
30 /// need to be encoded using multiple FunctionInfo objects.
31 ///
32 /// ENCODING
33 ///
34 /// The function information gets the function start address as an argument
35 /// to the FunctionInfo::decode(...) function. This information is calculated
36 /// from the GSYM header and an address offset from the GSYM address offsets
37 /// table. The encoded FunctionInfo information must be aligned to a 4 byte
38 /// boundary.
39 ///
40 /// The encoded data for a FunctionInfo starts with fixed data that all
41 /// function info objects have:
42 ///
43 /// ENCODING  NAME        DESCRIPTION
44 /// ========= =========== ====================================================
45 /// uint32_t  Size        The size in bytes of this function.
46 /// uint32_t  Name        The string table offset of the function name.
47 ///
48 /// The optional data in a FunctionInfo object follows this fixed information
49 /// and consists of a stream of tuples that consist of:
50 ///
51 /// ENCODING  NAME        DESCRIPTION
52 /// ========= =========== ====================================================
53 /// uint32_t  InfoType    An "InfoType" enumeration that describes the type
54 ///                       of optional data that is encoded.
55 /// uint32_t  InfoLength  The size in bytes of the encoded data that
56 ///                       immediately follows this length if this value is
57 ///                       greater than zero.
58 /// uint8_t[] InfoData    Encoded bytes that represent the data for the
59 ///                       "InfoType". These bytes are only present if
60 ///                       "InfoLength" is greater than zero.
61 ///
62 /// The "InfoType" is an enumeration:
63 ///
64 ///   enum InfoType {
65 ///     EndOfList = 0u,
66 ///     LineTableInfo = 1u,
67 ///     InlineInfo = 2u,
68 ///     MergedFunctionsInfo = 3u,
69 ///     CallSiteInfo = 4u
70 ///   };
71 ///
72 /// This stream of tuples is terminated by a "InfoType" whose value is
73 /// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
74 /// the optional information list. This format allows us to add new optional
75 /// information data to a FunctionInfo object over time and allows older
76 /// clients to still parse the format and skip over any data that they don't
77 /// understand or want to parse.
78 ///
79 /// So the function information encoding essentially looks like:
80 ///
81 /// struct {
82 ///   uint32_t Size;
83 ///   uint32_t Name;
84 ///   struct {
85 ///     uint32_t InfoType;
86 ///     uint32_t InfoLength;
87 ///     uint8_t InfoData[InfoLength];
88 ///   }[N];
89 /// }
90 ///
91 /// Where "N" is the number of tuples.
92 struct FunctionInfo {
93   AddressRange Range;
94   uint32_t Name; ///< String table offset in the string table.
95   std::optional<LineTable> OptLineTable;
96   std::optional<InlineInfo> Inline;
97   std::optional<MergedFunctionsInfo> MergedFunctions;
98   std::optional<CallSiteInfoCollection> CallSites;
99   /// If we encode a FunctionInfo during segmenting so we know its size, we can
100   /// cache that encoding here so we don't need to re-encode it when saving the
101   /// GSYM file.
102   SmallString<32> EncodingCache;
103 
104   FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
105       : Range(Addr, Addr + Size), Name(N) {}
106 
107   /// Query if a FunctionInfo has rich debug info.
108   ///
109   /// \returns A bool that indicates if this object has something else than
110   /// range and name. When converting information from a symbol table and from
111   /// debug info, we might end up with multiple FunctionInfo objects for the
112   /// same range and we need to be able to tell which one is the better object
113   /// to use.
114   bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
115 
116   /// Query if a FunctionInfo object is valid.
117   ///
118   /// Address and size can be zero and there can be no line entries for a
119   /// symbol so the only indication this entry is valid is if the name is
120   /// not zero. This can happen when extracting information from symbol
121   /// tables that do not encode symbol sizes. In that case only the
122   /// address and name will be filled in.
123   ///
124   /// \returns A boolean indicating if this FunctionInfo is valid.
125   bool isValid() const {
126     return Name != 0;
127   }
128 
129   /// Decode an object from a binary data stream.
130   ///
131   /// \param Data The binary stream to read the data from. This object must
132   /// have the data for the object starting at offset zero. The data
133   /// can contain more data than needed.
134   ///
135   /// \param BaseAddr The FunctionInfo's start address and will be used as the
136   /// base address when decoding any contained information like the line table
137   /// and the inline info.
138   ///
139   /// \returns An FunctionInfo or an error describing the issue that was
140   /// encountered during decoding.
141   static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
142                                              uint64_t BaseAddr);
143 
144   /// Encode this object into FileWriter stream.
145   ///
146   /// \param O The binary stream to write the data to at the current file
147   /// position.
148   ///
149   /// \param NoPadding Directly write the FunctionInfo data, without any padding
150   /// By default, FunctionInfo will be 4-byte aligned by padding with
151   /// 0's at the start. This is OK since the function will return the offset of
152   /// actual data in the stream. However when writing FunctionInfo's as a
153   /// stream, the padding will break the decoding of the data - since the offset
154   /// where the FunctionInfo starts is not kept in this scenario.
155   ///
156   /// \returns An error object that indicates failure or the offset of the
157   /// function info that was successfully written into the stream.
158   llvm::Expected<uint64_t> encode(FileWriter &O, bool NoPadding = false) const;
159 
160   /// Encode this function info into the internal byte cache and return the size
161   /// in bytes.
162   ///
163   /// When segmenting GSYM files we need to know how big each FunctionInfo will
164   /// encode into so we can generate segments of the right size. We don't want
165   /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
166   /// and re-use then when calling FunctionInfo::encode(...).
167   ///
168   /// \returns The size in bytes of the FunctionInfo if it were to be encoded
169   /// into a byte stream.
170   uint64_t cacheEncoding();
171 
172   /// Lookup an address within a FunctionInfo object's data stream.
173   ///
174   /// Instead of decoding an entire FunctionInfo object when doing lookups,
175   /// we can decode only the information we need from the FunctionInfo's data
176   /// for the specific address. The lookup result information is returned as
177   /// a LookupResult.
178   ///
179   /// \param Data The binary stream to read the data from. This object must
180   /// have the data for the object starting at offset zero. The data
181   /// can contain more data than needed.
182   ///
183   /// \param GR The GSYM reader that contains the string and file table that
184   /// will be used to fill in information in the returned result.
185   ///
186   /// \param FuncAddr The function start address decoded from the GsymReader.
187   ///
188   /// \param Addr The address to lookup.
189   ///
190   /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
191   /// non-null, will be set to the raw data of the MergedFunctionInfo, if
192   /// present.
193   ///
194   /// \returns An LookupResult or an error describing the issue that was
195   /// encountered during decoding. An error should only be returned if the
196   /// address is not contained in the FunctionInfo or if the data is corrupted.
197   static llvm::Expected<LookupResult>
198   lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
199          uint64_t Addr,
200          std::optional<DataExtractor> *MergedFuncsData = nullptr);
201 
202   uint64_t startAddress() const { return Range.start(); }
203   uint64_t endAddress() const { return Range.end(); }
204   uint64_t size() const { return Range.size(); }
205 
206   void clear() {
207     Range = {0, 0};
208     Name = 0;
209     OptLineTable = std::nullopt;
210     Inline = std::nullopt;
211   }
212 };
213 
214 inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
215   return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
216          LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
217 }
218 inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
219   return !(LHS == RHS);
220 }
221 /// This sorting will order things consistently by address range first, but
222 /// then followed by increasing levels of debug info like inline information
223 /// and line tables. We might end up with a FunctionInfo from debug info that
224 /// will have the same range as one from the symbol table, but we want to
225 /// quickly be able to sort and use the best version when creating the final
226 /// GSYM file. This function compares the inline information as we have seen
227 /// cases where LTO can generate a wide array of differing inline information,
228 /// mostly due to messing up the address ranges for inlined functions, so the
229 /// inline information with the most entries will appeear last. If the inline
230 /// information match, either by both function infos not having any or both
231 /// being exactly the same, we will then compare line tables. Comparing line
232 /// tables allows the entry with the most line entries to appear last. This
233 /// ensures we are able to save the FunctionInfo with the most debug info into
234 /// the GSYM file.
235 inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
236   // First sort by address range
237   if (LHS.Range != RHS.Range)
238     return LHS.Range < RHS.Range;
239   if (LHS.Inline == RHS.Inline)
240     return LHS.OptLineTable < RHS.OptLineTable;
241   return LHS.Inline < RHS.Inline;
242 }
243 
244 raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
245 
246 } // namespace gsym
247 } // namespace llvm
248 
249 #endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
250