xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/llvm-profgen/ProfiledBinary.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
11 
12 #include "CallContext.h"
13 #include "PseudoProbe.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
17 #include "llvm/MC/MCAsmInfo.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
20 #include "llvm/MC/MCInst.h"
21 #include "llvm/MC/MCInstPrinter.h"
22 #include "llvm/MC/MCInstrAnalysis.h"
23 #include "llvm/MC/MCInstrInfo.h"
24 #include "llvm/MC/MCObjectFileInfo.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCTargetOptions.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/Path.h"
31 #include <list>
32 #include <set>
33 #include <sstream>
34 #include <string>
35 #include <unordered_map>
36 #include <unordered_set>
37 #include <vector>
38 
39 using namespace llvm;
40 using namespace sampleprof;
41 using namespace llvm::object;
42 
43 namespace llvm {
44 namespace sampleprof {
45 
46 class ProfiledBinary;
47 
48 struct InstructionPointer {
49   ProfiledBinary *Binary;
50   union {
51     // Offset of the executable segment of the binary.
52     uint64_t Offset = 0;
53     // Also used as address in unwinder
54     uint64_t Address;
55   };
56   // Index to the sorted code address array of the binary.
57   uint64_t Index = 0;
58   InstructionPointer(ProfiledBinary *Binary, uint64_t Address,
59                      bool RoundToNext = false);
60   void advance();
61   void backward();
62   void update(uint64_t Addr);
63 };
64 
65 // PrologEpilog offset tracker, used to filter out broken stack samples
66 // Currently we use a heuristic size (two) to infer prolog and epilog
67 // based on the start address and return address. In the future,
68 // we will switch to Dwarf CFI based tracker
69 struct PrologEpilogTracker {
70   // A set of prolog and epilog offsets. Used by virtual unwinding.
71   std::unordered_set<uint64_t> PrologEpilogSet;
72   ProfiledBinary *Binary;
PrologEpilogTrackerPrologEpilogTracker73   PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
74 
75   // Take the two addresses from the start of function as prolog
inferPrologOffsetsPrologEpilogTracker76   void inferPrologOffsets(
77       std::unordered_map<uint64_t, std::string> &FuncStartAddrMap) {
78     for (auto I : FuncStartAddrMap) {
79       PrologEpilogSet.insert(I.first);
80       InstructionPointer IP(Binary, I.first);
81       IP.advance();
82       PrologEpilogSet.insert(IP.Offset);
83     }
84   }
85 
86   // Take the last two addresses before the return address as epilog
inferEpilogOffsetsPrologEpilogTracker87   void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) {
88     for (auto Addr : RetAddrs) {
89       PrologEpilogSet.insert(Addr);
90       InstructionPointer IP(Binary, Addr);
91       IP.backward();
92       PrologEpilogSet.insert(IP.Offset);
93     }
94   }
95 };
96 
97 class ProfiledBinary {
98   // Absolute path of the binary.
99   std::string Path;
100   // The target triple.
101   Triple TheTriple;
102   // The runtime base address that the executable sections are loaded at.
103   mutable uint64_t BaseAddress = 0;
104   // The preferred base address that the executable sections are loaded at.
105   uint64_t PreferredBaseAddress = 0;
106   // Mutiple MC component info
107   std::unique_ptr<const MCRegisterInfo> MRI;
108   std::unique_ptr<const MCAsmInfo> AsmInfo;
109   std::unique_ptr<const MCSubtargetInfo> STI;
110   std::unique_ptr<const MCInstrInfo> MII;
111   std::unique_ptr<MCDisassembler> DisAsm;
112   std::unique_ptr<const MCInstrAnalysis> MIA;
113   std::unique_ptr<MCInstPrinter> IPrinter;
114   // A list of text sections sorted by start RVA and size. Used to check
115   // if a given RVA is a valid code address.
116   std::set<std::pair<uint64_t, uint64_t>> TextSections;
117   // Function offset to name mapping.
118   std::unordered_map<uint64_t, std::string> FuncStartAddrMap;
119   // Offset to context location map. Used to expand the context.
120   std::unordered_map<uint64_t, FrameLocationStack> Offset2LocStackMap;
121   // An array of offsets of all instructions sorted in increasing order. The
122   // sorting is needed to fast advance to the next forward/backward instruction.
123   std::vector<uint64_t> CodeAddrs;
124   // A set of call instruction offsets. Used by virtual unwinding.
125   std::unordered_set<uint64_t> CallAddrs;
126   // A set of return instruction offsets. Used by virtual unwinding.
127   std::unordered_set<uint64_t> RetAddrs;
128 
129   PrologEpilogTracker ProEpilogTracker;
130 
131   // The symbolizer used to get inline context for an instruction.
132   std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
133 
134   // Pseudo probe decoder
135   PseudoProbeDecoder ProbeDecoder;
136 
137   bool UsePseudoProbes = false;
138 
139   void setPreferredBaseAddress(const ELFObjectFileBase *O);
140 
141   void decodePseudoProbe(const ELFObjectFileBase *Obj);
142 
143   // Set up disassembler and related components.
144   void setUpDisassembler(const ELFObjectFileBase *Obj);
145   void setupSymbolizer();
146 
147   /// Dissassemble the text section and build various address maps.
148   void disassemble(const ELFObjectFileBase *O);
149 
150   /// Helper function to dissassemble the symbol and extract info for unwinding
151   bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
152                           SectionSymbolsTy &Symbols, const SectionRef &Section);
153   /// Symbolize a given instruction pointer and return a full call context.
154   FrameLocationStack symbolize(const InstructionPointer &IP,
155                                bool UseCanonicalFnName = false);
156 
157   /// Decode the interesting parts of the binary and build internal data
158   /// structures. On high level, the parts of interest are:
159   ///   1. Text sections, including the main code section and the PLT
160   ///   entries that will be used to handle cross-module call transitions.
161   ///   2. The .debug_line section, used by Dwarf-based profile generation.
162   ///   3. Pseudo probe related sections, used by probe-based profile
163   ///   generation.
164   void load();
getFrameLocationStack(uint64_t Offset)165   const FrameLocationStack &getFrameLocationStack(uint64_t Offset) const {
166     auto I = Offset2LocStackMap.find(Offset);
167     assert(I != Offset2LocStackMap.end() &&
168            "Can't find location for offset in the binary");
169     return I->second;
170   }
171 
172 public:
ProfiledBinary(StringRef Path)173   ProfiledBinary(StringRef Path) : Path(Path), ProEpilogTracker(this) {
174     setupSymbolizer();
175     load();
176   }
virtualAddrToOffset(uint64_t VitualAddress)177   uint64_t virtualAddrToOffset(uint64_t VitualAddress) const {
178     return VitualAddress - BaseAddress;
179   }
offsetToVirtualAddr(uint64_t Offset)180   uint64_t offsetToVirtualAddr(uint64_t Offset) const {
181     return Offset + BaseAddress;
182   }
getPath()183   StringRef getPath() const { return Path; }
getName()184   StringRef getName() const { return llvm::sys::path::filename(Path); }
getBaseAddress()185   uint64_t getBaseAddress() const { return BaseAddress; }
setBaseAddress(uint64_t Address)186   void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
getPreferredBaseAddress()187   uint64_t getPreferredBaseAddress() const { return PreferredBaseAddress; }
188 
addressIsCode(uint64_t Address)189   bool addressIsCode(uint64_t Address) const {
190     uint64_t Offset = virtualAddrToOffset(Address);
191     return Offset2LocStackMap.find(Offset) != Offset2LocStackMap.end();
192   }
addressIsCall(uint64_t Address)193   bool addressIsCall(uint64_t Address) const {
194     uint64_t Offset = virtualAddrToOffset(Address);
195     return CallAddrs.count(Offset);
196   }
addressIsReturn(uint64_t Address)197   bool addressIsReturn(uint64_t Address) const {
198     uint64_t Offset = virtualAddrToOffset(Address);
199     return RetAddrs.count(Offset);
200   }
addressInPrologEpilog(uint64_t Address)201   bool addressInPrologEpilog(uint64_t Address) const {
202     uint64_t Offset = virtualAddrToOffset(Address);
203     return ProEpilogTracker.PrologEpilogSet.count(Offset);
204   }
205 
getAddressforIndex(uint64_t Index)206   uint64_t getAddressforIndex(uint64_t Index) const {
207     return offsetToVirtualAddr(CodeAddrs[Index]);
208   }
209 
usePseudoProbes()210   bool usePseudoProbes() const { return UsePseudoProbes; }
211   // Get the index in CodeAddrs for the address
212   // As we might get an address which is not the code
213   // here it would round to the next valid code address by
214   // using lower bound operation
getIndexForAddr(uint64_t Address)215   uint32_t getIndexForAddr(uint64_t Address) const {
216     uint64_t Offset = virtualAddrToOffset(Address);
217     auto Low = llvm::lower_bound(CodeAddrs, Offset);
218     return Low - CodeAddrs.begin();
219   }
220 
getCallAddrFromFrameAddr(uint64_t FrameAddr)221   uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
222     return getAddressforIndex(getIndexForAddr(FrameAddr) - 1);
223   }
224 
getFuncFromStartOffset(uint64_t Offset)225   StringRef getFuncFromStartOffset(uint64_t Offset) {
226     return FuncStartAddrMap[Offset];
227   }
228 
getInlineLeafFrameLoc(uint64_t Offset)229   Optional<FrameLocation> getInlineLeafFrameLoc(uint64_t Offset) {
230     const auto &Stack = getFrameLocationStack(Offset);
231     if (Stack.empty())
232       return {};
233     return Stack.back();
234   }
235 
236   // Compare two addresses' inline context
237   bool inlineContextEqual(uint64_t Add1, uint64_t Add2) const;
238 
239   // Get the context string of the current stack with inline context filled in.
240   // It will search the disassembling info stored in Offset2LocStackMap. This is
241   // used as the key of function sample map
242   std::string getExpandedContextStr(const SmallVectorImpl<uint64_t> &Stack,
243                                     bool &WasLeafInlined) const;
244 
getCallProbeForAddr(uint64_t Address)245   const PseudoProbe *getCallProbeForAddr(uint64_t Address) const {
246     return ProbeDecoder.getCallProbeForAddr(Address);
247   }
248   void
249   getInlineContextForProbe(const PseudoProbe *Probe,
250                            SmallVectorImpl<std::string> &InlineContextStack,
251                            bool IncludeLeaf = false) const {
252     return ProbeDecoder.getInlineContextForProbe(Probe, InlineContextStack,
253                                                  IncludeLeaf);
254   }
getAddress2ProbesMap()255   const AddressProbesMap &getAddress2ProbesMap() const {
256     return ProbeDecoder.getAddress2ProbesMap();
257   }
getFuncDescForGUID(uint64_t GUID)258   const PseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
259     return ProbeDecoder.getFuncDescForGUID(GUID);
260   }
getInlinerDescForProbe(const PseudoProbe * Probe)261   const PseudoProbeFuncDesc *getInlinerDescForProbe(const PseudoProbe *Probe) {
262     return ProbeDecoder.getInlinerDescForProbe(Probe);
263   }
264 };
265 
266 } // end namespace sampleprof
267 } // end namespace llvm
268 
269 #endif
270