xref: /llvm-project/bolt/include/bolt/Passes/InstrumentationSummary.h (revision 1a2f83366b86433bb86f3b60fa19b3f096313a21)
1 //===- bolt/Passes/InstrumentationSummary.h ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // InstrumentationSummary holds all the data generated during
10 // the Instrumentation pass, which will be needed latter for runtime library
11 // binary emit and linking.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef BOLT_PASSES_INSTRUMENTATION_SUMMARY_H
16 #define BOLT_PASSES_INSTRUMENTATION_SUMMARY_H
17 
18 #include "llvm/ADT/DenseSet.h"
19 #include <string>
20 #include <vector>
21 
22 namespace llvm {
23 
24 class MCSymbol;
25 
26 namespace bolt {
27 
28 class BinaryFunction;
29 
30 // All structs here are part of the program metadata serialization format and
31 // consist of POD types or array of POD types that are trivially mapped from
32 // disk to memory. This provides the runtime library with a basic
33 // understanding of the program structure, so it can build a CFG for each
34 // function and deduce execution counts for edges that don't require explicit
35 // counters. It also provides function names and offsets used when writing the
36 // fdata file.
37 
38 // Location information -- analogous to the concept of the same name in fdata
39 // writing/reading. The difference is that the name is stored as an index to a
40 // string table written separately.
41 struct LocDescription {
42   uint32_t FuncString;
43   uint32_t Offset;
44 };
45 
46 // Inter-function control flow transfer instrumentation
47 struct CallDescription {
48   LocDescription FromLoc;
49   uint32_t FromNode; // Node refers to the CFG node index of the call site
50   LocDescription ToLoc;
51   uint32_t Counter;
52   const BinaryFunction *Target;
53 };
54 
55 // Spans multiple counters during runtime - this is an indirect call site
56 struct IndCallDescription {
57   LocDescription FromLoc;
58 };
59 
60 // This is an indirect call target (any entry point from any function). This
61 // is stored sorted in the binary for fast lookups during data writing.
62 struct IndCallTargetDescription {
63   LocDescription ToLoc;
64   const BinaryFunction *Target;
65 };
66 
67 // Intra-function control flow transfer instrumentation
68 struct EdgeDescription {
69   LocDescription FromLoc;
70   uint32_t FromNode;
71   LocDescription ToLoc;
72   uint32_t ToNode;
73   uint32_t Counter;
74 };
75 
76 // Basic block frequency (CFG node) instrumentation - only used for spanning
77 // tree leaf nodes.
78 struct InstrumentedNode {
79   uint32_t Node;
80   uint32_t Counter;
81 };
82 
83 // Entry basic blocks for a function. We record their output addresses to
84 // check frequency of this address (via node number) against all tracked calls
85 // to this address and discover traffic coming from uninstrumented code.
86 struct EntryNode {
87   uint64_t Node;
88   uint64_t Address;
89 };
90 
91 // Base struct organizing all metadata pertaining to a single function
92 struct FunctionDescription {
93   const BinaryFunction *Function;
94   std::vector<InstrumentedNode> LeafNodes;
95   std::vector<EdgeDescription> Edges;
96   DenseSet<std::pair<uint32_t, uint32_t>> EdgesSet;
97   std::vector<CallDescription> Calls;
98   std::vector<EntryNode> EntryNodes;
99 };
100 
101 /// Holds the summary of the data generated by the Instrumentation Pass.
102 /// These information will be needed for binary emit.
103 struct InstrumentationSummary {
104   /// Identify all counters used in runtime while instrumentation is running
105   std::vector<MCSymbol *> Counters;
106 
107   /// Stores function names, to be emitted to the runtime
108   std::string StringTable;
109 
110   /// Pointer to runtime instrumentation handlers
111   MCSymbol *IndCallCounterFuncPtr;
112   MCSymbol *IndTailCallCounterFuncPtr;
113 
114   /// Intra-function control flow and direct calls
115   std::vector<FunctionDescription> FunctionDescriptions;
116 
117   /// Inter-function control flow via indirect calls
118   std::vector<IndCallDescription> IndCallDescriptions;
119   std::vector<IndCallTargetDescription> IndCallTargetDescriptions;
120 
121   static constexpr uint64_t NUM_SERIALIZED_CONTAINERS = 4;
122   static constexpr uint64_t SERIALIZED_CONTAINER_SIZE =
123       sizeof(uint32_t) * NUM_SERIALIZED_CONTAINERS;
124 
getFDSizeInstrumentationSummary125   uint32_t getFDSize() const {
126     uint32_t FuncDescSize = 0;
127     for (const FunctionDescription &Func : FunctionDescriptions) {
128       // A function description consists of containers of different
129       // descriptions. We use vectors to store them and when serializing them,
130       // we first output a uint32_t-sized field for the number of elements of
131       // the vector and then we write each element, so a simple parser know
132       // where to stop.
133       FuncDescSize += SERIALIZED_CONTAINER_SIZE +
134                       Func.Edges.size() * sizeof(EdgeDescription) +
135                       Func.LeafNodes.size() * sizeof(InstrumentedNode) +
136                       Func.Calls.size() * sizeof(CallDescription) +
137                       Func.EntryNodes.size() * sizeof(EntryNode);
138     }
139     return FuncDescSize;
140   }
141 };
142 
143 } // namespace bolt
144 } // namespace llvm
145 
146 #endif
147