1 //===- bolt/Passes/InstrumentationSummary.h ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // InstrumentationSummary holds all the data generated during 10 // the Instrumentation pass, which will be needed latter for runtime library 11 // binary emit and linking. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef BOLT_PASSES_INSTRUMENTATION_SUMMARY_H 16 #define BOLT_PASSES_INSTRUMENTATION_SUMMARY_H 17 18 #include "llvm/ADT/DenseSet.h" 19 #include <string> 20 #include <vector> 21 22 namespace llvm { 23 24 class MCSymbol; 25 26 namespace bolt { 27 28 class BinaryFunction; 29 30 // All structs here are part of the program metadata serialization format and 31 // consist of POD types or array of POD types that are trivially mapped from 32 // disk to memory. This provides the runtime library with a basic 33 // understanding of the program structure, so it can build a CFG for each 34 // function and deduce execution counts for edges that don't require explicit 35 // counters. It also provides function names and offsets used when writing the 36 // fdata file. 37 38 // Location information -- analogous to the concept of the same name in fdata 39 // writing/reading. The difference is that the name is stored as an index to a 40 // string table written separately. 41 struct LocDescription { 42 uint32_t FuncString; 43 uint32_t Offset; 44 }; 45 46 // Inter-function control flow transfer instrumentation 47 struct CallDescription { 48 LocDescription FromLoc; 49 uint32_t FromNode; // Node refers to the CFG node index of the call site 50 LocDescription ToLoc; 51 uint32_t Counter; 52 const BinaryFunction *Target; 53 }; 54 55 // Spans multiple counters during runtime - this is an indirect call site 56 struct IndCallDescription { 57 LocDescription FromLoc; 58 }; 59 60 // This is an indirect call target (any entry point from any function). This 61 // is stored sorted in the binary for fast lookups during data writing. 62 struct IndCallTargetDescription { 63 LocDescription ToLoc; 64 const BinaryFunction *Target; 65 }; 66 67 // Intra-function control flow transfer instrumentation 68 struct EdgeDescription { 69 LocDescription FromLoc; 70 uint32_t FromNode; 71 LocDescription ToLoc; 72 uint32_t ToNode; 73 uint32_t Counter; 74 }; 75 76 // Basic block frequency (CFG node) instrumentation - only used for spanning 77 // tree leaf nodes. 78 struct InstrumentedNode { 79 uint32_t Node; 80 uint32_t Counter; 81 }; 82 83 // Entry basic blocks for a function. We record their output addresses to 84 // check frequency of this address (via node number) against all tracked calls 85 // to this address and discover traffic coming from uninstrumented code. 86 struct EntryNode { 87 uint64_t Node; 88 uint64_t Address; 89 }; 90 91 // Base struct organizing all metadata pertaining to a single function 92 struct FunctionDescription { 93 const BinaryFunction *Function; 94 std::vector<InstrumentedNode> LeafNodes; 95 std::vector<EdgeDescription> Edges; 96 DenseSet<std::pair<uint32_t, uint32_t>> EdgesSet; 97 std::vector<CallDescription> Calls; 98 std::vector<EntryNode> EntryNodes; 99 }; 100 101 /// Holds the summary of the data generated by the Instrumentation Pass. 102 /// These information will be needed for binary emit. 103 struct InstrumentationSummary { 104 /// Identify all counters used in runtime while instrumentation is running 105 std::vector<MCSymbol *> Counters; 106 107 /// Stores function names, to be emitted to the runtime 108 std::string StringTable; 109 110 /// Pointer to runtime instrumentation handlers 111 MCSymbol *IndCallCounterFuncPtr; 112 MCSymbol *IndTailCallCounterFuncPtr; 113 114 /// Intra-function control flow and direct calls 115 std::vector<FunctionDescription> FunctionDescriptions; 116 117 /// Inter-function control flow via indirect calls 118 std::vector<IndCallDescription> IndCallDescriptions; 119 std::vector<IndCallTargetDescription> IndCallTargetDescriptions; 120 121 static constexpr uint64_t NUM_SERIALIZED_CONTAINERS = 4; 122 static constexpr uint64_t SERIALIZED_CONTAINER_SIZE = 123 sizeof(uint32_t) * NUM_SERIALIZED_CONTAINERS; 124 getFDSizeInstrumentationSummary125 uint32_t getFDSize() const { 126 uint32_t FuncDescSize = 0; 127 for (const FunctionDescription &Func : FunctionDescriptions) { 128 // A function description consists of containers of different 129 // descriptions. We use vectors to store them and when serializing them, 130 // we first output a uint32_t-sized field for the number of elements of 131 // the vector and then we write each element, so a simple parser know 132 // where to stop. 133 FuncDescSize += SERIALIZED_CONTAINER_SIZE + 134 Func.Edges.size() * sizeof(EdgeDescription) + 135 Func.LeafNodes.size() * sizeof(InstrumentedNode) + 136 Func.Calls.size() * sizeof(CallDescription) + 137 Func.EntryNodes.size() * sizeof(EntryNode); 138 } 139 return FuncDescSize; 140 } 141 }; 142 143 } // namespace bolt 144 } // namespace llvm 145 146 #endif 147