xref: /openbsd-src/gnu/llvm/compiler-rt/lib/xray/xray_function_call_trie.h (revision 3cab2bb3f667058bece8e38b12449a63a9d73c4b)
1*3cab2bb3Spatrick //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2*3cab2bb3Spatrick //
3*3cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*3cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information.
5*3cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*3cab2bb3Spatrick //
7*3cab2bb3Spatrick //===----------------------------------------------------------------------===//
8*3cab2bb3Spatrick //
9*3cab2bb3Spatrick // This file is a part of XRay, a dynamic runtime instrumentation system.
10*3cab2bb3Spatrick //
11*3cab2bb3Spatrick // This file defines the interface for a function call trie.
12*3cab2bb3Spatrick //
13*3cab2bb3Spatrick //===----------------------------------------------------------------------===//
14*3cab2bb3Spatrick #ifndef XRAY_FUNCTION_CALL_TRIE_H
15*3cab2bb3Spatrick #define XRAY_FUNCTION_CALL_TRIE_H
16*3cab2bb3Spatrick 
17*3cab2bb3Spatrick #include "xray_buffer_queue.h"
18*3cab2bb3Spatrick #include "xray_defs.h"
19*3cab2bb3Spatrick #include "xray_profiling_flags.h"
20*3cab2bb3Spatrick #include "xray_segmented_array.h"
21*3cab2bb3Spatrick #include <limits>
22*3cab2bb3Spatrick #include <memory> // For placement new.
23*3cab2bb3Spatrick #include <utility>
24*3cab2bb3Spatrick 
25*3cab2bb3Spatrick namespace __xray {
26*3cab2bb3Spatrick 
27*3cab2bb3Spatrick /// A FunctionCallTrie represents the stack traces of XRay instrumented
28*3cab2bb3Spatrick /// functions that we've encountered, where a node corresponds to a function and
29*3cab2bb3Spatrick /// the path from the root to the node its stack trace. Each node in the trie
30*3cab2bb3Spatrick /// will contain some useful values, including:
31*3cab2bb3Spatrick ///
32*3cab2bb3Spatrick ///   * The cumulative amount of time spent in this particular node/stack.
33*3cab2bb3Spatrick ///   * The number of times this stack has appeared.
34*3cab2bb3Spatrick ///   * A histogram of latencies for that particular node.
35*3cab2bb3Spatrick ///
36*3cab2bb3Spatrick /// Each node in the trie will also contain a list of callees, represented using
37*3cab2bb3Spatrick /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38*3cab2bb3Spatrick /// ID of the callee, and a pointer to the node.
39*3cab2bb3Spatrick ///
40*3cab2bb3Spatrick /// If we visualise this data structure, we'll find the following potential
41*3cab2bb3Spatrick /// representation:
42*3cab2bb3Spatrick ///
43*3cab2bb3Spatrick ///   [function id node] -> [callees] [cumulative time]
44*3cab2bb3Spatrick ///                         [call counter] [latency histogram]
45*3cab2bb3Spatrick ///
46*3cab2bb3Spatrick /// As an example, when we have a function in this pseudocode:
47*3cab2bb3Spatrick ///
48*3cab2bb3Spatrick ///   func f(N) {
49*3cab2bb3Spatrick ///     g()
50*3cab2bb3Spatrick ///     h()
51*3cab2bb3Spatrick ///     for i := 1..N { j() }
52*3cab2bb3Spatrick ///   }
53*3cab2bb3Spatrick ///
54*3cab2bb3Spatrick /// We may end up with a trie of the following form:
55*3cab2bb3Spatrick ///
56*3cab2bb3Spatrick ///   f -> [ g, h, j ] [...] [1] [...]
57*3cab2bb3Spatrick ///   g -> [ ... ] [...] [1] [...]
58*3cab2bb3Spatrick ///   h -> [ ... ] [...] [1] [...]
59*3cab2bb3Spatrick ///   j -> [ ... ] [...] [N] [...]
60*3cab2bb3Spatrick ///
61*3cab2bb3Spatrick /// If for instance the function g() called j() like so:
62*3cab2bb3Spatrick ///
63*3cab2bb3Spatrick ///   func g() {
64*3cab2bb3Spatrick ///     for i := 1..10 { j() }
65*3cab2bb3Spatrick ///   }
66*3cab2bb3Spatrick ///
67*3cab2bb3Spatrick /// We'll find the following updated trie:
68*3cab2bb3Spatrick ///
69*3cab2bb3Spatrick ///   f -> [ g, h, j ] [...] [1] [...]
70*3cab2bb3Spatrick ///   g -> [ j' ] [...] [1] [...]
71*3cab2bb3Spatrick ///   h -> [ ... ] [...] [1] [...]
72*3cab2bb3Spatrick ///   j -> [ ... ] [...] [N] [...]
73*3cab2bb3Spatrick ///   j' -> [ ... ] [...] [10] [...]
74*3cab2bb3Spatrick ///
75*3cab2bb3Spatrick /// Note that we'll have a new node representing the path `f -> g -> j'` with
76*3cab2bb3Spatrick /// isolated data. This isolation gives us a means of representing the stack
77*3cab2bb3Spatrick /// traces as a path, as opposed to a key in a table. The alternative
78*3cab2bb3Spatrick /// implementation here would be to use a separate table for the path, and use
79*3cab2bb3Spatrick /// hashes of the path as an identifier to accumulate the information. We've
80*3cab2bb3Spatrick /// moved away from this approach as it takes a lot of time to compute the hash
81*3cab2bb3Spatrick /// every time we need to update a function's call information as we're handling
82*3cab2bb3Spatrick /// the entry and exit events.
83*3cab2bb3Spatrick ///
84*3cab2bb3Spatrick /// This approach allows us to maintain a shadow stack, which represents the
85*3cab2bb3Spatrick /// currently executing path, and on function exits quickly compute the amount
86*3cab2bb3Spatrick /// of time elapsed from the entry, then update the counters for the node
87*3cab2bb3Spatrick /// already represented in the trie. This necessitates an efficient
88*3cab2bb3Spatrick /// representation of the various data structures (the list of callees must be
89*3cab2bb3Spatrick /// cache-aware and efficient to look up, and the histogram must be compact and
90*3cab2bb3Spatrick /// quick to update) to enable us to keep the overheads of this implementation
91*3cab2bb3Spatrick /// to the minimum.
92*3cab2bb3Spatrick class FunctionCallTrie {
93*3cab2bb3Spatrick public:
94*3cab2bb3Spatrick   struct Node;
95*3cab2bb3Spatrick 
96*3cab2bb3Spatrick   // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97*3cab2bb3Spatrick   // standard library types in this header.
98*3cab2bb3Spatrick   struct NodeIdPair {
99*3cab2bb3Spatrick     Node *NodePtr;
100*3cab2bb3Spatrick     int32_t FId;
101*3cab2bb3Spatrick   };
102*3cab2bb3Spatrick 
103*3cab2bb3Spatrick   using NodeIdPairArray = Array<NodeIdPair>;
104*3cab2bb3Spatrick   using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105*3cab2bb3Spatrick 
106*3cab2bb3Spatrick   // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107*3cab2bb3Spatrick   // number of times this node actually appeared, the cumulative amount of time
108*3cab2bb3Spatrick   // for this particular node including its children call times, and just the
109*3cab2bb3Spatrick   // local time spent on this node. Each Node will have the ID of the XRay
110*3cab2bb3Spatrick   // instrumented function that it is associated to.
111*3cab2bb3Spatrick   struct Node {
112*3cab2bb3Spatrick     Node *Parent;
113*3cab2bb3Spatrick     NodeIdPairArray Callees;
114*3cab2bb3Spatrick     uint64_t CallCount;
115*3cab2bb3Spatrick     uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116*3cab2bb3Spatrick     int32_t FId;
117*3cab2bb3Spatrick 
118*3cab2bb3Spatrick     // TODO: Include the compact histogram.
119*3cab2bb3Spatrick   };
120*3cab2bb3Spatrick 
121*3cab2bb3Spatrick private:
122*3cab2bb3Spatrick   struct ShadowStackEntry {
123*3cab2bb3Spatrick     uint64_t EntryTSC;
124*3cab2bb3Spatrick     Node *NodePtr;
125*3cab2bb3Spatrick     uint16_t EntryCPU;
126*3cab2bb3Spatrick   };
127*3cab2bb3Spatrick 
128*3cab2bb3Spatrick   using NodeArray = Array<Node>;
129*3cab2bb3Spatrick   using RootArray = Array<Node *>;
130*3cab2bb3Spatrick   using ShadowStackArray = Array<ShadowStackEntry>;
131*3cab2bb3Spatrick 
132*3cab2bb3Spatrick public:
133*3cab2bb3Spatrick   // We collate the allocators we need into a single struct, as a convenience to
134*3cab2bb3Spatrick   // allow us to initialize these as a group.
135*3cab2bb3Spatrick   struct Allocators {
136*3cab2bb3Spatrick     using NodeAllocatorType = NodeArray::AllocatorType;
137*3cab2bb3Spatrick     using RootAllocatorType = RootArray::AllocatorType;
138*3cab2bb3Spatrick     using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139*3cab2bb3Spatrick 
140*3cab2bb3Spatrick     // Use hosted aligned storage members to allow for trivial move and init.
141*3cab2bb3Spatrick     // This also allows us to sidestep the potential-failing allocation issue.
142*3cab2bb3Spatrick     typename std::aligned_storage<sizeof(NodeAllocatorType),
143*3cab2bb3Spatrick                                   alignof(NodeAllocatorType)>::type
144*3cab2bb3Spatrick         NodeAllocatorStorage;
145*3cab2bb3Spatrick     typename std::aligned_storage<sizeof(RootAllocatorType),
146*3cab2bb3Spatrick                                   alignof(RootAllocatorType)>::type
147*3cab2bb3Spatrick         RootAllocatorStorage;
148*3cab2bb3Spatrick     typename std::aligned_storage<sizeof(ShadowStackAllocatorType),
149*3cab2bb3Spatrick                                   alignof(ShadowStackAllocatorType)>::type
150*3cab2bb3Spatrick         ShadowStackAllocatorStorage;
151*3cab2bb3Spatrick     typename std::aligned_storage<sizeof(NodeIdPairAllocatorType),
152*3cab2bb3Spatrick                                   alignof(NodeIdPairAllocatorType)>::type
153*3cab2bb3Spatrick         NodeIdPairAllocatorStorage;
154*3cab2bb3Spatrick 
155*3cab2bb3Spatrick     NodeAllocatorType *NodeAllocator = nullptr;
156*3cab2bb3Spatrick     RootAllocatorType *RootAllocator = nullptr;
157*3cab2bb3Spatrick     ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
158*3cab2bb3Spatrick     NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
159*3cab2bb3Spatrick 
160*3cab2bb3Spatrick     Allocators() = default;
161*3cab2bb3Spatrick     Allocators(const Allocators &) = delete;
162*3cab2bb3Spatrick     Allocators &operator=(const Allocators &) = delete;
163*3cab2bb3Spatrick 
164*3cab2bb3Spatrick     struct Buffers {
165*3cab2bb3Spatrick       BufferQueue::Buffer NodeBuffer;
166*3cab2bb3Spatrick       BufferQueue::Buffer RootsBuffer;
167*3cab2bb3Spatrick       BufferQueue::Buffer ShadowStackBuffer;
168*3cab2bb3Spatrick       BufferQueue::Buffer NodeIdPairBuffer;
169*3cab2bb3Spatrick     };
170*3cab2bb3Spatrick 
AllocatorsAllocators171*3cab2bb3Spatrick     explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
172*3cab2bb3Spatrick       new (&NodeAllocatorStorage)
173*3cab2bb3Spatrick           NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
174*3cab2bb3Spatrick       NodeAllocator =
175*3cab2bb3Spatrick           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
176*3cab2bb3Spatrick 
177*3cab2bb3Spatrick       new (&RootAllocatorStorage)
178*3cab2bb3Spatrick           RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
179*3cab2bb3Spatrick       RootAllocator =
180*3cab2bb3Spatrick           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
181*3cab2bb3Spatrick 
182*3cab2bb3Spatrick       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
183*3cab2bb3Spatrick           B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
184*3cab2bb3Spatrick       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
185*3cab2bb3Spatrick           &ShadowStackAllocatorStorage);
186*3cab2bb3Spatrick 
187*3cab2bb3Spatrick       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
188*3cab2bb3Spatrick           B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
189*3cab2bb3Spatrick       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
190*3cab2bb3Spatrick           &NodeIdPairAllocatorStorage);
191*3cab2bb3Spatrick     }
192*3cab2bb3Spatrick 
AllocatorsAllocators193*3cab2bb3Spatrick     explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
194*3cab2bb3Spatrick       new (&NodeAllocatorStorage) NodeAllocatorType(Max);
195*3cab2bb3Spatrick       NodeAllocator =
196*3cab2bb3Spatrick           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
197*3cab2bb3Spatrick 
198*3cab2bb3Spatrick       new (&RootAllocatorStorage) RootAllocatorType(Max);
199*3cab2bb3Spatrick       RootAllocator =
200*3cab2bb3Spatrick           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
201*3cab2bb3Spatrick 
202*3cab2bb3Spatrick       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
203*3cab2bb3Spatrick       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
204*3cab2bb3Spatrick           &ShadowStackAllocatorStorage);
205*3cab2bb3Spatrick 
206*3cab2bb3Spatrick       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
207*3cab2bb3Spatrick       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
208*3cab2bb3Spatrick           &NodeIdPairAllocatorStorage);
209*3cab2bb3Spatrick     }
210*3cab2bb3Spatrick 
AllocatorsAllocators211*3cab2bb3Spatrick     Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
212*3cab2bb3Spatrick       // Here we rely on the safety of memcpy'ing contents of the storage
213*3cab2bb3Spatrick       // members, and then pointing the source pointers to nullptr.
214*3cab2bb3Spatrick       internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
215*3cab2bb3Spatrick                       sizeof(NodeAllocatorType));
216*3cab2bb3Spatrick       internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
217*3cab2bb3Spatrick                       sizeof(RootAllocatorType));
218*3cab2bb3Spatrick       internal_memcpy(&ShadowStackAllocatorStorage,
219*3cab2bb3Spatrick                       &O.ShadowStackAllocatorStorage,
220*3cab2bb3Spatrick                       sizeof(ShadowStackAllocatorType));
221*3cab2bb3Spatrick       internal_memcpy(&NodeIdPairAllocatorStorage,
222*3cab2bb3Spatrick                       &O.NodeIdPairAllocatorStorage,
223*3cab2bb3Spatrick                       sizeof(NodeIdPairAllocatorType));
224*3cab2bb3Spatrick 
225*3cab2bb3Spatrick       NodeAllocator =
226*3cab2bb3Spatrick           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
227*3cab2bb3Spatrick       RootAllocator =
228*3cab2bb3Spatrick           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
229*3cab2bb3Spatrick       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
230*3cab2bb3Spatrick           &ShadowStackAllocatorStorage);
231*3cab2bb3Spatrick       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
232*3cab2bb3Spatrick           &NodeIdPairAllocatorStorage);
233*3cab2bb3Spatrick 
234*3cab2bb3Spatrick       O.NodeAllocator = nullptr;
235*3cab2bb3Spatrick       O.RootAllocator = nullptr;
236*3cab2bb3Spatrick       O.ShadowStackAllocator = nullptr;
237*3cab2bb3Spatrick       O.NodeIdPairAllocator = nullptr;
238*3cab2bb3Spatrick     }
239*3cab2bb3Spatrick 
240*3cab2bb3Spatrick     Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
241*3cab2bb3Spatrick       // When moving into an existing instance, we ensure that we clean up the
242*3cab2bb3Spatrick       // current allocators.
243*3cab2bb3Spatrick       if (NodeAllocator)
244*3cab2bb3Spatrick         NodeAllocator->~NodeAllocatorType();
245*3cab2bb3Spatrick       if (O.NodeAllocator) {
246*3cab2bb3Spatrick         new (&NodeAllocatorStorage)
247*3cab2bb3Spatrick             NodeAllocatorType(std::move(*O.NodeAllocator));
248*3cab2bb3Spatrick         NodeAllocator =
249*3cab2bb3Spatrick             reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
250*3cab2bb3Spatrick         O.NodeAllocator = nullptr;
251*3cab2bb3Spatrick       } else {
252*3cab2bb3Spatrick         NodeAllocator = nullptr;
253*3cab2bb3Spatrick       }
254*3cab2bb3Spatrick 
255*3cab2bb3Spatrick       if (RootAllocator)
256*3cab2bb3Spatrick         RootAllocator->~RootAllocatorType();
257*3cab2bb3Spatrick       if (O.RootAllocator) {
258*3cab2bb3Spatrick         new (&RootAllocatorStorage)
259*3cab2bb3Spatrick             RootAllocatorType(std::move(*O.RootAllocator));
260*3cab2bb3Spatrick         RootAllocator =
261*3cab2bb3Spatrick             reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
262*3cab2bb3Spatrick         O.RootAllocator = nullptr;
263*3cab2bb3Spatrick       } else {
264*3cab2bb3Spatrick         RootAllocator = nullptr;
265*3cab2bb3Spatrick       }
266*3cab2bb3Spatrick 
267*3cab2bb3Spatrick       if (ShadowStackAllocator)
268*3cab2bb3Spatrick         ShadowStackAllocator->~ShadowStackAllocatorType();
269*3cab2bb3Spatrick       if (O.ShadowStackAllocator) {
270*3cab2bb3Spatrick         new (&ShadowStackAllocatorStorage)
271*3cab2bb3Spatrick             ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
272*3cab2bb3Spatrick         ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
273*3cab2bb3Spatrick             &ShadowStackAllocatorStorage);
274*3cab2bb3Spatrick         O.ShadowStackAllocator = nullptr;
275*3cab2bb3Spatrick       } else {
276*3cab2bb3Spatrick         ShadowStackAllocator = nullptr;
277*3cab2bb3Spatrick       }
278*3cab2bb3Spatrick 
279*3cab2bb3Spatrick       if (NodeIdPairAllocator)
280*3cab2bb3Spatrick         NodeIdPairAllocator->~NodeIdPairAllocatorType();
281*3cab2bb3Spatrick       if (O.NodeIdPairAllocator) {
282*3cab2bb3Spatrick         new (&NodeIdPairAllocatorStorage)
283*3cab2bb3Spatrick             NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
284*3cab2bb3Spatrick         NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
285*3cab2bb3Spatrick             &NodeIdPairAllocatorStorage);
286*3cab2bb3Spatrick         O.NodeIdPairAllocator = nullptr;
287*3cab2bb3Spatrick       } else {
288*3cab2bb3Spatrick         NodeIdPairAllocator = nullptr;
289*3cab2bb3Spatrick       }
290*3cab2bb3Spatrick 
291*3cab2bb3Spatrick       return *this;
292*3cab2bb3Spatrick     }
293*3cab2bb3Spatrick 
~AllocatorsAllocators294*3cab2bb3Spatrick     ~Allocators() XRAY_NEVER_INSTRUMENT {
295*3cab2bb3Spatrick       if (NodeAllocator != nullptr)
296*3cab2bb3Spatrick         NodeAllocator->~NodeAllocatorType();
297*3cab2bb3Spatrick       if (RootAllocator != nullptr)
298*3cab2bb3Spatrick         RootAllocator->~RootAllocatorType();
299*3cab2bb3Spatrick       if (ShadowStackAllocator != nullptr)
300*3cab2bb3Spatrick         ShadowStackAllocator->~ShadowStackAllocatorType();
301*3cab2bb3Spatrick       if (NodeIdPairAllocator != nullptr)
302*3cab2bb3Spatrick         NodeIdPairAllocator->~NodeIdPairAllocatorType();
303*3cab2bb3Spatrick     }
304*3cab2bb3Spatrick   };
305*3cab2bb3Spatrick 
InitAllocators()306*3cab2bb3Spatrick   static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
307*3cab2bb3Spatrick     return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
308*3cab2bb3Spatrick   }
309*3cab2bb3Spatrick 
InitAllocatorsCustom(uptr Max)310*3cab2bb3Spatrick   static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
311*3cab2bb3Spatrick     Allocators A(Max);
312*3cab2bb3Spatrick     return A;
313*3cab2bb3Spatrick   }
314*3cab2bb3Spatrick 
315*3cab2bb3Spatrick   static Allocators
InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)316*3cab2bb3Spatrick   InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
317*3cab2bb3Spatrick     Allocators A(Bufs);
318*3cab2bb3Spatrick     return A;
319*3cab2bb3Spatrick   }
320*3cab2bb3Spatrick 
321*3cab2bb3Spatrick private:
322*3cab2bb3Spatrick   NodeArray Nodes;
323*3cab2bb3Spatrick   RootArray Roots;
324*3cab2bb3Spatrick   ShadowStackArray ShadowStack;
325*3cab2bb3Spatrick   NodeIdPairAllocatorType *NodeIdPairAllocator;
326*3cab2bb3Spatrick   uint32_t OverflowedFunctions;
327*3cab2bb3Spatrick 
328*3cab2bb3Spatrick public:
FunctionCallTrie(const Allocators & A)329*3cab2bb3Spatrick   explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
330*3cab2bb3Spatrick       : Nodes(*A.NodeAllocator),
331*3cab2bb3Spatrick         Roots(*A.RootAllocator),
332*3cab2bb3Spatrick         ShadowStack(*A.ShadowStackAllocator),
333*3cab2bb3Spatrick         NodeIdPairAllocator(A.NodeIdPairAllocator),
334*3cab2bb3Spatrick         OverflowedFunctions(0) {}
335*3cab2bb3Spatrick 
336*3cab2bb3Spatrick   FunctionCallTrie() = delete;
337*3cab2bb3Spatrick   FunctionCallTrie(const FunctionCallTrie &) = delete;
338*3cab2bb3Spatrick   FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
339*3cab2bb3Spatrick 
FunctionCallTrie(FunctionCallTrie && O)340*3cab2bb3Spatrick   FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
341*3cab2bb3Spatrick       : Nodes(std::move(O.Nodes)),
342*3cab2bb3Spatrick         Roots(std::move(O.Roots)),
343*3cab2bb3Spatrick         ShadowStack(std::move(O.ShadowStack)),
344*3cab2bb3Spatrick         NodeIdPairAllocator(O.NodeIdPairAllocator),
345*3cab2bb3Spatrick         OverflowedFunctions(O.OverflowedFunctions) {}
346*3cab2bb3Spatrick 
347*3cab2bb3Spatrick   FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
348*3cab2bb3Spatrick     Nodes = std::move(O.Nodes);
349*3cab2bb3Spatrick     Roots = std::move(O.Roots);
350*3cab2bb3Spatrick     ShadowStack = std::move(O.ShadowStack);
351*3cab2bb3Spatrick     NodeIdPairAllocator = O.NodeIdPairAllocator;
352*3cab2bb3Spatrick     OverflowedFunctions = O.OverflowedFunctions;
353*3cab2bb3Spatrick     return *this;
354*3cab2bb3Spatrick   }
355*3cab2bb3Spatrick 
~FunctionCallTrie()356*3cab2bb3Spatrick   ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
357*3cab2bb3Spatrick 
enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)358*3cab2bb3Spatrick   void enterFunction(const int32_t FId, uint64_t TSC,
359*3cab2bb3Spatrick                      uint16_t CPU) XRAY_NEVER_INSTRUMENT {
360*3cab2bb3Spatrick     DCHECK_NE(FId, 0);
361*3cab2bb3Spatrick 
362*3cab2bb3Spatrick     // If we're already overflowed the function call stack, do not bother
363*3cab2bb3Spatrick     // attempting to record any more function entries.
364*3cab2bb3Spatrick     if (UNLIKELY(OverflowedFunctions)) {
365*3cab2bb3Spatrick       ++OverflowedFunctions;
366*3cab2bb3Spatrick       return;
367*3cab2bb3Spatrick     }
368*3cab2bb3Spatrick 
369*3cab2bb3Spatrick     // If this is the first function we've encountered, we want to set up the
370*3cab2bb3Spatrick     // node(s) and treat it as a root.
371*3cab2bb3Spatrick     if (UNLIKELY(ShadowStack.empty())) {
372*3cab2bb3Spatrick       auto *NewRoot = Nodes.AppendEmplace(
373*3cab2bb3Spatrick           nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
374*3cab2bb3Spatrick       if (UNLIKELY(NewRoot == nullptr))
375*3cab2bb3Spatrick         return;
376*3cab2bb3Spatrick       if (Roots.AppendEmplace(NewRoot) == nullptr) {
377*3cab2bb3Spatrick         Nodes.trim(1);
378*3cab2bb3Spatrick         return;
379*3cab2bb3Spatrick       }
380*3cab2bb3Spatrick       if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
381*3cab2bb3Spatrick         Nodes.trim(1);
382*3cab2bb3Spatrick         Roots.trim(1);
383*3cab2bb3Spatrick         ++OverflowedFunctions;
384*3cab2bb3Spatrick         return;
385*3cab2bb3Spatrick       }
386*3cab2bb3Spatrick       return;
387*3cab2bb3Spatrick     }
388*3cab2bb3Spatrick 
389*3cab2bb3Spatrick     // From this point on, we require that the stack is not empty.
390*3cab2bb3Spatrick     DCHECK(!ShadowStack.empty());
391*3cab2bb3Spatrick     auto TopNode = ShadowStack.back().NodePtr;
392*3cab2bb3Spatrick     DCHECK_NE(TopNode, nullptr);
393*3cab2bb3Spatrick 
394*3cab2bb3Spatrick     // If we've seen this callee before, then we access that node and place that
395*3cab2bb3Spatrick     // on the top of the stack.
396*3cab2bb3Spatrick     auto* Callee = TopNode->Callees.find_element(
397*3cab2bb3Spatrick         [FId](const NodeIdPair &NR) { return NR.FId == FId; });
398*3cab2bb3Spatrick     if (Callee != nullptr) {
399*3cab2bb3Spatrick       CHECK_NE(Callee->NodePtr, nullptr);
400*3cab2bb3Spatrick       if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
401*3cab2bb3Spatrick         ++OverflowedFunctions;
402*3cab2bb3Spatrick       return;
403*3cab2bb3Spatrick     }
404*3cab2bb3Spatrick 
405*3cab2bb3Spatrick     // This means we've never seen this stack before, create a new node here.
406*3cab2bb3Spatrick     auto* NewNode = Nodes.AppendEmplace(
407*3cab2bb3Spatrick         TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
408*3cab2bb3Spatrick     if (UNLIKELY(NewNode == nullptr))
409*3cab2bb3Spatrick       return;
410*3cab2bb3Spatrick     DCHECK_NE(NewNode, nullptr);
411*3cab2bb3Spatrick     TopNode->Callees.AppendEmplace(NewNode, FId);
412*3cab2bb3Spatrick     if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
413*3cab2bb3Spatrick       ++OverflowedFunctions;
414*3cab2bb3Spatrick     return;
415*3cab2bb3Spatrick   }
416*3cab2bb3Spatrick 
exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)417*3cab2bb3Spatrick   void exitFunction(int32_t FId, uint64_t TSC,
418*3cab2bb3Spatrick                     uint16_t CPU) XRAY_NEVER_INSTRUMENT {
419*3cab2bb3Spatrick     // If we're exiting functions that have "overflowed" or don't fit into the
420*3cab2bb3Spatrick     // stack due to allocator constraints, we then decrement that count first.
421*3cab2bb3Spatrick     if (OverflowedFunctions) {
422*3cab2bb3Spatrick       --OverflowedFunctions;
423*3cab2bb3Spatrick       return;
424*3cab2bb3Spatrick     }
425*3cab2bb3Spatrick 
426*3cab2bb3Spatrick     // When we exit a function, we look up the ShadowStack to see whether we've
427*3cab2bb3Spatrick     // entered this function before. We do as little processing here as we can,
428*3cab2bb3Spatrick     // since most of the hard work would have already been done at function
429*3cab2bb3Spatrick     // entry.
430*3cab2bb3Spatrick     uint64_t CumulativeTreeTime = 0;
431*3cab2bb3Spatrick 
432*3cab2bb3Spatrick     while (!ShadowStack.empty()) {
433*3cab2bb3Spatrick       const auto &Top = ShadowStack.back();
434*3cab2bb3Spatrick       auto TopNode = Top.NodePtr;
435*3cab2bb3Spatrick       DCHECK_NE(TopNode, nullptr);
436*3cab2bb3Spatrick 
437*3cab2bb3Spatrick       // We may encounter overflow on the TSC we're provided, which may end up
438*3cab2bb3Spatrick       // being less than the TSC when we first entered the function.
439*3cab2bb3Spatrick       //
440*3cab2bb3Spatrick       // To get the accurate measurement of cycles, we need to check whether
441*3cab2bb3Spatrick       // we've overflowed (TSC < Top.EntryTSC) and then account the difference
442*3cab2bb3Spatrick       // between the entry TSC and the max for the TSC counter (max of uint64_t)
443*3cab2bb3Spatrick       // then add the value of TSC. We can prove that the maximum delta we will
444*3cab2bb3Spatrick       // get is at most the 64-bit unsigned value, since the difference between
445*3cab2bb3Spatrick       // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
446*3cab2bb3Spatrick       // - 1) + 1.
447*3cab2bb3Spatrick       //
448*3cab2bb3Spatrick       // NOTE: This assumes that TSCs are synchronised across CPUs.
449*3cab2bb3Spatrick       // TODO: Count the number of times we've seen CPU migrations.
450*3cab2bb3Spatrick       uint64_t LocalTime =
451*3cab2bb3Spatrick           Top.EntryTSC > TSC
452*3cab2bb3Spatrick               ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
453*3cab2bb3Spatrick               : TSC - Top.EntryTSC;
454*3cab2bb3Spatrick       TopNode->CallCount++;
455*3cab2bb3Spatrick       TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
456*3cab2bb3Spatrick       CumulativeTreeTime += LocalTime;
457*3cab2bb3Spatrick       ShadowStack.trim(1);
458*3cab2bb3Spatrick 
459*3cab2bb3Spatrick       // TODO: Update the histogram for the node.
460*3cab2bb3Spatrick       if (TopNode->FId == FId)
461*3cab2bb3Spatrick         break;
462*3cab2bb3Spatrick     }
463*3cab2bb3Spatrick   }
464*3cab2bb3Spatrick 
getRoots()465*3cab2bb3Spatrick   const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
466*3cab2bb3Spatrick 
467*3cab2bb3Spatrick   // The deepCopyInto operation will update the provided FunctionCallTrie by
468*3cab2bb3Spatrick   // re-creating the contents of this particular FunctionCallTrie in the other
469*3cab2bb3Spatrick   // FunctionCallTrie. It will do this using a Depth First Traversal from the
470*3cab2bb3Spatrick   // roots, and while doing so recreating the traversal in the provided
471*3cab2bb3Spatrick   // FunctionCallTrie.
472*3cab2bb3Spatrick   //
473*3cab2bb3Spatrick   // This operation will *not* destroy the state in `O`, and thus may cause some
474*3cab2bb3Spatrick   // duplicate entries in `O` if it is not empty.
475*3cab2bb3Spatrick   //
476*3cab2bb3Spatrick   // This function is *not* thread-safe, and may require external
477*3cab2bb3Spatrick   // synchronisation of both "this" and |O|.
478*3cab2bb3Spatrick   //
479*3cab2bb3Spatrick   // This function must *not* be called with a non-empty FunctionCallTrie |O|.
deepCopyInto(FunctionCallTrie & O)480*3cab2bb3Spatrick   void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
481*3cab2bb3Spatrick     DCHECK(O.getRoots().empty());
482*3cab2bb3Spatrick 
483*3cab2bb3Spatrick     // We then push the root into a stack, to use as the parent marker for new
484*3cab2bb3Spatrick     // nodes we push in as we're traversing depth-first down the call tree.
485*3cab2bb3Spatrick     struct NodeAndParent {
486*3cab2bb3Spatrick       FunctionCallTrie::Node *Node;
487*3cab2bb3Spatrick       FunctionCallTrie::Node *NewNode;
488*3cab2bb3Spatrick     };
489*3cab2bb3Spatrick     using Stack = Array<NodeAndParent>;
490*3cab2bb3Spatrick 
491*3cab2bb3Spatrick     typename Stack::AllocatorType StackAllocator(
492*3cab2bb3Spatrick         profilingFlags()->stack_allocator_max);
493*3cab2bb3Spatrick     Stack DFSStack(StackAllocator);
494*3cab2bb3Spatrick 
495*3cab2bb3Spatrick     for (const auto Root : getRoots()) {
496*3cab2bb3Spatrick       // Add a node in O for this root.
497*3cab2bb3Spatrick       auto NewRoot = O.Nodes.AppendEmplace(
498*3cab2bb3Spatrick           nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
499*3cab2bb3Spatrick           Root->CumulativeLocalTime, Root->FId);
500*3cab2bb3Spatrick 
501*3cab2bb3Spatrick       // Because we cannot allocate more memory we should bail out right away.
502*3cab2bb3Spatrick       if (UNLIKELY(NewRoot == nullptr))
503*3cab2bb3Spatrick         return;
504*3cab2bb3Spatrick 
505*3cab2bb3Spatrick       if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
506*3cab2bb3Spatrick         return;
507*3cab2bb3Spatrick 
508*3cab2bb3Spatrick       // TODO: Figure out what to do if we fail to allocate any more stack
509*3cab2bb3Spatrick       // space. Maybe warn or report once?
510*3cab2bb3Spatrick       if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
511*3cab2bb3Spatrick         return;
512*3cab2bb3Spatrick       while (!DFSStack.empty()) {
513*3cab2bb3Spatrick         NodeAndParent NP = DFSStack.back();
514*3cab2bb3Spatrick         DCHECK_NE(NP.Node, nullptr);
515*3cab2bb3Spatrick         DCHECK_NE(NP.NewNode, nullptr);
516*3cab2bb3Spatrick         DFSStack.trim(1);
517*3cab2bb3Spatrick         for (const auto Callee : NP.Node->Callees) {
518*3cab2bb3Spatrick           auto NewNode = O.Nodes.AppendEmplace(
519*3cab2bb3Spatrick               NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
520*3cab2bb3Spatrick               Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
521*3cab2bb3Spatrick               Callee.FId);
522*3cab2bb3Spatrick           if (UNLIKELY(NewNode == nullptr))
523*3cab2bb3Spatrick             return;
524*3cab2bb3Spatrick           if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
525*3cab2bb3Spatrick                        nullptr))
526*3cab2bb3Spatrick             return;
527*3cab2bb3Spatrick           if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
528*3cab2bb3Spatrick                        nullptr))
529*3cab2bb3Spatrick             return;
530*3cab2bb3Spatrick         }
531*3cab2bb3Spatrick       }
532*3cab2bb3Spatrick     }
533*3cab2bb3Spatrick   }
534*3cab2bb3Spatrick 
535*3cab2bb3Spatrick   // The mergeInto operation will update the provided FunctionCallTrie by
536*3cab2bb3Spatrick   // traversing the current trie's roots and updating (i.e. merging) the data in
537*3cab2bb3Spatrick   // the nodes with the data in the target's nodes. If the node doesn't exist in
538*3cab2bb3Spatrick   // the provided trie, we add a new one in the right position, and inherit the
539*3cab2bb3Spatrick   // data from the original (current) trie, along with all its callees.
540*3cab2bb3Spatrick   //
541*3cab2bb3Spatrick   // This function is *not* thread-safe, and may require external
542*3cab2bb3Spatrick   // synchronisation of both "this" and |O|.
mergeInto(FunctionCallTrie & O)543*3cab2bb3Spatrick   void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
544*3cab2bb3Spatrick     struct NodeAndTarget {
545*3cab2bb3Spatrick       FunctionCallTrie::Node *OrigNode;
546*3cab2bb3Spatrick       FunctionCallTrie::Node *TargetNode;
547*3cab2bb3Spatrick     };
548*3cab2bb3Spatrick     using Stack = Array<NodeAndTarget>;
549*3cab2bb3Spatrick     typename Stack::AllocatorType StackAllocator(
550*3cab2bb3Spatrick         profilingFlags()->stack_allocator_max);
551*3cab2bb3Spatrick     Stack DFSStack(StackAllocator);
552*3cab2bb3Spatrick 
553*3cab2bb3Spatrick     for (const auto Root : getRoots()) {
554*3cab2bb3Spatrick       Node *TargetRoot = nullptr;
555*3cab2bb3Spatrick       auto R = O.Roots.find_element(
556*3cab2bb3Spatrick           [&](const Node *Node) { return Node->FId == Root->FId; });
557*3cab2bb3Spatrick       if (R == nullptr) {
558*3cab2bb3Spatrick         TargetRoot = O.Nodes.AppendEmplace(
559*3cab2bb3Spatrick             nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
560*3cab2bb3Spatrick             Root->FId);
561*3cab2bb3Spatrick         if (UNLIKELY(TargetRoot == nullptr))
562*3cab2bb3Spatrick           return;
563*3cab2bb3Spatrick 
564*3cab2bb3Spatrick         O.Roots.Append(TargetRoot);
565*3cab2bb3Spatrick       } else {
566*3cab2bb3Spatrick         TargetRoot = *R;
567*3cab2bb3Spatrick       }
568*3cab2bb3Spatrick 
569*3cab2bb3Spatrick       DFSStack.AppendEmplace(Root, TargetRoot);
570*3cab2bb3Spatrick       while (!DFSStack.empty()) {
571*3cab2bb3Spatrick         NodeAndTarget NT = DFSStack.back();
572*3cab2bb3Spatrick         DCHECK_NE(NT.OrigNode, nullptr);
573*3cab2bb3Spatrick         DCHECK_NE(NT.TargetNode, nullptr);
574*3cab2bb3Spatrick         DFSStack.trim(1);
575*3cab2bb3Spatrick         // TODO: Update the histogram as well when we have it ready.
576*3cab2bb3Spatrick         NT.TargetNode->CallCount += NT.OrigNode->CallCount;
577*3cab2bb3Spatrick         NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
578*3cab2bb3Spatrick         for (const auto Callee : NT.OrigNode->Callees) {
579*3cab2bb3Spatrick           auto TargetCallee = NT.TargetNode->Callees.find_element(
580*3cab2bb3Spatrick               [&](const FunctionCallTrie::NodeIdPair &C) {
581*3cab2bb3Spatrick                 return C.FId == Callee.FId;
582*3cab2bb3Spatrick               });
583*3cab2bb3Spatrick           if (TargetCallee == nullptr) {
584*3cab2bb3Spatrick             auto NewTargetNode = O.Nodes.AppendEmplace(
585*3cab2bb3Spatrick                 NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
586*3cab2bb3Spatrick                 Callee.FId);
587*3cab2bb3Spatrick 
588*3cab2bb3Spatrick             if (UNLIKELY(NewTargetNode == nullptr))
589*3cab2bb3Spatrick               return;
590*3cab2bb3Spatrick 
591*3cab2bb3Spatrick             TargetCallee =
592*3cab2bb3Spatrick                 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
593*3cab2bb3Spatrick           }
594*3cab2bb3Spatrick           DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
595*3cab2bb3Spatrick         }
596*3cab2bb3Spatrick       }
597*3cab2bb3Spatrick     }
598*3cab2bb3Spatrick   }
599*3cab2bb3Spatrick };
600*3cab2bb3Spatrick 
601*3cab2bb3Spatrick } // namespace __xray
602*3cab2bb3Spatrick 
603*3cab2bb3Spatrick #endif // XRAY_FUNCTION_CALL_TRIE_H
604