xref: /llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h (revision cac7821438f625d6c8a36dd9363f9acd3a3d93de)
1980d93d0SDean Michael Berris //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2980d93d0SDean Michael Berris //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6980d93d0SDean Michael Berris //
7980d93d0SDean Michael Berris //===----------------------------------------------------------------------===//
8980d93d0SDean Michael Berris //
9980d93d0SDean Michael Berris // This file is a part of XRay, a dynamic runtime instrumentation system.
10980d93d0SDean Michael Berris //
11980d93d0SDean Michael Berris // This file defines the interface for a function call trie.
12980d93d0SDean Michael Berris //
13980d93d0SDean Michael Berris //===----------------------------------------------------------------------===//
14980d93d0SDean Michael Berris #ifndef XRAY_FUNCTION_CALL_TRIE_H
15980d93d0SDean Michael Berris #define XRAY_FUNCTION_CALL_TRIE_H
16980d93d0SDean Michael Berris 
1725d50595SDean Michael Berris #include "xray_buffer_queue.h"
18edf0f6a7SDean Michael Berris #include "xray_defs.h"
19cfd7eec3SDean Michael Berris #include "xray_profiling_flags.h"
20980d93d0SDean Michael Berris #include "xray_segmented_array.h"
211e255e7aSDean Michael Berris #include <limits>
221eb8c206SDean Michael Berris #include <memory> // For placement new.
230dd4f9f2SDean Michael Berris #include <utility>
24980d93d0SDean Michael Berris 
25980d93d0SDean Michael Berris namespace __xray {
26980d93d0SDean Michael Berris 
27980d93d0SDean Michael Berris /// A FunctionCallTrie represents the stack traces of XRay instrumented
28980d93d0SDean Michael Berris /// functions that we've encountered, where a node corresponds to a function and
29980d93d0SDean Michael Berris /// the path from the root to the node its stack trace. Each node in the trie
30980d93d0SDean Michael Berris /// will contain some useful values, including:
31980d93d0SDean Michael Berris ///
32980d93d0SDean Michael Berris ///   * The cumulative amount of time spent in this particular node/stack.
33980d93d0SDean Michael Berris ///   * The number of times this stack has appeared.
34980d93d0SDean Michael Berris ///   * A histogram of latencies for that particular node.
35980d93d0SDean Michael Berris ///
36980d93d0SDean Michael Berris /// Each node in the trie will also contain a list of callees, represented using
37980d93d0SDean Michael Berris /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38980d93d0SDean Michael Berris /// ID of the callee, and a pointer to the node.
39980d93d0SDean Michael Berris ///
40980d93d0SDean Michael Berris /// If we visualise this data structure, we'll find the following potential
41980d93d0SDean Michael Berris /// representation:
42980d93d0SDean Michael Berris ///
43980d93d0SDean Michael Berris ///   [function id node] -> [callees] [cumulative time]
44980d93d0SDean Michael Berris ///                         [call counter] [latency histogram]
45980d93d0SDean Michael Berris ///
46980d93d0SDean Michael Berris /// As an example, when we have a function in this pseudocode:
47980d93d0SDean Michael Berris ///
48980d93d0SDean Michael Berris ///   func f(N) {
49980d93d0SDean Michael Berris ///     g()
50980d93d0SDean Michael Berris ///     h()
51980d93d0SDean Michael Berris ///     for i := 1..N { j() }
52980d93d0SDean Michael Berris ///   }
53980d93d0SDean Michael Berris ///
54980d93d0SDean Michael Berris /// We may end up with a trie of the following form:
55980d93d0SDean Michael Berris ///
56980d93d0SDean Michael Berris ///   f -> [ g, h, j ] [...] [1] [...]
57980d93d0SDean Michael Berris ///   g -> [ ... ] [...] [1] [...]
58980d93d0SDean Michael Berris ///   h -> [ ... ] [...] [1] [...]
59980d93d0SDean Michael Berris ///   j -> [ ... ] [...] [N] [...]
60980d93d0SDean Michael Berris ///
61980d93d0SDean Michael Berris /// If for instance the function g() called j() like so:
62980d93d0SDean Michael Berris ///
63980d93d0SDean Michael Berris ///   func g() {
64980d93d0SDean Michael Berris ///     for i := 1..10 { j() }
65980d93d0SDean Michael Berris ///   }
66980d93d0SDean Michael Berris ///
67980d93d0SDean Michael Berris /// We'll find the following updated trie:
68980d93d0SDean Michael Berris ///
69980d93d0SDean Michael Berris ///   f -> [ g, h, j ] [...] [1] [...]
70980d93d0SDean Michael Berris ///   g -> [ j' ] [...] [1] [...]
71980d93d0SDean Michael Berris ///   h -> [ ... ] [...] [1] [...]
72980d93d0SDean Michael Berris ///   j -> [ ... ] [...] [N] [...]
73980d93d0SDean Michael Berris ///   j' -> [ ... ] [...] [10] [...]
74980d93d0SDean Michael Berris ///
75980d93d0SDean Michael Berris /// Note that we'll have a new node representing the path `f -> g -> j'` with
76980d93d0SDean Michael Berris /// isolated data. This isolation gives us a means of representing the stack
77980d93d0SDean Michael Berris /// traces as a path, as opposed to a key in a table. The alternative
78980d93d0SDean Michael Berris /// implementation here would be to use a separate table for the path, and use
79980d93d0SDean Michael Berris /// hashes of the path as an identifier to accumulate the information. We've
80980d93d0SDean Michael Berris /// moved away from this approach as it takes a lot of time to compute the hash
81980d93d0SDean Michael Berris /// every time we need to update a function's call information as we're handling
82980d93d0SDean Michael Berris /// the entry and exit events.
83980d93d0SDean Michael Berris ///
84980d93d0SDean Michael Berris /// This approach allows us to maintain a shadow stack, which represents the
85980d93d0SDean Michael Berris /// currently executing path, and on function exits quickly compute the amount
86980d93d0SDean Michael Berris /// of time elapsed from the entry, then update the counters for the node
87980d93d0SDean Michael Berris /// already represented in the trie. This necessitates an efficient
88980d93d0SDean Michael Berris /// representation of the various data structures (the list of callees must be
89980d93d0SDean Michael Berris /// cache-aware and efficient to look up, and the histogram must be compact and
90980d93d0SDean Michael Berris /// quick to update) to enable us to keep the overheads of this implementation
91980d93d0SDean Michael Berris /// to the minimum.
92980d93d0SDean Michael Berris class FunctionCallTrie {
93980d93d0SDean Michael Berris public:
94980d93d0SDean Michael Berris   struct Node;
95980d93d0SDean Michael Berris 
96980d93d0SDean Michael Berris   // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97980d93d0SDean Michael Berris   // standard library types in this header.
98980d93d0SDean Michael Berris   struct NodeIdPair {
99980d93d0SDean Michael Berris     Node *NodePtr;
100980d93d0SDean Michael Berris     int32_t FId;
101980d93d0SDean Michael Berris   };
102980d93d0SDean Michael Berris 
103980d93d0SDean Michael Berris   using NodeIdPairArray = Array<NodeIdPair>;
104980d93d0SDean Michael Berris   using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105980d93d0SDean Michael Berris 
106980d93d0SDean Michael Berris   // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107980d93d0SDean Michael Berris   // number of times this node actually appeared, the cumulative amount of time
108980d93d0SDean Michael Berris   // for this particular node including its children call times, and just the
109980d93d0SDean Michael Berris   // local time spent on this node. Each Node will have the ID of the XRay
110980d93d0SDean Michael Berris   // instrumented function that it is associated to.
111980d93d0SDean Michael Berris   struct Node {
112980d93d0SDean Michael Berris     Node *Parent;
113980d93d0SDean Michael Berris     NodeIdPairArray Callees;
1141e255e7aSDean Michael Berris     uint64_t CallCount;
1151e255e7aSDean Michael Berris     uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116980d93d0SDean Michael Berris     int32_t FId;
117980d93d0SDean Michael Berris 
118980d93d0SDean Michael Berris     // TODO: Include the compact histogram.
119980d93d0SDean Michael Berris   };
120980d93d0SDean Michael Berris 
121980d93d0SDean Michael Berris private:
122980d93d0SDean Michael Berris   struct ShadowStackEntry {
123980d93d0SDean Michael Berris     uint64_t EntryTSC;
124980d93d0SDean Michael Berris     Node *NodePtr;
1251e255e7aSDean Michael Berris     uint16_t EntryCPU;
126980d93d0SDean Michael Berris   };
127980d93d0SDean Michael Berris 
128980d93d0SDean Michael Berris   using NodeArray = Array<Node>;
129980d93d0SDean Michael Berris   using RootArray = Array<Node *>;
130980d93d0SDean Michael Berris   using ShadowStackArray = Array<ShadowStackEntry>;
131980d93d0SDean Michael Berris 
132980d93d0SDean Michael Berris public:
133980d93d0SDean Michael Berris   // We collate the allocators we need into a single struct, as a convenience to
134980d93d0SDean Michael Berris   // allow us to initialize these as a group.
135980d93d0SDean Michael Berris   struct Allocators {
136980d93d0SDean Michael Berris     using NodeAllocatorType = NodeArray::AllocatorType;
137980d93d0SDean Michael Berris     using RootAllocatorType = RootArray::AllocatorType;
138980d93d0SDean Michael Berris     using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139980d93d0SDean Michael Berris 
140190c49bcSDean Michael Berris     // Use hosted aligned storage members to allow for trivial move and init.
141190c49bcSDean Michael Berris     // This also allows us to sidestep the potential-failing allocation issue.
142*cac78214SMarc Auberer     alignas(NodeAllocatorType) std::byte
143*cac78214SMarc Auberer         NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144*cac78214SMarc Auberer     alignas(RootAllocatorType) std::byte
145*cac78214SMarc Auberer         RootAllocatorStorage[sizeof(RootAllocatorType)];
146*cac78214SMarc Auberer     alignas(ShadowStackAllocatorType) std::byte
147*cac78214SMarc Auberer         ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148*cac78214SMarc Auberer     alignas(NodeIdPairAllocatorType) std::byte
149*cac78214SMarc Auberer         NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
150190c49bcSDean Michael Berris 
151980d93d0SDean Michael Berris     NodeAllocatorType *NodeAllocator = nullptr;
152980d93d0SDean Michael Berris     RootAllocatorType *RootAllocator = nullptr;
153980d93d0SDean Michael Berris     ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
154980d93d0SDean Michael Berris     NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
155980d93d0SDean Michael Berris 
156190c49bcSDean Michael Berris     Allocators() = default;
157980d93d0SDean Michael Berris     Allocators(const Allocators &) = delete;
158980d93d0SDean Michael Berris     Allocators &operator=(const Allocators &) = delete;
159980d93d0SDean Michael Berris 
16025d50595SDean Michael Berris     struct Buffers {
16125d50595SDean Michael Berris       BufferQueue::Buffer NodeBuffer;
16225d50595SDean Michael Berris       BufferQueue::Buffer RootsBuffer;
16325d50595SDean Michael Berris       BufferQueue::Buffer ShadowStackBuffer;
16425d50595SDean Michael Berris       BufferQueue::Buffer NodeIdPairBuffer;
16525d50595SDean Michael Berris     };
16625d50595SDean Michael Berris 
AllocatorsAllocators16725d50595SDean Michael Berris     explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
16825d50595SDean Michael Berris       new (&NodeAllocatorStorage)
16925d50595SDean Michael Berris           NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
17025d50595SDean Michael Berris       NodeAllocator =
17125d50595SDean Michael Berris           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
17225d50595SDean Michael Berris 
17325d50595SDean Michael Berris       new (&RootAllocatorStorage)
17425d50595SDean Michael Berris           RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
17525d50595SDean Michael Berris       RootAllocator =
17625d50595SDean Michael Berris           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
17725d50595SDean Michael Berris 
17825d50595SDean Michael Berris       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
17925d50595SDean Michael Berris           B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
18025d50595SDean Michael Berris       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
18125d50595SDean Michael Berris           &ShadowStackAllocatorStorage);
18225d50595SDean Michael Berris 
18325d50595SDean Michael Berris       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
18425d50595SDean Michael Berris           B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
18525d50595SDean Michael Berris       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
18625d50595SDean Michael Berris           &NodeIdPairAllocatorStorage);
18725d50595SDean Michael Berris     }
18825d50595SDean Michael Berris 
AllocatorsAllocators189190c49bcSDean Michael Berris     explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
190190c49bcSDean Michael Berris       new (&NodeAllocatorStorage) NodeAllocatorType(Max);
191190c49bcSDean Michael Berris       NodeAllocator =
192190c49bcSDean Michael Berris           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
193190c49bcSDean Michael Berris 
194190c49bcSDean Michael Berris       new (&RootAllocatorStorage) RootAllocatorType(Max);
195190c49bcSDean Michael Berris       RootAllocator =
196190c49bcSDean Michael Berris           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
197190c49bcSDean Michael Berris 
198190c49bcSDean Michael Berris       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
199190c49bcSDean Michael Berris       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
200190c49bcSDean Michael Berris           &ShadowStackAllocatorStorage);
201190c49bcSDean Michael Berris 
202190c49bcSDean Michael Berris       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
203190c49bcSDean Michael Berris       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
204190c49bcSDean Michael Berris           &NodeIdPairAllocatorStorage);
205190c49bcSDean Michael Berris     }
206190c49bcSDean Michael Berris 
AllocatorsAllocators207190c49bcSDean Michael Berris     Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
208190c49bcSDean Michael Berris       // Here we rely on the safety of memcpy'ing contents of the storage
209190c49bcSDean Michael Berris       // members, and then pointing the source pointers to nullptr.
210190c49bcSDean Michael Berris       internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
211190c49bcSDean Michael Berris                       sizeof(NodeAllocatorType));
212190c49bcSDean Michael Berris       internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
213190c49bcSDean Michael Berris                       sizeof(RootAllocatorType));
214190c49bcSDean Michael Berris       internal_memcpy(&ShadowStackAllocatorStorage,
215190c49bcSDean Michael Berris                       &O.ShadowStackAllocatorStorage,
216190c49bcSDean Michael Berris                       sizeof(ShadowStackAllocatorType));
217190c49bcSDean Michael Berris       internal_memcpy(&NodeIdPairAllocatorStorage,
218190c49bcSDean Michael Berris                       &O.NodeIdPairAllocatorStorage,
219190c49bcSDean Michael Berris                       sizeof(NodeIdPairAllocatorType));
220190c49bcSDean Michael Berris 
221190c49bcSDean Michael Berris       NodeAllocator =
222190c49bcSDean Michael Berris           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
223190c49bcSDean Michael Berris       RootAllocator =
224190c49bcSDean Michael Berris           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
225190c49bcSDean Michael Berris       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
226190c49bcSDean Michael Berris           &ShadowStackAllocatorStorage);
227190c49bcSDean Michael Berris       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
228190c49bcSDean Michael Berris           &NodeIdPairAllocatorStorage);
229190c49bcSDean Michael Berris 
230980d93d0SDean Michael Berris       O.NodeAllocator = nullptr;
231980d93d0SDean Michael Berris       O.RootAllocator = nullptr;
232980d93d0SDean Michael Berris       O.ShadowStackAllocator = nullptr;
233980d93d0SDean Michael Berris       O.NodeIdPairAllocator = nullptr;
234980d93d0SDean Michael Berris     }
235980d93d0SDean Michael Berris 
236edf0f6a7SDean Michael Berris     Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
237190c49bcSDean Michael Berris       // When moving into an existing instance, we ensure that we clean up the
238190c49bcSDean Michael Berris       // current allocators.
239190c49bcSDean Michael Berris       if (NodeAllocator)
240190c49bcSDean Michael Berris         NodeAllocator->~NodeAllocatorType();
241190c49bcSDean Michael Berris       if (O.NodeAllocator) {
242190c49bcSDean Michael Berris         new (&NodeAllocatorStorage)
243190c49bcSDean Michael Berris             NodeAllocatorType(std::move(*O.NodeAllocator));
244190c49bcSDean Michael Berris         NodeAllocator =
245190c49bcSDean Michael Berris             reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
246190c49bcSDean Michael Berris         O.NodeAllocator = nullptr;
247190c49bcSDean Michael Berris       } else {
248190c49bcSDean Michael Berris         NodeAllocator = nullptr;
249980d93d0SDean Michael Berris       }
250190c49bcSDean Michael Berris 
251190c49bcSDean Michael Berris       if (RootAllocator)
252190c49bcSDean Michael Berris         RootAllocator->~RootAllocatorType();
253190c49bcSDean Michael Berris       if (O.RootAllocator) {
254190c49bcSDean Michael Berris         new (&RootAllocatorStorage)
255190c49bcSDean Michael Berris             RootAllocatorType(std::move(*O.RootAllocator));
256190c49bcSDean Michael Berris         RootAllocator =
257190c49bcSDean Michael Berris             reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
258190c49bcSDean Michael Berris         O.RootAllocator = nullptr;
259190c49bcSDean Michael Berris       } else {
260190c49bcSDean Michael Berris         RootAllocator = nullptr;
261980d93d0SDean Michael Berris       }
262190c49bcSDean Michael Berris 
263190c49bcSDean Michael Berris       if (ShadowStackAllocator)
264190c49bcSDean Michael Berris         ShadowStackAllocator->~ShadowStackAllocatorType();
265190c49bcSDean Michael Berris       if (O.ShadowStackAllocator) {
266190c49bcSDean Michael Berris         new (&ShadowStackAllocatorStorage)
267190c49bcSDean Michael Berris             ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
268190c49bcSDean Michael Berris         ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
269190c49bcSDean Michael Berris             &ShadowStackAllocatorStorage);
270190c49bcSDean Michael Berris         O.ShadowStackAllocator = nullptr;
271190c49bcSDean Michael Berris       } else {
272190c49bcSDean Michael Berris         ShadowStackAllocator = nullptr;
273980d93d0SDean Michael Berris       }
274190c49bcSDean Michael Berris 
275190c49bcSDean Michael Berris       if (NodeIdPairAllocator)
276190c49bcSDean Michael Berris         NodeIdPairAllocator->~NodeIdPairAllocatorType();
277190c49bcSDean Michael Berris       if (O.NodeIdPairAllocator) {
278190c49bcSDean Michael Berris         new (&NodeIdPairAllocatorStorage)
279190c49bcSDean Michael Berris             NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
280190c49bcSDean Michael Berris         NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
281190c49bcSDean Michael Berris             &NodeIdPairAllocatorStorage);
282190c49bcSDean Michael Berris         O.NodeIdPairAllocator = nullptr;
283190c49bcSDean Michael Berris       } else {
284190c49bcSDean Michael Berris         NodeIdPairAllocator = nullptr;
285980d93d0SDean Michael Berris       }
286190c49bcSDean Michael Berris 
287980d93d0SDean Michael Berris       return *this;
288980d93d0SDean Michael Berris     }
289980d93d0SDean Michael Berris 
~AllocatorsAllocators290edf0f6a7SDean Michael Berris     ~Allocators() XRAY_NEVER_INSTRUMENT {
291190c49bcSDean Michael Berris       if (NodeAllocator != nullptr)
292980d93d0SDean Michael Berris         NodeAllocator->~NodeAllocatorType();
293190c49bcSDean Michael Berris       if (RootAllocator != nullptr)
294980d93d0SDean Michael Berris         RootAllocator->~RootAllocatorType();
295190c49bcSDean Michael Berris       if (ShadowStackAllocator != nullptr)
296980d93d0SDean Michael Berris         ShadowStackAllocator->~ShadowStackAllocatorType();
297190c49bcSDean Michael Berris       if (NodeIdPairAllocator != nullptr)
298980d93d0SDean Michael Berris         NodeIdPairAllocator->~NodeIdPairAllocatorType();
299980d93d0SDean Michael Berris     }
300980d93d0SDean Michael Berris   };
301980d93d0SDean Michael Berris 
InitAllocators()302edf0f6a7SDean Michael Berris   static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
3030dd4f9f2SDean Michael Berris     return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
3040dd4f9f2SDean Michael Berris   }
3050dd4f9f2SDean Michael Berris 
InitAllocatorsCustom(uptr Max)306edf0f6a7SDean Michael Berris   static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
307190c49bcSDean Michael Berris     Allocators A(Max);
308980d93d0SDean Michael Berris     return A;
309980d93d0SDean Michael Berris   }
310980d93d0SDean Michael Berris 
31125d50595SDean Michael Berris   static Allocators
InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)31225d50595SDean Michael Berris   InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
31325d50595SDean Michael Berris     Allocators A(Bufs);
31425d50595SDean Michael Berris     return A;
31525d50595SDean Michael Berris   }
31625d50595SDean Michael Berris 
317980d93d0SDean Michael Berris private:
318980d93d0SDean Michael Berris   NodeArray Nodes;
319980d93d0SDean Michael Berris   RootArray Roots;
320980d93d0SDean Michael Berris   ShadowStackArray ShadowStack;
321190c49bcSDean Michael Berris   NodeIdPairAllocatorType *NodeIdPairAllocator;
322190c49bcSDean Michael Berris   uint32_t OverflowedFunctions;
323980d93d0SDean Michael Berris 
324980d93d0SDean Michael Berris public:
FunctionCallTrie(const Allocators & A)325edf0f6a7SDean Michael Berris   explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
326edf0f6a7SDean Michael Berris       : Nodes(*A.NodeAllocator),
327edf0f6a7SDean Michael Berris         Roots(*A.RootAllocator),
3284719c524SDean Michael Berris         ShadowStack(*A.ShadowStackAllocator),
329190c49bcSDean Michael Berris         NodeIdPairAllocator(A.NodeIdPairAllocator),
330190c49bcSDean Michael Berris         OverflowedFunctions(0) {}
331190c49bcSDean Michael Berris 
332190c49bcSDean Michael Berris   FunctionCallTrie() = delete;
333190c49bcSDean Michael Berris   FunctionCallTrie(const FunctionCallTrie &) = delete;
334190c49bcSDean Michael Berris   FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
335190c49bcSDean Michael Berris 
FunctionCallTrie(FunctionCallTrie && O)336190c49bcSDean Michael Berris   FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
337190c49bcSDean Michael Berris       : Nodes(std::move(O.Nodes)),
338190c49bcSDean Michael Berris         Roots(std::move(O.Roots)),
339190c49bcSDean Michael Berris         ShadowStack(std::move(O.ShadowStack)),
340190c49bcSDean Michael Berris         NodeIdPairAllocator(O.NodeIdPairAllocator),
341190c49bcSDean Michael Berris         OverflowedFunctions(O.OverflowedFunctions) {}
342190c49bcSDean Michael Berris 
343190c49bcSDean Michael Berris   FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
344190c49bcSDean Michael Berris     Nodes = std::move(O.Nodes);
345190c49bcSDean Michael Berris     Roots = std::move(O.Roots);
346190c49bcSDean Michael Berris     ShadowStack = std::move(O.ShadowStack);
347190c49bcSDean Michael Berris     NodeIdPairAllocator = O.NodeIdPairAllocator;
348190c49bcSDean Michael Berris     OverflowedFunctions = O.OverflowedFunctions;
349190c49bcSDean Michael Berris     return *this;
350190c49bcSDean Michael Berris   }
351190c49bcSDean Michael Berris 
~FunctionCallTrie()352190c49bcSDean Michael Berris   ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
353980d93d0SDean Michael Berris 
enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)3541e255e7aSDean Michael Berris   void enterFunction(const int32_t FId, uint64_t TSC,
3551e255e7aSDean Michael Berris                      uint16_t CPU) XRAY_NEVER_INSTRUMENT {
3560dd4f9f2SDean Michael Berris     DCHECK_NE(FId, 0);
35725d50595SDean Michael Berris 
35825d50595SDean Michael Berris     // If we're already overflowed the function call stack, do not bother
35925d50595SDean Michael Berris     // attempting to record any more function entries.
36025d50595SDean Michael Berris     if (UNLIKELY(OverflowedFunctions)) {
36125d50595SDean Michael Berris       ++OverflowedFunctions;
36225d50595SDean Michael Berris       return;
36325d50595SDean Michael Berris     }
36425d50595SDean Michael Berris 
36525d50595SDean Michael Berris     // If this is the first function we've encountered, we want to set up the
36625d50595SDean Michael Berris     // node(s) and treat it as a root.
367980d93d0SDean Michael Berris     if (UNLIKELY(ShadowStack.empty())) {
36825d50595SDean Michael Berris       auto *NewRoot = Nodes.AppendEmplace(
36925d50595SDean Michael Berris           nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
370980d93d0SDean Michael Berris       if (UNLIKELY(NewRoot == nullptr))
371980d93d0SDean Michael Berris         return;
37225d50595SDean Michael Berris       if (Roots.AppendEmplace(NewRoot) == nullptr) {
37325d50595SDean Michael Berris         Nodes.trim(1);
374190c49bcSDean Michael Berris         return;
37525d50595SDean Michael Berris       }
376190c49bcSDean Michael Berris       if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
37725d50595SDean Michael Berris         Nodes.trim(1);
378190c49bcSDean Michael Berris         Roots.trim(1);
379190c49bcSDean Michael Berris         ++OverflowedFunctions;
380190c49bcSDean Michael Berris         return;
381190c49bcSDean Michael Berris       }
382980d93d0SDean Michael Berris       return;
383980d93d0SDean Michael Berris     }
384980d93d0SDean Michael Berris 
38525d50595SDean Michael Berris     // From this point on, we require that the stack is not empty.
38625d50595SDean Michael Berris     DCHECK(!ShadowStack.empty());
38725d50595SDean Michael Berris     auto TopNode = ShadowStack.back().NodePtr;
3880dd4f9f2SDean Michael Berris     DCHECK_NE(TopNode, nullptr);
389980d93d0SDean Michael Berris 
39025d50595SDean Michael Berris     // If we've seen this callee before, then we access that node and place that
39125d50595SDean Michael Berris     // on the top of the stack.
39225d50595SDean Michael Berris     auto* Callee = TopNode->Callees.find_element(
393980d93d0SDean Michael Berris         [FId](const NodeIdPair &NR) { return NR.FId == FId; });
394980d93d0SDean Michael Berris     if (Callee != nullptr) {
395980d93d0SDean Michael Berris       CHECK_NE(Callee->NodePtr, nullptr);
396190c49bcSDean Michael Berris       if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
397190c49bcSDean Michael Berris         ++OverflowedFunctions;
398980d93d0SDean Michael Berris       return;
399980d93d0SDean Michael Berris     }
400980d93d0SDean Michael Berris 
401980d93d0SDean Michael Berris     // This means we've never seen this stack before, create a new node here.
40225d50595SDean Michael Berris     auto* NewNode = Nodes.AppendEmplace(
403190c49bcSDean Michael Berris         TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
404980d93d0SDean Michael Berris     if (UNLIKELY(NewNode == nullptr))
405980d93d0SDean Michael Berris       return;
4060dd4f9f2SDean Michael Berris     DCHECK_NE(NewNode, nullptr);
407980d93d0SDean Michael Berris     TopNode->Callees.AppendEmplace(NewNode, FId);
408190c49bcSDean Michael Berris     if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
409190c49bcSDean Michael Berris       ++OverflowedFunctions;
410980d93d0SDean Michael Berris     return;
411980d93d0SDean Michael Berris   }
412980d93d0SDean Michael Berris 
exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)4131e255e7aSDean Michael Berris   void exitFunction(int32_t FId, uint64_t TSC,
4141e255e7aSDean Michael Berris                     uint16_t CPU) XRAY_NEVER_INSTRUMENT {
415190c49bcSDean Michael Berris     // If we're exiting functions that have "overflowed" or don't fit into the
416190c49bcSDean Michael Berris     // stack due to allocator constraints, we then decrement that count first.
417190c49bcSDean Michael Berris     if (OverflowedFunctions) {
418190c49bcSDean Michael Berris       --OverflowedFunctions;
419190c49bcSDean Michael Berris       return;
420190c49bcSDean Michael Berris     }
421190c49bcSDean Michael Berris 
422980d93d0SDean Michael Berris     // When we exit a function, we look up the ShadowStack to see whether we've
423980d93d0SDean Michael Berris     // entered this function before. We do as little processing here as we can,
424980d93d0SDean Michael Berris     // since most of the hard work would have already been done at function
425980d93d0SDean Michael Berris     // entry.
426980d93d0SDean Michael Berris     uint64_t CumulativeTreeTime = 0;
427190c49bcSDean Michael Berris 
428980d93d0SDean Michael Berris     while (!ShadowStack.empty()) {
4290dd4f9f2SDean Michael Berris       const auto &Top = ShadowStack.back();
430980d93d0SDean Michael Berris       auto TopNode = Top.NodePtr;
4310dd4f9f2SDean Michael Berris       DCHECK_NE(TopNode, nullptr);
4321e255e7aSDean Michael Berris 
4331e255e7aSDean Michael Berris       // We may encounter overflow on the TSC we're provided, which may end up
4341e255e7aSDean Michael Berris       // being less than the TSC when we first entered the function.
4351e255e7aSDean Michael Berris       //
4361e255e7aSDean Michael Berris       // To get the accurate measurement of cycles, we need to check whether
4371e255e7aSDean Michael Berris       // we've overflowed (TSC < Top.EntryTSC) and then account the difference
4381e255e7aSDean Michael Berris       // between the entry TSC and the max for the TSC counter (max of uint64_t)
4391e255e7aSDean Michael Berris       // then add the value of TSC. We can prove that the maximum delta we will
4401e255e7aSDean Michael Berris       // get is at most the 64-bit unsigned value, since the difference between
4411e255e7aSDean Michael Berris       // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
4421e255e7aSDean Michael Berris       // - 1) + 1.
4431e255e7aSDean Michael Berris       //
4441e255e7aSDean Michael Berris       // NOTE: This assumes that TSCs are synchronised across CPUs.
4451e255e7aSDean Michael Berris       // TODO: Count the number of times we've seen CPU migrations.
4461e255e7aSDean Michael Berris       uint64_t LocalTime =
4471e255e7aSDean Michael Berris           Top.EntryTSC > TSC
4481e255e7aSDean Michael Berris               ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
4491e255e7aSDean Michael Berris               : TSC - Top.EntryTSC;
450980d93d0SDean Michael Berris       TopNode->CallCount++;
451980d93d0SDean Michael Berris       TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
452980d93d0SDean Michael Berris       CumulativeTreeTime += LocalTime;
453980d93d0SDean Michael Berris       ShadowStack.trim(1);
454980d93d0SDean Michael Berris 
455980d93d0SDean Michael Berris       // TODO: Update the histogram for the node.
4560dd4f9f2SDean Michael Berris       if (TopNode->FId == FId)
457980d93d0SDean Michael Berris         break;
458980d93d0SDean Michael Berris     }
459980d93d0SDean Michael Berris   }
460980d93d0SDean Michael Berris 
getRoots()461edf0f6a7SDean Michael Berris   const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
462980d93d0SDean Michael Berris 
463980d93d0SDean Michael Berris   // The deepCopyInto operation will update the provided FunctionCallTrie by
464980d93d0SDean Michael Berris   // re-creating the contents of this particular FunctionCallTrie in the other
465980d93d0SDean Michael Berris   // FunctionCallTrie. It will do this using a Depth First Traversal from the
466980d93d0SDean Michael Berris   // roots, and while doing so recreating the traversal in the provided
467980d93d0SDean Michael Berris   // FunctionCallTrie.
468980d93d0SDean Michael Berris   //
469980d93d0SDean Michael Berris   // This operation will *not* destroy the state in `O`, and thus may cause some
470980d93d0SDean Michael Berris   // duplicate entries in `O` if it is not empty.
471980d93d0SDean Michael Berris   //
472980d93d0SDean Michael Berris   // This function is *not* thread-safe, and may require external
473980d93d0SDean Michael Berris   // synchronisation of both "this" and |O|.
474980d93d0SDean Michael Berris   //
475980d93d0SDean Michael Berris   // This function must *not* be called with a non-empty FunctionCallTrie |O|.
deepCopyInto(FunctionCallTrie & O)476edf0f6a7SDean Michael Berris   void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
477980d93d0SDean Michael Berris     DCHECK(O.getRoots().empty());
478980d93d0SDean Michael Berris 
479980d93d0SDean Michael Berris     // We then push the root into a stack, to use as the parent marker for new
480980d93d0SDean Michael Berris     // nodes we push in as we're traversing depth-first down the call tree.
481980d93d0SDean Michael Berris     struct NodeAndParent {
482980d93d0SDean Michael Berris       FunctionCallTrie::Node *Node;
483980d93d0SDean Michael Berris       FunctionCallTrie::Node *NewNode;
484980d93d0SDean Michael Berris     };
485980d93d0SDean Michael Berris     using Stack = Array<NodeAndParent>;
486980d93d0SDean Michael Berris 
487980d93d0SDean Michael Berris     typename Stack::AllocatorType StackAllocator(
4889d6b7a5fSDean Michael Berris         profilingFlags()->stack_allocator_max);
4894719c524SDean Michael Berris     Stack DFSStack(StackAllocator);
490980d93d0SDean Michael Berris 
4910dd4f9f2SDean Michael Berris     for (const auto Root : getRoots()) {
4920dd4f9f2SDean Michael Berris       // Add a node in O for this root.
4930dd4f9f2SDean Michael Berris       auto NewRoot = O.Nodes.AppendEmplace(
494190c49bcSDean Michael Berris           nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
4950dd4f9f2SDean Michael Berris           Root->CumulativeLocalTime, Root->FId);
4960dd4f9f2SDean Michael Berris 
4970dd4f9f2SDean Michael Berris       // Because we cannot allocate more memory we should bail out right away.
4980dd4f9f2SDean Michael Berris       if (UNLIKELY(NewRoot == nullptr))
4990dd4f9f2SDean Michael Berris         return;
5000dd4f9f2SDean Michael Berris 
50125d50595SDean Michael Berris       if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
50225d50595SDean Michael Berris         return;
5030dd4f9f2SDean Michael Berris 
504980d93d0SDean Michael Berris       // TODO: Figure out what to do if we fail to allocate any more stack
505980d93d0SDean Michael Berris       // space. Maybe warn or report once?
50625d50595SDean Michael Berris       if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
50725d50595SDean Michael Berris         return;
508980d93d0SDean Michael Berris       while (!DFSStack.empty()) {
509980d93d0SDean Michael Berris         NodeAndParent NP = DFSStack.back();
510980d93d0SDean Michael Berris         DCHECK_NE(NP.Node, nullptr);
511980d93d0SDean Michael Berris         DCHECK_NE(NP.NewNode, nullptr);
512980d93d0SDean Michael Berris         DFSStack.trim(1);
513980d93d0SDean Michael Berris         for (const auto Callee : NP.Node->Callees) {
514980d93d0SDean Michael Berris           auto NewNode = O.Nodes.AppendEmplace(
515190c49bcSDean Michael Berris               NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
516190c49bcSDean Michael Berris               Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
517190c49bcSDean Michael Berris               Callee.FId);
5180dd4f9f2SDean Michael Berris           if (UNLIKELY(NewNode == nullptr))
5190dd4f9f2SDean Michael Berris             return;
52025d50595SDean Michael Berris           if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
52125d50595SDean Michael Berris                        nullptr))
52225d50595SDean Michael Berris             return;
52325d50595SDean Michael Berris           if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
52425d50595SDean Michael Berris                        nullptr))
52525d50595SDean Michael Berris             return;
526980d93d0SDean Michael Berris         }
527980d93d0SDean Michael Berris       }
528980d93d0SDean Michael Berris     }
529980d93d0SDean Michael Berris   }
530980d93d0SDean Michael Berris 
531980d93d0SDean Michael Berris   // The mergeInto operation will update the provided FunctionCallTrie by
532980d93d0SDean Michael Berris   // traversing the current trie's roots and updating (i.e. merging) the data in
533980d93d0SDean Michael Berris   // the nodes with the data in the target's nodes. If the node doesn't exist in
534980d93d0SDean Michael Berris   // the provided trie, we add a new one in the right position, and inherit the
535980d93d0SDean Michael Berris   // data from the original (current) trie, along with all its callees.
536980d93d0SDean Michael Berris   //
537980d93d0SDean Michael Berris   // This function is *not* thread-safe, and may require external
538980d93d0SDean Michael Berris   // synchronisation of both "this" and |O|.
mergeInto(FunctionCallTrie & O)539edf0f6a7SDean Michael Berris   void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
540980d93d0SDean Michael Berris     struct NodeAndTarget {
541980d93d0SDean Michael Berris       FunctionCallTrie::Node *OrigNode;
542980d93d0SDean Michael Berris       FunctionCallTrie::Node *TargetNode;
543980d93d0SDean Michael Berris     };
544980d93d0SDean Michael Berris     using Stack = Array<NodeAndTarget>;
545980d93d0SDean Michael Berris     typename Stack::AllocatorType StackAllocator(
5469d6b7a5fSDean Michael Berris         profilingFlags()->stack_allocator_max);
5474719c524SDean Michael Berris     Stack DFSStack(StackAllocator);
548980d93d0SDean Michael Berris 
549980d93d0SDean Michael Berris     for (const auto Root : getRoots()) {
550980d93d0SDean Michael Berris       Node *TargetRoot = nullptr;
551980d93d0SDean Michael Berris       auto R = O.Roots.find_element(
552980d93d0SDean Michael Berris           [&](const Node *Node) { return Node->FId == Root->FId; });
553980d93d0SDean Michael Berris       if (R == nullptr) {
554190c49bcSDean Michael Berris         TargetRoot = O.Nodes.AppendEmplace(
555190c49bcSDean Michael Berris             nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
556190c49bcSDean Michael Berris             Root->FId);
5570dd4f9f2SDean Michael Berris         if (UNLIKELY(TargetRoot == nullptr))
5580dd4f9f2SDean Michael Berris           return;
5590dd4f9f2SDean Michael Berris 
560980d93d0SDean Michael Berris         O.Roots.Append(TargetRoot);
561980d93d0SDean Michael Berris       } else {
562980d93d0SDean Michael Berris         TargetRoot = *R;
563980d93d0SDean Michael Berris       }
564980d93d0SDean Michael Berris 
565190c49bcSDean Michael Berris       DFSStack.AppendEmplace(Root, TargetRoot);
566980d93d0SDean Michael Berris       while (!DFSStack.empty()) {
567980d93d0SDean Michael Berris         NodeAndTarget NT = DFSStack.back();
568980d93d0SDean Michael Berris         DCHECK_NE(NT.OrigNode, nullptr);
569980d93d0SDean Michael Berris         DCHECK_NE(NT.TargetNode, nullptr);
570980d93d0SDean Michael Berris         DFSStack.trim(1);
571980d93d0SDean Michael Berris         // TODO: Update the histogram as well when we have it ready.
572980d93d0SDean Michael Berris         NT.TargetNode->CallCount += NT.OrigNode->CallCount;
573980d93d0SDean Michael Berris         NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
574980d93d0SDean Michael Berris         for (const auto Callee : NT.OrigNode->Callees) {
575980d93d0SDean Michael Berris           auto TargetCallee = NT.TargetNode->Callees.find_element(
576980d93d0SDean Michael Berris               [&](const FunctionCallTrie::NodeIdPair &C) {
577980d93d0SDean Michael Berris                 return C.FId == Callee.FId;
578980d93d0SDean Michael Berris               });
579980d93d0SDean Michael Berris           if (TargetCallee == nullptr) {
5804719c524SDean Michael Berris             auto NewTargetNode = O.Nodes.AppendEmplace(
581190c49bcSDean Michael Berris                 NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
582190c49bcSDean Michael Berris                 Callee.FId);
5830dd4f9f2SDean Michael Berris 
5840dd4f9f2SDean Michael Berris             if (UNLIKELY(NewTargetNode == nullptr))
5850dd4f9f2SDean Michael Berris               return;
5860dd4f9f2SDean Michael Berris 
587980d93d0SDean Michael Berris             TargetCallee =
588980d93d0SDean Michael Berris                 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
589980d93d0SDean Michael Berris           }
5900dd4f9f2SDean Michael Berris           DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
591980d93d0SDean Michael Berris         }
592980d93d0SDean Michael Berris       }
593980d93d0SDean Michael Berris     }
594980d93d0SDean Michael Berris   }
595980d93d0SDean Michael Berris };
596980d93d0SDean Michael Berris 
597980d93d0SDean Michael Berris } // namespace __xray
598980d93d0SDean Michael Berris 
599980d93d0SDean Michael Berris #endif // XRAY_FUNCTION_CALL_TRIE_H
600