1980d93d0SDean Michael Berris //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// 2980d93d0SDean Michael Berris // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6980d93d0SDean Michael Berris // 7980d93d0SDean Michael Berris //===----------------------------------------------------------------------===// 8980d93d0SDean Michael Berris // 9980d93d0SDean Michael Berris // This file is a part of XRay, a dynamic runtime instrumentation system. 10980d93d0SDean Michael Berris // 11980d93d0SDean Michael Berris // This file defines the interface for a function call trie. 12980d93d0SDean Michael Berris // 13980d93d0SDean Michael Berris //===----------------------------------------------------------------------===// 14980d93d0SDean Michael Berris #ifndef XRAY_FUNCTION_CALL_TRIE_H 15980d93d0SDean Michael Berris #define XRAY_FUNCTION_CALL_TRIE_H 16980d93d0SDean Michael Berris 1725d50595SDean Michael Berris #include "xray_buffer_queue.h" 18edf0f6a7SDean Michael Berris #include "xray_defs.h" 19cfd7eec3SDean Michael Berris #include "xray_profiling_flags.h" 20980d93d0SDean Michael Berris #include "xray_segmented_array.h" 211e255e7aSDean Michael Berris #include <limits> 221eb8c206SDean Michael Berris #include <memory> // For placement new. 230dd4f9f2SDean Michael Berris #include <utility> 24980d93d0SDean Michael Berris 25980d93d0SDean Michael Berris namespace __xray { 26980d93d0SDean Michael Berris 27980d93d0SDean Michael Berris /// A FunctionCallTrie represents the stack traces of XRay instrumented 28980d93d0SDean Michael Berris /// functions that we've encountered, where a node corresponds to a function and 29980d93d0SDean Michael Berris /// the path from the root to the node its stack trace. Each node in the trie 30980d93d0SDean Michael Berris /// will contain some useful values, including: 31980d93d0SDean Michael Berris /// 32980d93d0SDean Michael Berris /// * The cumulative amount of time spent in this particular node/stack. 33980d93d0SDean Michael Berris /// * The number of times this stack has appeared. 34980d93d0SDean Michael Berris /// * A histogram of latencies for that particular node. 35980d93d0SDean Michael Berris /// 36980d93d0SDean Michael Berris /// Each node in the trie will also contain a list of callees, represented using 37980d93d0SDean Michael Berris /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function 38980d93d0SDean Michael Berris /// ID of the callee, and a pointer to the node. 39980d93d0SDean Michael Berris /// 40980d93d0SDean Michael Berris /// If we visualise this data structure, we'll find the following potential 41980d93d0SDean Michael Berris /// representation: 42980d93d0SDean Michael Berris /// 43980d93d0SDean Michael Berris /// [function id node] -> [callees] [cumulative time] 44980d93d0SDean Michael Berris /// [call counter] [latency histogram] 45980d93d0SDean Michael Berris /// 46980d93d0SDean Michael Berris /// As an example, when we have a function in this pseudocode: 47980d93d0SDean Michael Berris /// 48980d93d0SDean Michael Berris /// func f(N) { 49980d93d0SDean Michael Berris /// g() 50980d93d0SDean Michael Berris /// h() 51980d93d0SDean Michael Berris /// for i := 1..N { j() } 52980d93d0SDean Michael Berris /// } 53980d93d0SDean Michael Berris /// 54980d93d0SDean Michael Berris /// We may end up with a trie of the following form: 55980d93d0SDean Michael Berris /// 56980d93d0SDean Michael Berris /// f -> [ g, h, j ] [...] [1] [...] 57980d93d0SDean Michael Berris /// g -> [ ... ] [...] [1] [...] 58980d93d0SDean Michael Berris /// h -> [ ... ] [...] [1] [...] 59980d93d0SDean Michael Berris /// j -> [ ... ] [...] [N] [...] 60980d93d0SDean Michael Berris /// 61980d93d0SDean Michael Berris /// If for instance the function g() called j() like so: 62980d93d0SDean Michael Berris /// 63980d93d0SDean Michael Berris /// func g() { 64980d93d0SDean Michael Berris /// for i := 1..10 { j() } 65980d93d0SDean Michael Berris /// } 66980d93d0SDean Michael Berris /// 67980d93d0SDean Michael Berris /// We'll find the following updated trie: 68980d93d0SDean Michael Berris /// 69980d93d0SDean Michael Berris /// f -> [ g, h, j ] [...] [1] [...] 70980d93d0SDean Michael Berris /// g -> [ j' ] [...] [1] [...] 71980d93d0SDean Michael Berris /// h -> [ ... ] [...] [1] [...] 72980d93d0SDean Michael Berris /// j -> [ ... ] [...] [N] [...] 73980d93d0SDean Michael Berris /// j' -> [ ... ] [...] [10] [...] 74980d93d0SDean Michael Berris /// 75980d93d0SDean Michael Berris /// Note that we'll have a new node representing the path `f -> g -> j'` with 76980d93d0SDean Michael Berris /// isolated data. This isolation gives us a means of representing the stack 77980d93d0SDean Michael Berris /// traces as a path, as opposed to a key in a table. The alternative 78980d93d0SDean Michael Berris /// implementation here would be to use a separate table for the path, and use 79980d93d0SDean Michael Berris /// hashes of the path as an identifier to accumulate the information. We've 80980d93d0SDean Michael Berris /// moved away from this approach as it takes a lot of time to compute the hash 81980d93d0SDean Michael Berris /// every time we need to update a function's call information as we're handling 82980d93d0SDean Michael Berris /// the entry and exit events. 83980d93d0SDean Michael Berris /// 84980d93d0SDean Michael Berris /// This approach allows us to maintain a shadow stack, which represents the 85980d93d0SDean Michael Berris /// currently executing path, and on function exits quickly compute the amount 86980d93d0SDean Michael Berris /// of time elapsed from the entry, then update the counters for the node 87980d93d0SDean Michael Berris /// already represented in the trie. This necessitates an efficient 88980d93d0SDean Michael Berris /// representation of the various data structures (the list of callees must be 89980d93d0SDean Michael Berris /// cache-aware and efficient to look up, and the histogram must be compact and 90980d93d0SDean Michael Berris /// quick to update) to enable us to keep the overheads of this implementation 91980d93d0SDean Michael Berris /// to the minimum. 92980d93d0SDean Michael Berris class FunctionCallTrie { 93980d93d0SDean Michael Berris public: 94980d93d0SDean Michael Berris struct Node; 95980d93d0SDean Michael Berris 96980d93d0SDean Michael Berris // We use a NodeIdPair type instead of a std::pair<...> to not rely on the 97980d93d0SDean Michael Berris // standard library types in this header. 98980d93d0SDean Michael Berris struct NodeIdPair { 99980d93d0SDean Michael Berris Node *NodePtr; 100980d93d0SDean Michael Berris int32_t FId; 101980d93d0SDean Michael Berris }; 102980d93d0SDean Michael Berris 103980d93d0SDean Michael Berris using NodeIdPairArray = Array<NodeIdPair>; 104980d93d0SDean Michael Berris using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; 105980d93d0SDean Michael Berris 106980d93d0SDean Michael Berris // A Node in the FunctionCallTrie gives us a list of callees, the cumulative 107980d93d0SDean Michael Berris // number of times this node actually appeared, the cumulative amount of time 108980d93d0SDean Michael Berris // for this particular node including its children call times, and just the 109980d93d0SDean Michael Berris // local time spent on this node. Each Node will have the ID of the XRay 110980d93d0SDean Michael Berris // instrumented function that it is associated to. 111980d93d0SDean Michael Berris struct Node { 112980d93d0SDean Michael Berris Node *Parent; 113980d93d0SDean Michael Berris NodeIdPairArray Callees; 1141e255e7aSDean Michael Berris uint64_t CallCount; 1151e255e7aSDean Michael Berris uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. 116980d93d0SDean Michael Berris int32_t FId; 117980d93d0SDean Michael Berris 118980d93d0SDean Michael Berris // TODO: Include the compact histogram. 119980d93d0SDean Michael Berris }; 120980d93d0SDean Michael Berris 121980d93d0SDean Michael Berris private: 122980d93d0SDean Michael Berris struct ShadowStackEntry { 123980d93d0SDean Michael Berris uint64_t EntryTSC; 124980d93d0SDean Michael Berris Node *NodePtr; 1251e255e7aSDean Michael Berris uint16_t EntryCPU; 126980d93d0SDean Michael Berris }; 127980d93d0SDean Michael Berris 128980d93d0SDean Michael Berris using NodeArray = Array<Node>; 129980d93d0SDean Michael Berris using RootArray = Array<Node *>; 130980d93d0SDean Michael Berris using ShadowStackArray = Array<ShadowStackEntry>; 131980d93d0SDean Michael Berris 132980d93d0SDean Michael Berris public: 133980d93d0SDean Michael Berris // We collate the allocators we need into a single struct, as a convenience to 134980d93d0SDean Michael Berris // allow us to initialize these as a group. 135980d93d0SDean Michael Berris struct Allocators { 136980d93d0SDean Michael Berris using NodeAllocatorType = NodeArray::AllocatorType; 137980d93d0SDean Michael Berris using RootAllocatorType = RootArray::AllocatorType; 138980d93d0SDean Michael Berris using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; 139980d93d0SDean Michael Berris 140190c49bcSDean Michael Berris // Use hosted aligned storage members to allow for trivial move and init. 141190c49bcSDean Michael Berris // This also allows us to sidestep the potential-failing allocation issue. 142*cac78214SMarc Auberer alignas(NodeAllocatorType) std::byte 143*cac78214SMarc Auberer NodeAllocatorStorage[sizeof(NodeAllocatorType)]; 144*cac78214SMarc Auberer alignas(RootAllocatorType) std::byte 145*cac78214SMarc Auberer RootAllocatorStorage[sizeof(RootAllocatorType)]; 146*cac78214SMarc Auberer alignas(ShadowStackAllocatorType) std::byte 147*cac78214SMarc Auberer ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)]; 148*cac78214SMarc Auberer alignas(NodeIdPairAllocatorType) std::byte 149*cac78214SMarc Auberer NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)]; 150190c49bcSDean Michael Berris 151980d93d0SDean Michael Berris NodeAllocatorType *NodeAllocator = nullptr; 152980d93d0SDean Michael Berris RootAllocatorType *RootAllocator = nullptr; 153980d93d0SDean Michael Berris ShadowStackAllocatorType *ShadowStackAllocator = nullptr; 154980d93d0SDean Michael Berris NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; 155980d93d0SDean Michael Berris 156190c49bcSDean Michael Berris Allocators() = default; 157980d93d0SDean Michael Berris Allocators(const Allocators &) = delete; 158980d93d0SDean Michael Berris Allocators &operator=(const Allocators &) = delete; 159980d93d0SDean Michael Berris 16025d50595SDean Michael Berris struct Buffers { 16125d50595SDean Michael Berris BufferQueue::Buffer NodeBuffer; 16225d50595SDean Michael Berris BufferQueue::Buffer RootsBuffer; 16325d50595SDean Michael Berris BufferQueue::Buffer ShadowStackBuffer; 16425d50595SDean Michael Berris BufferQueue::Buffer NodeIdPairBuffer; 16525d50595SDean Michael Berris }; 16625d50595SDean Michael Berris AllocatorsAllocators16725d50595SDean Michael Berris explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { 16825d50595SDean Michael Berris new (&NodeAllocatorStorage) 16925d50595SDean Michael Berris NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); 17025d50595SDean Michael Berris NodeAllocator = 17125d50595SDean Michael Berris reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 17225d50595SDean Michael Berris 17325d50595SDean Michael Berris new (&RootAllocatorStorage) 17425d50595SDean Michael Berris RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); 17525d50595SDean Michael Berris RootAllocator = 17625d50595SDean Michael Berris reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 17725d50595SDean Michael Berris 17825d50595SDean Michael Berris new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( 17925d50595SDean Michael Berris B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); 18025d50595SDean Michael Berris ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 18125d50595SDean Michael Berris &ShadowStackAllocatorStorage); 18225d50595SDean Michael Berris 18325d50595SDean Michael Berris new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( 18425d50595SDean Michael Berris B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); 18525d50595SDean Michael Berris NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 18625d50595SDean Michael Berris &NodeIdPairAllocatorStorage); 18725d50595SDean Michael Berris } 18825d50595SDean Michael Berris AllocatorsAllocators189190c49bcSDean Michael Berris explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { 190190c49bcSDean Michael Berris new (&NodeAllocatorStorage) NodeAllocatorType(Max); 191190c49bcSDean Michael Berris NodeAllocator = 192190c49bcSDean Michael Berris reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 193190c49bcSDean Michael Berris 194190c49bcSDean Michael Berris new (&RootAllocatorStorage) RootAllocatorType(Max); 195190c49bcSDean Michael Berris RootAllocator = 196190c49bcSDean Michael Berris reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 197190c49bcSDean Michael Berris 198190c49bcSDean Michael Berris new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); 199190c49bcSDean Michael Berris ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 200190c49bcSDean Michael Berris &ShadowStackAllocatorStorage); 201190c49bcSDean Michael Berris 202190c49bcSDean Michael Berris new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); 203190c49bcSDean Michael Berris NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 204190c49bcSDean Michael Berris &NodeIdPairAllocatorStorage); 205190c49bcSDean Michael Berris } 206190c49bcSDean Michael Berris AllocatorsAllocators207190c49bcSDean Michael Berris Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { 208190c49bcSDean Michael Berris // Here we rely on the safety of memcpy'ing contents of the storage 209190c49bcSDean Michael Berris // members, and then pointing the source pointers to nullptr. 210190c49bcSDean Michael Berris internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, 211190c49bcSDean Michael Berris sizeof(NodeAllocatorType)); 212190c49bcSDean Michael Berris internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, 213190c49bcSDean Michael Berris sizeof(RootAllocatorType)); 214190c49bcSDean Michael Berris internal_memcpy(&ShadowStackAllocatorStorage, 215190c49bcSDean Michael Berris &O.ShadowStackAllocatorStorage, 216190c49bcSDean Michael Berris sizeof(ShadowStackAllocatorType)); 217190c49bcSDean Michael Berris internal_memcpy(&NodeIdPairAllocatorStorage, 218190c49bcSDean Michael Berris &O.NodeIdPairAllocatorStorage, 219190c49bcSDean Michael Berris sizeof(NodeIdPairAllocatorType)); 220190c49bcSDean Michael Berris 221190c49bcSDean Michael Berris NodeAllocator = 222190c49bcSDean Michael Berris reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 223190c49bcSDean Michael Berris RootAllocator = 224190c49bcSDean Michael Berris reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 225190c49bcSDean Michael Berris ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 226190c49bcSDean Michael Berris &ShadowStackAllocatorStorage); 227190c49bcSDean Michael Berris NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 228190c49bcSDean Michael Berris &NodeIdPairAllocatorStorage); 229190c49bcSDean Michael Berris 230980d93d0SDean Michael Berris O.NodeAllocator = nullptr; 231980d93d0SDean Michael Berris O.RootAllocator = nullptr; 232980d93d0SDean Michael Berris O.ShadowStackAllocator = nullptr; 233980d93d0SDean Michael Berris O.NodeIdPairAllocator = nullptr; 234980d93d0SDean Michael Berris } 235980d93d0SDean Michael Berris 236edf0f6a7SDean Michael Berris Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { 237190c49bcSDean Michael Berris // When moving into an existing instance, we ensure that we clean up the 238190c49bcSDean Michael Berris // current allocators. 239190c49bcSDean Michael Berris if (NodeAllocator) 240190c49bcSDean Michael Berris NodeAllocator->~NodeAllocatorType(); 241190c49bcSDean Michael Berris if (O.NodeAllocator) { 242190c49bcSDean Michael Berris new (&NodeAllocatorStorage) 243190c49bcSDean Michael Berris NodeAllocatorType(std::move(*O.NodeAllocator)); 244190c49bcSDean Michael Berris NodeAllocator = 245190c49bcSDean Michael Berris reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 246190c49bcSDean Michael Berris O.NodeAllocator = nullptr; 247190c49bcSDean Michael Berris } else { 248190c49bcSDean Michael Berris NodeAllocator = nullptr; 249980d93d0SDean Michael Berris } 250190c49bcSDean Michael Berris 251190c49bcSDean Michael Berris if (RootAllocator) 252190c49bcSDean Michael Berris RootAllocator->~RootAllocatorType(); 253190c49bcSDean Michael Berris if (O.RootAllocator) { 254190c49bcSDean Michael Berris new (&RootAllocatorStorage) 255190c49bcSDean Michael Berris RootAllocatorType(std::move(*O.RootAllocator)); 256190c49bcSDean Michael Berris RootAllocator = 257190c49bcSDean Michael Berris reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 258190c49bcSDean Michael Berris O.RootAllocator = nullptr; 259190c49bcSDean Michael Berris } else { 260190c49bcSDean Michael Berris RootAllocator = nullptr; 261980d93d0SDean Michael Berris } 262190c49bcSDean Michael Berris 263190c49bcSDean Michael Berris if (ShadowStackAllocator) 264190c49bcSDean Michael Berris ShadowStackAllocator->~ShadowStackAllocatorType(); 265190c49bcSDean Michael Berris if (O.ShadowStackAllocator) { 266190c49bcSDean Michael Berris new (&ShadowStackAllocatorStorage) 267190c49bcSDean Michael Berris ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); 268190c49bcSDean Michael Berris ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 269190c49bcSDean Michael Berris &ShadowStackAllocatorStorage); 270190c49bcSDean Michael Berris O.ShadowStackAllocator = nullptr; 271190c49bcSDean Michael Berris } else { 272190c49bcSDean Michael Berris ShadowStackAllocator = nullptr; 273980d93d0SDean Michael Berris } 274190c49bcSDean Michael Berris 275190c49bcSDean Michael Berris if (NodeIdPairAllocator) 276190c49bcSDean Michael Berris NodeIdPairAllocator->~NodeIdPairAllocatorType(); 277190c49bcSDean Michael Berris if (O.NodeIdPairAllocator) { 278190c49bcSDean Michael Berris new (&NodeIdPairAllocatorStorage) 279190c49bcSDean Michael Berris NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); 280190c49bcSDean Michael Berris NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 281190c49bcSDean Michael Berris &NodeIdPairAllocatorStorage); 282190c49bcSDean Michael Berris O.NodeIdPairAllocator = nullptr; 283190c49bcSDean Michael Berris } else { 284190c49bcSDean Michael Berris NodeIdPairAllocator = nullptr; 285980d93d0SDean Michael Berris } 286190c49bcSDean Michael Berris 287980d93d0SDean Michael Berris return *this; 288980d93d0SDean Michael Berris } 289980d93d0SDean Michael Berris ~AllocatorsAllocators290edf0f6a7SDean Michael Berris ~Allocators() XRAY_NEVER_INSTRUMENT { 291190c49bcSDean Michael Berris if (NodeAllocator != nullptr) 292980d93d0SDean Michael Berris NodeAllocator->~NodeAllocatorType(); 293190c49bcSDean Michael Berris if (RootAllocator != nullptr) 294980d93d0SDean Michael Berris RootAllocator->~RootAllocatorType(); 295190c49bcSDean Michael Berris if (ShadowStackAllocator != nullptr) 296980d93d0SDean Michael Berris ShadowStackAllocator->~ShadowStackAllocatorType(); 297190c49bcSDean Michael Berris if (NodeIdPairAllocator != nullptr) 298980d93d0SDean Michael Berris NodeIdPairAllocator->~NodeIdPairAllocatorType(); 299980d93d0SDean Michael Berris } 300980d93d0SDean Michael Berris }; 301980d93d0SDean Michael Berris InitAllocators()302edf0f6a7SDean Michael Berris static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { 3030dd4f9f2SDean Michael Berris return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); 3040dd4f9f2SDean Michael Berris } 3050dd4f9f2SDean Michael Berris InitAllocatorsCustom(uptr Max)306edf0f6a7SDean Michael Berris static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { 307190c49bcSDean Michael Berris Allocators A(Max); 308980d93d0SDean Michael Berris return A; 309980d93d0SDean Michael Berris } 310980d93d0SDean Michael Berris 31125d50595SDean Michael Berris static Allocators InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)31225d50595SDean Michael Berris InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { 31325d50595SDean Michael Berris Allocators A(Bufs); 31425d50595SDean Michael Berris return A; 31525d50595SDean Michael Berris } 31625d50595SDean Michael Berris 317980d93d0SDean Michael Berris private: 318980d93d0SDean Michael Berris NodeArray Nodes; 319980d93d0SDean Michael Berris RootArray Roots; 320980d93d0SDean Michael Berris ShadowStackArray ShadowStack; 321190c49bcSDean Michael Berris NodeIdPairAllocatorType *NodeIdPairAllocator; 322190c49bcSDean Michael Berris uint32_t OverflowedFunctions; 323980d93d0SDean Michael Berris 324980d93d0SDean Michael Berris public: FunctionCallTrie(const Allocators & A)325edf0f6a7SDean Michael Berris explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT 326edf0f6a7SDean Michael Berris : Nodes(*A.NodeAllocator), 327edf0f6a7SDean Michael Berris Roots(*A.RootAllocator), 3284719c524SDean Michael Berris ShadowStack(*A.ShadowStackAllocator), 329190c49bcSDean Michael Berris NodeIdPairAllocator(A.NodeIdPairAllocator), 330190c49bcSDean Michael Berris OverflowedFunctions(0) {} 331190c49bcSDean Michael Berris 332190c49bcSDean Michael Berris FunctionCallTrie() = delete; 333190c49bcSDean Michael Berris FunctionCallTrie(const FunctionCallTrie &) = delete; 334190c49bcSDean Michael Berris FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; 335190c49bcSDean Michael Berris FunctionCallTrie(FunctionCallTrie && O)336190c49bcSDean Michael Berris FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT 337190c49bcSDean Michael Berris : Nodes(std::move(O.Nodes)), 338190c49bcSDean Michael Berris Roots(std::move(O.Roots)), 339190c49bcSDean Michael Berris ShadowStack(std::move(O.ShadowStack)), 340190c49bcSDean Michael Berris NodeIdPairAllocator(O.NodeIdPairAllocator), 341190c49bcSDean Michael Berris OverflowedFunctions(O.OverflowedFunctions) {} 342190c49bcSDean Michael Berris 343190c49bcSDean Michael Berris FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { 344190c49bcSDean Michael Berris Nodes = std::move(O.Nodes); 345190c49bcSDean Michael Berris Roots = std::move(O.Roots); 346190c49bcSDean Michael Berris ShadowStack = std::move(O.ShadowStack); 347190c49bcSDean Michael Berris NodeIdPairAllocator = O.NodeIdPairAllocator; 348190c49bcSDean Michael Berris OverflowedFunctions = O.OverflowedFunctions; 349190c49bcSDean Michael Berris return *this; 350190c49bcSDean Michael Berris } 351190c49bcSDean Michael Berris ~FunctionCallTrie()352190c49bcSDean Michael Berris ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} 353980d93d0SDean Michael Berris enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)3541e255e7aSDean Michael Berris void enterFunction(const int32_t FId, uint64_t TSC, 3551e255e7aSDean Michael Berris uint16_t CPU) XRAY_NEVER_INSTRUMENT { 3560dd4f9f2SDean Michael Berris DCHECK_NE(FId, 0); 35725d50595SDean Michael Berris 35825d50595SDean Michael Berris // If we're already overflowed the function call stack, do not bother 35925d50595SDean Michael Berris // attempting to record any more function entries. 36025d50595SDean Michael Berris if (UNLIKELY(OverflowedFunctions)) { 36125d50595SDean Michael Berris ++OverflowedFunctions; 36225d50595SDean Michael Berris return; 36325d50595SDean Michael Berris } 36425d50595SDean Michael Berris 36525d50595SDean Michael Berris // If this is the first function we've encountered, we want to set up the 36625d50595SDean Michael Berris // node(s) and treat it as a root. 367980d93d0SDean Michael Berris if (UNLIKELY(ShadowStack.empty())) { 36825d50595SDean Michael Berris auto *NewRoot = Nodes.AppendEmplace( 36925d50595SDean Michael Berris nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 370980d93d0SDean Michael Berris if (UNLIKELY(NewRoot == nullptr)) 371980d93d0SDean Michael Berris return; 37225d50595SDean Michael Berris if (Roots.AppendEmplace(NewRoot) == nullptr) { 37325d50595SDean Michael Berris Nodes.trim(1); 374190c49bcSDean Michael Berris return; 37525d50595SDean Michael Berris } 376190c49bcSDean Michael Berris if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { 37725d50595SDean Michael Berris Nodes.trim(1); 378190c49bcSDean Michael Berris Roots.trim(1); 379190c49bcSDean Michael Berris ++OverflowedFunctions; 380190c49bcSDean Michael Berris return; 381190c49bcSDean Michael Berris } 382980d93d0SDean Michael Berris return; 383980d93d0SDean Michael Berris } 384980d93d0SDean Michael Berris 38525d50595SDean Michael Berris // From this point on, we require that the stack is not empty. 38625d50595SDean Michael Berris DCHECK(!ShadowStack.empty()); 38725d50595SDean Michael Berris auto TopNode = ShadowStack.back().NodePtr; 3880dd4f9f2SDean Michael Berris DCHECK_NE(TopNode, nullptr); 389980d93d0SDean Michael Berris 39025d50595SDean Michael Berris // If we've seen this callee before, then we access that node and place that 39125d50595SDean Michael Berris // on the top of the stack. 39225d50595SDean Michael Berris auto* Callee = TopNode->Callees.find_element( 393980d93d0SDean Michael Berris [FId](const NodeIdPair &NR) { return NR.FId == FId; }); 394980d93d0SDean Michael Berris if (Callee != nullptr) { 395980d93d0SDean Michael Berris CHECK_NE(Callee->NodePtr, nullptr); 396190c49bcSDean Michael Berris if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) 397190c49bcSDean Michael Berris ++OverflowedFunctions; 398980d93d0SDean Michael Berris return; 399980d93d0SDean Michael Berris } 400980d93d0SDean Michael Berris 401980d93d0SDean Michael Berris // This means we've never seen this stack before, create a new node here. 40225d50595SDean Michael Berris auto* NewNode = Nodes.AppendEmplace( 403190c49bcSDean Michael Berris TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 404980d93d0SDean Michael Berris if (UNLIKELY(NewNode == nullptr)) 405980d93d0SDean Michael Berris return; 4060dd4f9f2SDean Michael Berris DCHECK_NE(NewNode, nullptr); 407980d93d0SDean Michael Berris TopNode->Callees.AppendEmplace(NewNode, FId); 408190c49bcSDean Michael Berris if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) 409190c49bcSDean Michael Berris ++OverflowedFunctions; 410980d93d0SDean Michael Berris return; 411980d93d0SDean Michael Berris } 412980d93d0SDean Michael Berris exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)4131e255e7aSDean Michael Berris void exitFunction(int32_t FId, uint64_t TSC, 4141e255e7aSDean Michael Berris uint16_t CPU) XRAY_NEVER_INSTRUMENT { 415190c49bcSDean Michael Berris // If we're exiting functions that have "overflowed" or don't fit into the 416190c49bcSDean Michael Berris // stack due to allocator constraints, we then decrement that count first. 417190c49bcSDean Michael Berris if (OverflowedFunctions) { 418190c49bcSDean Michael Berris --OverflowedFunctions; 419190c49bcSDean Michael Berris return; 420190c49bcSDean Michael Berris } 421190c49bcSDean Michael Berris 422980d93d0SDean Michael Berris // When we exit a function, we look up the ShadowStack to see whether we've 423980d93d0SDean Michael Berris // entered this function before. We do as little processing here as we can, 424980d93d0SDean Michael Berris // since most of the hard work would have already been done at function 425980d93d0SDean Michael Berris // entry. 426980d93d0SDean Michael Berris uint64_t CumulativeTreeTime = 0; 427190c49bcSDean Michael Berris 428980d93d0SDean Michael Berris while (!ShadowStack.empty()) { 4290dd4f9f2SDean Michael Berris const auto &Top = ShadowStack.back(); 430980d93d0SDean Michael Berris auto TopNode = Top.NodePtr; 4310dd4f9f2SDean Michael Berris DCHECK_NE(TopNode, nullptr); 4321e255e7aSDean Michael Berris 4331e255e7aSDean Michael Berris // We may encounter overflow on the TSC we're provided, which may end up 4341e255e7aSDean Michael Berris // being less than the TSC when we first entered the function. 4351e255e7aSDean Michael Berris // 4361e255e7aSDean Michael Berris // To get the accurate measurement of cycles, we need to check whether 4371e255e7aSDean Michael Berris // we've overflowed (TSC < Top.EntryTSC) and then account the difference 4381e255e7aSDean Michael Berris // between the entry TSC and the max for the TSC counter (max of uint64_t) 4391e255e7aSDean Michael Berris // then add the value of TSC. We can prove that the maximum delta we will 4401e255e7aSDean Michael Berris // get is at most the 64-bit unsigned value, since the difference between 4411e255e7aSDean Michael Berris // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() 4421e255e7aSDean Michael Berris // - 1) + 1. 4431e255e7aSDean Michael Berris // 4441e255e7aSDean Michael Berris // NOTE: This assumes that TSCs are synchronised across CPUs. 4451e255e7aSDean Michael Berris // TODO: Count the number of times we've seen CPU migrations. 4461e255e7aSDean Michael Berris uint64_t LocalTime = 4471e255e7aSDean Michael Berris Top.EntryTSC > TSC 4481e255e7aSDean Michael Berris ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC 4491e255e7aSDean Michael Berris : TSC - Top.EntryTSC; 450980d93d0SDean Michael Berris TopNode->CallCount++; 451980d93d0SDean Michael Berris TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; 452980d93d0SDean Michael Berris CumulativeTreeTime += LocalTime; 453980d93d0SDean Michael Berris ShadowStack.trim(1); 454980d93d0SDean Michael Berris 455980d93d0SDean Michael Berris // TODO: Update the histogram for the node. 4560dd4f9f2SDean Michael Berris if (TopNode->FId == FId) 457980d93d0SDean Michael Berris break; 458980d93d0SDean Michael Berris } 459980d93d0SDean Michael Berris } 460980d93d0SDean Michael Berris getRoots()461edf0f6a7SDean Michael Berris const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } 462980d93d0SDean Michael Berris 463980d93d0SDean Michael Berris // The deepCopyInto operation will update the provided FunctionCallTrie by 464980d93d0SDean Michael Berris // re-creating the contents of this particular FunctionCallTrie in the other 465980d93d0SDean Michael Berris // FunctionCallTrie. It will do this using a Depth First Traversal from the 466980d93d0SDean Michael Berris // roots, and while doing so recreating the traversal in the provided 467980d93d0SDean Michael Berris // FunctionCallTrie. 468980d93d0SDean Michael Berris // 469980d93d0SDean Michael Berris // This operation will *not* destroy the state in `O`, and thus may cause some 470980d93d0SDean Michael Berris // duplicate entries in `O` if it is not empty. 471980d93d0SDean Michael Berris // 472980d93d0SDean Michael Berris // This function is *not* thread-safe, and may require external 473980d93d0SDean Michael Berris // synchronisation of both "this" and |O|. 474980d93d0SDean Michael Berris // 475980d93d0SDean Michael Berris // This function must *not* be called with a non-empty FunctionCallTrie |O|. deepCopyInto(FunctionCallTrie & O)476edf0f6a7SDean Michael Berris void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 477980d93d0SDean Michael Berris DCHECK(O.getRoots().empty()); 478980d93d0SDean Michael Berris 479980d93d0SDean Michael Berris // We then push the root into a stack, to use as the parent marker for new 480980d93d0SDean Michael Berris // nodes we push in as we're traversing depth-first down the call tree. 481980d93d0SDean Michael Berris struct NodeAndParent { 482980d93d0SDean Michael Berris FunctionCallTrie::Node *Node; 483980d93d0SDean Michael Berris FunctionCallTrie::Node *NewNode; 484980d93d0SDean Michael Berris }; 485980d93d0SDean Michael Berris using Stack = Array<NodeAndParent>; 486980d93d0SDean Michael Berris 487980d93d0SDean Michael Berris typename Stack::AllocatorType StackAllocator( 4889d6b7a5fSDean Michael Berris profilingFlags()->stack_allocator_max); 4894719c524SDean Michael Berris Stack DFSStack(StackAllocator); 490980d93d0SDean Michael Berris 4910dd4f9f2SDean Michael Berris for (const auto Root : getRoots()) { 4920dd4f9f2SDean Michael Berris // Add a node in O for this root. 4930dd4f9f2SDean Michael Berris auto NewRoot = O.Nodes.AppendEmplace( 494190c49bcSDean Michael Berris nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, 4950dd4f9f2SDean Michael Berris Root->CumulativeLocalTime, Root->FId); 4960dd4f9f2SDean Michael Berris 4970dd4f9f2SDean Michael Berris // Because we cannot allocate more memory we should bail out right away. 4980dd4f9f2SDean Michael Berris if (UNLIKELY(NewRoot == nullptr)) 4990dd4f9f2SDean Michael Berris return; 5000dd4f9f2SDean Michael Berris 50125d50595SDean Michael Berris if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) 50225d50595SDean Michael Berris return; 5030dd4f9f2SDean Michael Berris 504980d93d0SDean Michael Berris // TODO: Figure out what to do if we fail to allocate any more stack 505980d93d0SDean Michael Berris // space. Maybe warn or report once? 50625d50595SDean Michael Berris if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) 50725d50595SDean Michael Berris return; 508980d93d0SDean Michael Berris while (!DFSStack.empty()) { 509980d93d0SDean Michael Berris NodeAndParent NP = DFSStack.back(); 510980d93d0SDean Michael Berris DCHECK_NE(NP.Node, nullptr); 511980d93d0SDean Michael Berris DCHECK_NE(NP.NewNode, nullptr); 512980d93d0SDean Michael Berris DFSStack.trim(1); 513980d93d0SDean Michael Berris for (const auto Callee : NP.Node->Callees) { 514980d93d0SDean Michael Berris auto NewNode = O.Nodes.AppendEmplace( 515190c49bcSDean Michael Berris NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), 516190c49bcSDean Michael Berris Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, 517190c49bcSDean Michael Berris Callee.FId); 5180dd4f9f2SDean Michael Berris if (UNLIKELY(NewNode == nullptr)) 5190dd4f9f2SDean Michael Berris return; 52025d50595SDean Michael Berris if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == 52125d50595SDean Michael Berris nullptr)) 52225d50595SDean Michael Berris return; 52325d50595SDean Michael Berris if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == 52425d50595SDean Michael Berris nullptr)) 52525d50595SDean Michael Berris return; 526980d93d0SDean Michael Berris } 527980d93d0SDean Michael Berris } 528980d93d0SDean Michael Berris } 529980d93d0SDean Michael Berris } 530980d93d0SDean Michael Berris 531980d93d0SDean Michael Berris // The mergeInto operation will update the provided FunctionCallTrie by 532980d93d0SDean Michael Berris // traversing the current trie's roots and updating (i.e. merging) the data in 533980d93d0SDean Michael Berris // the nodes with the data in the target's nodes. If the node doesn't exist in 534980d93d0SDean Michael Berris // the provided trie, we add a new one in the right position, and inherit the 535980d93d0SDean Michael Berris // data from the original (current) trie, along with all its callees. 536980d93d0SDean Michael Berris // 537980d93d0SDean Michael Berris // This function is *not* thread-safe, and may require external 538980d93d0SDean Michael Berris // synchronisation of both "this" and |O|. mergeInto(FunctionCallTrie & O)539edf0f6a7SDean Michael Berris void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 540980d93d0SDean Michael Berris struct NodeAndTarget { 541980d93d0SDean Michael Berris FunctionCallTrie::Node *OrigNode; 542980d93d0SDean Michael Berris FunctionCallTrie::Node *TargetNode; 543980d93d0SDean Michael Berris }; 544980d93d0SDean Michael Berris using Stack = Array<NodeAndTarget>; 545980d93d0SDean Michael Berris typename Stack::AllocatorType StackAllocator( 5469d6b7a5fSDean Michael Berris profilingFlags()->stack_allocator_max); 5474719c524SDean Michael Berris Stack DFSStack(StackAllocator); 548980d93d0SDean Michael Berris 549980d93d0SDean Michael Berris for (const auto Root : getRoots()) { 550980d93d0SDean Michael Berris Node *TargetRoot = nullptr; 551980d93d0SDean Michael Berris auto R = O.Roots.find_element( 552980d93d0SDean Michael Berris [&](const Node *Node) { return Node->FId == Root->FId; }); 553980d93d0SDean Michael Berris if (R == nullptr) { 554190c49bcSDean Michael Berris TargetRoot = O.Nodes.AppendEmplace( 555190c49bcSDean Michael Berris nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 556190c49bcSDean Michael Berris Root->FId); 5570dd4f9f2SDean Michael Berris if (UNLIKELY(TargetRoot == nullptr)) 5580dd4f9f2SDean Michael Berris return; 5590dd4f9f2SDean Michael Berris 560980d93d0SDean Michael Berris O.Roots.Append(TargetRoot); 561980d93d0SDean Michael Berris } else { 562980d93d0SDean Michael Berris TargetRoot = *R; 563980d93d0SDean Michael Berris } 564980d93d0SDean Michael Berris 565190c49bcSDean Michael Berris DFSStack.AppendEmplace(Root, TargetRoot); 566980d93d0SDean Michael Berris while (!DFSStack.empty()) { 567980d93d0SDean Michael Berris NodeAndTarget NT = DFSStack.back(); 568980d93d0SDean Michael Berris DCHECK_NE(NT.OrigNode, nullptr); 569980d93d0SDean Michael Berris DCHECK_NE(NT.TargetNode, nullptr); 570980d93d0SDean Michael Berris DFSStack.trim(1); 571980d93d0SDean Michael Berris // TODO: Update the histogram as well when we have it ready. 572980d93d0SDean Michael Berris NT.TargetNode->CallCount += NT.OrigNode->CallCount; 573980d93d0SDean Michael Berris NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; 574980d93d0SDean Michael Berris for (const auto Callee : NT.OrigNode->Callees) { 575980d93d0SDean Michael Berris auto TargetCallee = NT.TargetNode->Callees.find_element( 576980d93d0SDean Michael Berris [&](const FunctionCallTrie::NodeIdPair &C) { 577980d93d0SDean Michael Berris return C.FId == Callee.FId; 578980d93d0SDean Michael Berris }); 579980d93d0SDean Michael Berris if (TargetCallee == nullptr) { 5804719c524SDean Michael Berris auto NewTargetNode = O.Nodes.AppendEmplace( 581190c49bcSDean Michael Berris NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 582190c49bcSDean Michael Berris Callee.FId); 5830dd4f9f2SDean Michael Berris 5840dd4f9f2SDean Michael Berris if (UNLIKELY(NewTargetNode == nullptr)) 5850dd4f9f2SDean Michael Berris return; 5860dd4f9f2SDean Michael Berris 587980d93d0SDean Michael Berris TargetCallee = 588980d93d0SDean Michael Berris NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); 589980d93d0SDean Michael Berris } 5900dd4f9f2SDean Michael Berris DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); 591980d93d0SDean Michael Berris } 592980d93d0SDean Michael Berris } 593980d93d0SDean Michael Berris } 594980d93d0SDean Michael Berris } 595980d93d0SDean Michael Berris }; 596980d93d0SDean Michael Berris 597980d93d0SDean Michael Berris } // namespace __xray 598980d93d0SDean Michael Berris 599980d93d0SDean Michael Berris #endif // XRAY_FUNCTION_CALL_TRIE_H 600