1*3cab2bb3Spatrick //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// 2*3cab2bb3Spatrick // 3*3cab2bb3Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*3cab2bb3Spatrick // See https://llvm.org/LICENSE.txt for license information. 5*3cab2bb3Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*3cab2bb3Spatrick // 7*3cab2bb3Spatrick //===----------------------------------------------------------------------===// 8*3cab2bb3Spatrick // 9*3cab2bb3Spatrick // This file is a part of XRay, a dynamic runtime instrumentation system. 10*3cab2bb3Spatrick // 11*3cab2bb3Spatrick // This file defines the interface for a function call trie. 12*3cab2bb3Spatrick // 13*3cab2bb3Spatrick //===----------------------------------------------------------------------===// 14*3cab2bb3Spatrick #ifndef XRAY_FUNCTION_CALL_TRIE_H 15*3cab2bb3Spatrick #define XRAY_FUNCTION_CALL_TRIE_H 16*3cab2bb3Spatrick 17*3cab2bb3Spatrick #include "xray_buffer_queue.h" 18*3cab2bb3Spatrick #include "xray_defs.h" 19*3cab2bb3Spatrick #include "xray_profiling_flags.h" 20*3cab2bb3Spatrick #include "xray_segmented_array.h" 21*3cab2bb3Spatrick #include <limits> 22*3cab2bb3Spatrick #include <memory> // For placement new. 23*3cab2bb3Spatrick #include <utility> 24*3cab2bb3Spatrick 25*3cab2bb3Spatrick namespace __xray { 26*3cab2bb3Spatrick 27*3cab2bb3Spatrick /// A FunctionCallTrie represents the stack traces of XRay instrumented 28*3cab2bb3Spatrick /// functions that we've encountered, where a node corresponds to a function and 29*3cab2bb3Spatrick /// the path from the root to the node its stack trace. Each node in the trie 30*3cab2bb3Spatrick /// will contain some useful values, including: 31*3cab2bb3Spatrick /// 32*3cab2bb3Spatrick /// * The cumulative amount of time spent in this particular node/stack. 33*3cab2bb3Spatrick /// * The number of times this stack has appeared. 34*3cab2bb3Spatrick /// * A histogram of latencies for that particular node. 35*3cab2bb3Spatrick /// 36*3cab2bb3Spatrick /// Each node in the trie will also contain a list of callees, represented using 37*3cab2bb3Spatrick /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function 38*3cab2bb3Spatrick /// ID of the callee, and a pointer to the node. 39*3cab2bb3Spatrick /// 40*3cab2bb3Spatrick /// If we visualise this data structure, we'll find the following potential 41*3cab2bb3Spatrick /// representation: 42*3cab2bb3Spatrick /// 43*3cab2bb3Spatrick /// [function id node] -> [callees] [cumulative time] 44*3cab2bb3Spatrick /// [call counter] [latency histogram] 45*3cab2bb3Spatrick /// 46*3cab2bb3Spatrick /// As an example, when we have a function in this pseudocode: 47*3cab2bb3Spatrick /// 48*3cab2bb3Spatrick /// func f(N) { 49*3cab2bb3Spatrick /// g() 50*3cab2bb3Spatrick /// h() 51*3cab2bb3Spatrick /// for i := 1..N { j() } 52*3cab2bb3Spatrick /// } 53*3cab2bb3Spatrick /// 54*3cab2bb3Spatrick /// We may end up with a trie of the following form: 55*3cab2bb3Spatrick /// 56*3cab2bb3Spatrick /// f -> [ g, h, j ] [...] [1] [...] 57*3cab2bb3Spatrick /// g -> [ ... ] [...] [1] [...] 58*3cab2bb3Spatrick /// h -> [ ... ] [...] [1] [...] 59*3cab2bb3Spatrick /// j -> [ ... ] [...] [N] [...] 60*3cab2bb3Spatrick /// 61*3cab2bb3Spatrick /// If for instance the function g() called j() like so: 62*3cab2bb3Spatrick /// 63*3cab2bb3Spatrick /// func g() { 64*3cab2bb3Spatrick /// for i := 1..10 { j() } 65*3cab2bb3Spatrick /// } 66*3cab2bb3Spatrick /// 67*3cab2bb3Spatrick /// We'll find the following updated trie: 68*3cab2bb3Spatrick /// 69*3cab2bb3Spatrick /// f -> [ g, h, j ] [...] [1] [...] 70*3cab2bb3Spatrick /// g -> [ j' ] [...] [1] [...] 71*3cab2bb3Spatrick /// h -> [ ... ] [...] [1] [...] 72*3cab2bb3Spatrick /// j -> [ ... ] [...] [N] [...] 73*3cab2bb3Spatrick /// j' -> [ ... ] [...] [10] [...] 74*3cab2bb3Spatrick /// 75*3cab2bb3Spatrick /// Note that we'll have a new node representing the path `f -> g -> j'` with 76*3cab2bb3Spatrick /// isolated data. This isolation gives us a means of representing the stack 77*3cab2bb3Spatrick /// traces as a path, as opposed to a key in a table. The alternative 78*3cab2bb3Spatrick /// implementation here would be to use a separate table for the path, and use 79*3cab2bb3Spatrick /// hashes of the path as an identifier to accumulate the information. We've 80*3cab2bb3Spatrick /// moved away from this approach as it takes a lot of time to compute the hash 81*3cab2bb3Spatrick /// every time we need to update a function's call information as we're handling 82*3cab2bb3Spatrick /// the entry and exit events. 83*3cab2bb3Spatrick /// 84*3cab2bb3Spatrick /// This approach allows us to maintain a shadow stack, which represents the 85*3cab2bb3Spatrick /// currently executing path, and on function exits quickly compute the amount 86*3cab2bb3Spatrick /// of time elapsed from the entry, then update the counters for the node 87*3cab2bb3Spatrick /// already represented in the trie. This necessitates an efficient 88*3cab2bb3Spatrick /// representation of the various data structures (the list of callees must be 89*3cab2bb3Spatrick /// cache-aware and efficient to look up, and the histogram must be compact and 90*3cab2bb3Spatrick /// quick to update) to enable us to keep the overheads of this implementation 91*3cab2bb3Spatrick /// to the minimum. 92*3cab2bb3Spatrick class FunctionCallTrie { 93*3cab2bb3Spatrick public: 94*3cab2bb3Spatrick struct Node; 95*3cab2bb3Spatrick 96*3cab2bb3Spatrick // We use a NodeIdPair type instead of a std::pair<...> to not rely on the 97*3cab2bb3Spatrick // standard library types in this header. 98*3cab2bb3Spatrick struct NodeIdPair { 99*3cab2bb3Spatrick Node *NodePtr; 100*3cab2bb3Spatrick int32_t FId; 101*3cab2bb3Spatrick }; 102*3cab2bb3Spatrick 103*3cab2bb3Spatrick using NodeIdPairArray = Array<NodeIdPair>; 104*3cab2bb3Spatrick using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; 105*3cab2bb3Spatrick 106*3cab2bb3Spatrick // A Node in the FunctionCallTrie gives us a list of callees, the cumulative 107*3cab2bb3Spatrick // number of times this node actually appeared, the cumulative amount of time 108*3cab2bb3Spatrick // for this particular node including its children call times, and just the 109*3cab2bb3Spatrick // local time spent on this node. Each Node will have the ID of the XRay 110*3cab2bb3Spatrick // instrumented function that it is associated to. 111*3cab2bb3Spatrick struct Node { 112*3cab2bb3Spatrick Node *Parent; 113*3cab2bb3Spatrick NodeIdPairArray Callees; 114*3cab2bb3Spatrick uint64_t CallCount; 115*3cab2bb3Spatrick uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. 116*3cab2bb3Spatrick int32_t FId; 117*3cab2bb3Spatrick 118*3cab2bb3Spatrick // TODO: Include the compact histogram. 119*3cab2bb3Spatrick }; 120*3cab2bb3Spatrick 121*3cab2bb3Spatrick private: 122*3cab2bb3Spatrick struct ShadowStackEntry { 123*3cab2bb3Spatrick uint64_t EntryTSC; 124*3cab2bb3Spatrick Node *NodePtr; 125*3cab2bb3Spatrick uint16_t EntryCPU; 126*3cab2bb3Spatrick }; 127*3cab2bb3Spatrick 128*3cab2bb3Spatrick using NodeArray = Array<Node>; 129*3cab2bb3Spatrick using RootArray = Array<Node *>; 130*3cab2bb3Spatrick using ShadowStackArray = Array<ShadowStackEntry>; 131*3cab2bb3Spatrick 132*3cab2bb3Spatrick public: 133*3cab2bb3Spatrick // We collate the allocators we need into a single struct, as a convenience to 134*3cab2bb3Spatrick // allow us to initialize these as a group. 135*3cab2bb3Spatrick struct Allocators { 136*3cab2bb3Spatrick using NodeAllocatorType = NodeArray::AllocatorType; 137*3cab2bb3Spatrick using RootAllocatorType = RootArray::AllocatorType; 138*3cab2bb3Spatrick using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; 139*3cab2bb3Spatrick 140*3cab2bb3Spatrick // Use hosted aligned storage members to allow for trivial move and init. 141*3cab2bb3Spatrick // This also allows us to sidestep the potential-failing allocation issue. 142*3cab2bb3Spatrick typename std::aligned_storage<sizeof(NodeAllocatorType), 143*3cab2bb3Spatrick alignof(NodeAllocatorType)>::type 144*3cab2bb3Spatrick NodeAllocatorStorage; 145*3cab2bb3Spatrick typename std::aligned_storage<sizeof(RootAllocatorType), 146*3cab2bb3Spatrick alignof(RootAllocatorType)>::type 147*3cab2bb3Spatrick RootAllocatorStorage; 148*3cab2bb3Spatrick typename std::aligned_storage<sizeof(ShadowStackAllocatorType), 149*3cab2bb3Spatrick alignof(ShadowStackAllocatorType)>::type 150*3cab2bb3Spatrick ShadowStackAllocatorStorage; 151*3cab2bb3Spatrick typename std::aligned_storage<sizeof(NodeIdPairAllocatorType), 152*3cab2bb3Spatrick alignof(NodeIdPairAllocatorType)>::type 153*3cab2bb3Spatrick NodeIdPairAllocatorStorage; 154*3cab2bb3Spatrick 155*3cab2bb3Spatrick NodeAllocatorType *NodeAllocator = nullptr; 156*3cab2bb3Spatrick RootAllocatorType *RootAllocator = nullptr; 157*3cab2bb3Spatrick ShadowStackAllocatorType *ShadowStackAllocator = nullptr; 158*3cab2bb3Spatrick NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; 159*3cab2bb3Spatrick 160*3cab2bb3Spatrick Allocators() = default; 161*3cab2bb3Spatrick Allocators(const Allocators &) = delete; 162*3cab2bb3Spatrick Allocators &operator=(const Allocators &) = delete; 163*3cab2bb3Spatrick 164*3cab2bb3Spatrick struct Buffers { 165*3cab2bb3Spatrick BufferQueue::Buffer NodeBuffer; 166*3cab2bb3Spatrick BufferQueue::Buffer RootsBuffer; 167*3cab2bb3Spatrick BufferQueue::Buffer ShadowStackBuffer; 168*3cab2bb3Spatrick BufferQueue::Buffer NodeIdPairBuffer; 169*3cab2bb3Spatrick }; 170*3cab2bb3Spatrick AllocatorsAllocators171*3cab2bb3Spatrick explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { 172*3cab2bb3Spatrick new (&NodeAllocatorStorage) 173*3cab2bb3Spatrick NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); 174*3cab2bb3Spatrick NodeAllocator = 175*3cab2bb3Spatrick reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 176*3cab2bb3Spatrick 177*3cab2bb3Spatrick new (&RootAllocatorStorage) 178*3cab2bb3Spatrick RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); 179*3cab2bb3Spatrick RootAllocator = 180*3cab2bb3Spatrick reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 181*3cab2bb3Spatrick 182*3cab2bb3Spatrick new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( 183*3cab2bb3Spatrick B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); 184*3cab2bb3Spatrick ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 185*3cab2bb3Spatrick &ShadowStackAllocatorStorage); 186*3cab2bb3Spatrick 187*3cab2bb3Spatrick new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( 188*3cab2bb3Spatrick B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); 189*3cab2bb3Spatrick NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 190*3cab2bb3Spatrick &NodeIdPairAllocatorStorage); 191*3cab2bb3Spatrick } 192*3cab2bb3Spatrick AllocatorsAllocators193*3cab2bb3Spatrick explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { 194*3cab2bb3Spatrick new (&NodeAllocatorStorage) NodeAllocatorType(Max); 195*3cab2bb3Spatrick NodeAllocator = 196*3cab2bb3Spatrick reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 197*3cab2bb3Spatrick 198*3cab2bb3Spatrick new (&RootAllocatorStorage) RootAllocatorType(Max); 199*3cab2bb3Spatrick RootAllocator = 200*3cab2bb3Spatrick reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 201*3cab2bb3Spatrick 202*3cab2bb3Spatrick new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); 203*3cab2bb3Spatrick ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 204*3cab2bb3Spatrick &ShadowStackAllocatorStorage); 205*3cab2bb3Spatrick 206*3cab2bb3Spatrick new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); 207*3cab2bb3Spatrick NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 208*3cab2bb3Spatrick &NodeIdPairAllocatorStorage); 209*3cab2bb3Spatrick } 210*3cab2bb3Spatrick AllocatorsAllocators211*3cab2bb3Spatrick Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { 212*3cab2bb3Spatrick // Here we rely on the safety of memcpy'ing contents of the storage 213*3cab2bb3Spatrick // members, and then pointing the source pointers to nullptr. 214*3cab2bb3Spatrick internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, 215*3cab2bb3Spatrick sizeof(NodeAllocatorType)); 216*3cab2bb3Spatrick internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, 217*3cab2bb3Spatrick sizeof(RootAllocatorType)); 218*3cab2bb3Spatrick internal_memcpy(&ShadowStackAllocatorStorage, 219*3cab2bb3Spatrick &O.ShadowStackAllocatorStorage, 220*3cab2bb3Spatrick sizeof(ShadowStackAllocatorType)); 221*3cab2bb3Spatrick internal_memcpy(&NodeIdPairAllocatorStorage, 222*3cab2bb3Spatrick &O.NodeIdPairAllocatorStorage, 223*3cab2bb3Spatrick sizeof(NodeIdPairAllocatorType)); 224*3cab2bb3Spatrick 225*3cab2bb3Spatrick NodeAllocator = 226*3cab2bb3Spatrick reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 227*3cab2bb3Spatrick RootAllocator = 228*3cab2bb3Spatrick reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 229*3cab2bb3Spatrick ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 230*3cab2bb3Spatrick &ShadowStackAllocatorStorage); 231*3cab2bb3Spatrick NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 232*3cab2bb3Spatrick &NodeIdPairAllocatorStorage); 233*3cab2bb3Spatrick 234*3cab2bb3Spatrick O.NodeAllocator = nullptr; 235*3cab2bb3Spatrick O.RootAllocator = nullptr; 236*3cab2bb3Spatrick O.ShadowStackAllocator = nullptr; 237*3cab2bb3Spatrick O.NodeIdPairAllocator = nullptr; 238*3cab2bb3Spatrick } 239*3cab2bb3Spatrick 240*3cab2bb3Spatrick Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { 241*3cab2bb3Spatrick // When moving into an existing instance, we ensure that we clean up the 242*3cab2bb3Spatrick // current allocators. 243*3cab2bb3Spatrick if (NodeAllocator) 244*3cab2bb3Spatrick NodeAllocator->~NodeAllocatorType(); 245*3cab2bb3Spatrick if (O.NodeAllocator) { 246*3cab2bb3Spatrick new (&NodeAllocatorStorage) 247*3cab2bb3Spatrick NodeAllocatorType(std::move(*O.NodeAllocator)); 248*3cab2bb3Spatrick NodeAllocator = 249*3cab2bb3Spatrick reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 250*3cab2bb3Spatrick O.NodeAllocator = nullptr; 251*3cab2bb3Spatrick } else { 252*3cab2bb3Spatrick NodeAllocator = nullptr; 253*3cab2bb3Spatrick } 254*3cab2bb3Spatrick 255*3cab2bb3Spatrick if (RootAllocator) 256*3cab2bb3Spatrick RootAllocator->~RootAllocatorType(); 257*3cab2bb3Spatrick if (O.RootAllocator) { 258*3cab2bb3Spatrick new (&RootAllocatorStorage) 259*3cab2bb3Spatrick RootAllocatorType(std::move(*O.RootAllocator)); 260*3cab2bb3Spatrick RootAllocator = 261*3cab2bb3Spatrick reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 262*3cab2bb3Spatrick O.RootAllocator = nullptr; 263*3cab2bb3Spatrick } else { 264*3cab2bb3Spatrick RootAllocator = nullptr; 265*3cab2bb3Spatrick } 266*3cab2bb3Spatrick 267*3cab2bb3Spatrick if (ShadowStackAllocator) 268*3cab2bb3Spatrick ShadowStackAllocator->~ShadowStackAllocatorType(); 269*3cab2bb3Spatrick if (O.ShadowStackAllocator) { 270*3cab2bb3Spatrick new (&ShadowStackAllocatorStorage) 271*3cab2bb3Spatrick ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); 272*3cab2bb3Spatrick ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 273*3cab2bb3Spatrick &ShadowStackAllocatorStorage); 274*3cab2bb3Spatrick O.ShadowStackAllocator = nullptr; 275*3cab2bb3Spatrick } else { 276*3cab2bb3Spatrick ShadowStackAllocator = nullptr; 277*3cab2bb3Spatrick } 278*3cab2bb3Spatrick 279*3cab2bb3Spatrick if (NodeIdPairAllocator) 280*3cab2bb3Spatrick NodeIdPairAllocator->~NodeIdPairAllocatorType(); 281*3cab2bb3Spatrick if (O.NodeIdPairAllocator) { 282*3cab2bb3Spatrick new (&NodeIdPairAllocatorStorage) 283*3cab2bb3Spatrick NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); 284*3cab2bb3Spatrick NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 285*3cab2bb3Spatrick &NodeIdPairAllocatorStorage); 286*3cab2bb3Spatrick O.NodeIdPairAllocator = nullptr; 287*3cab2bb3Spatrick } else { 288*3cab2bb3Spatrick NodeIdPairAllocator = nullptr; 289*3cab2bb3Spatrick } 290*3cab2bb3Spatrick 291*3cab2bb3Spatrick return *this; 292*3cab2bb3Spatrick } 293*3cab2bb3Spatrick ~AllocatorsAllocators294*3cab2bb3Spatrick ~Allocators() XRAY_NEVER_INSTRUMENT { 295*3cab2bb3Spatrick if (NodeAllocator != nullptr) 296*3cab2bb3Spatrick NodeAllocator->~NodeAllocatorType(); 297*3cab2bb3Spatrick if (RootAllocator != nullptr) 298*3cab2bb3Spatrick RootAllocator->~RootAllocatorType(); 299*3cab2bb3Spatrick if (ShadowStackAllocator != nullptr) 300*3cab2bb3Spatrick ShadowStackAllocator->~ShadowStackAllocatorType(); 301*3cab2bb3Spatrick if (NodeIdPairAllocator != nullptr) 302*3cab2bb3Spatrick NodeIdPairAllocator->~NodeIdPairAllocatorType(); 303*3cab2bb3Spatrick } 304*3cab2bb3Spatrick }; 305*3cab2bb3Spatrick InitAllocators()306*3cab2bb3Spatrick static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { 307*3cab2bb3Spatrick return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); 308*3cab2bb3Spatrick } 309*3cab2bb3Spatrick InitAllocatorsCustom(uptr Max)310*3cab2bb3Spatrick static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { 311*3cab2bb3Spatrick Allocators A(Max); 312*3cab2bb3Spatrick return A; 313*3cab2bb3Spatrick } 314*3cab2bb3Spatrick 315*3cab2bb3Spatrick static Allocators InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)316*3cab2bb3Spatrick InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { 317*3cab2bb3Spatrick Allocators A(Bufs); 318*3cab2bb3Spatrick return A; 319*3cab2bb3Spatrick } 320*3cab2bb3Spatrick 321*3cab2bb3Spatrick private: 322*3cab2bb3Spatrick NodeArray Nodes; 323*3cab2bb3Spatrick RootArray Roots; 324*3cab2bb3Spatrick ShadowStackArray ShadowStack; 325*3cab2bb3Spatrick NodeIdPairAllocatorType *NodeIdPairAllocator; 326*3cab2bb3Spatrick uint32_t OverflowedFunctions; 327*3cab2bb3Spatrick 328*3cab2bb3Spatrick public: FunctionCallTrie(const Allocators & A)329*3cab2bb3Spatrick explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT 330*3cab2bb3Spatrick : Nodes(*A.NodeAllocator), 331*3cab2bb3Spatrick Roots(*A.RootAllocator), 332*3cab2bb3Spatrick ShadowStack(*A.ShadowStackAllocator), 333*3cab2bb3Spatrick NodeIdPairAllocator(A.NodeIdPairAllocator), 334*3cab2bb3Spatrick OverflowedFunctions(0) {} 335*3cab2bb3Spatrick 336*3cab2bb3Spatrick FunctionCallTrie() = delete; 337*3cab2bb3Spatrick FunctionCallTrie(const FunctionCallTrie &) = delete; 338*3cab2bb3Spatrick FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; 339*3cab2bb3Spatrick FunctionCallTrie(FunctionCallTrie && O)340*3cab2bb3Spatrick FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT 341*3cab2bb3Spatrick : Nodes(std::move(O.Nodes)), 342*3cab2bb3Spatrick Roots(std::move(O.Roots)), 343*3cab2bb3Spatrick ShadowStack(std::move(O.ShadowStack)), 344*3cab2bb3Spatrick NodeIdPairAllocator(O.NodeIdPairAllocator), 345*3cab2bb3Spatrick OverflowedFunctions(O.OverflowedFunctions) {} 346*3cab2bb3Spatrick 347*3cab2bb3Spatrick FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { 348*3cab2bb3Spatrick Nodes = std::move(O.Nodes); 349*3cab2bb3Spatrick Roots = std::move(O.Roots); 350*3cab2bb3Spatrick ShadowStack = std::move(O.ShadowStack); 351*3cab2bb3Spatrick NodeIdPairAllocator = O.NodeIdPairAllocator; 352*3cab2bb3Spatrick OverflowedFunctions = O.OverflowedFunctions; 353*3cab2bb3Spatrick return *this; 354*3cab2bb3Spatrick } 355*3cab2bb3Spatrick ~FunctionCallTrie()356*3cab2bb3Spatrick ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} 357*3cab2bb3Spatrick enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)358*3cab2bb3Spatrick void enterFunction(const int32_t FId, uint64_t TSC, 359*3cab2bb3Spatrick uint16_t CPU) XRAY_NEVER_INSTRUMENT { 360*3cab2bb3Spatrick DCHECK_NE(FId, 0); 361*3cab2bb3Spatrick 362*3cab2bb3Spatrick // If we're already overflowed the function call stack, do not bother 363*3cab2bb3Spatrick // attempting to record any more function entries. 364*3cab2bb3Spatrick if (UNLIKELY(OverflowedFunctions)) { 365*3cab2bb3Spatrick ++OverflowedFunctions; 366*3cab2bb3Spatrick return; 367*3cab2bb3Spatrick } 368*3cab2bb3Spatrick 369*3cab2bb3Spatrick // If this is the first function we've encountered, we want to set up the 370*3cab2bb3Spatrick // node(s) and treat it as a root. 371*3cab2bb3Spatrick if (UNLIKELY(ShadowStack.empty())) { 372*3cab2bb3Spatrick auto *NewRoot = Nodes.AppendEmplace( 373*3cab2bb3Spatrick nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 374*3cab2bb3Spatrick if (UNLIKELY(NewRoot == nullptr)) 375*3cab2bb3Spatrick return; 376*3cab2bb3Spatrick if (Roots.AppendEmplace(NewRoot) == nullptr) { 377*3cab2bb3Spatrick Nodes.trim(1); 378*3cab2bb3Spatrick return; 379*3cab2bb3Spatrick } 380*3cab2bb3Spatrick if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { 381*3cab2bb3Spatrick Nodes.trim(1); 382*3cab2bb3Spatrick Roots.trim(1); 383*3cab2bb3Spatrick ++OverflowedFunctions; 384*3cab2bb3Spatrick return; 385*3cab2bb3Spatrick } 386*3cab2bb3Spatrick return; 387*3cab2bb3Spatrick } 388*3cab2bb3Spatrick 389*3cab2bb3Spatrick // From this point on, we require that the stack is not empty. 390*3cab2bb3Spatrick DCHECK(!ShadowStack.empty()); 391*3cab2bb3Spatrick auto TopNode = ShadowStack.back().NodePtr; 392*3cab2bb3Spatrick DCHECK_NE(TopNode, nullptr); 393*3cab2bb3Spatrick 394*3cab2bb3Spatrick // If we've seen this callee before, then we access that node and place that 395*3cab2bb3Spatrick // on the top of the stack. 396*3cab2bb3Spatrick auto* Callee = TopNode->Callees.find_element( 397*3cab2bb3Spatrick [FId](const NodeIdPair &NR) { return NR.FId == FId; }); 398*3cab2bb3Spatrick if (Callee != nullptr) { 399*3cab2bb3Spatrick CHECK_NE(Callee->NodePtr, nullptr); 400*3cab2bb3Spatrick if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) 401*3cab2bb3Spatrick ++OverflowedFunctions; 402*3cab2bb3Spatrick return; 403*3cab2bb3Spatrick } 404*3cab2bb3Spatrick 405*3cab2bb3Spatrick // This means we've never seen this stack before, create a new node here. 406*3cab2bb3Spatrick auto* NewNode = Nodes.AppendEmplace( 407*3cab2bb3Spatrick TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 408*3cab2bb3Spatrick if (UNLIKELY(NewNode == nullptr)) 409*3cab2bb3Spatrick return; 410*3cab2bb3Spatrick DCHECK_NE(NewNode, nullptr); 411*3cab2bb3Spatrick TopNode->Callees.AppendEmplace(NewNode, FId); 412*3cab2bb3Spatrick if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) 413*3cab2bb3Spatrick ++OverflowedFunctions; 414*3cab2bb3Spatrick return; 415*3cab2bb3Spatrick } 416*3cab2bb3Spatrick exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)417*3cab2bb3Spatrick void exitFunction(int32_t FId, uint64_t TSC, 418*3cab2bb3Spatrick uint16_t CPU) XRAY_NEVER_INSTRUMENT { 419*3cab2bb3Spatrick // If we're exiting functions that have "overflowed" or don't fit into the 420*3cab2bb3Spatrick // stack due to allocator constraints, we then decrement that count first. 421*3cab2bb3Spatrick if (OverflowedFunctions) { 422*3cab2bb3Spatrick --OverflowedFunctions; 423*3cab2bb3Spatrick return; 424*3cab2bb3Spatrick } 425*3cab2bb3Spatrick 426*3cab2bb3Spatrick // When we exit a function, we look up the ShadowStack to see whether we've 427*3cab2bb3Spatrick // entered this function before. We do as little processing here as we can, 428*3cab2bb3Spatrick // since most of the hard work would have already been done at function 429*3cab2bb3Spatrick // entry. 430*3cab2bb3Spatrick uint64_t CumulativeTreeTime = 0; 431*3cab2bb3Spatrick 432*3cab2bb3Spatrick while (!ShadowStack.empty()) { 433*3cab2bb3Spatrick const auto &Top = ShadowStack.back(); 434*3cab2bb3Spatrick auto TopNode = Top.NodePtr; 435*3cab2bb3Spatrick DCHECK_NE(TopNode, nullptr); 436*3cab2bb3Spatrick 437*3cab2bb3Spatrick // We may encounter overflow on the TSC we're provided, which may end up 438*3cab2bb3Spatrick // being less than the TSC when we first entered the function. 439*3cab2bb3Spatrick // 440*3cab2bb3Spatrick // To get the accurate measurement of cycles, we need to check whether 441*3cab2bb3Spatrick // we've overflowed (TSC < Top.EntryTSC) and then account the difference 442*3cab2bb3Spatrick // between the entry TSC and the max for the TSC counter (max of uint64_t) 443*3cab2bb3Spatrick // then add the value of TSC. We can prove that the maximum delta we will 444*3cab2bb3Spatrick // get is at most the 64-bit unsigned value, since the difference between 445*3cab2bb3Spatrick // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() 446*3cab2bb3Spatrick // - 1) + 1. 447*3cab2bb3Spatrick // 448*3cab2bb3Spatrick // NOTE: This assumes that TSCs are synchronised across CPUs. 449*3cab2bb3Spatrick // TODO: Count the number of times we've seen CPU migrations. 450*3cab2bb3Spatrick uint64_t LocalTime = 451*3cab2bb3Spatrick Top.EntryTSC > TSC 452*3cab2bb3Spatrick ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC 453*3cab2bb3Spatrick : TSC - Top.EntryTSC; 454*3cab2bb3Spatrick TopNode->CallCount++; 455*3cab2bb3Spatrick TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; 456*3cab2bb3Spatrick CumulativeTreeTime += LocalTime; 457*3cab2bb3Spatrick ShadowStack.trim(1); 458*3cab2bb3Spatrick 459*3cab2bb3Spatrick // TODO: Update the histogram for the node. 460*3cab2bb3Spatrick if (TopNode->FId == FId) 461*3cab2bb3Spatrick break; 462*3cab2bb3Spatrick } 463*3cab2bb3Spatrick } 464*3cab2bb3Spatrick getRoots()465*3cab2bb3Spatrick const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } 466*3cab2bb3Spatrick 467*3cab2bb3Spatrick // The deepCopyInto operation will update the provided FunctionCallTrie by 468*3cab2bb3Spatrick // re-creating the contents of this particular FunctionCallTrie in the other 469*3cab2bb3Spatrick // FunctionCallTrie. It will do this using a Depth First Traversal from the 470*3cab2bb3Spatrick // roots, and while doing so recreating the traversal in the provided 471*3cab2bb3Spatrick // FunctionCallTrie. 472*3cab2bb3Spatrick // 473*3cab2bb3Spatrick // This operation will *not* destroy the state in `O`, and thus may cause some 474*3cab2bb3Spatrick // duplicate entries in `O` if it is not empty. 475*3cab2bb3Spatrick // 476*3cab2bb3Spatrick // This function is *not* thread-safe, and may require external 477*3cab2bb3Spatrick // synchronisation of both "this" and |O|. 478*3cab2bb3Spatrick // 479*3cab2bb3Spatrick // This function must *not* be called with a non-empty FunctionCallTrie |O|. deepCopyInto(FunctionCallTrie & O)480*3cab2bb3Spatrick void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 481*3cab2bb3Spatrick DCHECK(O.getRoots().empty()); 482*3cab2bb3Spatrick 483*3cab2bb3Spatrick // We then push the root into a stack, to use as the parent marker for new 484*3cab2bb3Spatrick // nodes we push in as we're traversing depth-first down the call tree. 485*3cab2bb3Spatrick struct NodeAndParent { 486*3cab2bb3Spatrick FunctionCallTrie::Node *Node; 487*3cab2bb3Spatrick FunctionCallTrie::Node *NewNode; 488*3cab2bb3Spatrick }; 489*3cab2bb3Spatrick using Stack = Array<NodeAndParent>; 490*3cab2bb3Spatrick 491*3cab2bb3Spatrick typename Stack::AllocatorType StackAllocator( 492*3cab2bb3Spatrick profilingFlags()->stack_allocator_max); 493*3cab2bb3Spatrick Stack DFSStack(StackAllocator); 494*3cab2bb3Spatrick 495*3cab2bb3Spatrick for (const auto Root : getRoots()) { 496*3cab2bb3Spatrick // Add a node in O for this root. 497*3cab2bb3Spatrick auto NewRoot = O.Nodes.AppendEmplace( 498*3cab2bb3Spatrick nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, 499*3cab2bb3Spatrick Root->CumulativeLocalTime, Root->FId); 500*3cab2bb3Spatrick 501*3cab2bb3Spatrick // Because we cannot allocate more memory we should bail out right away. 502*3cab2bb3Spatrick if (UNLIKELY(NewRoot == nullptr)) 503*3cab2bb3Spatrick return; 504*3cab2bb3Spatrick 505*3cab2bb3Spatrick if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) 506*3cab2bb3Spatrick return; 507*3cab2bb3Spatrick 508*3cab2bb3Spatrick // TODO: Figure out what to do if we fail to allocate any more stack 509*3cab2bb3Spatrick // space. Maybe warn or report once? 510*3cab2bb3Spatrick if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) 511*3cab2bb3Spatrick return; 512*3cab2bb3Spatrick while (!DFSStack.empty()) { 513*3cab2bb3Spatrick NodeAndParent NP = DFSStack.back(); 514*3cab2bb3Spatrick DCHECK_NE(NP.Node, nullptr); 515*3cab2bb3Spatrick DCHECK_NE(NP.NewNode, nullptr); 516*3cab2bb3Spatrick DFSStack.trim(1); 517*3cab2bb3Spatrick for (const auto Callee : NP.Node->Callees) { 518*3cab2bb3Spatrick auto NewNode = O.Nodes.AppendEmplace( 519*3cab2bb3Spatrick NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), 520*3cab2bb3Spatrick Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, 521*3cab2bb3Spatrick Callee.FId); 522*3cab2bb3Spatrick if (UNLIKELY(NewNode == nullptr)) 523*3cab2bb3Spatrick return; 524*3cab2bb3Spatrick if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == 525*3cab2bb3Spatrick nullptr)) 526*3cab2bb3Spatrick return; 527*3cab2bb3Spatrick if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == 528*3cab2bb3Spatrick nullptr)) 529*3cab2bb3Spatrick return; 530*3cab2bb3Spatrick } 531*3cab2bb3Spatrick } 532*3cab2bb3Spatrick } 533*3cab2bb3Spatrick } 534*3cab2bb3Spatrick 535*3cab2bb3Spatrick // The mergeInto operation will update the provided FunctionCallTrie by 536*3cab2bb3Spatrick // traversing the current trie's roots and updating (i.e. merging) the data in 537*3cab2bb3Spatrick // the nodes with the data in the target's nodes. If the node doesn't exist in 538*3cab2bb3Spatrick // the provided trie, we add a new one in the right position, and inherit the 539*3cab2bb3Spatrick // data from the original (current) trie, along with all its callees. 540*3cab2bb3Spatrick // 541*3cab2bb3Spatrick // This function is *not* thread-safe, and may require external 542*3cab2bb3Spatrick // synchronisation of both "this" and |O|. mergeInto(FunctionCallTrie & O)543*3cab2bb3Spatrick void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 544*3cab2bb3Spatrick struct NodeAndTarget { 545*3cab2bb3Spatrick FunctionCallTrie::Node *OrigNode; 546*3cab2bb3Spatrick FunctionCallTrie::Node *TargetNode; 547*3cab2bb3Spatrick }; 548*3cab2bb3Spatrick using Stack = Array<NodeAndTarget>; 549*3cab2bb3Spatrick typename Stack::AllocatorType StackAllocator( 550*3cab2bb3Spatrick profilingFlags()->stack_allocator_max); 551*3cab2bb3Spatrick Stack DFSStack(StackAllocator); 552*3cab2bb3Spatrick 553*3cab2bb3Spatrick for (const auto Root : getRoots()) { 554*3cab2bb3Spatrick Node *TargetRoot = nullptr; 555*3cab2bb3Spatrick auto R = O.Roots.find_element( 556*3cab2bb3Spatrick [&](const Node *Node) { return Node->FId == Root->FId; }); 557*3cab2bb3Spatrick if (R == nullptr) { 558*3cab2bb3Spatrick TargetRoot = O.Nodes.AppendEmplace( 559*3cab2bb3Spatrick nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 560*3cab2bb3Spatrick Root->FId); 561*3cab2bb3Spatrick if (UNLIKELY(TargetRoot == nullptr)) 562*3cab2bb3Spatrick return; 563*3cab2bb3Spatrick 564*3cab2bb3Spatrick O.Roots.Append(TargetRoot); 565*3cab2bb3Spatrick } else { 566*3cab2bb3Spatrick TargetRoot = *R; 567*3cab2bb3Spatrick } 568*3cab2bb3Spatrick 569*3cab2bb3Spatrick DFSStack.AppendEmplace(Root, TargetRoot); 570*3cab2bb3Spatrick while (!DFSStack.empty()) { 571*3cab2bb3Spatrick NodeAndTarget NT = DFSStack.back(); 572*3cab2bb3Spatrick DCHECK_NE(NT.OrigNode, nullptr); 573*3cab2bb3Spatrick DCHECK_NE(NT.TargetNode, nullptr); 574*3cab2bb3Spatrick DFSStack.trim(1); 575*3cab2bb3Spatrick // TODO: Update the histogram as well when we have it ready. 576*3cab2bb3Spatrick NT.TargetNode->CallCount += NT.OrigNode->CallCount; 577*3cab2bb3Spatrick NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; 578*3cab2bb3Spatrick for (const auto Callee : NT.OrigNode->Callees) { 579*3cab2bb3Spatrick auto TargetCallee = NT.TargetNode->Callees.find_element( 580*3cab2bb3Spatrick [&](const FunctionCallTrie::NodeIdPair &C) { 581*3cab2bb3Spatrick return C.FId == Callee.FId; 582*3cab2bb3Spatrick }); 583*3cab2bb3Spatrick if (TargetCallee == nullptr) { 584*3cab2bb3Spatrick auto NewTargetNode = O.Nodes.AppendEmplace( 585*3cab2bb3Spatrick NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 586*3cab2bb3Spatrick Callee.FId); 587*3cab2bb3Spatrick 588*3cab2bb3Spatrick if (UNLIKELY(NewTargetNode == nullptr)) 589*3cab2bb3Spatrick return; 590*3cab2bb3Spatrick 591*3cab2bb3Spatrick TargetCallee = 592*3cab2bb3Spatrick NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); 593*3cab2bb3Spatrick } 594*3cab2bb3Spatrick DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); 595*3cab2bb3Spatrick } 596*3cab2bb3Spatrick } 597*3cab2bb3Spatrick } 598*3cab2bb3Spatrick } 599*3cab2bb3Spatrick }; 600*3cab2bb3Spatrick 601*3cab2bb3Spatrick } // namespace __xray 602*3cab2bb3Spatrick 603*3cab2bb3Spatrick #endif // XRAY_FUNCTION_CALL_TRIE_H 604