10b57cec5SDimitry Andric //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file is a part of XRay, a dynamic runtime instrumentation system. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric // This file defines the interface for a function call trie. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric #ifndef XRAY_FUNCTION_CALL_TRIE_H 150b57cec5SDimitry Andric #define XRAY_FUNCTION_CALL_TRIE_H 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "xray_buffer_queue.h" 180b57cec5SDimitry Andric #include "xray_defs.h" 190b57cec5SDimitry Andric #include "xray_profiling_flags.h" 200b57cec5SDimitry Andric #include "xray_segmented_array.h" 210b57cec5SDimitry Andric #include <limits> 220b57cec5SDimitry Andric #include <memory> // For placement new. 230b57cec5SDimitry Andric #include <utility> 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric namespace __xray { 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric /// A FunctionCallTrie represents the stack traces of XRay instrumented 280b57cec5SDimitry Andric /// functions that we've encountered, where a node corresponds to a function and 290b57cec5SDimitry Andric /// the path from the root to the node its stack trace. Each node in the trie 300b57cec5SDimitry Andric /// will contain some useful values, including: 310b57cec5SDimitry Andric /// 320b57cec5SDimitry Andric /// * The cumulative amount of time spent in this particular node/stack. 330b57cec5SDimitry Andric /// * The number of times this stack has appeared. 340b57cec5SDimitry Andric /// * A histogram of latencies for that particular node. 350b57cec5SDimitry Andric /// 360b57cec5SDimitry Andric /// Each node in the trie will also contain a list of callees, represented using 370b57cec5SDimitry Andric /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function 380b57cec5SDimitry Andric /// ID of the callee, and a pointer to the node. 390b57cec5SDimitry Andric /// 400b57cec5SDimitry Andric /// If we visualise this data structure, we'll find the following potential 410b57cec5SDimitry Andric /// representation: 420b57cec5SDimitry Andric /// 430b57cec5SDimitry Andric /// [function id node] -> [callees] [cumulative time] 440b57cec5SDimitry Andric /// [call counter] [latency histogram] 450b57cec5SDimitry Andric /// 460b57cec5SDimitry Andric /// As an example, when we have a function in this pseudocode: 470b57cec5SDimitry Andric /// 480b57cec5SDimitry Andric /// func f(N) { 490b57cec5SDimitry Andric /// g() 500b57cec5SDimitry Andric /// h() 510b57cec5SDimitry Andric /// for i := 1..N { j() } 520b57cec5SDimitry Andric /// } 530b57cec5SDimitry Andric /// 540b57cec5SDimitry Andric /// We may end up with a trie of the following form: 550b57cec5SDimitry Andric /// 560b57cec5SDimitry Andric /// f -> [ g, h, j ] [...] [1] [...] 570b57cec5SDimitry Andric /// g -> [ ... ] [...] [1] [...] 580b57cec5SDimitry Andric /// h -> [ ... ] [...] [1] [...] 590b57cec5SDimitry Andric /// j -> [ ... ] [...] [N] [...] 600b57cec5SDimitry Andric /// 610b57cec5SDimitry Andric /// If for instance the function g() called j() like so: 620b57cec5SDimitry Andric /// 630b57cec5SDimitry Andric /// func g() { 640b57cec5SDimitry Andric /// for i := 1..10 { j() } 650b57cec5SDimitry Andric /// } 660b57cec5SDimitry Andric /// 670b57cec5SDimitry Andric /// We'll find the following updated trie: 680b57cec5SDimitry Andric /// 690b57cec5SDimitry Andric /// f -> [ g, h, j ] [...] [1] [...] 700b57cec5SDimitry Andric /// g -> [ j' ] [...] [1] [...] 710b57cec5SDimitry Andric /// h -> [ ... ] [...] [1] [...] 720b57cec5SDimitry Andric /// j -> [ ... ] [...] [N] [...] 730b57cec5SDimitry Andric /// j' -> [ ... ] [...] [10] [...] 740b57cec5SDimitry Andric /// 750b57cec5SDimitry Andric /// Note that we'll have a new node representing the path `f -> g -> j'` with 760b57cec5SDimitry Andric /// isolated data. This isolation gives us a means of representing the stack 770b57cec5SDimitry Andric /// traces as a path, as opposed to a key in a table. The alternative 780b57cec5SDimitry Andric /// implementation here would be to use a separate table for the path, and use 790b57cec5SDimitry Andric /// hashes of the path as an identifier to accumulate the information. We've 800b57cec5SDimitry Andric /// moved away from this approach as it takes a lot of time to compute the hash 810b57cec5SDimitry Andric /// every time we need to update a function's call information as we're handling 820b57cec5SDimitry Andric /// the entry and exit events. 830b57cec5SDimitry Andric /// 840b57cec5SDimitry Andric /// This approach allows us to maintain a shadow stack, which represents the 850b57cec5SDimitry Andric /// currently executing path, and on function exits quickly compute the amount 860b57cec5SDimitry Andric /// of time elapsed from the entry, then update the counters for the node 870b57cec5SDimitry Andric /// already represented in the trie. This necessitates an efficient 880b57cec5SDimitry Andric /// representation of the various data structures (the list of callees must be 890b57cec5SDimitry Andric /// cache-aware and efficient to look up, and the histogram must be compact and 900b57cec5SDimitry Andric /// quick to update) to enable us to keep the overheads of this implementation 910b57cec5SDimitry Andric /// to the minimum. 920b57cec5SDimitry Andric class FunctionCallTrie { 930b57cec5SDimitry Andric public: 940b57cec5SDimitry Andric struct Node; 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric // We use a NodeIdPair type instead of a std::pair<...> to not rely on the 970b57cec5SDimitry Andric // standard library types in this header. 980b57cec5SDimitry Andric struct NodeIdPair { 990b57cec5SDimitry Andric Node *NodePtr; 1000b57cec5SDimitry Andric int32_t FId; 1010b57cec5SDimitry Andric }; 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric using NodeIdPairArray = Array<NodeIdPair>; 1040b57cec5SDimitry Andric using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric // A Node in the FunctionCallTrie gives us a list of callees, the cumulative 1070b57cec5SDimitry Andric // number of times this node actually appeared, the cumulative amount of time 1080b57cec5SDimitry Andric // for this particular node including its children call times, and just the 1090b57cec5SDimitry Andric // local time spent on this node. Each Node will have the ID of the XRay 1100b57cec5SDimitry Andric // instrumented function that it is associated to. 1110b57cec5SDimitry Andric struct Node { 1120b57cec5SDimitry Andric Node *Parent; 1130b57cec5SDimitry Andric NodeIdPairArray Callees; 1140b57cec5SDimitry Andric uint64_t CallCount; 1150b57cec5SDimitry Andric uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. 1160b57cec5SDimitry Andric int32_t FId; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric // TODO: Include the compact histogram. 1190b57cec5SDimitry Andric }; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric private: 1220b57cec5SDimitry Andric struct ShadowStackEntry { 1230b57cec5SDimitry Andric uint64_t EntryTSC; 1240b57cec5SDimitry Andric Node *NodePtr; 1250b57cec5SDimitry Andric uint16_t EntryCPU; 1260b57cec5SDimitry Andric }; 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric using NodeArray = Array<Node>; 1290b57cec5SDimitry Andric using RootArray = Array<Node *>; 1300b57cec5SDimitry Andric using ShadowStackArray = Array<ShadowStackEntry>; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric public: 1330b57cec5SDimitry Andric // We collate the allocators we need into a single struct, as a convenience to 1340b57cec5SDimitry Andric // allow us to initialize these as a group. 1350b57cec5SDimitry Andric struct Allocators { 1360b57cec5SDimitry Andric using NodeAllocatorType = NodeArray::AllocatorType; 1370b57cec5SDimitry Andric using RootAllocatorType = RootArray::AllocatorType; 1380b57cec5SDimitry Andric using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric // Use hosted aligned storage members to allow for trivial move and init. 1410b57cec5SDimitry Andric // This also allows us to sidestep the potential-failing allocation issue. 142*0fca6ea1SDimitry Andric alignas(NodeAllocatorType) std::byte 143*0fca6ea1SDimitry Andric NodeAllocatorStorage[sizeof(NodeAllocatorType)]; 144*0fca6ea1SDimitry Andric alignas(RootAllocatorType) std::byte 145*0fca6ea1SDimitry Andric RootAllocatorStorage[sizeof(RootAllocatorType)]; 146*0fca6ea1SDimitry Andric alignas(ShadowStackAllocatorType) std::byte 147*0fca6ea1SDimitry Andric ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)]; 148*0fca6ea1SDimitry Andric alignas(NodeIdPairAllocatorType) std::byte 149*0fca6ea1SDimitry Andric NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)]; 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric NodeAllocatorType *NodeAllocator = nullptr; 1520b57cec5SDimitry Andric RootAllocatorType *RootAllocator = nullptr; 1530b57cec5SDimitry Andric ShadowStackAllocatorType *ShadowStackAllocator = nullptr; 1540b57cec5SDimitry Andric NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric Allocators() = default; 1570b57cec5SDimitry Andric Allocators(const Allocators &) = delete; 1580b57cec5SDimitry Andric Allocators &operator=(const Allocators &) = delete; 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric struct Buffers { 1610b57cec5SDimitry Andric BufferQueue::Buffer NodeBuffer; 1620b57cec5SDimitry Andric BufferQueue::Buffer RootsBuffer; 1630b57cec5SDimitry Andric BufferQueue::Buffer ShadowStackBuffer; 1640b57cec5SDimitry Andric BufferQueue::Buffer NodeIdPairBuffer; 1650b57cec5SDimitry Andric }; 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { 1680b57cec5SDimitry Andric new (&NodeAllocatorStorage) 1690b57cec5SDimitry Andric NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); 1700b57cec5SDimitry Andric NodeAllocator = 1710b57cec5SDimitry Andric reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric new (&RootAllocatorStorage) 1740b57cec5SDimitry Andric RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); 1750b57cec5SDimitry Andric RootAllocator = 1760b57cec5SDimitry Andric reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( 1790b57cec5SDimitry Andric B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); 1800b57cec5SDimitry Andric ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 1810b57cec5SDimitry Andric &ShadowStackAllocatorStorage); 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( 1840b57cec5SDimitry Andric B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); 1850b57cec5SDimitry Andric NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 1860b57cec5SDimitry Andric &NodeIdPairAllocatorStorage); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { 1900b57cec5SDimitry Andric new (&NodeAllocatorStorage) NodeAllocatorType(Max); 1910b57cec5SDimitry Andric NodeAllocator = 1920b57cec5SDimitry Andric reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric new (&RootAllocatorStorage) RootAllocatorType(Max); 1950b57cec5SDimitry Andric RootAllocator = 1960b57cec5SDimitry Andric reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); 1990b57cec5SDimitry Andric ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 2000b57cec5SDimitry Andric &ShadowStackAllocatorStorage); 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); 2030b57cec5SDimitry Andric NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 2040b57cec5SDimitry Andric &NodeIdPairAllocatorStorage); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { 2080b57cec5SDimitry Andric // Here we rely on the safety of memcpy'ing contents of the storage 2090b57cec5SDimitry Andric // members, and then pointing the source pointers to nullptr. 2100b57cec5SDimitry Andric internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, 2110b57cec5SDimitry Andric sizeof(NodeAllocatorType)); 2120b57cec5SDimitry Andric internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, 2130b57cec5SDimitry Andric sizeof(RootAllocatorType)); 2140b57cec5SDimitry Andric internal_memcpy(&ShadowStackAllocatorStorage, 2150b57cec5SDimitry Andric &O.ShadowStackAllocatorStorage, 2160b57cec5SDimitry Andric sizeof(ShadowStackAllocatorType)); 2170b57cec5SDimitry Andric internal_memcpy(&NodeIdPairAllocatorStorage, 2180b57cec5SDimitry Andric &O.NodeIdPairAllocatorStorage, 2190b57cec5SDimitry Andric sizeof(NodeIdPairAllocatorType)); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric NodeAllocator = 2220b57cec5SDimitry Andric reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 2230b57cec5SDimitry Andric RootAllocator = 2240b57cec5SDimitry Andric reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 2250b57cec5SDimitry Andric ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 2260b57cec5SDimitry Andric &ShadowStackAllocatorStorage); 2270b57cec5SDimitry Andric NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 2280b57cec5SDimitry Andric &NodeIdPairAllocatorStorage); 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric O.NodeAllocator = nullptr; 2310b57cec5SDimitry Andric O.RootAllocator = nullptr; 2320b57cec5SDimitry Andric O.ShadowStackAllocator = nullptr; 2330b57cec5SDimitry Andric O.NodeIdPairAllocator = nullptr; 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { 2370b57cec5SDimitry Andric // When moving into an existing instance, we ensure that we clean up the 2380b57cec5SDimitry Andric // current allocators. 2390b57cec5SDimitry Andric if (NodeAllocator) 2400b57cec5SDimitry Andric NodeAllocator->~NodeAllocatorType(); 2410b57cec5SDimitry Andric if (O.NodeAllocator) { 2420b57cec5SDimitry Andric new (&NodeAllocatorStorage) 2430b57cec5SDimitry Andric NodeAllocatorType(std::move(*O.NodeAllocator)); 2440b57cec5SDimitry Andric NodeAllocator = 2450b57cec5SDimitry Andric reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 2460b57cec5SDimitry Andric O.NodeAllocator = nullptr; 2470b57cec5SDimitry Andric } else { 2480b57cec5SDimitry Andric NodeAllocator = nullptr; 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric if (RootAllocator) 2520b57cec5SDimitry Andric RootAllocator->~RootAllocatorType(); 2530b57cec5SDimitry Andric if (O.RootAllocator) { 2540b57cec5SDimitry Andric new (&RootAllocatorStorage) 2550b57cec5SDimitry Andric RootAllocatorType(std::move(*O.RootAllocator)); 2560b57cec5SDimitry Andric RootAllocator = 2570b57cec5SDimitry Andric reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 2580b57cec5SDimitry Andric O.RootAllocator = nullptr; 2590b57cec5SDimitry Andric } else { 2600b57cec5SDimitry Andric RootAllocator = nullptr; 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric if (ShadowStackAllocator) 2640b57cec5SDimitry Andric ShadowStackAllocator->~ShadowStackAllocatorType(); 2650b57cec5SDimitry Andric if (O.ShadowStackAllocator) { 2660b57cec5SDimitry Andric new (&ShadowStackAllocatorStorage) 2670b57cec5SDimitry Andric ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); 2680b57cec5SDimitry Andric ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 2690b57cec5SDimitry Andric &ShadowStackAllocatorStorage); 2700b57cec5SDimitry Andric O.ShadowStackAllocator = nullptr; 2710b57cec5SDimitry Andric } else { 2720b57cec5SDimitry Andric ShadowStackAllocator = nullptr; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric if (NodeIdPairAllocator) 2760b57cec5SDimitry Andric NodeIdPairAllocator->~NodeIdPairAllocatorType(); 2770b57cec5SDimitry Andric if (O.NodeIdPairAllocator) { 2780b57cec5SDimitry Andric new (&NodeIdPairAllocatorStorage) 2790b57cec5SDimitry Andric NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); 2800b57cec5SDimitry Andric NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 2810b57cec5SDimitry Andric &NodeIdPairAllocatorStorage); 2820b57cec5SDimitry Andric O.NodeIdPairAllocator = nullptr; 2830b57cec5SDimitry Andric } else { 2840b57cec5SDimitry Andric NodeIdPairAllocator = nullptr; 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric return *this; 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric ~Allocators() XRAY_NEVER_INSTRUMENT { 2910b57cec5SDimitry Andric if (NodeAllocator != nullptr) 2920b57cec5SDimitry Andric NodeAllocator->~NodeAllocatorType(); 2930b57cec5SDimitry Andric if (RootAllocator != nullptr) 2940b57cec5SDimitry Andric RootAllocator->~RootAllocatorType(); 2950b57cec5SDimitry Andric if (ShadowStackAllocator != nullptr) 2960b57cec5SDimitry Andric ShadowStackAllocator->~ShadowStackAllocatorType(); 2970b57cec5SDimitry Andric if (NodeIdPairAllocator != nullptr) 2980b57cec5SDimitry Andric NodeIdPairAllocator->~NodeIdPairAllocatorType(); 2990b57cec5SDimitry Andric } 3000b57cec5SDimitry Andric }; 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { 3030b57cec5SDimitry Andric return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { 3070b57cec5SDimitry Andric Allocators A(Max); 3080b57cec5SDimitry Andric return A; 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric static Allocators 3120b57cec5SDimitry Andric InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { 3130b57cec5SDimitry Andric Allocators A(Bufs); 3140b57cec5SDimitry Andric return A; 3150b57cec5SDimitry Andric } 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric private: 3180b57cec5SDimitry Andric NodeArray Nodes; 3190b57cec5SDimitry Andric RootArray Roots; 3200b57cec5SDimitry Andric ShadowStackArray ShadowStack; 3210b57cec5SDimitry Andric NodeIdPairAllocatorType *NodeIdPairAllocator; 3220b57cec5SDimitry Andric uint32_t OverflowedFunctions; 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric public: 3250b57cec5SDimitry Andric explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT 3260b57cec5SDimitry Andric : Nodes(*A.NodeAllocator), 3270b57cec5SDimitry Andric Roots(*A.RootAllocator), 3280b57cec5SDimitry Andric ShadowStack(*A.ShadowStackAllocator), 3290b57cec5SDimitry Andric NodeIdPairAllocator(A.NodeIdPairAllocator), 3300b57cec5SDimitry Andric OverflowedFunctions(0) {} 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric FunctionCallTrie() = delete; 3330b57cec5SDimitry Andric FunctionCallTrie(const FunctionCallTrie &) = delete; 3340b57cec5SDimitry Andric FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT 3370b57cec5SDimitry Andric : Nodes(std::move(O.Nodes)), 3380b57cec5SDimitry Andric Roots(std::move(O.Roots)), 3390b57cec5SDimitry Andric ShadowStack(std::move(O.ShadowStack)), 3400b57cec5SDimitry Andric NodeIdPairAllocator(O.NodeIdPairAllocator), 3410b57cec5SDimitry Andric OverflowedFunctions(O.OverflowedFunctions) {} 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { 3440b57cec5SDimitry Andric Nodes = std::move(O.Nodes); 3450b57cec5SDimitry Andric Roots = std::move(O.Roots); 3460b57cec5SDimitry Andric ShadowStack = std::move(O.ShadowStack); 3470b57cec5SDimitry Andric NodeIdPairAllocator = O.NodeIdPairAllocator; 3480b57cec5SDimitry Andric OverflowedFunctions = O.OverflowedFunctions; 3490b57cec5SDimitry Andric return *this; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric void enterFunction(const int32_t FId, uint64_t TSC, 3550b57cec5SDimitry Andric uint16_t CPU) XRAY_NEVER_INSTRUMENT { 3560b57cec5SDimitry Andric DCHECK_NE(FId, 0); 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric // If we're already overflowed the function call stack, do not bother 3590b57cec5SDimitry Andric // attempting to record any more function entries. 3600b57cec5SDimitry Andric if (UNLIKELY(OverflowedFunctions)) { 3610b57cec5SDimitry Andric ++OverflowedFunctions; 3620b57cec5SDimitry Andric return; 3630b57cec5SDimitry Andric } 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric // If this is the first function we've encountered, we want to set up the 3660b57cec5SDimitry Andric // node(s) and treat it as a root. 3670b57cec5SDimitry Andric if (UNLIKELY(ShadowStack.empty())) { 3680b57cec5SDimitry Andric auto *NewRoot = Nodes.AppendEmplace( 3690b57cec5SDimitry Andric nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 3700b57cec5SDimitry Andric if (UNLIKELY(NewRoot == nullptr)) 3710b57cec5SDimitry Andric return; 3720b57cec5SDimitry Andric if (Roots.AppendEmplace(NewRoot) == nullptr) { 3730b57cec5SDimitry Andric Nodes.trim(1); 3740b57cec5SDimitry Andric return; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { 3770b57cec5SDimitry Andric Nodes.trim(1); 3780b57cec5SDimitry Andric Roots.trim(1); 3790b57cec5SDimitry Andric ++OverflowedFunctions; 3800b57cec5SDimitry Andric return; 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric return; 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric // From this point on, we require that the stack is not empty. 3860b57cec5SDimitry Andric DCHECK(!ShadowStack.empty()); 3870b57cec5SDimitry Andric auto TopNode = ShadowStack.back().NodePtr; 3880b57cec5SDimitry Andric DCHECK_NE(TopNode, nullptr); 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric // If we've seen this callee before, then we access that node and place that 3910b57cec5SDimitry Andric // on the top of the stack. 3920b57cec5SDimitry Andric auto* Callee = TopNode->Callees.find_element( 3930b57cec5SDimitry Andric [FId](const NodeIdPair &NR) { return NR.FId == FId; }); 3940b57cec5SDimitry Andric if (Callee != nullptr) { 3950b57cec5SDimitry Andric CHECK_NE(Callee->NodePtr, nullptr); 3960b57cec5SDimitry Andric if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) 3970b57cec5SDimitry Andric ++OverflowedFunctions; 3980b57cec5SDimitry Andric return; 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric // This means we've never seen this stack before, create a new node here. 4020b57cec5SDimitry Andric auto* NewNode = Nodes.AppendEmplace( 4030b57cec5SDimitry Andric TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 4040b57cec5SDimitry Andric if (UNLIKELY(NewNode == nullptr)) 4050b57cec5SDimitry Andric return; 4060b57cec5SDimitry Andric DCHECK_NE(NewNode, nullptr); 4070b57cec5SDimitry Andric TopNode->Callees.AppendEmplace(NewNode, FId); 4080b57cec5SDimitry Andric if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) 4090b57cec5SDimitry Andric ++OverflowedFunctions; 4100b57cec5SDimitry Andric return; 4110b57cec5SDimitry Andric } 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric void exitFunction(int32_t FId, uint64_t TSC, 4140b57cec5SDimitry Andric uint16_t CPU) XRAY_NEVER_INSTRUMENT { 4150b57cec5SDimitry Andric // If we're exiting functions that have "overflowed" or don't fit into the 4160b57cec5SDimitry Andric // stack due to allocator constraints, we then decrement that count first. 4170b57cec5SDimitry Andric if (OverflowedFunctions) { 4180b57cec5SDimitry Andric --OverflowedFunctions; 4190b57cec5SDimitry Andric return; 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric // When we exit a function, we look up the ShadowStack to see whether we've 4230b57cec5SDimitry Andric // entered this function before. We do as little processing here as we can, 4240b57cec5SDimitry Andric // since most of the hard work would have already been done at function 4250b57cec5SDimitry Andric // entry. 4260b57cec5SDimitry Andric uint64_t CumulativeTreeTime = 0; 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric while (!ShadowStack.empty()) { 4290b57cec5SDimitry Andric const auto &Top = ShadowStack.back(); 4300b57cec5SDimitry Andric auto TopNode = Top.NodePtr; 4310b57cec5SDimitry Andric DCHECK_NE(TopNode, nullptr); 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric // We may encounter overflow on the TSC we're provided, which may end up 4340b57cec5SDimitry Andric // being less than the TSC when we first entered the function. 4350b57cec5SDimitry Andric // 4360b57cec5SDimitry Andric // To get the accurate measurement of cycles, we need to check whether 4370b57cec5SDimitry Andric // we've overflowed (TSC < Top.EntryTSC) and then account the difference 4380b57cec5SDimitry Andric // between the entry TSC and the max for the TSC counter (max of uint64_t) 4390b57cec5SDimitry Andric // then add the value of TSC. We can prove that the maximum delta we will 4400b57cec5SDimitry Andric // get is at most the 64-bit unsigned value, since the difference between 4410b57cec5SDimitry Andric // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() 4420b57cec5SDimitry Andric // - 1) + 1. 4430b57cec5SDimitry Andric // 4440b57cec5SDimitry Andric // NOTE: This assumes that TSCs are synchronised across CPUs. 4450b57cec5SDimitry Andric // TODO: Count the number of times we've seen CPU migrations. 4460b57cec5SDimitry Andric uint64_t LocalTime = 4470b57cec5SDimitry Andric Top.EntryTSC > TSC 4480b57cec5SDimitry Andric ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC 4490b57cec5SDimitry Andric : TSC - Top.EntryTSC; 4500b57cec5SDimitry Andric TopNode->CallCount++; 4510b57cec5SDimitry Andric TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; 4520b57cec5SDimitry Andric CumulativeTreeTime += LocalTime; 4530b57cec5SDimitry Andric ShadowStack.trim(1); 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric // TODO: Update the histogram for the node. 4560b57cec5SDimitry Andric if (TopNode->FId == FId) 4570b57cec5SDimitry Andric break; 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric // The deepCopyInto operation will update the provided FunctionCallTrie by 4640b57cec5SDimitry Andric // re-creating the contents of this particular FunctionCallTrie in the other 4650b57cec5SDimitry Andric // FunctionCallTrie. It will do this using a Depth First Traversal from the 4660b57cec5SDimitry Andric // roots, and while doing so recreating the traversal in the provided 4670b57cec5SDimitry Andric // FunctionCallTrie. 4680b57cec5SDimitry Andric // 4690b57cec5SDimitry Andric // This operation will *not* destroy the state in `O`, and thus may cause some 4700b57cec5SDimitry Andric // duplicate entries in `O` if it is not empty. 4710b57cec5SDimitry Andric // 4720b57cec5SDimitry Andric // This function is *not* thread-safe, and may require external 4730b57cec5SDimitry Andric // synchronisation of both "this" and |O|. 4740b57cec5SDimitry Andric // 4750b57cec5SDimitry Andric // This function must *not* be called with a non-empty FunctionCallTrie |O|. 4760b57cec5SDimitry Andric void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 4770b57cec5SDimitry Andric DCHECK(O.getRoots().empty()); 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric // We then push the root into a stack, to use as the parent marker for new 4800b57cec5SDimitry Andric // nodes we push in as we're traversing depth-first down the call tree. 4810b57cec5SDimitry Andric struct NodeAndParent { 4820b57cec5SDimitry Andric FunctionCallTrie::Node *Node; 4830b57cec5SDimitry Andric FunctionCallTrie::Node *NewNode; 4840b57cec5SDimitry Andric }; 4850b57cec5SDimitry Andric using Stack = Array<NodeAndParent>; 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric typename Stack::AllocatorType StackAllocator( 4880b57cec5SDimitry Andric profilingFlags()->stack_allocator_max); 4890b57cec5SDimitry Andric Stack DFSStack(StackAllocator); 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric for (const auto Root : getRoots()) { 4920b57cec5SDimitry Andric // Add a node in O for this root. 4930b57cec5SDimitry Andric auto NewRoot = O.Nodes.AppendEmplace( 4940b57cec5SDimitry Andric nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, 4950b57cec5SDimitry Andric Root->CumulativeLocalTime, Root->FId); 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric // Because we cannot allocate more memory we should bail out right away. 4980b57cec5SDimitry Andric if (UNLIKELY(NewRoot == nullptr)) 4990b57cec5SDimitry Andric return; 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) 5020b57cec5SDimitry Andric return; 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric // TODO: Figure out what to do if we fail to allocate any more stack 5050b57cec5SDimitry Andric // space. Maybe warn or report once? 5060b57cec5SDimitry Andric if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) 5070b57cec5SDimitry Andric return; 5080b57cec5SDimitry Andric while (!DFSStack.empty()) { 5090b57cec5SDimitry Andric NodeAndParent NP = DFSStack.back(); 5100b57cec5SDimitry Andric DCHECK_NE(NP.Node, nullptr); 5110b57cec5SDimitry Andric DCHECK_NE(NP.NewNode, nullptr); 5120b57cec5SDimitry Andric DFSStack.trim(1); 5130b57cec5SDimitry Andric for (const auto Callee : NP.Node->Callees) { 5140b57cec5SDimitry Andric auto NewNode = O.Nodes.AppendEmplace( 5150b57cec5SDimitry Andric NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), 5160b57cec5SDimitry Andric Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, 5170b57cec5SDimitry Andric Callee.FId); 5180b57cec5SDimitry Andric if (UNLIKELY(NewNode == nullptr)) 5190b57cec5SDimitry Andric return; 5200b57cec5SDimitry Andric if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == 5210b57cec5SDimitry Andric nullptr)) 5220b57cec5SDimitry Andric return; 5230b57cec5SDimitry Andric if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == 5240b57cec5SDimitry Andric nullptr)) 5250b57cec5SDimitry Andric return; 5260b57cec5SDimitry Andric } 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric // The mergeInto operation will update the provided FunctionCallTrie by 5320b57cec5SDimitry Andric // traversing the current trie's roots and updating (i.e. merging) the data in 5330b57cec5SDimitry Andric // the nodes with the data in the target's nodes. If the node doesn't exist in 5340b57cec5SDimitry Andric // the provided trie, we add a new one in the right position, and inherit the 5350b57cec5SDimitry Andric // data from the original (current) trie, along with all its callees. 5360b57cec5SDimitry Andric // 5370b57cec5SDimitry Andric // This function is *not* thread-safe, and may require external 5380b57cec5SDimitry Andric // synchronisation of both "this" and |O|. 5390b57cec5SDimitry Andric void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 5400b57cec5SDimitry Andric struct NodeAndTarget { 5410b57cec5SDimitry Andric FunctionCallTrie::Node *OrigNode; 5420b57cec5SDimitry Andric FunctionCallTrie::Node *TargetNode; 5430b57cec5SDimitry Andric }; 5440b57cec5SDimitry Andric using Stack = Array<NodeAndTarget>; 5450b57cec5SDimitry Andric typename Stack::AllocatorType StackAllocator( 5460b57cec5SDimitry Andric profilingFlags()->stack_allocator_max); 5470b57cec5SDimitry Andric Stack DFSStack(StackAllocator); 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric for (const auto Root : getRoots()) { 5500b57cec5SDimitry Andric Node *TargetRoot = nullptr; 5510b57cec5SDimitry Andric auto R = O.Roots.find_element( 5520b57cec5SDimitry Andric [&](const Node *Node) { return Node->FId == Root->FId; }); 5530b57cec5SDimitry Andric if (R == nullptr) { 5540b57cec5SDimitry Andric TargetRoot = O.Nodes.AppendEmplace( 5550b57cec5SDimitry Andric nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 5560b57cec5SDimitry Andric Root->FId); 5570b57cec5SDimitry Andric if (UNLIKELY(TargetRoot == nullptr)) 5580b57cec5SDimitry Andric return; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric O.Roots.Append(TargetRoot); 5610b57cec5SDimitry Andric } else { 5620b57cec5SDimitry Andric TargetRoot = *R; 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric DFSStack.AppendEmplace(Root, TargetRoot); 5660b57cec5SDimitry Andric while (!DFSStack.empty()) { 5670b57cec5SDimitry Andric NodeAndTarget NT = DFSStack.back(); 5680b57cec5SDimitry Andric DCHECK_NE(NT.OrigNode, nullptr); 5690b57cec5SDimitry Andric DCHECK_NE(NT.TargetNode, nullptr); 5700b57cec5SDimitry Andric DFSStack.trim(1); 5710b57cec5SDimitry Andric // TODO: Update the histogram as well when we have it ready. 5720b57cec5SDimitry Andric NT.TargetNode->CallCount += NT.OrigNode->CallCount; 5730b57cec5SDimitry Andric NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; 5740b57cec5SDimitry Andric for (const auto Callee : NT.OrigNode->Callees) { 5750b57cec5SDimitry Andric auto TargetCallee = NT.TargetNode->Callees.find_element( 5760b57cec5SDimitry Andric [&](const FunctionCallTrie::NodeIdPair &C) { 5770b57cec5SDimitry Andric return C.FId == Callee.FId; 5780b57cec5SDimitry Andric }); 5790b57cec5SDimitry Andric if (TargetCallee == nullptr) { 5800b57cec5SDimitry Andric auto NewTargetNode = O.Nodes.AppendEmplace( 5810b57cec5SDimitry Andric NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 5820b57cec5SDimitry Andric Callee.FId); 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric if (UNLIKELY(NewTargetNode == nullptr)) 5850b57cec5SDimitry Andric return; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric TargetCallee = 5880b57cec5SDimitry Andric NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); 5890b57cec5SDimitry Andric } 5900b57cec5SDimitry Andric DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric }; 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric } // namespace __xray 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric #endif // XRAY_FUNCTION_CALL_TRIE_H 600