xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file is a part of XRay, a dynamic runtime instrumentation system.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // This file defines the interface for a function call trie.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric #ifndef XRAY_FUNCTION_CALL_TRIE_H
150b57cec5SDimitry Andric #define XRAY_FUNCTION_CALL_TRIE_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "xray_buffer_queue.h"
180b57cec5SDimitry Andric #include "xray_defs.h"
190b57cec5SDimitry Andric #include "xray_profiling_flags.h"
200b57cec5SDimitry Andric #include "xray_segmented_array.h"
210b57cec5SDimitry Andric #include <limits>
220b57cec5SDimitry Andric #include <memory> // For placement new.
230b57cec5SDimitry Andric #include <utility>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace __xray {
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric /// A FunctionCallTrie represents the stack traces of XRay instrumented
280b57cec5SDimitry Andric /// functions that we've encountered, where a node corresponds to a function and
290b57cec5SDimitry Andric /// the path from the root to the node its stack trace. Each node in the trie
300b57cec5SDimitry Andric /// will contain some useful values, including:
310b57cec5SDimitry Andric ///
320b57cec5SDimitry Andric ///   * The cumulative amount of time spent in this particular node/stack.
330b57cec5SDimitry Andric ///   * The number of times this stack has appeared.
340b57cec5SDimitry Andric ///   * A histogram of latencies for that particular node.
350b57cec5SDimitry Andric ///
360b57cec5SDimitry Andric /// Each node in the trie will also contain a list of callees, represented using
370b57cec5SDimitry Andric /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
380b57cec5SDimitry Andric /// ID of the callee, and a pointer to the node.
390b57cec5SDimitry Andric ///
400b57cec5SDimitry Andric /// If we visualise this data structure, we'll find the following potential
410b57cec5SDimitry Andric /// representation:
420b57cec5SDimitry Andric ///
430b57cec5SDimitry Andric ///   [function id node] -> [callees] [cumulative time]
440b57cec5SDimitry Andric ///                         [call counter] [latency histogram]
450b57cec5SDimitry Andric ///
460b57cec5SDimitry Andric /// As an example, when we have a function in this pseudocode:
470b57cec5SDimitry Andric ///
480b57cec5SDimitry Andric ///   func f(N) {
490b57cec5SDimitry Andric ///     g()
500b57cec5SDimitry Andric ///     h()
510b57cec5SDimitry Andric ///     for i := 1..N { j() }
520b57cec5SDimitry Andric ///   }
530b57cec5SDimitry Andric ///
540b57cec5SDimitry Andric /// We may end up with a trie of the following form:
550b57cec5SDimitry Andric ///
560b57cec5SDimitry Andric ///   f -> [ g, h, j ] [...] [1] [...]
570b57cec5SDimitry Andric ///   g -> [ ... ] [...] [1] [...]
580b57cec5SDimitry Andric ///   h -> [ ... ] [...] [1] [...]
590b57cec5SDimitry Andric ///   j -> [ ... ] [...] [N] [...]
600b57cec5SDimitry Andric ///
610b57cec5SDimitry Andric /// If for instance the function g() called j() like so:
620b57cec5SDimitry Andric ///
630b57cec5SDimitry Andric ///   func g() {
640b57cec5SDimitry Andric ///     for i := 1..10 { j() }
650b57cec5SDimitry Andric ///   }
660b57cec5SDimitry Andric ///
670b57cec5SDimitry Andric /// We'll find the following updated trie:
680b57cec5SDimitry Andric ///
690b57cec5SDimitry Andric ///   f -> [ g, h, j ] [...] [1] [...]
700b57cec5SDimitry Andric ///   g -> [ j' ] [...] [1] [...]
710b57cec5SDimitry Andric ///   h -> [ ... ] [...] [1] [...]
720b57cec5SDimitry Andric ///   j -> [ ... ] [...] [N] [...]
730b57cec5SDimitry Andric ///   j' -> [ ... ] [...] [10] [...]
740b57cec5SDimitry Andric ///
750b57cec5SDimitry Andric /// Note that we'll have a new node representing the path `f -> g -> j'` with
760b57cec5SDimitry Andric /// isolated data. This isolation gives us a means of representing the stack
770b57cec5SDimitry Andric /// traces as a path, as opposed to a key in a table. The alternative
780b57cec5SDimitry Andric /// implementation here would be to use a separate table for the path, and use
790b57cec5SDimitry Andric /// hashes of the path as an identifier to accumulate the information. We've
800b57cec5SDimitry Andric /// moved away from this approach as it takes a lot of time to compute the hash
810b57cec5SDimitry Andric /// every time we need to update a function's call information as we're handling
820b57cec5SDimitry Andric /// the entry and exit events.
830b57cec5SDimitry Andric ///
840b57cec5SDimitry Andric /// This approach allows us to maintain a shadow stack, which represents the
850b57cec5SDimitry Andric /// currently executing path, and on function exits quickly compute the amount
860b57cec5SDimitry Andric /// of time elapsed from the entry, then update the counters for the node
870b57cec5SDimitry Andric /// already represented in the trie. This necessitates an efficient
880b57cec5SDimitry Andric /// representation of the various data structures (the list of callees must be
890b57cec5SDimitry Andric /// cache-aware and efficient to look up, and the histogram must be compact and
900b57cec5SDimitry Andric /// quick to update) to enable us to keep the overheads of this implementation
910b57cec5SDimitry Andric /// to the minimum.
920b57cec5SDimitry Andric class FunctionCallTrie {
930b57cec5SDimitry Andric public:
940b57cec5SDimitry Andric   struct Node;
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
970b57cec5SDimitry Andric   // standard library types in this header.
980b57cec5SDimitry Andric   struct NodeIdPair {
990b57cec5SDimitry Andric     Node *NodePtr;
1000b57cec5SDimitry Andric     int32_t FId;
1010b57cec5SDimitry Andric   };
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric   using NodeIdPairArray = Array<NodeIdPair>;
1040b57cec5SDimitry Andric   using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
1070b57cec5SDimitry Andric   // number of times this node actually appeared, the cumulative amount of time
1080b57cec5SDimitry Andric   // for this particular node including its children call times, and just the
1090b57cec5SDimitry Andric   // local time spent on this node. Each Node will have the ID of the XRay
1100b57cec5SDimitry Andric   // instrumented function that it is associated to.
1110b57cec5SDimitry Andric   struct Node {
1120b57cec5SDimitry Andric     Node *Parent;
1130b57cec5SDimitry Andric     NodeIdPairArray Callees;
1140b57cec5SDimitry Andric     uint64_t CallCount;
1150b57cec5SDimitry Andric     uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
1160b57cec5SDimitry Andric     int32_t FId;
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric     // TODO: Include the compact histogram.
1190b57cec5SDimitry Andric   };
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric private:
1220b57cec5SDimitry Andric   struct ShadowStackEntry {
1230b57cec5SDimitry Andric     uint64_t EntryTSC;
1240b57cec5SDimitry Andric     Node *NodePtr;
1250b57cec5SDimitry Andric     uint16_t EntryCPU;
1260b57cec5SDimitry Andric   };
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric   using NodeArray = Array<Node>;
1290b57cec5SDimitry Andric   using RootArray = Array<Node *>;
1300b57cec5SDimitry Andric   using ShadowStackArray = Array<ShadowStackEntry>;
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric public:
1330b57cec5SDimitry Andric   // We collate the allocators we need into a single struct, as a convenience to
1340b57cec5SDimitry Andric   // allow us to initialize these as a group.
1350b57cec5SDimitry Andric   struct Allocators {
1360b57cec5SDimitry Andric     using NodeAllocatorType = NodeArray::AllocatorType;
1370b57cec5SDimitry Andric     using RootAllocatorType = RootArray::AllocatorType;
1380b57cec5SDimitry Andric     using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric     // Use hosted aligned storage members to allow for trivial move and init.
1410b57cec5SDimitry Andric     // This also allows us to sidestep the potential-failing allocation issue.
142*0fca6ea1SDimitry Andric     alignas(NodeAllocatorType) std::byte
143*0fca6ea1SDimitry Andric         NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144*0fca6ea1SDimitry Andric     alignas(RootAllocatorType) std::byte
145*0fca6ea1SDimitry Andric         RootAllocatorStorage[sizeof(RootAllocatorType)];
146*0fca6ea1SDimitry Andric     alignas(ShadowStackAllocatorType) std::byte
147*0fca6ea1SDimitry Andric         ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148*0fca6ea1SDimitry Andric     alignas(NodeIdPairAllocatorType) std::byte
149*0fca6ea1SDimitry Andric         NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric     NodeAllocatorType *NodeAllocator = nullptr;
1520b57cec5SDimitry Andric     RootAllocatorType *RootAllocator = nullptr;
1530b57cec5SDimitry Andric     ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
1540b57cec5SDimitry Andric     NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric     Allocators() = default;
1570b57cec5SDimitry Andric     Allocators(const Allocators &) = delete;
1580b57cec5SDimitry Andric     Allocators &operator=(const Allocators &) = delete;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric     struct Buffers {
1610b57cec5SDimitry Andric       BufferQueue::Buffer NodeBuffer;
1620b57cec5SDimitry Andric       BufferQueue::Buffer RootsBuffer;
1630b57cec5SDimitry Andric       BufferQueue::Buffer ShadowStackBuffer;
1640b57cec5SDimitry Andric       BufferQueue::Buffer NodeIdPairBuffer;
1650b57cec5SDimitry Andric     };
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric     explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
1680b57cec5SDimitry Andric       new (&NodeAllocatorStorage)
1690b57cec5SDimitry Andric           NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
1700b57cec5SDimitry Andric       NodeAllocator =
1710b57cec5SDimitry Andric           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric       new (&RootAllocatorStorage)
1740b57cec5SDimitry Andric           RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
1750b57cec5SDimitry Andric       RootAllocator =
1760b57cec5SDimitry Andric           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
1790b57cec5SDimitry Andric           B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
1800b57cec5SDimitry Andric       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
1810b57cec5SDimitry Andric           &ShadowStackAllocatorStorage);
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
1840b57cec5SDimitry Andric           B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
1850b57cec5SDimitry Andric       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
1860b57cec5SDimitry Andric           &NodeIdPairAllocatorStorage);
1870b57cec5SDimitry Andric     }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric     explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
1900b57cec5SDimitry Andric       new (&NodeAllocatorStorage) NodeAllocatorType(Max);
1910b57cec5SDimitry Andric       NodeAllocator =
1920b57cec5SDimitry Andric           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric       new (&RootAllocatorStorage) RootAllocatorType(Max);
1950b57cec5SDimitry Andric       RootAllocator =
1960b57cec5SDimitry Andric           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
1970b57cec5SDimitry Andric 
1980b57cec5SDimitry Andric       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
1990b57cec5SDimitry Andric       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
2000b57cec5SDimitry Andric           &ShadowStackAllocatorStorage);
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
2030b57cec5SDimitry Andric       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
2040b57cec5SDimitry Andric           &NodeIdPairAllocatorStorage);
2050b57cec5SDimitry Andric     }
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric     Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
2080b57cec5SDimitry Andric       // Here we rely on the safety of memcpy'ing contents of the storage
2090b57cec5SDimitry Andric       // members, and then pointing the source pointers to nullptr.
2100b57cec5SDimitry Andric       internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
2110b57cec5SDimitry Andric                       sizeof(NodeAllocatorType));
2120b57cec5SDimitry Andric       internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
2130b57cec5SDimitry Andric                       sizeof(RootAllocatorType));
2140b57cec5SDimitry Andric       internal_memcpy(&ShadowStackAllocatorStorage,
2150b57cec5SDimitry Andric                       &O.ShadowStackAllocatorStorage,
2160b57cec5SDimitry Andric                       sizeof(ShadowStackAllocatorType));
2170b57cec5SDimitry Andric       internal_memcpy(&NodeIdPairAllocatorStorage,
2180b57cec5SDimitry Andric                       &O.NodeIdPairAllocatorStorage,
2190b57cec5SDimitry Andric                       sizeof(NodeIdPairAllocatorType));
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric       NodeAllocator =
2220b57cec5SDimitry Andric           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
2230b57cec5SDimitry Andric       RootAllocator =
2240b57cec5SDimitry Andric           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
2250b57cec5SDimitry Andric       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
2260b57cec5SDimitry Andric           &ShadowStackAllocatorStorage);
2270b57cec5SDimitry Andric       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
2280b57cec5SDimitry Andric           &NodeIdPairAllocatorStorage);
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric       O.NodeAllocator = nullptr;
2310b57cec5SDimitry Andric       O.RootAllocator = nullptr;
2320b57cec5SDimitry Andric       O.ShadowStackAllocator = nullptr;
2330b57cec5SDimitry Andric       O.NodeIdPairAllocator = nullptr;
2340b57cec5SDimitry Andric     }
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric     Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
2370b57cec5SDimitry Andric       // When moving into an existing instance, we ensure that we clean up the
2380b57cec5SDimitry Andric       // current allocators.
2390b57cec5SDimitry Andric       if (NodeAllocator)
2400b57cec5SDimitry Andric         NodeAllocator->~NodeAllocatorType();
2410b57cec5SDimitry Andric       if (O.NodeAllocator) {
2420b57cec5SDimitry Andric         new (&NodeAllocatorStorage)
2430b57cec5SDimitry Andric             NodeAllocatorType(std::move(*O.NodeAllocator));
2440b57cec5SDimitry Andric         NodeAllocator =
2450b57cec5SDimitry Andric             reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
2460b57cec5SDimitry Andric         O.NodeAllocator = nullptr;
2470b57cec5SDimitry Andric       } else {
2480b57cec5SDimitry Andric         NodeAllocator = nullptr;
2490b57cec5SDimitry Andric       }
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric       if (RootAllocator)
2520b57cec5SDimitry Andric         RootAllocator->~RootAllocatorType();
2530b57cec5SDimitry Andric       if (O.RootAllocator) {
2540b57cec5SDimitry Andric         new (&RootAllocatorStorage)
2550b57cec5SDimitry Andric             RootAllocatorType(std::move(*O.RootAllocator));
2560b57cec5SDimitry Andric         RootAllocator =
2570b57cec5SDimitry Andric             reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
2580b57cec5SDimitry Andric         O.RootAllocator = nullptr;
2590b57cec5SDimitry Andric       } else {
2600b57cec5SDimitry Andric         RootAllocator = nullptr;
2610b57cec5SDimitry Andric       }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric       if (ShadowStackAllocator)
2640b57cec5SDimitry Andric         ShadowStackAllocator->~ShadowStackAllocatorType();
2650b57cec5SDimitry Andric       if (O.ShadowStackAllocator) {
2660b57cec5SDimitry Andric         new (&ShadowStackAllocatorStorage)
2670b57cec5SDimitry Andric             ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
2680b57cec5SDimitry Andric         ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
2690b57cec5SDimitry Andric             &ShadowStackAllocatorStorage);
2700b57cec5SDimitry Andric         O.ShadowStackAllocator = nullptr;
2710b57cec5SDimitry Andric       } else {
2720b57cec5SDimitry Andric         ShadowStackAllocator = nullptr;
2730b57cec5SDimitry Andric       }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric       if (NodeIdPairAllocator)
2760b57cec5SDimitry Andric         NodeIdPairAllocator->~NodeIdPairAllocatorType();
2770b57cec5SDimitry Andric       if (O.NodeIdPairAllocator) {
2780b57cec5SDimitry Andric         new (&NodeIdPairAllocatorStorage)
2790b57cec5SDimitry Andric             NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
2800b57cec5SDimitry Andric         NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
2810b57cec5SDimitry Andric             &NodeIdPairAllocatorStorage);
2820b57cec5SDimitry Andric         O.NodeIdPairAllocator = nullptr;
2830b57cec5SDimitry Andric       } else {
2840b57cec5SDimitry Andric         NodeIdPairAllocator = nullptr;
2850b57cec5SDimitry Andric       }
2860b57cec5SDimitry Andric 
2870b57cec5SDimitry Andric       return *this;
2880b57cec5SDimitry Andric     }
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric     ~Allocators() XRAY_NEVER_INSTRUMENT {
2910b57cec5SDimitry Andric       if (NodeAllocator != nullptr)
2920b57cec5SDimitry Andric         NodeAllocator->~NodeAllocatorType();
2930b57cec5SDimitry Andric       if (RootAllocator != nullptr)
2940b57cec5SDimitry Andric         RootAllocator->~RootAllocatorType();
2950b57cec5SDimitry Andric       if (ShadowStackAllocator != nullptr)
2960b57cec5SDimitry Andric         ShadowStackAllocator->~ShadowStackAllocatorType();
2970b57cec5SDimitry Andric       if (NodeIdPairAllocator != nullptr)
2980b57cec5SDimitry Andric         NodeIdPairAllocator->~NodeIdPairAllocatorType();
2990b57cec5SDimitry Andric     }
3000b57cec5SDimitry Andric   };
3010b57cec5SDimitry Andric 
3020b57cec5SDimitry Andric   static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
3030b57cec5SDimitry Andric     return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
3040b57cec5SDimitry Andric   }
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
3070b57cec5SDimitry Andric     Allocators A(Max);
3080b57cec5SDimitry Andric     return A;
3090b57cec5SDimitry Andric   }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   static Allocators
3120b57cec5SDimitry Andric   InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
3130b57cec5SDimitry Andric     Allocators A(Bufs);
3140b57cec5SDimitry Andric     return A;
3150b57cec5SDimitry Andric   }
3160b57cec5SDimitry Andric 
3170b57cec5SDimitry Andric private:
3180b57cec5SDimitry Andric   NodeArray Nodes;
3190b57cec5SDimitry Andric   RootArray Roots;
3200b57cec5SDimitry Andric   ShadowStackArray ShadowStack;
3210b57cec5SDimitry Andric   NodeIdPairAllocatorType *NodeIdPairAllocator;
3220b57cec5SDimitry Andric   uint32_t OverflowedFunctions;
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric public:
3250b57cec5SDimitry Andric   explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
3260b57cec5SDimitry Andric       : Nodes(*A.NodeAllocator),
3270b57cec5SDimitry Andric         Roots(*A.RootAllocator),
3280b57cec5SDimitry Andric         ShadowStack(*A.ShadowStackAllocator),
3290b57cec5SDimitry Andric         NodeIdPairAllocator(A.NodeIdPairAllocator),
3300b57cec5SDimitry Andric         OverflowedFunctions(0) {}
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric   FunctionCallTrie() = delete;
3330b57cec5SDimitry Andric   FunctionCallTrie(const FunctionCallTrie &) = delete;
3340b57cec5SDimitry Andric   FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
3370b57cec5SDimitry Andric       : Nodes(std::move(O.Nodes)),
3380b57cec5SDimitry Andric         Roots(std::move(O.Roots)),
3390b57cec5SDimitry Andric         ShadowStack(std::move(O.ShadowStack)),
3400b57cec5SDimitry Andric         NodeIdPairAllocator(O.NodeIdPairAllocator),
3410b57cec5SDimitry Andric         OverflowedFunctions(O.OverflowedFunctions) {}
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric   FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
3440b57cec5SDimitry Andric     Nodes = std::move(O.Nodes);
3450b57cec5SDimitry Andric     Roots = std::move(O.Roots);
3460b57cec5SDimitry Andric     ShadowStack = std::move(O.ShadowStack);
3470b57cec5SDimitry Andric     NodeIdPairAllocator = O.NodeIdPairAllocator;
3480b57cec5SDimitry Andric     OverflowedFunctions = O.OverflowedFunctions;
3490b57cec5SDimitry Andric     return *this;
3500b57cec5SDimitry Andric   }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric   ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric   void enterFunction(const int32_t FId, uint64_t TSC,
3550b57cec5SDimitry Andric                      uint16_t CPU) XRAY_NEVER_INSTRUMENT {
3560b57cec5SDimitry Andric     DCHECK_NE(FId, 0);
3570b57cec5SDimitry Andric 
3580b57cec5SDimitry Andric     // If we're already overflowed the function call stack, do not bother
3590b57cec5SDimitry Andric     // attempting to record any more function entries.
3600b57cec5SDimitry Andric     if (UNLIKELY(OverflowedFunctions)) {
3610b57cec5SDimitry Andric       ++OverflowedFunctions;
3620b57cec5SDimitry Andric       return;
3630b57cec5SDimitry Andric     }
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric     // If this is the first function we've encountered, we want to set up the
3660b57cec5SDimitry Andric     // node(s) and treat it as a root.
3670b57cec5SDimitry Andric     if (UNLIKELY(ShadowStack.empty())) {
3680b57cec5SDimitry Andric       auto *NewRoot = Nodes.AppendEmplace(
3690b57cec5SDimitry Andric           nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
3700b57cec5SDimitry Andric       if (UNLIKELY(NewRoot == nullptr))
3710b57cec5SDimitry Andric         return;
3720b57cec5SDimitry Andric       if (Roots.AppendEmplace(NewRoot) == nullptr) {
3730b57cec5SDimitry Andric         Nodes.trim(1);
3740b57cec5SDimitry Andric         return;
3750b57cec5SDimitry Andric       }
3760b57cec5SDimitry Andric       if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
3770b57cec5SDimitry Andric         Nodes.trim(1);
3780b57cec5SDimitry Andric         Roots.trim(1);
3790b57cec5SDimitry Andric         ++OverflowedFunctions;
3800b57cec5SDimitry Andric         return;
3810b57cec5SDimitry Andric       }
3820b57cec5SDimitry Andric       return;
3830b57cec5SDimitry Andric     }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric     // From this point on, we require that the stack is not empty.
3860b57cec5SDimitry Andric     DCHECK(!ShadowStack.empty());
3870b57cec5SDimitry Andric     auto TopNode = ShadowStack.back().NodePtr;
3880b57cec5SDimitry Andric     DCHECK_NE(TopNode, nullptr);
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric     // If we've seen this callee before, then we access that node and place that
3910b57cec5SDimitry Andric     // on the top of the stack.
3920b57cec5SDimitry Andric     auto* Callee = TopNode->Callees.find_element(
3930b57cec5SDimitry Andric         [FId](const NodeIdPair &NR) { return NR.FId == FId; });
3940b57cec5SDimitry Andric     if (Callee != nullptr) {
3950b57cec5SDimitry Andric       CHECK_NE(Callee->NodePtr, nullptr);
3960b57cec5SDimitry Andric       if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
3970b57cec5SDimitry Andric         ++OverflowedFunctions;
3980b57cec5SDimitry Andric       return;
3990b57cec5SDimitry Andric     }
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric     // This means we've never seen this stack before, create a new node here.
4020b57cec5SDimitry Andric     auto* NewNode = Nodes.AppendEmplace(
4030b57cec5SDimitry Andric         TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
4040b57cec5SDimitry Andric     if (UNLIKELY(NewNode == nullptr))
4050b57cec5SDimitry Andric       return;
4060b57cec5SDimitry Andric     DCHECK_NE(NewNode, nullptr);
4070b57cec5SDimitry Andric     TopNode->Callees.AppendEmplace(NewNode, FId);
4080b57cec5SDimitry Andric     if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
4090b57cec5SDimitry Andric       ++OverflowedFunctions;
4100b57cec5SDimitry Andric     return;
4110b57cec5SDimitry Andric   }
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   void exitFunction(int32_t FId, uint64_t TSC,
4140b57cec5SDimitry Andric                     uint16_t CPU) XRAY_NEVER_INSTRUMENT {
4150b57cec5SDimitry Andric     // If we're exiting functions that have "overflowed" or don't fit into the
4160b57cec5SDimitry Andric     // stack due to allocator constraints, we then decrement that count first.
4170b57cec5SDimitry Andric     if (OverflowedFunctions) {
4180b57cec5SDimitry Andric       --OverflowedFunctions;
4190b57cec5SDimitry Andric       return;
4200b57cec5SDimitry Andric     }
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric     // When we exit a function, we look up the ShadowStack to see whether we've
4230b57cec5SDimitry Andric     // entered this function before. We do as little processing here as we can,
4240b57cec5SDimitry Andric     // since most of the hard work would have already been done at function
4250b57cec5SDimitry Andric     // entry.
4260b57cec5SDimitry Andric     uint64_t CumulativeTreeTime = 0;
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric     while (!ShadowStack.empty()) {
4290b57cec5SDimitry Andric       const auto &Top = ShadowStack.back();
4300b57cec5SDimitry Andric       auto TopNode = Top.NodePtr;
4310b57cec5SDimitry Andric       DCHECK_NE(TopNode, nullptr);
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric       // We may encounter overflow on the TSC we're provided, which may end up
4340b57cec5SDimitry Andric       // being less than the TSC when we first entered the function.
4350b57cec5SDimitry Andric       //
4360b57cec5SDimitry Andric       // To get the accurate measurement of cycles, we need to check whether
4370b57cec5SDimitry Andric       // we've overflowed (TSC < Top.EntryTSC) and then account the difference
4380b57cec5SDimitry Andric       // between the entry TSC and the max for the TSC counter (max of uint64_t)
4390b57cec5SDimitry Andric       // then add the value of TSC. We can prove that the maximum delta we will
4400b57cec5SDimitry Andric       // get is at most the 64-bit unsigned value, since the difference between
4410b57cec5SDimitry Andric       // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
4420b57cec5SDimitry Andric       // - 1) + 1.
4430b57cec5SDimitry Andric       //
4440b57cec5SDimitry Andric       // NOTE: This assumes that TSCs are synchronised across CPUs.
4450b57cec5SDimitry Andric       // TODO: Count the number of times we've seen CPU migrations.
4460b57cec5SDimitry Andric       uint64_t LocalTime =
4470b57cec5SDimitry Andric           Top.EntryTSC > TSC
4480b57cec5SDimitry Andric               ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
4490b57cec5SDimitry Andric               : TSC - Top.EntryTSC;
4500b57cec5SDimitry Andric       TopNode->CallCount++;
4510b57cec5SDimitry Andric       TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
4520b57cec5SDimitry Andric       CumulativeTreeTime += LocalTime;
4530b57cec5SDimitry Andric       ShadowStack.trim(1);
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric       // TODO: Update the histogram for the node.
4560b57cec5SDimitry Andric       if (TopNode->FId == FId)
4570b57cec5SDimitry Andric         break;
4580b57cec5SDimitry Andric     }
4590b57cec5SDimitry Andric   }
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric   const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   // The deepCopyInto operation will update the provided FunctionCallTrie by
4640b57cec5SDimitry Andric   // re-creating the contents of this particular FunctionCallTrie in the other
4650b57cec5SDimitry Andric   // FunctionCallTrie. It will do this using a Depth First Traversal from the
4660b57cec5SDimitry Andric   // roots, and while doing so recreating the traversal in the provided
4670b57cec5SDimitry Andric   // FunctionCallTrie.
4680b57cec5SDimitry Andric   //
4690b57cec5SDimitry Andric   // This operation will *not* destroy the state in `O`, and thus may cause some
4700b57cec5SDimitry Andric   // duplicate entries in `O` if it is not empty.
4710b57cec5SDimitry Andric   //
4720b57cec5SDimitry Andric   // This function is *not* thread-safe, and may require external
4730b57cec5SDimitry Andric   // synchronisation of both "this" and |O|.
4740b57cec5SDimitry Andric   //
4750b57cec5SDimitry Andric   // This function must *not* be called with a non-empty FunctionCallTrie |O|.
4760b57cec5SDimitry Andric   void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
4770b57cec5SDimitry Andric     DCHECK(O.getRoots().empty());
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric     // We then push the root into a stack, to use as the parent marker for new
4800b57cec5SDimitry Andric     // nodes we push in as we're traversing depth-first down the call tree.
4810b57cec5SDimitry Andric     struct NodeAndParent {
4820b57cec5SDimitry Andric       FunctionCallTrie::Node *Node;
4830b57cec5SDimitry Andric       FunctionCallTrie::Node *NewNode;
4840b57cec5SDimitry Andric     };
4850b57cec5SDimitry Andric     using Stack = Array<NodeAndParent>;
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric     typename Stack::AllocatorType StackAllocator(
4880b57cec5SDimitry Andric         profilingFlags()->stack_allocator_max);
4890b57cec5SDimitry Andric     Stack DFSStack(StackAllocator);
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric     for (const auto Root : getRoots()) {
4920b57cec5SDimitry Andric       // Add a node in O for this root.
4930b57cec5SDimitry Andric       auto NewRoot = O.Nodes.AppendEmplace(
4940b57cec5SDimitry Andric           nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
4950b57cec5SDimitry Andric           Root->CumulativeLocalTime, Root->FId);
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric       // Because we cannot allocate more memory we should bail out right away.
4980b57cec5SDimitry Andric       if (UNLIKELY(NewRoot == nullptr))
4990b57cec5SDimitry Andric         return;
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric       if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
5020b57cec5SDimitry Andric         return;
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric       // TODO: Figure out what to do if we fail to allocate any more stack
5050b57cec5SDimitry Andric       // space. Maybe warn or report once?
5060b57cec5SDimitry Andric       if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
5070b57cec5SDimitry Andric         return;
5080b57cec5SDimitry Andric       while (!DFSStack.empty()) {
5090b57cec5SDimitry Andric         NodeAndParent NP = DFSStack.back();
5100b57cec5SDimitry Andric         DCHECK_NE(NP.Node, nullptr);
5110b57cec5SDimitry Andric         DCHECK_NE(NP.NewNode, nullptr);
5120b57cec5SDimitry Andric         DFSStack.trim(1);
5130b57cec5SDimitry Andric         for (const auto Callee : NP.Node->Callees) {
5140b57cec5SDimitry Andric           auto NewNode = O.Nodes.AppendEmplace(
5150b57cec5SDimitry Andric               NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
5160b57cec5SDimitry Andric               Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
5170b57cec5SDimitry Andric               Callee.FId);
5180b57cec5SDimitry Andric           if (UNLIKELY(NewNode == nullptr))
5190b57cec5SDimitry Andric             return;
5200b57cec5SDimitry Andric           if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
5210b57cec5SDimitry Andric                        nullptr))
5220b57cec5SDimitry Andric             return;
5230b57cec5SDimitry Andric           if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
5240b57cec5SDimitry Andric                        nullptr))
5250b57cec5SDimitry Andric             return;
5260b57cec5SDimitry Andric         }
5270b57cec5SDimitry Andric       }
5280b57cec5SDimitry Andric     }
5290b57cec5SDimitry Andric   }
5300b57cec5SDimitry Andric 
5310b57cec5SDimitry Andric   // The mergeInto operation will update the provided FunctionCallTrie by
5320b57cec5SDimitry Andric   // traversing the current trie's roots and updating (i.e. merging) the data in
5330b57cec5SDimitry Andric   // the nodes with the data in the target's nodes. If the node doesn't exist in
5340b57cec5SDimitry Andric   // the provided trie, we add a new one in the right position, and inherit the
5350b57cec5SDimitry Andric   // data from the original (current) trie, along with all its callees.
5360b57cec5SDimitry Andric   //
5370b57cec5SDimitry Andric   // This function is *not* thread-safe, and may require external
5380b57cec5SDimitry Andric   // synchronisation of both "this" and |O|.
5390b57cec5SDimitry Andric   void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
5400b57cec5SDimitry Andric     struct NodeAndTarget {
5410b57cec5SDimitry Andric       FunctionCallTrie::Node *OrigNode;
5420b57cec5SDimitry Andric       FunctionCallTrie::Node *TargetNode;
5430b57cec5SDimitry Andric     };
5440b57cec5SDimitry Andric     using Stack = Array<NodeAndTarget>;
5450b57cec5SDimitry Andric     typename Stack::AllocatorType StackAllocator(
5460b57cec5SDimitry Andric         profilingFlags()->stack_allocator_max);
5470b57cec5SDimitry Andric     Stack DFSStack(StackAllocator);
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric     for (const auto Root : getRoots()) {
5500b57cec5SDimitry Andric       Node *TargetRoot = nullptr;
5510b57cec5SDimitry Andric       auto R = O.Roots.find_element(
5520b57cec5SDimitry Andric           [&](const Node *Node) { return Node->FId == Root->FId; });
5530b57cec5SDimitry Andric       if (R == nullptr) {
5540b57cec5SDimitry Andric         TargetRoot = O.Nodes.AppendEmplace(
5550b57cec5SDimitry Andric             nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
5560b57cec5SDimitry Andric             Root->FId);
5570b57cec5SDimitry Andric         if (UNLIKELY(TargetRoot == nullptr))
5580b57cec5SDimitry Andric           return;
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric         O.Roots.Append(TargetRoot);
5610b57cec5SDimitry Andric       } else {
5620b57cec5SDimitry Andric         TargetRoot = *R;
5630b57cec5SDimitry Andric       }
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric       DFSStack.AppendEmplace(Root, TargetRoot);
5660b57cec5SDimitry Andric       while (!DFSStack.empty()) {
5670b57cec5SDimitry Andric         NodeAndTarget NT = DFSStack.back();
5680b57cec5SDimitry Andric         DCHECK_NE(NT.OrigNode, nullptr);
5690b57cec5SDimitry Andric         DCHECK_NE(NT.TargetNode, nullptr);
5700b57cec5SDimitry Andric         DFSStack.trim(1);
5710b57cec5SDimitry Andric         // TODO: Update the histogram as well when we have it ready.
5720b57cec5SDimitry Andric         NT.TargetNode->CallCount += NT.OrigNode->CallCount;
5730b57cec5SDimitry Andric         NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
5740b57cec5SDimitry Andric         for (const auto Callee : NT.OrigNode->Callees) {
5750b57cec5SDimitry Andric           auto TargetCallee = NT.TargetNode->Callees.find_element(
5760b57cec5SDimitry Andric               [&](const FunctionCallTrie::NodeIdPair &C) {
5770b57cec5SDimitry Andric                 return C.FId == Callee.FId;
5780b57cec5SDimitry Andric               });
5790b57cec5SDimitry Andric           if (TargetCallee == nullptr) {
5800b57cec5SDimitry Andric             auto NewTargetNode = O.Nodes.AppendEmplace(
5810b57cec5SDimitry Andric                 NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
5820b57cec5SDimitry Andric                 Callee.FId);
5830b57cec5SDimitry Andric 
5840b57cec5SDimitry Andric             if (UNLIKELY(NewTargetNode == nullptr))
5850b57cec5SDimitry Andric               return;
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric             TargetCallee =
5880b57cec5SDimitry Andric                 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
5890b57cec5SDimitry Andric           }
5900b57cec5SDimitry Andric           DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
5910b57cec5SDimitry Andric         }
5920b57cec5SDimitry Andric       }
5930b57cec5SDimitry Andric     }
5940b57cec5SDimitry Andric   }
5950b57cec5SDimitry Andric };
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric } // namespace __xray
5980b57cec5SDimitry Andric 
5990b57cec5SDimitry Andric #endif // XRAY_FUNCTION_CALL_TRIE_H
600