1*0fca6ea1SDimitry Andric /*===- CtxInstrProfiling.h- Contextual instrumentation-based PGO ---------===*\ 2*0fca6ea1SDimitry Andric |* 3*0fca6ea1SDimitry Andric |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric |* See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric |* 7*0fca6ea1SDimitry Andric \*===----------------------------------------------------------------------===*/ 8*0fca6ea1SDimitry Andric 9*0fca6ea1SDimitry Andric #ifndef CTX_PROFILE_CTXINSTRPROFILING_H_ 10*0fca6ea1SDimitry Andric #define CTX_PROFILE_CTXINSTRPROFILING_H_ 11*0fca6ea1SDimitry Andric 12*0fca6ea1SDimitry Andric #include "CtxInstrContextNode.h" 13*0fca6ea1SDimitry Andric #include "sanitizer_common/sanitizer_mutex.h" 14*0fca6ea1SDimitry Andric #include <sanitizer/common_interface_defs.h> 15*0fca6ea1SDimitry Andric 16*0fca6ea1SDimitry Andric using namespace llvm::ctx_profile; 17*0fca6ea1SDimitry Andric 18*0fca6ea1SDimitry Andric // Forward-declare for the one unittest checking Arena construction zeroes out 19*0fca6ea1SDimitry Andric // its allocatable space. 20*0fca6ea1SDimitry Andric class ArenaTest_ZeroInit_Test; 21*0fca6ea1SDimitry Andric namespace __ctx_profile { 22*0fca6ea1SDimitry Andric 23*0fca6ea1SDimitry Andric static constexpr size_t ExpectedAlignment = 8; 24*0fca6ea1SDimitry Andric // We really depend on this, see further below. We currently support x86_64. 25*0fca6ea1SDimitry Andric // When we want to support other archs, we need to trace the places Alignment is 26*0fca6ea1SDimitry Andric // used and adjust accordingly. 27*0fca6ea1SDimitry Andric static_assert(sizeof(void *) == ExpectedAlignment); 28*0fca6ea1SDimitry Andric 29*0fca6ea1SDimitry Andric /// Arena (bump allocator) forming a linked list. Intentionally not thread safe. 30*0fca6ea1SDimitry Andric /// Allocation and de-allocation happen using sanitizer APIs. We make that 31*0fca6ea1SDimitry Andric /// explicit. 32*0fca6ea1SDimitry Andric class Arena final { 33*0fca6ea1SDimitry Andric public: 34*0fca6ea1SDimitry Andric // When allocating a new Arena, optionally specify an existing one to append 35*0fca6ea1SDimitry Andric // to, assumed to be the last in the Arena list. We only need to support 36*0fca6ea1SDimitry Andric // appending to the arena list. 37*0fca6ea1SDimitry Andric static Arena *allocateNewArena(size_t Size, Arena *Prev = nullptr); 38*0fca6ea1SDimitry Andric static void freeArenaList(Arena *&A); 39*0fca6ea1SDimitry Andric 40*0fca6ea1SDimitry Andric uint64_t size() const { return Size; } 41*0fca6ea1SDimitry Andric 42*0fca6ea1SDimitry Andric // Allocate S bytes or return nullptr if we don't have that many available. 43*0fca6ea1SDimitry Andric char *tryBumpAllocate(size_t S) { 44*0fca6ea1SDimitry Andric if (Pos + S > Size) 45*0fca6ea1SDimitry Andric return nullptr; 46*0fca6ea1SDimitry Andric Pos += S; 47*0fca6ea1SDimitry Andric return start() + (Pos - S); 48*0fca6ea1SDimitry Andric } 49*0fca6ea1SDimitry Andric 50*0fca6ea1SDimitry Andric Arena *next() const { return Next; } 51*0fca6ea1SDimitry Andric 52*0fca6ea1SDimitry Andric // the beginning of allocatable memory. 53*0fca6ea1SDimitry Andric const char *start() const { return const_cast<Arena *>(this)->start(); } 54*0fca6ea1SDimitry Andric const char *pos() const { return start() + Pos; } 55*0fca6ea1SDimitry Andric 56*0fca6ea1SDimitry Andric private: 57*0fca6ea1SDimitry Andric friend class ::ArenaTest_ZeroInit_Test; 58*0fca6ea1SDimitry Andric explicit Arena(uint32_t Size); 59*0fca6ea1SDimitry Andric ~Arena() = delete; 60*0fca6ea1SDimitry Andric 61*0fca6ea1SDimitry Andric char *start() { return reinterpret_cast<char *>(&this[1]); } 62*0fca6ea1SDimitry Andric 63*0fca6ea1SDimitry Andric Arena *Next = nullptr; 64*0fca6ea1SDimitry Andric uint64_t Pos = 0; 65*0fca6ea1SDimitry Andric const uint64_t Size; 66*0fca6ea1SDimitry Andric }; 67*0fca6ea1SDimitry Andric 68*0fca6ea1SDimitry Andric // The memory available for allocation follows the Arena header, and we expect 69*0fca6ea1SDimitry Andric // it to be thus aligned. 70*0fca6ea1SDimitry Andric static_assert(alignof(Arena) == ExpectedAlignment); 71*0fca6ea1SDimitry Andric 72*0fca6ea1SDimitry Andric // Verify maintenance to ContextNode doesn't change this invariant, which makes 73*0fca6ea1SDimitry Andric // sure the inlined vectors are appropriately aligned. 74*0fca6ea1SDimitry Andric static_assert(alignof(ContextNode) == ExpectedAlignment); 75*0fca6ea1SDimitry Andric 76*0fca6ea1SDimitry Andric /// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned 77*0fca6ea1SDimitry Andric /// with allocating and zero-initializing the global value (as in, GlobalValue) 78*0fca6ea1SDimitry Andric /// for it. 79*0fca6ea1SDimitry Andric struct ContextRoot { 80*0fca6ea1SDimitry Andric ContextNode *FirstNode = nullptr; 81*0fca6ea1SDimitry Andric Arena *FirstMemBlock = nullptr; 82*0fca6ea1SDimitry Andric Arena *CurrentMem = nullptr; 83*0fca6ea1SDimitry Andric // This is init-ed by the static zero initializer in LLVM. 84*0fca6ea1SDimitry Andric // Taken is used to ensure only one thread traverses the contextual graph - 85*0fca6ea1SDimitry Andric // either to read it or to write it. On server side, the same entrypoint will 86*0fca6ea1SDimitry Andric // be entered by numerous threads, but over time, the profile aggregated by 87*0fca6ea1SDimitry Andric // collecting sequentially on one thread at a time is expected to converge to 88*0fca6ea1SDimitry Andric // the aggregate profile that may have been observable on all the threads. 89*0fca6ea1SDimitry Andric // Note that this is node-by-node aggregation, i.e. summing counters of nodes 90*0fca6ea1SDimitry Andric // at the same position in the graph, not flattening. 91*0fca6ea1SDimitry Andric // Threads that cannot lock Taken (fail TryLock) are given a "scratch context" 92*0fca6ea1SDimitry Andric // - a buffer they can clobber, safely from a memory access perspective. 93*0fca6ea1SDimitry Andric // 94*0fca6ea1SDimitry Andric // Note about "scratch"-ness: we currently ignore the data written in them 95*0fca6ea1SDimitry Andric // (which is anyway clobbered). The design allows for that not be the case - 96*0fca6ea1SDimitry Andric // because "scratch"-ness is first and foremost about not trying to build 97*0fca6ea1SDimitry Andric // subcontexts, and is captured by tainting the pointer value (pointer to the 98*0fca6ea1SDimitry Andric // memory treated as context), but right now, we drop that info. 99*0fca6ea1SDimitry Andric // 100*0fca6ea1SDimitry Andric // We could consider relaxing the requirement of more than one thread 101*0fca6ea1SDimitry Andric // entering by holding a few context trees per entrypoint and then aggregating 102*0fca6ea1SDimitry Andric // them (as explained above) at the end of the profile collection - it's a 103*0fca6ea1SDimitry Andric // tradeoff between collection time and memory use: higher precision can be 104*0fca6ea1SDimitry Andric // obtained with either less concurrent collections but more collection time, 105*0fca6ea1SDimitry Andric // or with more concurrent collections (==more memory) and less collection 106*0fca6ea1SDimitry Andric // time. Note that concurrent collection does happen for different 107*0fca6ea1SDimitry Andric // entrypoints, regardless. 108*0fca6ea1SDimitry Andric ::__sanitizer::StaticSpinMutex Taken; 109*0fca6ea1SDimitry Andric 110*0fca6ea1SDimitry Andric // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM 111*0fca6ea1SDimitry Andric // instrumentation lowering side because it is responsible for allocating and 112*0fca6ea1SDimitry Andric // zero-initializing ContextRoots. 113*0fca6ea1SDimitry Andric static_assert(sizeof(Taken) == 1); 114*0fca6ea1SDimitry Andric }; 115*0fca6ea1SDimitry Andric 116*0fca6ea1SDimitry Andric /// This API is exposed for testing. See the APIs below about the contract with 117*0fca6ea1SDimitry Andric /// LLVM. 118*0fca6ea1SDimitry Andric inline bool isScratch(const void *Ctx) { 119*0fca6ea1SDimitry Andric return (reinterpret_cast<uint64_t>(Ctx) & 1); 120*0fca6ea1SDimitry Andric } 121*0fca6ea1SDimitry Andric 122*0fca6ea1SDimitry Andric } // namespace __ctx_profile 123*0fca6ea1SDimitry Andric 124*0fca6ea1SDimitry Andric extern "C" { 125*0fca6ea1SDimitry Andric 126*0fca6ea1SDimitry Andric // LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic. 127*0fca6ea1SDimitry Andric // position 0 is used when the current context isn't scratch, 1 when it is. They 128*0fca6ea1SDimitry Andric // are volatile because of signal handlers - we mean to specifically control 129*0fca6ea1SDimitry Andric // when the data is loaded. 130*0fca6ea1SDimitry Andric // 131*0fca6ea1SDimitry Andric /// TLS where LLVM stores the pointer of the called value, as part of lowering a 132*0fca6ea1SDimitry Andric /// llvm.instrprof.callsite 133*0fca6ea1SDimitry Andric extern __thread void *volatile __llvm_ctx_profile_expected_callee[2]; 134*0fca6ea1SDimitry Andric /// TLS where LLVM stores the pointer inside a caller's subcontexts vector that 135*0fca6ea1SDimitry Andric /// corresponds to the callsite being lowered. 136*0fca6ea1SDimitry Andric extern __thread ContextNode **volatile __llvm_ctx_profile_callsite[2]; 137*0fca6ea1SDimitry Andric 138*0fca6ea1SDimitry Andric // __llvm_ctx_profile_current_context_root is exposed for unit testing, 139*0fca6ea1SDimitry Andric // othwerise it's only used internally by compiler-rt/ctx_profile. 140*0fca6ea1SDimitry Andric extern __thread __ctx_profile::ContextRoot 141*0fca6ea1SDimitry Andric *volatile __llvm_ctx_profile_current_context_root; 142*0fca6ea1SDimitry Andric 143*0fca6ea1SDimitry Andric /// called by LLVM in the entry BB of a "entry point" function. The returned 144*0fca6ea1SDimitry Andric /// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch. 145*0fca6ea1SDimitry Andric ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root, 146*0fca6ea1SDimitry Andric GUID Guid, uint32_t Counters, 147*0fca6ea1SDimitry Andric uint32_t Callsites); 148*0fca6ea1SDimitry Andric 149*0fca6ea1SDimitry Andric /// paired with __llvm_ctx_profile_start_context, and called at the exit of the 150*0fca6ea1SDimitry Andric /// entry point function. 151*0fca6ea1SDimitry Andric void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root); 152*0fca6ea1SDimitry Andric 153*0fca6ea1SDimitry Andric /// called for any other function than entry points, in the entry BB of such 154*0fca6ea1SDimitry Andric /// function. Same consideration about LSB of returned value as .._start_context 155*0fca6ea1SDimitry Andric ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid, 156*0fca6ea1SDimitry Andric uint32_t NrCounters, 157*0fca6ea1SDimitry Andric uint32_t NrCallsites); 158*0fca6ea1SDimitry Andric 159*0fca6ea1SDimitry Andric /// Prepares for collection. Currently this resets counter values but preserves 160*0fca6ea1SDimitry Andric /// internal context tree structure. 161*0fca6ea1SDimitry Andric void __llvm_ctx_profile_start_collection(); 162*0fca6ea1SDimitry Andric 163*0fca6ea1SDimitry Andric /// Completely free allocated memory. 164*0fca6ea1SDimitry Andric void __llvm_ctx_profile_free(); 165*0fca6ea1SDimitry Andric 166*0fca6ea1SDimitry Andric /// Used to obtain the profile. The Writer is called for each root ContextNode, 167*0fca6ea1SDimitry Andric /// with the ContextRoot::Taken taken. The Writer is responsible for traversing 168*0fca6ea1SDimitry Andric /// the structure underneath. 169*0fca6ea1SDimitry Andric /// The Writer's first parameter plays the role of closure for Writer, and is 170*0fca6ea1SDimitry Andric /// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter. 171*0fca6ea1SDimitry Andric /// The second parameter is the root of a context tree. 172*0fca6ea1SDimitry Andric bool __llvm_ctx_profile_fetch(void *Data, 173*0fca6ea1SDimitry Andric bool (*Writer)(void *, const ContextNode &)); 174*0fca6ea1SDimitry Andric } 175*0fca6ea1SDimitry Andric #endif // CTX_PROFILE_CTXINSTRPROFILING_H_ 176