xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric /*===- CtxInstrProfiling.h- Contextual instrumentation-based PGO  ---------===*\
2*0fca6ea1SDimitry Andric |*
3*0fca6ea1SDimitry Andric |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric |* See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric |*
7*0fca6ea1SDimitry Andric \*===----------------------------------------------------------------------===*/
8*0fca6ea1SDimitry Andric 
9*0fca6ea1SDimitry Andric #ifndef CTX_PROFILE_CTXINSTRPROFILING_H_
10*0fca6ea1SDimitry Andric #define CTX_PROFILE_CTXINSTRPROFILING_H_
11*0fca6ea1SDimitry Andric 
12*0fca6ea1SDimitry Andric #include "CtxInstrContextNode.h"
13*0fca6ea1SDimitry Andric #include "sanitizer_common/sanitizer_mutex.h"
14*0fca6ea1SDimitry Andric #include <sanitizer/common_interface_defs.h>
15*0fca6ea1SDimitry Andric 
16*0fca6ea1SDimitry Andric using namespace llvm::ctx_profile;
17*0fca6ea1SDimitry Andric 
18*0fca6ea1SDimitry Andric // Forward-declare for the one unittest checking Arena construction zeroes out
19*0fca6ea1SDimitry Andric // its allocatable space.
20*0fca6ea1SDimitry Andric class ArenaTest_ZeroInit_Test;
21*0fca6ea1SDimitry Andric namespace __ctx_profile {
22*0fca6ea1SDimitry Andric 
23*0fca6ea1SDimitry Andric static constexpr size_t ExpectedAlignment = 8;
24*0fca6ea1SDimitry Andric // We really depend on this, see further below. We currently support x86_64.
25*0fca6ea1SDimitry Andric // When we want to support other archs, we need to trace the places Alignment is
26*0fca6ea1SDimitry Andric // used and adjust accordingly.
27*0fca6ea1SDimitry Andric static_assert(sizeof(void *) == ExpectedAlignment);
28*0fca6ea1SDimitry Andric 
29*0fca6ea1SDimitry Andric /// Arena (bump allocator) forming a linked list. Intentionally not thread safe.
30*0fca6ea1SDimitry Andric /// Allocation and de-allocation happen using sanitizer APIs. We make that
31*0fca6ea1SDimitry Andric /// explicit.
32*0fca6ea1SDimitry Andric class Arena final {
33*0fca6ea1SDimitry Andric public:
34*0fca6ea1SDimitry Andric   // When allocating a new Arena, optionally specify an existing one to append
35*0fca6ea1SDimitry Andric   // to, assumed to be the last in the Arena list. We only need to support
36*0fca6ea1SDimitry Andric   // appending to the arena list.
37*0fca6ea1SDimitry Andric   static Arena *allocateNewArena(size_t Size, Arena *Prev = nullptr);
38*0fca6ea1SDimitry Andric   static void freeArenaList(Arena *&A);
39*0fca6ea1SDimitry Andric 
40*0fca6ea1SDimitry Andric   uint64_t size() const { return Size; }
41*0fca6ea1SDimitry Andric 
42*0fca6ea1SDimitry Andric   // Allocate S bytes or return nullptr if we don't have that many available.
43*0fca6ea1SDimitry Andric   char *tryBumpAllocate(size_t S) {
44*0fca6ea1SDimitry Andric     if (Pos + S > Size)
45*0fca6ea1SDimitry Andric       return nullptr;
46*0fca6ea1SDimitry Andric     Pos += S;
47*0fca6ea1SDimitry Andric     return start() + (Pos - S);
48*0fca6ea1SDimitry Andric   }
49*0fca6ea1SDimitry Andric 
50*0fca6ea1SDimitry Andric   Arena *next() const { return Next; }
51*0fca6ea1SDimitry Andric 
52*0fca6ea1SDimitry Andric   // the beginning of allocatable memory.
53*0fca6ea1SDimitry Andric   const char *start() const { return const_cast<Arena *>(this)->start(); }
54*0fca6ea1SDimitry Andric   const char *pos() const { return start() + Pos; }
55*0fca6ea1SDimitry Andric 
56*0fca6ea1SDimitry Andric private:
57*0fca6ea1SDimitry Andric   friend class ::ArenaTest_ZeroInit_Test;
58*0fca6ea1SDimitry Andric   explicit Arena(uint32_t Size);
59*0fca6ea1SDimitry Andric   ~Arena() = delete;
60*0fca6ea1SDimitry Andric 
61*0fca6ea1SDimitry Andric   char *start() { return reinterpret_cast<char *>(&this[1]); }
62*0fca6ea1SDimitry Andric 
63*0fca6ea1SDimitry Andric   Arena *Next = nullptr;
64*0fca6ea1SDimitry Andric   uint64_t Pos = 0;
65*0fca6ea1SDimitry Andric   const uint64_t Size;
66*0fca6ea1SDimitry Andric };
67*0fca6ea1SDimitry Andric 
68*0fca6ea1SDimitry Andric // The memory available for allocation follows the Arena header, and we expect
69*0fca6ea1SDimitry Andric // it to be thus aligned.
70*0fca6ea1SDimitry Andric static_assert(alignof(Arena) == ExpectedAlignment);
71*0fca6ea1SDimitry Andric 
72*0fca6ea1SDimitry Andric // Verify maintenance to ContextNode doesn't change this invariant, which makes
73*0fca6ea1SDimitry Andric // sure the inlined vectors are appropriately aligned.
74*0fca6ea1SDimitry Andric static_assert(alignof(ContextNode) == ExpectedAlignment);
75*0fca6ea1SDimitry Andric 
76*0fca6ea1SDimitry Andric /// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned
77*0fca6ea1SDimitry Andric /// with allocating and zero-initializing the global value (as in, GlobalValue)
78*0fca6ea1SDimitry Andric /// for it.
79*0fca6ea1SDimitry Andric struct ContextRoot {
80*0fca6ea1SDimitry Andric   ContextNode *FirstNode = nullptr;
81*0fca6ea1SDimitry Andric   Arena *FirstMemBlock = nullptr;
82*0fca6ea1SDimitry Andric   Arena *CurrentMem = nullptr;
83*0fca6ea1SDimitry Andric   // This is init-ed by the static zero initializer in LLVM.
84*0fca6ea1SDimitry Andric   // Taken is used to ensure only one thread traverses the contextual graph -
85*0fca6ea1SDimitry Andric   // either to read it or to write it. On server side, the same entrypoint will
86*0fca6ea1SDimitry Andric   // be entered by numerous threads, but over time, the profile aggregated by
87*0fca6ea1SDimitry Andric   // collecting sequentially on one thread at a time is expected to converge to
88*0fca6ea1SDimitry Andric   // the aggregate profile that may have been observable on all the threads.
89*0fca6ea1SDimitry Andric   // Note that this is node-by-node aggregation, i.e. summing counters of nodes
90*0fca6ea1SDimitry Andric   // at the same position in the graph, not flattening.
91*0fca6ea1SDimitry Andric   // Threads that cannot lock Taken (fail TryLock) are given a "scratch context"
92*0fca6ea1SDimitry Andric   // - a buffer they can clobber, safely from a memory access perspective.
93*0fca6ea1SDimitry Andric   //
94*0fca6ea1SDimitry Andric   // Note about "scratch"-ness: we currently ignore the data written in them
95*0fca6ea1SDimitry Andric   // (which is anyway clobbered). The design allows for that not be the case -
96*0fca6ea1SDimitry Andric   // because "scratch"-ness is first and foremost about not trying to build
97*0fca6ea1SDimitry Andric   // subcontexts, and is captured by tainting the pointer value (pointer to the
98*0fca6ea1SDimitry Andric   // memory treated as context), but right now, we drop that info.
99*0fca6ea1SDimitry Andric   //
100*0fca6ea1SDimitry Andric   // We could consider relaxing the requirement of more than one thread
101*0fca6ea1SDimitry Andric   // entering by holding a few context trees per entrypoint and then aggregating
102*0fca6ea1SDimitry Andric   // them (as explained above) at the end of the profile collection - it's a
103*0fca6ea1SDimitry Andric   // tradeoff between collection time and memory use: higher precision can be
104*0fca6ea1SDimitry Andric   // obtained with either less concurrent collections but more collection time,
105*0fca6ea1SDimitry Andric   // or with more concurrent collections (==more memory) and less collection
106*0fca6ea1SDimitry Andric   // time. Note that concurrent collection does happen for different
107*0fca6ea1SDimitry Andric   // entrypoints, regardless.
108*0fca6ea1SDimitry Andric   ::__sanitizer::StaticSpinMutex Taken;
109*0fca6ea1SDimitry Andric 
110*0fca6ea1SDimitry Andric   // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
111*0fca6ea1SDimitry Andric   // instrumentation lowering side because it is responsible for allocating and
112*0fca6ea1SDimitry Andric   // zero-initializing ContextRoots.
113*0fca6ea1SDimitry Andric   static_assert(sizeof(Taken) == 1);
114*0fca6ea1SDimitry Andric };
115*0fca6ea1SDimitry Andric 
116*0fca6ea1SDimitry Andric /// This API is exposed for testing. See the APIs below about the contract with
117*0fca6ea1SDimitry Andric /// LLVM.
118*0fca6ea1SDimitry Andric inline bool isScratch(const void *Ctx) {
119*0fca6ea1SDimitry Andric   return (reinterpret_cast<uint64_t>(Ctx) & 1);
120*0fca6ea1SDimitry Andric }
121*0fca6ea1SDimitry Andric 
122*0fca6ea1SDimitry Andric } // namespace __ctx_profile
123*0fca6ea1SDimitry Andric 
124*0fca6ea1SDimitry Andric extern "C" {
125*0fca6ea1SDimitry Andric 
126*0fca6ea1SDimitry Andric // LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.
127*0fca6ea1SDimitry Andric // position 0 is used when the current context isn't scratch, 1 when it is. They
128*0fca6ea1SDimitry Andric // are volatile because of signal handlers - we mean to specifically control
129*0fca6ea1SDimitry Andric // when the data is loaded.
130*0fca6ea1SDimitry Andric //
131*0fca6ea1SDimitry Andric /// TLS where LLVM stores the pointer of the called value, as part of lowering a
132*0fca6ea1SDimitry Andric /// llvm.instrprof.callsite
133*0fca6ea1SDimitry Andric extern __thread void *volatile __llvm_ctx_profile_expected_callee[2];
134*0fca6ea1SDimitry Andric /// TLS where LLVM stores the pointer inside a caller's subcontexts vector that
135*0fca6ea1SDimitry Andric /// corresponds to the callsite being lowered.
136*0fca6ea1SDimitry Andric extern __thread ContextNode **volatile __llvm_ctx_profile_callsite[2];
137*0fca6ea1SDimitry Andric 
138*0fca6ea1SDimitry Andric // __llvm_ctx_profile_current_context_root is exposed for unit testing,
139*0fca6ea1SDimitry Andric // othwerise it's only used internally by compiler-rt/ctx_profile.
140*0fca6ea1SDimitry Andric extern __thread __ctx_profile::ContextRoot
141*0fca6ea1SDimitry Andric     *volatile __llvm_ctx_profile_current_context_root;
142*0fca6ea1SDimitry Andric 
143*0fca6ea1SDimitry Andric /// called by LLVM in the entry BB of a "entry point" function. The returned
144*0fca6ea1SDimitry Andric /// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
145*0fca6ea1SDimitry Andric ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
146*0fca6ea1SDimitry Andric                                               GUID Guid, uint32_t Counters,
147*0fca6ea1SDimitry Andric                                               uint32_t Callsites);
148*0fca6ea1SDimitry Andric 
149*0fca6ea1SDimitry Andric /// paired with __llvm_ctx_profile_start_context, and called at the exit of the
150*0fca6ea1SDimitry Andric /// entry point function.
151*0fca6ea1SDimitry Andric void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
152*0fca6ea1SDimitry Andric 
153*0fca6ea1SDimitry Andric /// called for any other function than entry points, in the entry BB of such
154*0fca6ea1SDimitry Andric /// function. Same consideration about LSB of returned value as .._start_context
155*0fca6ea1SDimitry Andric ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
156*0fca6ea1SDimitry Andric                                             uint32_t NrCounters,
157*0fca6ea1SDimitry Andric                                             uint32_t NrCallsites);
158*0fca6ea1SDimitry Andric 
159*0fca6ea1SDimitry Andric /// Prepares for collection. Currently this resets counter values but preserves
160*0fca6ea1SDimitry Andric /// internal context tree structure.
161*0fca6ea1SDimitry Andric void __llvm_ctx_profile_start_collection();
162*0fca6ea1SDimitry Andric 
163*0fca6ea1SDimitry Andric /// Completely free allocated memory.
164*0fca6ea1SDimitry Andric void __llvm_ctx_profile_free();
165*0fca6ea1SDimitry Andric 
166*0fca6ea1SDimitry Andric /// Used to obtain the profile. The Writer is called for each root ContextNode,
167*0fca6ea1SDimitry Andric /// with the ContextRoot::Taken taken. The Writer is responsible for traversing
168*0fca6ea1SDimitry Andric /// the structure underneath.
169*0fca6ea1SDimitry Andric /// The Writer's first parameter plays the role of closure for Writer, and is
170*0fca6ea1SDimitry Andric /// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter.
171*0fca6ea1SDimitry Andric /// The second parameter is the root of a context tree.
172*0fca6ea1SDimitry Andric bool __llvm_ctx_profile_fetch(void *Data,
173*0fca6ea1SDimitry Andric                               bool (*Writer)(void *, const ContextNode &));
174*0fca6ea1SDimitry Andric }
175*0fca6ea1SDimitry Andric #endif // CTX_PROFILE_CTXINSTRPROFILING_H_
176