xref: /llvm-project/llvm/lib/Support/rpmalloc/rpmalloc.h (revision 67226bad150785f64efcf53c79b7785d421fc8eb)
1 //===---------------------- rpmalloc.h ------------------*- C -*-=============//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This library provides a cross-platform lock free thread caching malloc
10 // implementation in C11.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #pragma once
15 
16 #include <stddef.h>
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 #if defined(__clang__) || defined(__GNUC__)
23 #define RPMALLOC_EXPORT __attribute__((visibility("default")))
24 #define RPMALLOC_ALLOCATOR
25 #if (defined(__clang_major__) && (__clang_major__ < 4)) ||                     \
26     (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD)
27 #define RPMALLOC_ATTRIB_MALLOC
28 #define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
29 #define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
30 #else
31 #define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
32 #define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
33 #define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)                               \
34   __attribute__((alloc_size(count, size)))
35 #endif
36 #define RPMALLOC_CDECL
37 #elif defined(_MSC_VER)
38 #define RPMALLOC_EXPORT
39 #define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
40 #define RPMALLOC_ATTRIB_MALLOC
41 #define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
42 #define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
43 #define RPMALLOC_CDECL __cdecl
44 #else
45 #define RPMALLOC_EXPORT
46 #define RPMALLOC_ALLOCATOR
47 #define RPMALLOC_ATTRIB_MALLOC
48 #define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
49 #define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
50 #define RPMALLOC_CDECL
51 #endif
52 
53 //! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce
54 //  a very small overhead due to some size calculations not being compile time
55 //  constants
56 #ifndef RPMALLOC_CONFIGURABLE
57 #define RPMALLOC_CONFIGURABLE 0
58 #endif
59 
60 //! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_*
61 //! functions).
62 //  Will introduce a very small overhead to track fully allocated spans in heaps
63 #ifndef RPMALLOC_FIRST_CLASS_HEAPS
64 #define RPMALLOC_FIRST_CLASS_HEAPS 0
65 #endif
66 
67 //! Flag to rpaligned_realloc to not preserve content in reallocation
68 #define RPMALLOC_NO_PRESERVE 1
69 //! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be
70 //! done in-place,
71 //  in which case the original pointer is still valid (just like a call to
72 //  realloc which failes to allocate a new block).
73 #define RPMALLOC_GROW_OR_FAIL 2
74 
75 typedef struct rpmalloc_global_statistics_t {
76   //! Current amount of virtual memory mapped, all of which might not have been
77   //! committed (only if ENABLE_STATISTICS=1)
78   size_t mapped;
79   //! Peak amount of virtual memory mapped, all of which might not have been
80   //! committed (only if ENABLE_STATISTICS=1)
81   size_t mapped_peak;
82   //! Current amount of memory in global caches for small and medium sizes
83   //! (<32KiB)
84   size_t cached;
85   //! Current amount of memory allocated in huge allocations, i.e larger than
86   //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
87   size_t huge_alloc;
88   //! Peak amount of memory allocated in huge allocations, i.e larger than
89   //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
90   size_t huge_alloc_peak;
91   //! Total amount of memory mapped since initialization (only if
92   //! ENABLE_STATISTICS=1)
93   size_t mapped_total;
94   //! Total amount of memory unmapped since initialization  (only if
95   //! ENABLE_STATISTICS=1)
96   size_t unmapped_total;
97 } rpmalloc_global_statistics_t;
98 
99 typedef struct rpmalloc_thread_statistics_t {
100   //! Current number of bytes available in thread size class caches for small
101   //! and medium sizes (<32KiB)
102   size_t sizecache;
103   //! Current number of bytes available in thread span caches for small and
104   //! medium sizes (<32KiB)
105   size_t spancache;
106   //! Total number of bytes transitioned from thread cache to global cache (only
107   //! if ENABLE_STATISTICS=1)
108   size_t thread_to_global;
109   //! Total number of bytes transitioned from global cache to thread cache (only
110   //! if ENABLE_STATISTICS=1)
111   size_t global_to_thread;
112   //! Per span count statistics (only if ENABLE_STATISTICS=1)
113   struct {
114     //! Currently used number of spans
115     size_t current;
116     //! High water mark of spans used
117     size_t peak;
118     //! Number of spans transitioned to global cache
119     size_t to_global;
120     //! Number of spans transitioned from global cache
121     size_t from_global;
122     //! Number of spans transitioned to thread cache
123     size_t to_cache;
124     //! Number of spans transitioned from thread cache
125     size_t from_cache;
126     //! Number of spans transitioned to reserved state
127     size_t to_reserved;
128     //! Number of spans transitioned from reserved state
129     size_t from_reserved;
130     //! Number of raw memory map calls (not hitting the reserve spans but
131     //! resulting in actual OS mmap calls)
132     size_t map_calls;
133   } span_use[64];
134   //! Per size class statistics (only if ENABLE_STATISTICS=1)
135   struct {
136     //! Current number of allocations
137     size_t alloc_current;
138     //! Peak number of allocations
139     size_t alloc_peak;
140     //! Total number of allocations
141     size_t alloc_total;
142     //! Total number of frees
143     size_t free_total;
144     //! Number of spans transitioned to cache
145     size_t spans_to_cache;
146     //! Number of spans transitioned from cache
147     size_t spans_from_cache;
148     //! Number of spans transitioned from reserved state
149     size_t spans_from_reserved;
150     //! Number of raw memory map calls (not hitting the reserve spans but
151     //! resulting in actual OS mmap calls)
152     size_t map_calls;
153   } size_use[128];
154 } rpmalloc_thread_statistics_t;
155 
156 typedef struct rpmalloc_config_t {
157   //! Map memory pages for the given number of bytes. The returned address MUST
158   //! be
159   //  aligned to the rpmalloc span size, which will always be a power of two.
160   //  Optionally the function can store an alignment offset in the offset
161   //  variable in case it performs alignment and the returned pointer is offset
162   //  from the actual start of the memory region due to this alignment. The
163   //  alignment offset will be passed to the memory unmap function. The
164   //  alignment offset MUST NOT be larger than 65535 (storable in an uint16_t),
165   //  if it is you must use natural alignment to shift it into 16 bits. If you
166   //  set a memory_map function, you must also set a memory_unmap function or
167   //  else the default implementation will be used for both. This function must
168   //  be thread safe, it can be called by multiple threads simultaneously.
169   void *(*memory_map)(size_t size, size_t *offset);
170   //! Unmap the memory pages starting at address and spanning the given number
171   //! of bytes.
172   //  If release is set to non-zero, the unmap is for an entire span range as
173   //  returned by a previous call to memory_map and that the entire range should
174   //  be released. The release argument holds the size of the entire span range.
175   //  If release is set to 0, the unmap is a partial decommit of a subset of the
176   //  mapped memory range. If you set a memory_unmap function, you must also set
177   //  a memory_map function or else the default implementation will be used for
178   //  both. This function must be thread safe, it can be called by multiple
179   //  threads simultaneously.
180   void (*memory_unmap)(void *address, size_t size, size_t offset,
181                        size_t release);
182   //! Called when an assert fails, if asserts are enabled. Will use the standard
183   //! assert()
184   //  if this is not set.
185   void (*error_callback)(const char *message);
186   //! Called when a call to map memory pages fails (out of memory). If this
187   //! callback is
188   //  not set or returns zero the library will return a null pointer in the
189   //  allocation call. If this callback returns non-zero the map call will be
190   //  retried. The argument passed is the number of bytes that was requested in
191   //  the map call. Only used if the default system memory map function is used
192   //  (memory_map callback is not set).
193   int (*map_fail_callback)(size_t size);
194   //! Size of memory pages. The page size MUST be a power of two. All memory
195   //! mapping
196   //  requests to memory_map will be made with size set to a multiple of the
197   //  page size. Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system
198   //  page size is used.
199   size_t page_size;
200   //! Size of a span of memory blocks. MUST be a power of two, and in
201   //! [4096,262144]
202   //  range (unless 0 - set to 0 to use the default span size). Used if
203   //  RPMALLOC_CONFIGURABLE is defined to 1.
204   size_t span_size;
205   //! Number of spans to map at each request to map new virtual memory blocks.
206   //! This can
207   //  be used to minimize the system call overhead at the cost of virtual memory
208   //  address space. The extra mapped pages will not be written until actually
209   //  used, so physical committed memory should not be affected in the default
210   //  implementation. Will be aligned to a multiple of spans that match memory
211   //  page size in case of huge pages.
212   size_t span_map_count;
213   //! Enable use of large/huge pages. If this flag is set to non-zero and page
214   //! size is
215   //  zero, the allocator will try to enable huge pages and auto detect the
216   //  configuration. If this is set to non-zero and page_size is also non-zero,
217   //  the allocator will assume huge pages have been configured and enabled
218   //  prior to initializing the allocator. For Windows, see
219   //  https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
220   //  For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
221   int enable_huge_pages;
222   //! Respectively allocated pages and huge allocated pages names for systems
223   //  supporting it to be able to distinguish among anonymous regions.
224   const char *page_name;
225   const char *huge_page_name;
226 } rpmalloc_config_t;
227 
228 //! Initialize allocator with default configuration
229 RPMALLOC_EXPORT int rpmalloc_initialize(void);
230 
231 //! Initialize allocator with given configuration
232 RPMALLOC_EXPORT int rpmalloc_initialize_config(const rpmalloc_config_t *config);
233 
234 //! Get allocator configuration
235 RPMALLOC_EXPORT const rpmalloc_config_t *rpmalloc_config(void);
236 
237 //! Finalize allocator
238 RPMALLOC_EXPORT void rpmalloc_finalize(void);
239 
240 //! Initialize allocator for calling thread
241 RPMALLOC_EXPORT void rpmalloc_thread_initialize(void);
242 
243 //! Finalize allocator for calling thread
244 RPMALLOC_EXPORT void rpmalloc_thread_finalize(int release_caches);
245 
246 //! Perform deferred deallocations pending for the calling thread heap
247 RPMALLOC_EXPORT void rpmalloc_thread_collect(void);
248 
249 //! Query if allocator is initialized for calling thread
250 RPMALLOC_EXPORT int rpmalloc_is_thread_initialized(void);
251 
252 //! Get per-thread statistics
253 RPMALLOC_EXPORT void
254 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
255 
256 //! Get global statistics
257 RPMALLOC_EXPORT void
258 rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
259 
260 //! Dump all statistics in human readable format to file (should be a FILE*)
261 RPMALLOC_EXPORT void rpmalloc_dump_statistics(void *file);
262 
263 //! Allocate a memory block of at least the given size
264 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
265 rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
266 
267 //! Free the given memory block
268 RPMALLOC_EXPORT void rpfree(void *ptr);
269 
270 //! Allocate a memory block of at least the given size and zero initialize it
271 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
272 rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
273     RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
274 
275 //! Reallocate the given block to at least the given size
276 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
277 rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC
278     RPMALLOC_ATTRIB_ALLOC_SIZE(2);
279 
280 //! Reallocate the given block to at least the given size and alignment,
281 //  with optional control flags (see RPMALLOC_NO_PRESERVE).
282 //  Alignment must be a power of two and a multiple of sizeof(void*),
283 //  and should ideally be less than memory page size. A caveat of rpmalloc
284 //  internals is that this must also be strictly less than the span size
285 //  (default 64KiB)
286 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
287 rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
288                   unsigned int flags) RPMALLOC_ATTRIB_MALLOC
289     RPMALLOC_ATTRIB_ALLOC_SIZE(3);
290 
291 //! Allocate a memory block of at least the given size and alignment.
292 //  Alignment must be a power of two and a multiple of sizeof(void*),
293 //  and should ideally be less than memory page size. A caveat of rpmalloc
294 //  internals is that this must also be strictly less than the span size
295 //  (default 64KiB)
296 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
297 rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
298     RPMALLOC_ATTRIB_ALLOC_SIZE(2);
299 
300 //! Allocate a memory block of at least the given size and alignment, and zero
301 //! initialize it.
302 //  Alignment must be a power of two and a multiple of sizeof(void*),
303 //  and should ideally be less than memory page size. A caveat of rpmalloc
304 //  internals is that this must also be strictly less than the span size
305 //  (default 64KiB)
306 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
307 rpaligned_calloc(size_t alignment, size_t num,
308                  size_t size) RPMALLOC_ATTRIB_MALLOC
309     RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
310 
311 //! Allocate a memory block of at least the given size and alignment.
312 //  Alignment must be a power of two and a multiple of sizeof(void*),
313 //  and should ideally be less than memory page size. A caveat of rpmalloc
314 //  internals is that this must also be strictly less than the span size
315 //  (default 64KiB)
316 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
317 rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
318     RPMALLOC_ATTRIB_ALLOC_SIZE(2);
319 
320 //! Allocate a memory block of at least the given size and alignment.
321 //  Alignment must be a power of two and a multiple of sizeof(void*),
322 //  and should ideally be less than memory page size. A caveat of rpmalloc
323 //  internals is that this must also be strictly less than the span size
324 //  (default 64KiB)
325 RPMALLOC_EXPORT int rpposix_memalign(void **memptr, size_t alignment,
326                                      size_t size);
327 
328 //! Query the usable size of the given memory block (from given pointer to the
329 //! end of block)
330 RPMALLOC_EXPORT size_t rpmalloc_usable_size(void *ptr);
331 
332 //! Dummy empty function for forcing linker symbol inclusion
333 RPMALLOC_EXPORT void rpmalloc_linker_reference(void);
334 
335 #if RPMALLOC_FIRST_CLASS_HEAPS
336 
337 //! Heap type
338 typedef struct heap_t rpmalloc_heap_t;
339 
340 //! Acquire a new heap. Will reuse existing released heaps or allocate memory
341 //! for a new heap
342 //  if none available. Heap API is implemented with the strict assumption that
343 //  only one single thread will call heap functions for a given heap at any
344 //  given time, no functions are thread safe.
345 RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_heap_acquire(void);
346 
347 //! Release a heap (does NOT free the memory allocated by the heap, use
348 //! rpmalloc_heap_free_all before destroying the heap).
349 //  Releasing a heap will enable it to be reused by other threads. Safe to pass
350 //  a null pointer.
351 RPMALLOC_EXPORT void rpmalloc_heap_release(rpmalloc_heap_t *heap);
352 
353 //! Allocate a memory block of at least the given size using the given heap.
354 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
355 rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC
356     RPMALLOC_ATTRIB_ALLOC_SIZE(2);
357 
358 //! Allocate a memory block of at least the given size using the given heap. The
359 //! returned
360 //  block will have the requested alignment. Alignment must be a power of two
361 //  and a multiple of sizeof(void*), and should ideally be less than memory page
362 //  size. A caveat of rpmalloc internals is that this must also be strictly less
363 //  than the span size (default 64KiB).
364 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
365 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
366                             size_t size) RPMALLOC_ATTRIB_MALLOC
367     RPMALLOC_ATTRIB_ALLOC_SIZE(3);
368 
369 //! Allocate a memory block of at least the given size using the given heap and
370 //! zero initialize it.
371 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
372 rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num,
373                      size_t size) RPMALLOC_ATTRIB_MALLOC
374     RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
375 
376 //! Allocate a memory block of at least the given size using the given heap and
377 //! zero initialize it. The returned
378 //  block will have the requested alignment. Alignment must either be zero, or a
379 //  power of two and a multiple of sizeof(void*), and should ideally be less
380 //  than memory page size. A caveat of rpmalloc internals is that this must also
381 //  be strictly less than the span size (default 64KiB).
382 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
383 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
384                              size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
385     RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
386 
387 //! Reallocate the given block to at least the given size. The memory block MUST
388 //! be allocated
389 //  by the same heap given to this function.
390 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
391 rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
392                       unsigned int flags) RPMALLOC_ATTRIB_MALLOC
393     RPMALLOC_ATTRIB_ALLOC_SIZE(3);
394 
395 //! Reallocate the given block to at least the given size. The memory block MUST
396 //! be allocated
397 //  by the same heap given to this function. The returned block will have the
398 //  requested alignment. Alignment must be either zero, or a power of two and a
399 //  multiple of sizeof(void*), and should ideally be less than memory page size.
400 //  A caveat of rpmalloc internals is that this must also be strictly less than
401 //  the span size (default 64KiB).
402 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *rpmalloc_heap_aligned_realloc(
403     rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
404     unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
405 
406 //! Free the given memory block from the given heap. The memory block MUST be
407 //! allocated
408 //  by the same heap given to this function.
409 RPMALLOC_EXPORT void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
410 
411 //! Free all memory allocated by the heap
412 RPMALLOC_EXPORT void rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
413 
414 //! Set the given heap as the current heap for the calling thread. A heap MUST
415 //! only be current heap
416 //  for a single thread, a heap can never be shared between multiple threads.
417 //  The previous current heap for the calling thread is released to be reused by
418 //  other threads.
419 RPMALLOC_EXPORT void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
420 
421 //! Returns which heap the given pointer is allocated on
422 RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr);
423 
424 #endif
425 
426 #ifdef __cplusplus
427 }
428 #endif
429