1*8e33eff8Schristos #ifndef JEMALLOC_INTERNAL_TSD_H 2*8e33eff8Schristos #define JEMALLOC_INTERNAL_TSD_H 3*8e33eff8Schristos 4*8e33eff8Schristos #include "jemalloc/internal/arena_types.h" 5*8e33eff8Schristos #include "jemalloc/internal/assert.h" 6*8e33eff8Schristos #include "jemalloc/internal/jemalloc_internal_externs.h" 7*8e33eff8Schristos #include "jemalloc/internal/prof_types.h" 8*8e33eff8Schristos #include "jemalloc/internal/ql.h" 9*8e33eff8Schristos #include "jemalloc/internal/rtree_tsd.h" 10*8e33eff8Schristos #include "jemalloc/internal/tcache_types.h" 11*8e33eff8Schristos #include "jemalloc/internal/tcache_structs.h" 12*8e33eff8Schristos #include "jemalloc/internal/util.h" 13*8e33eff8Schristos #include "jemalloc/internal/witness.h" 14*8e33eff8Schristos 15*8e33eff8Schristos /* 16*8e33eff8Schristos * Thread-Specific-Data layout 17*8e33eff8Schristos * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof --- 18*8e33eff8Schristos * s: state 19*8e33eff8Schristos * e: tcache_enabled 20*8e33eff8Schristos * m: thread_allocated (config_stats) 21*8e33eff8Schristos * f: thread_deallocated (config_stats) 22*8e33eff8Schristos * p: prof_tdata (config_prof) 23*8e33eff8Schristos * c: rtree_ctx (rtree cache accessed on deallocation) 24*8e33eff8Schristos * t: tcache 25*8e33eff8Schristos * --- data not accessed on tcache fast path: arena-related fields --- 26*8e33eff8Schristos * d: arenas_tdata_bypass 27*8e33eff8Schristos * r: reentrancy_level 28*8e33eff8Schristos * x: narenas_tdata 29*8e33eff8Schristos * i: iarena 30*8e33eff8Schristos * a: arena 31*8e33eff8Schristos * o: arenas_tdata 32*8e33eff8Schristos * Loading TSD data is on the critical path of basically all malloc operations. 33*8e33eff8Schristos * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective. 34*8e33eff8Schristos * Use a compact layout to reduce cache footprint. 35*8e33eff8Schristos * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+ 36*8e33eff8Schristos * |---------------------------- 1st cacheline ----------------------------| 37*8e33eff8Schristos * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] | 38*8e33eff8Schristos * |---------------------------- 2nd cacheline ----------------------------| 39*8e33eff8Schristos * | [c * 64 ........ ........ ........ ........ ........ ........ .......] | 40*8e33eff8Schristos * |---------------------------- 3nd cacheline ----------------------------| 41*8e33eff8Schristos * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... | 42*8e33eff8Schristos * +-------------------------------------------------------------------------+ 43*8e33eff8Schristos * Note: the entire tcache is embedded into TSD and spans multiple cachelines. 44*8e33eff8Schristos * 45*8e33eff8Schristos * The last 3 members (i, a and o) before tcache isn't really needed on tcache 46*8e33eff8Schristos * fast path. However we have a number of unused tcache bins and witnesses 47*8e33eff8Schristos * (never touched unless config_debug) at the end of tcache, so we place them 48*8e33eff8Schristos * there to avoid breaking the cachelines and possibly paging in an extra page. 49*8e33eff8Schristos */ 50*8e33eff8Schristos #ifdef JEMALLOC_JET 51*8e33eff8Schristos typedef void (*test_callback_t)(int *); 52*8e33eff8Schristos # define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10 53*8e33eff8Schristos # define MALLOC_TEST_TSD \ 54*8e33eff8Schristos O(test_data, int, int) \ 55*8e33eff8Schristos O(test_callback, test_callback_t, int) 56*8e33eff8Schristos # define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL 57*8e33eff8Schristos #else 58*8e33eff8Schristos # define MALLOC_TEST_TSD 59*8e33eff8Schristos # define MALLOC_TEST_TSD_INITIALIZER 60*8e33eff8Schristos #endif 61*8e33eff8Schristos 62*8e33eff8Schristos /* O(name, type, nullable type */ 63*8e33eff8Schristos #define MALLOC_TSD \ 64*8e33eff8Schristos O(tcache_enabled, bool, bool) \ 65*8e33eff8Schristos O(arenas_tdata_bypass, bool, bool) \ 66*8e33eff8Schristos O(reentrancy_level, int8_t, int8_t) \ 67*8e33eff8Schristos O(narenas_tdata, uint32_t, uint32_t) \ 68*8e33eff8Schristos O(offset_state, uint64_t, uint64_t) \ 69*8e33eff8Schristos O(thread_allocated, uint64_t, uint64_t) \ 70*8e33eff8Schristos O(thread_deallocated, uint64_t, uint64_t) \ 71*8e33eff8Schristos O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ 72*8e33eff8Schristos O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \ 73*8e33eff8Schristos O(iarena, arena_t *, arena_t *) \ 74*8e33eff8Schristos O(arena, arena_t *, arena_t *) \ 75*8e33eff8Schristos O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\ 76*8e33eff8Schristos O(tcache, tcache_t, tcache_t) \ 77*8e33eff8Schristos O(witness_tsd, witness_tsd_t, witness_tsdn_t) \ 78*8e33eff8Schristos MALLOC_TEST_TSD 79*8e33eff8Schristos 80*8e33eff8Schristos #define TSD_INITIALIZER { \ 81*8e33eff8Schristos tsd_state_uninitialized, \ 82*8e33eff8Schristos TCACHE_ENABLED_ZERO_INITIALIZER, \ 83*8e33eff8Schristos false, \ 84*8e33eff8Schristos 0, \ 85*8e33eff8Schristos 0, \ 86*8e33eff8Schristos 0, \ 87*8e33eff8Schristos 0, \ 88*8e33eff8Schristos 0, \ 89*8e33eff8Schristos NULL, \ 90*8e33eff8Schristos RTREE_CTX_ZERO_INITIALIZER, \ 91*8e33eff8Schristos NULL, \ 92*8e33eff8Schristos NULL, \ 93*8e33eff8Schristos NULL, \ 94*8e33eff8Schristos TCACHE_ZERO_INITIALIZER, \ 95*8e33eff8Schristos WITNESS_TSD_INITIALIZER \ 96*8e33eff8Schristos MALLOC_TEST_TSD_INITIALIZER \ 97*8e33eff8Schristos } 98*8e33eff8Schristos 99*8e33eff8Schristos enum { 100*8e33eff8Schristos tsd_state_nominal = 0, /* Common case --> jnz. */ 101*8e33eff8Schristos tsd_state_nominal_slow = 1, /* Initialized but on slow path. */ 102*8e33eff8Schristos /* the above 2 nominal states should be lower values. */ 103*8e33eff8Schristos tsd_state_nominal_max = 1, /* used for comparison only. */ 104*8e33eff8Schristos tsd_state_minimal_initialized = 2, 105*8e33eff8Schristos tsd_state_purgatory = 3, 106*8e33eff8Schristos tsd_state_reincarnated = 4, 107*8e33eff8Schristos tsd_state_uninitialized = 5 108*8e33eff8Schristos }; 109*8e33eff8Schristos 110*8e33eff8Schristos /* Manually limit tsd_state_t to a single byte. */ 111*8e33eff8Schristos typedef uint8_t tsd_state_t; 112*8e33eff8Schristos 113*8e33eff8Schristos /* The actual tsd. */ 114*8e33eff8Schristos struct tsd_s { 115*8e33eff8Schristos /* 116*8e33eff8Schristos * The contents should be treated as totally opaque outside the tsd 117*8e33eff8Schristos * module. Access any thread-local state through the getters and 118*8e33eff8Schristos * setters below. 119*8e33eff8Schristos */ 120*8e33eff8Schristos tsd_state_t state; 121*8e33eff8Schristos #define O(n, t, nt) \ 122*8e33eff8Schristos t use_a_getter_or_setter_instead_##n; 123*8e33eff8Schristos MALLOC_TSD 124*8e33eff8Schristos #undef O 125*8e33eff8Schristos }; 126*8e33eff8Schristos 127*8e33eff8Schristos /* 128*8e33eff8Schristos * Wrapper around tsd_t that makes it possible to avoid implicit conversion 129*8e33eff8Schristos * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be 130*8e33eff8Schristos * explicitly converted to tsd_t, which is non-nullable. 131*8e33eff8Schristos */ 132*8e33eff8Schristos struct tsdn_s { 133*8e33eff8Schristos tsd_t tsd; 134*8e33eff8Schristos }; 135*8e33eff8Schristos #define TSDN_NULL ((tsdn_t *)0) 136*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsdn_t * 137*8e33eff8Schristos tsd_tsdn(tsd_t *tsd) { 138*8e33eff8Schristos return (tsdn_t *)tsd; 139*8e33eff8Schristos } 140*8e33eff8Schristos 141*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE bool 142*8e33eff8Schristos tsdn_null(const tsdn_t *tsdn) { 143*8e33eff8Schristos return tsdn == NULL; 144*8e33eff8Schristos } 145*8e33eff8Schristos 146*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsd_t * 147*8e33eff8Schristos tsdn_tsd(tsdn_t *tsdn) { 148*8e33eff8Schristos assert(!tsdn_null(tsdn)); 149*8e33eff8Schristos 150*8e33eff8Schristos return &tsdn->tsd; 151*8e33eff8Schristos } 152*8e33eff8Schristos 153*8e33eff8Schristos void *malloc_tsd_malloc(size_t size); 154*8e33eff8Schristos void malloc_tsd_dalloc(void *wrapper); 155*8e33eff8Schristos void malloc_tsd_cleanup_register(bool (*f)(void)); 156*8e33eff8Schristos tsd_t *malloc_tsd_boot0(void); 157*8e33eff8Schristos void malloc_tsd_boot1(void); 158*8e33eff8Schristos void tsd_cleanup(void *arg); 159*8e33eff8Schristos tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal); 160*8e33eff8Schristos void tsd_slow_update(tsd_t *tsd); 161*8e33eff8Schristos 162*8e33eff8Schristos /* 163*8e33eff8Schristos * We put the platform-specific data declarations and inlines into their own 164*8e33eff8Schristos * header files to avoid cluttering this file. They define tsd_boot0, 165*8e33eff8Schristos * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set. 166*8e33eff8Schristos */ 167*8e33eff8Schristos #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP 168*8e33eff8Schristos #include "jemalloc/internal/tsd_malloc_thread_cleanup.h" 169*8e33eff8Schristos #elif (defined(JEMALLOC_TLS)) 170*8e33eff8Schristos #include "jemalloc/internal/tsd_tls.h" 171*8e33eff8Schristos #elif (defined(_WIN32)) 172*8e33eff8Schristos #include "jemalloc/internal/tsd_win.h" 173*8e33eff8Schristos #else 174*8e33eff8Schristos #include "jemalloc/internal/tsd_generic.h" 175*8e33eff8Schristos #endif 176*8e33eff8Schristos 177*8e33eff8Schristos /* 178*8e33eff8Schristos * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of 179*8e33eff8Schristos * foo. This omits some safety checks, and so can be used during tsd 180*8e33eff8Schristos * initialization and cleanup. 181*8e33eff8Schristos */ 182*8e33eff8Schristos #define O(n, t, nt) \ 183*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE t * \ 184*8e33eff8Schristos tsd_##n##p_get_unsafe(tsd_t *tsd) { \ 185*8e33eff8Schristos return &tsd->use_a_getter_or_setter_instead_##n; \ 186*8e33eff8Schristos } 187*8e33eff8Schristos MALLOC_TSD 188*8e33eff8Schristos #undef O 189*8e33eff8Schristos 190*8e33eff8Schristos /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */ 191*8e33eff8Schristos #define O(n, t, nt) \ 192*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE t * \ 193*8e33eff8Schristos tsd_##n##p_get(tsd_t *tsd) { \ 194*8e33eff8Schristos assert(tsd->state == tsd_state_nominal || \ 195*8e33eff8Schristos tsd->state == tsd_state_nominal_slow || \ 196*8e33eff8Schristos tsd->state == tsd_state_reincarnated || \ 197*8e33eff8Schristos tsd->state == tsd_state_minimal_initialized); \ 198*8e33eff8Schristos return tsd_##n##p_get_unsafe(tsd); \ 199*8e33eff8Schristos } 200*8e33eff8Schristos MALLOC_TSD 201*8e33eff8Schristos #undef O 202*8e33eff8Schristos 203*8e33eff8Schristos /* 204*8e33eff8Schristos * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn 205*8e33eff8Schristos * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type. 206*8e33eff8Schristos */ 207*8e33eff8Schristos #define O(n, t, nt) \ 208*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE nt * \ 209*8e33eff8Schristos tsdn_##n##p_get(tsdn_t *tsdn) { \ 210*8e33eff8Schristos if (tsdn_null(tsdn)) { \ 211*8e33eff8Schristos return NULL; \ 212*8e33eff8Schristos } \ 213*8e33eff8Schristos tsd_t *tsd = tsdn_tsd(tsdn); \ 214*8e33eff8Schristos return (nt *)tsd_##n##p_get(tsd); \ 215*8e33eff8Schristos } 216*8e33eff8Schristos MALLOC_TSD 217*8e33eff8Schristos #undef O 218*8e33eff8Schristos 219*8e33eff8Schristos /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */ 220*8e33eff8Schristos #define O(n, t, nt) \ 221*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE t \ 222*8e33eff8Schristos tsd_##n##_get(tsd_t *tsd) { \ 223*8e33eff8Schristos return *tsd_##n##p_get(tsd); \ 224*8e33eff8Schristos } 225*8e33eff8Schristos MALLOC_TSD 226*8e33eff8Schristos #undef O 227*8e33eff8Schristos 228*8e33eff8Schristos /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */ 229*8e33eff8Schristos #define O(n, t, nt) \ 230*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE void \ 231*8e33eff8Schristos tsd_##n##_set(tsd_t *tsd, t val) { \ 232*8e33eff8Schristos assert(tsd->state != tsd_state_reincarnated && \ 233*8e33eff8Schristos tsd->state != tsd_state_minimal_initialized); \ 234*8e33eff8Schristos *tsd_##n##p_get(tsd) = val; \ 235*8e33eff8Schristos } 236*8e33eff8Schristos MALLOC_TSD 237*8e33eff8Schristos #undef O 238*8e33eff8Schristos 239*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE void 240*8e33eff8Schristos tsd_assert_fast(tsd_t *tsd) { 241*8e33eff8Schristos assert(!malloc_slow && tsd_tcache_enabled_get(tsd) && 242*8e33eff8Schristos tsd_reentrancy_level_get(tsd) == 0); 243*8e33eff8Schristos } 244*8e33eff8Schristos 245*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE bool 246*8e33eff8Schristos tsd_fast(tsd_t *tsd) { 247*8e33eff8Schristos bool fast = (tsd->state == tsd_state_nominal); 248*8e33eff8Schristos if (fast) { 249*8e33eff8Schristos tsd_assert_fast(tsd); 250*8e33eff8Schristos } 251*8e33eff8Schristos 252*8e33eff8Schristos return fast; 253*8e33eff8Schristos } 254*8e33eff8Schristos 255*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsd_t * 256*8e33eff8Schristos tsd_fetch_impl(bool init, bool minimal) { 257*8e33eff8Schristos tsd_t *tsd = tsd_get(init); 258*8e33eff8Schristos 259*8e33eff8Schristos if (!init && tsd_get_allocates() && tsd == NULL) { 260*8e33eff8Schristos return NULL; 261*8e33eff8Schristos } 262*8e33eff8Schristos assert(tsd != NULL); 263*8e33eff8Schristos 264*8e33eff8Schristos if (unlikely(tsd->state != tsd_state_nominal)) { 265*8e33eff8Schristos return tsd_fetch_slow(tsd, minimal); 266*8e33eff8Schristos } 267*8e33eff8Schristos assert(tsd_fast(tsd)); 268*8e33eff8Schristos tsd_assert_fast(tsd); 269*8e33eff8Schristos 270*8e33eff8Schristos return tsd; 271*8e33eff8Schristos } 272*8e33eff8Schristos 273*8e33eff8Schristos /* Get a minimal TSD that requires no cleanup. See comments in free(). */ 274*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsd_t * 275*8e33eff8Schristos tsd_fetch_min(void) { 276*8e33eff8Schristos return tsd_fetch_impl(true, true); 277*8e33eff8Schristos } 278*8e33eff8Schristos 279*8e33eff8Schristos /* For internal background threads use only. */ 280*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsd_t * 281*8e33eff8Schristos tsd_internal_fetch(void) { 282*8e33eff8Schristos tsd_t *tsd = tsd_fetch_min(); 283*8e33eff8Schristos /* Use reincarnated state to prevent full initialization. */ 284*8e33eff8Schristos tsd->state = tsd_state_reincarnated; 285*8e33eff8Schristos 286*8e33eff8Schristos return tsd; 287*8e33eff8Schristos } 288*8e33eff8Schristos 289*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsd_t * 290*8e33eff8Schristos tsd_fetch(void) { 291*8e33eff8Schristos return tsd_fetch_impl(true, false); 292*8e33eff8Schristos } 293*8e33eff8Schristos 294*8e33eff8Schristos static inline bool 295*8e33eff8Schristos tsd_nominal(tsd_t *tsd) { 296*8e33eff8Schristos return (tsd->state <= tsd_state_nominal_max); 297*8e33eff8Schristos } 298*8e33eff8Schristos 299*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE tsdn_t * 300*8e33eff8Schristos tsdn_fetch(void) { 301*8e33eff8Schristos if (!tsd_booted_get()) { 302*8e33eff8Schristos return NULL; 303*8e33eff8Schristos } 304*8e33eff8Schristos 305*8e33eff8Schristos return tsd_tsdn(tsd_fetch_impl(false, false)); 306*8e33eff8Schristos } 307*8e33eff8Schristos 308*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE rtree_ctx_t * 309*8e33eff8Schristos tsd_rtree_ctx(tsd_t *tsd) { 310*8e33eff8Schristos return tsd_rtree_ctxp_get(tsd); 311*8e33eff8Schristos } 312*8e33eff8Schristos 313*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE rtree_ctx_t * 314*8e33eff8Schristos tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) { 315*8e33eff8Schristos /* 316*8e33eff8Schristos * If tsd cannot be accessed, initialize the fallback rtree_ctx and 317*8e33eff8Schristos * return a pointer to it. 318*8e33eff8Schristos */ 319*8e33eff8Schristos if (unlikely(tsdn_null(tsdn))) { 320*8e33eff8Schristos rtree_ctx_data_init(fallback); 321*8e33eff8Schristos return fallback; 322*8e33eff8Schristos } 323*8e33eff8Schristos return tsd_rtree_ctx(tsdn_tsd(tsdn)); 324*8e33eff8Schristos } 325*8e33eff8Schristos 326*8e33eff8Schristos #endif /* JEMALLOC_INTERNAL_TSD_H */ 327