1 #include "jemalloc/internal/jemalloc_preamble.h" 2 #include "jemalloc/internal/jemalloc_internal_includes.h" 3 4 #include "jemalloc/internal/assert.h" 5 #include "jemalloc/internal/san.h" 6 #include "jemalloc/internal/mutex.h" 7 #include "jemalloc/internal/rtree.h" 8 9 /******************************************************************************/ 10 /* Data. */ 11 12 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ 13 JEMALLOC_DIAGNOSTIC_PUSH 14 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS 15 16 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP 17 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; 18 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; 19 bool tsd_booted = false; 20 #elif (defined(JEMALLOC_TLS)) 21 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; 22 pthread_key_t tsd_tsd; 23 bool tsd_booted = false; 24 #elif (defined(_WIN32)) 25 DWORD tsd_tsd; 26 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER}; 27 bool tsd_booted = false; 28 #else 29 30 /* 31 * This contains a mutex, but it's pretty convenient to allow the mutex code to 32 * have a dependency on tsd. So we define the struct here, and only refer to it 33 * by pointer in the header. 34 */ 35 struct tsd_init_head_s { 36 ql_head(tsd_init_block_t) blocks; 37 malloc_mutex_t lock; 38 }; 39 40 pthread_key_t tsd_tsd; 41 tsd_init_head_t tsd_init_head = { 42 ql_head_initializer(blocks), 43 #ifndef __lint__ 44 // XXX: broken lint 45 MALLOC_MUTEX_INITIALIZER 46 #endif 47 }; 48 49 tsd_wrapper_t tsd_boot_wrapper = { 50 false, 51 TSD_INITIALIZER 52 }; 53 bool tsd_booted = false; 54 #endif 55 56 JEMALLOC_DIAGNOSTIC_POP 57 58 /******************************************************************************/ 59 60 /* A list of all the tsds in the nominal state. */ 61 typedef ql_head(tsd_t) tsd_list_t; 62 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); 63 static malloc_mutex_t tsd_nominal_tsds_lock; 64 65 /* How many slow-path-enabling features are turned on. */ 66 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); 67 68 static bool 69 tsd_in_nominal_list(tsd_t *tsd) { 70 tsd_t *tsd_list; 71 bool found = false; 72 /* 73 * We don't know that tsd is nominal; it might not be safe to get data 74 * out of it here. 75 */ 76 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); 77 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { 78 if (tsd == tsd_list) { 79 found = true; 80 break; 81 } 82 } 83 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); 84 return found; 85 } 86 87 static void 88 tsd_add_nominal(tsd_t *tsd) { 89 assert(!tsd_in_nominal_list(tsd)); 90 assert(tsd_state_get(tsd) <= tsd_state_nominal_max); 91 ql_elm_new(tsd, TSD_MANGLE(tsd_link)); 92 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 93 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); 94 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 95 } 96 97 static void 98 tsd_remove_nominal(tsd_t *tsd) { 99 assert(tsd_in_nominal_list(tsd)); 100 assert(tsd_state_get(tsd) <= tsd_state_nominal_max); 101 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 102 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); 103 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 104 } 105 106 static void 107 tsd_force_recompute(tsdn_t *tsdn) { 108 /* 109 * The stores to tsd->state here need to synchronize with the exchange 110 * in tsd_slow_update. 111 */ 112 atomic_fence(ATOMIC_RELEASE); 113 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); 114 tsd_t *remote_tsd; 115 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { 116 assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) 117 <= tsd_state_nominal_max); 118 tsd_atomic_store(&remote_tsd->state, 119 tsd_state_nominal_recompute, ATOMIC_RELAXED); 120 /* See comments in te_recompute_fast_threshold(). */ 121 atomic_fence(ATOMIC_SEQ_CST); 122 te_next_event_fast_set_non_nominal(remote_tsd); 123 } 124 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); 125 } 126 127 void 128 tsd_global_slow_inc(tsdn_t *tsdn) { 129 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); 130 /* 131 * We unconditionally force a recompute, even if the global slow count 132 * was already positive. If we didn't, then it would be possible for us 133 * to return to the user, have the user synchronize externally with some 134 * other thread, and then have that other thread not have picked up the 135 * update yet (since the original incrementing thread might still be 136 * making its way through the tsd list). 137 */ 138 tsd_force_recompute(tsdn); 139 } 140 141 void tsd_global_slow_dec(tsdn_t *tsdn) { 142 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); 143 /* See the note in ..._inc(). */ 144 tsd_force_recompute(tsdn); 145 } 146 147 static bool 148 tsd_local_slow(tsd_t *tsd) { 149 return !tsd_tcache_enabled_get(tsd) 150 || tsd_reentrancy_level_get(tsd) > 0; 151 } 152 153 bool 154 tsd_global_slow(void) { 155 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; 156 } 157 158 /******************************************************************************/ 159 160 static uint8_t 161 tsd_state_compute(tsd_t *tsd) { 162 if (!tsd_nominal(tsd)) { 163 return tsd_state_get(tsd); 164 } 165 /* We're in *a* nominal state; but which one? */ 166 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { 167 return tsd_state_nominal_slow; 168 } else { 169 return tsd_state_nominal; 170 } 171 } 172 173 void 174 tsd_slow_update(tsd_t *tsd) { 175 uint8_t old_state; 176 do { 177 uint8_t new_state = tsd_state_compute(tsd); 178 old_state = tsd_atomic_exchange(&tsd->state, new_state, 179 ATOMIC_ACQUIRE); 180 } while (old_state == tsd_state_nominal_recompute); 181 182 te_recompute_fast_threshold(tsd); 183 } 184 185 void 186 tsd_state_set(tsd_t *tsd, uint8_t new_state) { 187 /* Only the tsd module can change the state *to* recompute. */ 188 assert(new_state != tsd_state_nominal_recompute); 189 uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); 190 if (old_state > tsd_state_nominal_max) { 191 /* 192 * Not currently in the nominal list, but it might need to be 193 * inserted there. 194 */ 195 assert(!tsd_in_nominal_list(tsd)); 196 tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); 197 if (new_state <= tsd_state_nominal_max) { 198 tsd_add_nominal(tsd); 199 } 200 } else { 201 /* 202 * We're currently nominal. If the new state is non-nominal, 203 * great; we take ourselves off the list and just enter the new 204 * state. 205 */ 206 assert(tsd_in_nominal_list(tsd)); 207 if (new_state > tsd_state_nominal_max) { 208 tsd_remove_nominal(tsd); 209 tsd_atomic_store(&tsd->state, new_state, 210 ATOMIC_RELAXED); 211 } else { 212 /* 213 * This is the tricky case. We're transitioning from 214 * one nominal state to another. The caller can't know 215 * about any races that are occurring at the same time, 216 * so we always have to recompute no matter what. 217 */ 218 tsd_slow_update(tsd); 219 } 220 } 221 te_recompute_fast_threshold(tsd); 222 } 223 224 static void 225 tsd_prng_state_init(tsd_t *tsd) { 226 /* 227 * A nondeterministic seed based on the address of tsd reduces 228 * the likelihood of lockstep non-uniform cache index 229 * utilization among identical concurrent processes, but at the 230 * cost of test repeatability. For debug builds, instead use a 231 * deterministic seed. 232 */ 233 *tsd_prng_statep_get(tsd) = config_debug ? 0 : 234 (uint64_t)(uintptr_t)tsd; 235 } 236 237 static bool 238 tsd_data_init(tsd_t *tsd) { 239 /* 240 * We initialize the rtree context first (before the tcache), since the 241 * tcache initialization depends on it. 242 */ 243 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); 244 tsd_prng_state_init(tsd); 245 tsd_te_init(tsd); /* event_init may use the prng state above. */ 246 tsd_san_init(tsd); 247 return tsd_tcache_enabled_data_init(tsd); 248 } 249 250 static void 251 assert_tsd_data_cleanup_done(tsd_t *tsd) { 252 assert(!tsd_nominal(tsd)); 253 assert(!tsd_in_nominal_list(tsd)); 254 assert(*tsd_arenap_get_unsafe(tsd) == NULL); 255 assert(*tsd_iarenap_get_unsafe(tsd) == NULL); 256 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false); 257 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL); 258 } 259 260 static bool 261 tsd_data_init_nocleanup(tsd_t *tsd) { 262 assert(tsd_state_get(tsd) == tsd_state_reincarnated || 263 tsd_state_get(tsd) == tsd_state_minimal_initialized); 264 /* 265 * During reincarnation, there is no guarantee that the cleanup function 266 * will be called (deallocation may happen after all tsd destructors). 267 * We set up tsd in a way that no cleanup is needed. 268 */ 269 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); 270 *tsd_tcache_enabledp_get_unsafe(tsd) = false; 271 *tsd_reentrancy_levelp_get(tsd) = 1; 272 tsd_prng_state_init(tsd); 273 tsd_te_init(tsd); /* event_init may use the prng state above. */ 274 tsd_san_init(tsd); 275 assert_tsd_data_cleanup_done(tsd); 276 277 return false; 278 } 279 280 tsd_t * 281 tsd_fetch_slow(tsd_t *tsd, bool minimal) { 282 assert(!tsd_fast(tsd)); 283 284 if (tsd_state_get(tsd) == tsd_state_nominal_slow) { 285 /* 286 * On slow path but no work needed. Note that we can't 287 * necessarily *assert* that we're slow, because we might be 288 * slow because of an asynchronous modification to global state, 289 * which might be asynchronously modified *back*. 290 */ 291 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { 292 tsd_slow_update(tsd); 293 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { 294 if (!minimal) { 295 if (tsd_booted) { 296 tsd_state_set(tsd, tsd_state_nominal); 297 tsd_slow_update(tsd); 298 /* Trigger cleanup handler registration. */ 299 tsd_set(tsd); 300 tsd_data_init(tsd); 301 } 302 } else { 303 tsd_state_set(tsd, tsd_state_minimal_initialized); 304 tsd_set(tsd); 305 tsd_data_init_nocleanup(tsd); 306 } 307 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { 308 if (!minimal) { 309 /* Switch to fully initialized. */ 310 tsd_state_set(tsd, tsd_state_nominal); 311 assert(*tsd_reentrancy_levelp_get(tsd) >= 1); 312 (*tsd_reentrancy_levelp_get(tsd))--; 313 tsd_slow_update(tsd); 314 tsd_data_init(tsd); 315 } else { 316 assert_tsd_data_cleanup_done(tsd); 317 } 318 } else if (tsd_state_get(tsd) == tsd_state_purgatory) { 319 tsd_state_set(tsd, tsd_state_reincarnated); 320 tsd_set(tsd); 321 tsd_data_init_nocleanup(tsd); 322 } else { 323 assert(tsd_state_get(tsd) == tsd_state_reincarnated); 324 } 325 326 return tsd; 327 } 328 329 void * 330 malloc_tsd_malloc(size_t size) { 331 return a0malloc(CACHELINE_CEILING(size)); 332 } 333 334 void 335 malloc_tsd_dalloc(void *wrapper) { 336 a0dalloc(wrapper); 337 } 338 339 __BEGIN_DECLS 340 void _malloc_thread_cleanup(void); 341 __END_DECLS 342 343 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) 344 static unsigned ncleanups; 345 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; 346 347 #ifndef _WIN32 348 JEMALLOC_EXPORT 349 #endif 350 void 351 _malloc_thread_cleanup(void) { 352 bool pending[MALLOC_TSD_CLEANUPS_MAX], again; 353 unsigned i; 354 355 for (i = 0; i < ncleanups; i++) { 356 pending[i] = true; 357 } 358 359 do { 360 again = false; 361 for (i = 0; i < ncleanups; i++) { 362 if (pending[i]) { 363 pending[i] = cleanups[i](); 364 if (pending[i]) { 365 again = true; 366 } 367 } 368 } 369 } while (again); 370 } 371 372 #ifndef _WIN32 373 JEMALLOC_EXPORT 374 #endif 375 void 376 _malloc_tsd_cleanup_register(bool (*f)(void)) { 377 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); 378 cleanups[ncleanups] = f; 379 ncleanups++; 380 } 381 382 #endif 383 384 static void 385 tsd_do_data_cleanup(tsd_t *tsd) { 386 prof_tdata_cleanup(tsd); 387 iarena_cleanup(tsd); 388 arena_cleanup(tsd); 389 tcache_cleanup(tsd); 390 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd)); 391 *tsd_reentrancy_levelp_get(tsd) = 1; 392 } 393 394 void 395 tsd_cleanup(void *arg) { 396 tsd_t *tsd = (tsd_t *)arg; 397 398 switch (tsd_state_get(tsd)) { 399 case tsd_state_uninitialized: 400 /* Do nothing. */ 401 break; 402 case tsd_state_minimal_initialized: 403 /* This implies the thread only did free() in its life time. */ 404 /* Fall through. */ 405 case tsd_state_reincarnated: 406 /* 407 * Reincarnated means another destructor deallocated memory 408 * after the destructor was called. Cleanup isn't required but 409 * is still called for testing and completeness. 410 */ 411 assert_tsd_data_cleanup_done(tsd); 412 JEMALLOC_FALLTHROUGH; 413 case tsd_state_nominal: 414 case tsd_state_nominal_slow: 415 tsd_do_data_cleanup(tsd); 416 tsd_state_set(tsd, tsd_state_purgatory); 417 tsd_set(tsd); 418 break; 419 case tsd_state_purgatory: 420 /* 421 * The previous time this destructor was called, we set the 422 * state to tsd_state_purgatory so that other destructors 423 * wouldn't cause re-creation of the tsd. This time, do 424 * nothing, and do not request another callback. 425 */ 426 break; 427 default: 428 not_reached(); 429 } 430 #ifdef JEMALLOC_JET 431 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd); 432 int *data = tsd_test_datap_get_unsafe(tsd); 433 if (test_callback != NULL) { 434 test_callback(data); 435 } 436 #endif 437 } 438 439 tsd_t * 440 malloc_tsd_boot0(void) { 441 tsd_t *tsd; 442 443 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) 444 ncleanups = 0; 445 #endif 446 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock", 447 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { 448 return NULL; 449 } 450 if (tsd_boot0()) { 451 return NULL; 452 } 453 tsd = tsd_fetch(); 454 return tsd; 455 } 456 457 void 458 malloc_tsd_boot1(void) { 459 tsd_boot1(); 460 tsd_t *tsd = tsd_fetch(); 461 /* malloc_slow has been set properly. Update tsd_slow. */ 462 tsd_slow_update(tsd); 463 } 464 465 #ifdef _WIN32 466 static BOOL WINAPI 467 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { 468 switch (fdwReason) { 469 #ifdef JEMALLOC_LAZY_LOCK 470 case DLL_THREAD_ATTACH: 471 isthreaded = true; 472 break; 473 #endif 474 case DLL_THREAD_DETACH: 475 _malloc_thread_cleanup(); 476 break; 477 default: 478 break; 479 } 480 return true; 481 } 482 483 /* 484 * We need to be able to say "read" here (in the "pragma section"), but have 485 * hooked "read". We won't read for the rest of the file, so we can get away 486 * with unhooking. 487 */ 488 #ifdef read 489 # undef read 490 #endif 491 492 #ifdef _MSC_VER 493 # ifdef _M_IX86 494 # pragma comment(linker, "/INCLUDE:__tls_used") 495 # pragma comment(linker, "/INCLUDE:_tls_callback") 496 # else 497 # pragma comment(linker, "/INCLUDE:_tls_used") 498 # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) 499 # endif 500 # pragma section(".CRT$XLY",long,read) 501 #endif 502 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) 503 BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, 504 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; 505 #endif 506 507 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ 508 !defined(_WIN32)) 509 void * 510 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) { 511 pthread_t self = pthread_self(); 512 tsd_init_block_t *iter; 513 514 /* Check whether this thread has already inserted into the list. */ 515 malloc_mutex_lock(TSDN_NULL, &head->lock); 516 ql_foreach(iter, &head->blocks, link) { 517 if (iter->thread == self) { 518 malloc_mutex_unlock(TSDN_NULL, &head->lock); 519 return iter->data; 520 } 521 } 522 /* Insert block into list. */ 523 ql_elm_new(block, link); 524 block->thread = self; 525 ql_tail_insert(&head->blocks, block, link); 526 malloc_mutex_unlock(TSDN_NULL, &head->lock); 527 return NULL; 528 } 529 530 void 531 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { 532 malloc_mutex_lock(TSDN_NULL, &head->lock); 533 ql_remove(&head->blocks, block, link); 534 malloc_mutex_unlock(TSDN_NULL, &head->lock); 535 } 536 #endif 537 538 void 539 tsd_prefork(tsd_t *tsd) { 540 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 541 } 542 543 void 544 tsd_postfork_parent(tsd_t *tsd) { 545 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 546 } 547 548 void 549 tsd_postfork_child(tsd_t *tsd) { 550 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 551 ql_new(&tsd_nominal_tsds); 552 553 if (tsd_state_get(tsd) <= tsd_state_nominal_max) { 554 tsd_add_nominal(tsd); 555 } 556 } 557