xref: /netbsd-src/external/bsd/jemalloc/dist/src/tsd.c (revision f8cf1a9151c7af1cb0bd8b09c13c66bca599c027)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/assert.h"
5 #include "jemalloc/internal/san.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/rtree.h"
8 
9 /******************************************************************************/
10 /* Data. */
11 
12 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
13 JEMALLOC_DIAGNOSTIC_PUSH
14 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
15 
16 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
17 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
18 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
19 bool tsd_booted = false;
20 #elif (defined(JEMALLOC_TLS))
21 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
22 pthread_key_t tsd_tsd;
23 bool tsd_booted = false;
24 #elif (defined(_WIN32))
25 DWORD tsd_tsd;
26 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
27 bool tsd_booted = false;
28 #else
29 
30 /*
31  * This contains a mutex, but it's pretty convenient to allow the mutex code to
32  * have a dependency on tsd.  So we define the struct here, and only refer to it
33  * by pointer in the header.
34  */
35 struct tsd_init_head_s {
36 	ql_head(tsd_init_block_t) blocks;
37 	malloc_mutex_t lock;
38 };
39 
40 pthread_key_t tsd_tsd;
41 tsd_init_head_t	tsd_init_head = {
42 	ql_head_initializer(blocks),
43 #ifndef __lint__
44 	// XXX: broken lint
45 	MALLOC_MUTEX_INITIALIZER
46 #endif
47 };
48 
49 tsd_wrapper_t tsd_boot_wrapper = {
50 	false,
51 	TSD_INITIALIZER
52 };
53 bool tsd_booted = false;
54 #endif
55 
56 JEMALLOC_DIAGNOSTIC_POP
57 
58 /******************************************************************************/
59 
60 /* A list of all the tsds in the nominal state. */
61 typedef ql_head(tsd_t) tsd_list_t;
62 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
63 static malloc_mutex_t tsd_nominal_tsds_lock;
64 
65 /* How many slow-path-enabling features are turned on. */
66 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
67 
68 static bool
69 tsd_in_nominal_list(tsd_t *tsd) {
70 	tsd_t *tsd_list;
71 	bool found = false;
72 	/*
73 	 * We don't know that tsd is nominal; it might not be safe to get data
74 	 * out of it here.
75 	 */
76 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
77 	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
78 		if (tsd == tsd_list) {
79 			found = true;
80 			break;
81 		}
82 	}
83 	malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
84 	return found;
85 }
86 
87 static void
88 tsd_add_nominal(tsd_t *tsd) {
89 	assert(!tsd_in_nominal_list(tsd));
90 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
91 	ql_elm_new(tsd, TSD_MANGLE(tsd_link));
92 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
93 	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
94 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
95 }
96 
97 static void
98 tsd_remove_nominal(tsd_t *tsd) {
99 	assert(tsd_in_nominal_list(tsd));
100 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
101 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
102 	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
103 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
104 }
105 
106 static void
107 tsd_force_recompute(tsdn_t *tsdn) {
108 	/*
109 	 * The stores to tsd->state here need to synchronize with the exchange
110 	 * in tsd_slow_update.
111 	 */
112 	atomic_fence(ATOMIC_RELEASE);
113 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
114 	tsd_t *remote_tsd;
115 	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
116 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
117 		    <= tsd_state_nominal_max);
118 		tsd_atomic_store(&remote_tsd->state,
119 		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
120 		/* See comments in te_recompute_fast_threshold(). */
121 		atomic_fence(ATOMIC_SEQ_CST);
122 		te_next_event_fast_set_non_nominal(remote_tsd);
123 	}
124 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
125 }
126 
127 void
128 tsd_global_slow_inc(tsdn_t *tsdn) {
129 	atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
130 	/*
131 	 * We unconditionally force a recompute, even if the global slow count
132 	 * was already positive.  If we didn't, then it would be possible for us
133 	 * to return to the user, have the user synchronize externally with some
134 	 * other thread, and then have that other thread not have picked up the
135 	 * update yet (since the original incrementing thread might still be
136 	 * making its way through the tsd list).
137 	 */
138 	tsd_force_recompute(tsdn);
139 }
140 
141 void tsd_global_slow_dec(tsdn_t *tsdn) {
142 	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
143 	/* See the note in ..._inc(). */
144 	tsd_force_recompute(tsdn);
145 }
146 
147 static bool
148 tsd_local_slow(tsd_t *tsd) {
149 	return !tsd_tcache_enabled_get(tsd)
150 	    || tsd_reentrancy_level_get(tsd) > 0;
151 }
152 
153 bool
154 tsd_global_slow(void) {
155 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
156 }
157 
158 /******************************************************************************/
159 
160 static uint8_t
161 tsd_state_compute(tsd_t *tsd) {
162 	if (!tsd_nominal(tsd)) {
163 		return tsd_state_get(tsd);
164 	}
165 	/* We're in *a* nominal state; but which one? */
166 	if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
167 		return tsd_state_nominal_slow;
168 	} else {
169 		return tsd_state_nominal;
170 	}
171 }
172 
173 void
174 tsd_slow_update(tsd_t *tsd) {
175 	uint8_t old_state;
176 	do {
177 		uint8_t new_state = tsd_state_compute(tsd);
178 		old_state = tsd_atomic_exchange(&tsd->state, new_state,
179 		    ATOMIC_ACQUIRE);
180 	} while (old_state == tsd_state_nominal_recompute);
181 
182 	te_recompute_fast_threshold(tsd);
183 }
184 
185 void
186 tsd_state_set(tsd_t *tsd, uint8_t new_state) {
187 	/* Only the tsd module can change the state *to* recompute. */
188 	assert(new_state != tsd_state_nominal_recompute);
189 	uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
190 	if (old_state > tsd_state_nominal_max) {
191 		/*
192 		 * Not currently in the nominal list, but it might need to be
193 		 * inserted there.
194 		 */
195 		assert(!tsd_in_nominal_list(tsd));
196 		tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
197 		if (new_state <= tsd_state_nominal_max) {
198 			tsd_add_nominal(tsd);
199 		}
200 	} else {
201 		/*
202 		 * We're currently nominal.  If the new state is non-nominal,
203 		 * great; we take ourselves off the list and just enter the new
204 		 * state.
205 		 */
206 		assert(tsd_in_nominal_list(tsd));
207 		if (new_state > tsd_state_nominal_max) {
208 			tsd_remove_nominal(tsd);
209 			tsd_atomic_store(&tsd->state, new_state,
210 			    ATOMIC_RELAXED);
211 		} else {
212 			/*
213 			 * This is the tricky case.  We're transitioning from
214 			 * one nominal state to another.  The caller can't know
215 			 * about any races that are occurring at the same time,
216 			 * so we always have to recompute no matter what.
217 			 */
218 			tsd_slow_update(tsd);
219 		}
220 	}
221 	te_recompute_fast_threshold(tsd);
222 }
223 
224 static void
225 tsd_prng_state_init(tsd_t *tsd) {
226 	/*
227 	 * A nondeterministic seed based on the address of tsd reduces
228 	 * the likelihood of lockstep non-uniform cache index
229 	 * utilization among identical concurrent processes, but at the
230 	 * cost of test repeatability.  For debug builds, instead use a
231 	 * deterministic seed.
232 	 */
233 	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
234 	    (uint64_t)(uintptr_t)tsd;
235 }
236 
237 static bool
238 tsd_data_init(tsd_t *tsd) {
239 	/*
240 	 * We initialize the rtree context first (before the tcache), since the
241 	 * tcache initialization depends on it.
242 	 */
243 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
244 	tsd_prng_state_init(tsd);
245 	tsd_te_init(tsd); /* event_init may use the prng state above. */
246 	tsd_san_init(tsd);
247 	return tsd_tcache_enabled_data_init(tsd);
248 }
249 
250 static void
251 assert_tsd_data_cleanup_done(tsd_t *tsd) {
252 	assert(!tsd_nominal(tsd));
253 	assert(!tsd_in_nominal_list(tsd));
254 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
255 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
256 	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
257 	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
258 }
259 
260 static bool
261 tsd_data_init_nocleanup(tsd_t *tsd) {
262 	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
263 	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
264 	/*
265 	 * During reincarnation, there is no guarantee that the cleanup function
266 	 * will be called (deallocation may happen after all tsd destructors).
267 	 * We set up tsd in a way that no cleanup is needed.
268 	 */
269 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
270 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
271 	*tsd_reentrancy_levelp_get(tsd) = 1;
272 	tsd_prng_state_init(tsd);
273 	tsd_te_init(tsd); /* event_init may use the prng state above. */
274 	tsd_san_init(tsd);
275 	assert_tsd_data_cleanup_done(tsd);
276 
277 	return false;
278 }
279 
280 tsd_t *
281 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
282 	assert(!tsd_fast(tsd));
283 
284 	if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
285 		/*
286 		 * On slow path but no work needed.  Note that we can't
287 		 * necessarily *assert* that we're slow, because we might be
288 		 * slow because of an asynchronous modification to global state,
289 		 * which might be asynchronously modified *back*.
290 		 */
291 	} else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
292 		tsd_slow_update(tsd);
293 	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
294 		if (!minimal) {
295 			if (tsd_booted) {
296 				tsd_state_set(tsd, tsd_state_nominal);
297 				tsd_slow_update(tsd);
298 				/* Trigger cleanup handler registration. */
299 				tsd_set(tsd);
300 				tsd_data_init(tsd);
301 			}
302 		} else {
303 			tsd_state_set(tsd, tsd_state_minimal_initialized);
304 			tsd_set(tsd);
305 			tsd_data_init_nocleanup(tsd);
306 		}
307 	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
308 		if (!minimal) {
309 			/* Switch to fully initialized. */
310 			tsd_state_set(tsd, tsd_state_nominal);
311 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
312 			(*tsd_reentrancy_levelp_get(tsd))--;
313 			tsd_slow_update(tsd);
314 			tsd_data_init(tsd);
315 		} else {
316 			assert_tsd_data_cleanup_done(tsd);
317 		}
318 	} else if (tsd_state_get(tsd) == tsd_state_purgatory) {
319 		tsd_state_set(tsd, tsd_state_reincarnated);
320 		tsd_set(tsd);
321 		tsd_data_init_nocleanup(tsd);
322 	} else {
323 		assert(tsd_state_get(tsd) == tsd_state_reincarnated);
324 	}
325 
326 	return tsd;
327 }
328 
329 void *
330 malloc_tsd_malloc(size_t size) {
331 	return a0malloc(CACHELINE_CEILING(size));
332 }
333 
334 void
335 malloc_tsd_dalloc(void *wrapper) {
336 	a0dalloc(wrapper);
337 }
338 
339 __BEGIN_DECLS
340 void _malloc_thread_cleanup(void);
341 __END_DECLS
342 
343 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
344 static unsigned ncleanups;
345 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
346 
347 #ifndef _WIN32
348 JEMALLOC_EXPORT
349 #endif
350 void
351 _malloc_thread_cleanup(void) {
352 	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
353 	unsigned i;
354 
355 	for (i = 0; i < ncleanups; i++) {
356 		pending[i] = true;
357 	}
358 
359 	do {
360 		again = false;
361 		for (i = 0; i < ncleanups; i++) {
362 			if (pending[i]) {
363 				pending[i] = cleanups[i]();
364 				if (pending[i]) {
365 					again = true;
366 				}
367 			}
368 		}
369 	} while (again);
370 }
371 
372 #ifndef _WIN32
373 JEMALLOC_EXPORT
374 #endif
375 void
376 _malloc_tsd_cleanup_register(bool (*f)(void)) {
377 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
378 	cleanups[ncleanups] = f;
379 	ncleanups++;
380 }
381 
382 #endif
383 
384 static void
385 tsd_do_data_cleanup(tsd_t *tsd) {
386 	prof_tdata_cleanup(tsd);
387 	iarena_cleanup(tsd);
388 	arena_cleanup(tsd);
389 	tcache_cleanup(tsd);
390 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
391 	*tsd_reentrancy_levelp_get(tsd) = 1;
392 }
393 
394 void
395 tsd_cleanup(void *arg) {
396 	tsd_t *tsd = (tsd_t *)arg;
397 
398 	switch (tsd_state_get(tsd)) {
399 	case tsd_state_uninitialized:
400 		/* Do nothing. */
401 		break;
402 	case tsd_state_minimal_initialized:
403 		/* This implies the thread only did free() in its life time. */
404 		/* Fall through. */
405 	case tsd_state_reincarnated:
406 		/*
407 		 * Reincarnated means another destructor deallocated memory
408 		 * after the destructor was called.  Cleanup isn't required but
409 		 * is still called for testing and completeness.
410 		 */
411 		assert_tsd_data_cleanup_done(tsd);
412 		JEMALLOC_FALLTHROUGH;
413 	case tsd_state_nominal:
414 	case tsd_state_nominal_slow:
415 		tsd_do_data_cleanup(tsd);
416 		tsd_state_set(tsd, tsd_state_purgatory);
417 		tsd_set(tsd);
418 		break;
419 	case tsd_state_purgatory:
420 		/*
421 		 * The previous time this destructor was called, we set the
422 		 * state to tsd_state_purgatory so that other destructors
423 		 * wouldn't cause re-creation of the tsd.  This time, do
424 		 * nothing, and do not request another callback.
425 		 */
426 		break;
427 	default:
428 		not_reached();
429 	}
430 #ifdef JEMALLOC_JET
431 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
432 	int *data = tsd_test_datap_get_unsafe(tsd);
433 	if (test_callback != NULL) {
434 		test_callback(data);
435 	}
436 #endif
437 }
438 
439 tsd_t *
440 malloc_tsd_boot0(void) {
441 	tsd_t *tsd;
442 
443 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
444 	ncleanups = 0;
445 #endif
446 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
447 	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
448 		return NULL;
449 	}
450 	if (tsd_boot0()) {
451 		return NULL;
452 	}
453 	tsd = tsd_fetch();
454 	return tsd;
455 }
456 
457 void
458 malloc_tsd_boot1(void) {
459 	tsd_boot1();
460 	tsd_t *tsd = tsd_fetch();
461 	/* malloc_slow has been set properly.  Update tsd_slow. */
462 	tsd_slow_update(tsd);
463 }
464 
465 #ifdef _WIN32
466 static BOOL WINAPI
467 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
468 	switch (fdwReason) {
469 #ifdef JEMALLOC_LAZY_LOCK
470 	case DLL_THREAD_ATTACH:
471 		isthreaded = true;
472 		break;
473 #endif
474 	case DLL_THREAD_DETACH:
475 		_malloc_thread_cleanup();
476 		break;
477 	default:
478 		break;
479 	}
480 	return true;
481 }
482 
483 /*
484  * We need to be able to say "read" here (in the "pragma section"), but have
485  * hooked "read". We won't read for the rest of the file, so we can get away
486  * with unhooking.
487  */
488 #ifdef read
489 #  undef read
490 #endif
491 
492 #ifdef _MSC_VER
493 #  ifdef _M_IX86
494 #    pragma comment(linker, "/INCLUDE:__tls_used")
495 #    pragma comment(linker, "/INCLUDE:_tls_callback")
496 #  else
497 #    pragma comment(linker, "/INCLUDE:_tls_used")
498 #    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
499 #  endif
500 #  pragma section(".CRT$XLY",long,read)
501 #endif
502 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
503 BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
504     DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
505 #endif
506 
507 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
508     !defined(_WIN32))
509 void *
510 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
511 	pthread_t self = pthread_self();
512 	tsd_init_block_t *iter;
513 
514 	/* Check whether this thread has already inserted into the list. */
515 	malloc_mutex_lock(TSDN_NULL, &head->lock);
516 	ql_foreach(iter, &head->blocks, link) {
517 		if (iter->thread == self) {
518 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
519 			return iter->data;
520 		}
521 	}
522 	/* Insert block into list. */
523 	ql_elm_new(block, link);
524 	block->thread = self;
525 	ql_tail_insert(&head->blocks, block, link);
526 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
527 	return NULL;
528 }
529 
530 void
531 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
532 	malloc_mutex_lock(TSDN_NULL, &head->lock);
533 	ql_remove(&head->blocks, block, link);
534 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
535 }
536 #endif
537 
538 void
539 tsd_prefork(tsd_t *tsd) {
540 	malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
541 }
542 
543 void
544 tsd_postfork_parent(tsd_t *tsd) {
545 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
546 }
547 
548 void
549 tsd_postfork_child(tsd_t *tsd) {
550 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
551 	ql_new(&tsd_nominal_tsds);
552 
553 	if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
554 		tsd_add_nominal(tsd);
555 	}
556 }
557