xref: /netbsd-src/external/bsd/jemalloc/dist/src/base.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/assert.h"
5 #include "jemalloc/internal/extent_mmap.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/sz.h"
8 
9 /*
10  * In auto mode, arenas switch to huge pages for the base allocator on the
11  * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
12  * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
13  */
14 
15 #define BASE_AUTO_THP_THRESHOLD    2
16 #define BASE_AUTO_THP_THRESHOLD_A0 5
17 
18 /******************************************************************************/
19 /* Data. */
20 
21 static base_t *b0;
22 
23 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
24 
25 const char *metadata_thp_mode_names[] = {
26 	"disabled",
27 	"auto",
28 	"always"
29 };
30 
31 /******************************************************************************/
32 
33 static inline bool
34 metadata_thp_madvise(void) {
35 	return (metadata_thp_enabled() &&
36 	    (init_system_thp_mode == thp_mode_default));
37 }
38 
39 static void *
40 base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
41 	void *addr;
42 	bool zero = true;
43 	bool commit = true;
44 
45 	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
46 	assert(size == HUGEPAGE_CEILING(size));
47 	size_t alignment = HUGEPAGE;
48 	if (ehooks_are_default(ehooks)) {
49 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
50 		if (have_madvise_huge && addr) {
51 			pages_set_thp_state(addr, size);
52 		}
53 	} else {
54 		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
55 		    &commit);
56 	}
57 
58 	return addr;
59 }
60 
61 static void
62 base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
63     size_t size) {
64 	/*
65 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
66 	 * stopping at first success.  This cascade is performed for consistency
67 	 * with the cascade in extent_dalloc_wrapper() because an application's
68 	 * custom hooks may not support e.g. dalloc.  This function is only ever
69 	 * called as a side effect of arena destruction, so although it might
70 	 * seem pointless to do anything besides dalloc here, the application
71 	 * may in fact want the end state of all associated virtual memory to be
72 	 * in some consistent-but-allocated state.
73 	 */
74 	if (ehooks_are_default(ehooks)) {
75 		if (!extent_dalloc_mmap(addr, size)) {
76 			goto label_done;
77 		}
78 		if (!pages_decommit(addr, size)) {
79 			goto label_done;
80 		}
81 		if (!pages_purge_forced(addr, size)) {
82 			goto label_done;
83 		}
84 		if (!pages_purge_lazy(addr, size)) {
85 			goto label_done;
86 		}
87 		/* Nothing worked.  This should never happen. */
88 		not_reached();
89 	} else {
90 		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) {
91 			goto label_done;
92 		}
93 		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) {
94 			goto label_done;
95 		}
96 		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) {
97 			goto label_done;
98 		}
99 		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) {
100 			goto label_done;
101 		}
102 		/* Nothing worked.  That's the application's problem. */
103 	}
104 label_done:
105 	if (metadata_thp_madvise()) {
106 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
107 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
108 		    (size & HUGEPAGE_MASK) == 0);
109 		pages_nohuge(addr, size);
110 	}
111 }
112 
113 static void
114 base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
115     size_t size) {
116 	size_t sn;
117 
118 	sn = *extent_sn_next;
119 	(*extent_sn_next)++;
120 
121 	edata_binit(edata, addr, size, sn);
122 }
123 
124 static size_t
125 base_get_num_blocks(base_t *base, bool with_new_block) {
126 	base_block_t *b = base->blocks;
127 	assert(b != NULL);
128 
129 	size_t n_blocks = with_new_block ? 2 : 1;
130 	while (b->next != NULL) {
131 		n_blocks++;
132 		b = b->next;
133 	}
134 
135 	return n_blocks;
136 }
137 
138 static void
139 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
140 	assert(opt_metadata_thp == metadata_thp_auto);
141 	malloc_mutex_assert_owner(tsdn, &base->mtx);
142 	if (base->auto_thp_switched) {
143 		return;
144 	}
145 	/* Called when adding a new block. */
146 	bool should_switch;
147 	if (base_ind_get(base) != 0) {
148 		should_switch = (base_get_num_blocks(base, true) ==
149 		    BASE_AUTO_THP_THRESHOLD);
150 	} else {
151 		should_switch = (base_get_num_blocks(base, true) ==
152 		    BASE_AUTO_THP_THRESHOLD_A0);
153 	}
154 	if (!should_switch) {
155 		return;
156 	}
157 
158 	base->auto_thp_switched = true;
159 	assert(!config_stats || base->n_thp == 0);
160 	/* Make the initial blocks THP lazily. */
161 	base_block_t *block = base->blocks;
162 	while (block != NULL) {
163 		assert((block->size & HUGEPAGE_MASK) == 0);
164 		pages_huge(block, block->size);
165 		if (config_stats) {
166 			base->n_thp += HUGEPAGE_CEILING(block->size -
167 			    edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
168 		}
169 		block = block->next;
170 		assert(block == NULL || (base_ind_get(base) == 0));
171 	}
172 }
173 
174 static void *
175 base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
176     size_t alignment) {
177 	void *ret;
178 
179 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
180 	assert(size == ALIGNMENT_CEILING(size, alignment));
181 
182 	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
183 	    alignment) - (uintptr_t)edata_addr_get(edata);
184 	ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
185 	assert(edata_bsize_get(edata) >= *gap_size + size);
186 	edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
187 	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
188 	    edata_sn_get(edata));
189 	return ret;
190 }
191 
192 static void
193 base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
194     void *addr, size_t size) {
195 	if (edata_bsize_get(edata) > 0) {
196 		/*
197 		 * Compute the index for the largest size class that does not
198 		 * exceed extent's size.
199 		 */
200 		szind_t index_floor =
201 		    sz_size2index(edata_bsize_get(edata) + 1) - 1;
202 		edata_heap_insert(&base->avail[index_floor], edata);
203 	}
204 
205 	if (config_stats) {
206 		base->allocated += size;
207 		/*
208 		 * Add one PAGE to base_resident for every page boundary that is
209 		 * crossed by the new allocation. Adjust n_thp similarly when
210 		 * metadata_thp is enabled.
211 		 */
212 		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
213 		    PAGE_CEILING((uintptr_t)addr - gap_size);
214 		assert(base->allocated <= base->resident);
215 		assert(base->resident <= base->mapped);
216 		if (metadata_thp_madvise() && (opt_metadata_thp ==
217 		    metadata_thp_always || base->auto_thp_switched)) {
218 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
219 			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
220 			    LG_HUGEPAGE;
221 			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
222 		}
223 	}
224 }
225 
226 static void *
227 base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
228     size_t alignment) {
229 	void *ret;
230 	size_t gap_size;
231 
232 	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
233 	base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
234 	return ret;
235 }
236 
237 /*
238  * Allocate a block of virtual memory that is large enough to start with a
239  * base_block_t header, followed by an object of specified size and alignment.
240  * On success a pointer to the initialized base_block_t header is returned.
241  */
242 static base_block_t *
243 base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
244     pszind_t *pind_last, size_t *extent_sn_next, size_t size,
245     size_t alignment) {
246 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
247 	size_t usize = ALIGNMENT_CEILING(size, alignment);
248 	size_t header_size = sizeof(base_block_t);
249 	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
250 	    header_size;
251 	/*
252 	 * Create increasingly larger blocks in order to limit the total number
253 	 * of disjoint virtual memory ranges.  Choose the next size in the page
254 	 * size class series (skipping size classes that are not a multiple of
255 	 * HUGEPAGE), or a size large enough to satisfy the requested size and
256 	 * alignment, whichever is larger.
257 	 */
258 	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
259 	    + usize));
260 	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
261 	    *pind_last + 1 : *pind_last;
262 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
263 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
264 	    : next_block_size;
265 	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
266 	    block_size);
267 	if (block == NULL) {
268 		return NULL;
269 	}
270 
271 	if (metadata_thp_madvise()) {
272 		void *addr = (void *)block;
273 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
274 		    (block_size & HUGEPAGE_MASK) == 0);
275 		if (opt_metadata_thp == metadata_thp_always) {
276 			pages_huge(addr, block_size);
277 		} else if (opt_metadata_thp == metadata_thp_auto &&
278 		    base != NULL) {
279 			/* base != NULL indicates this is not a new base. */
280 			malloc_mutex_lock(tsdn, &base->mtx);
281 			base_auto_thp_switch(tsdn, base);
282 			if (base->auto_thp_switched) {
283 				pages_huge(addr, block_size);
284 			}
285 			malloc_mutex_unlock(tsdn, &base->mtx);
286 		}
287 	}
288 
289 	*pind_last = sz_psz2ind(block_size);
290 	block->size = block_size;
291 	block->next = NULL;
292 	assert(block_size >= header_size);
293 	base_edata_init(extent_sn_next, &block->edata,
294 	    (void *)((uintptr_t)block + header_size), block_size - header_size);
295 	return block;
296 }
297 
298 /*
299  * Allocate an extent that is at least as large as specified size, with
300  * specified alignment.
301  */
302 static edata_t *
303 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
304 	malloc_mutex_assert_owner(tsdn, &base->mtx);
305 
306 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
307 	/*
308 	 * Drop mutex during base_block_alloc(), because an extent hook will be
309 	 * called.
310 	 */
311 	malloc_mutex_unlock(tsdn, &base->mtx);
312 	base_block_t *block = base_block_alloc(tsdn, base, ehooks,
313 	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
314 	    alignment);
315 	malloc_mutex_lock(tsdn, &base->mtx);
316 	if (block == NULL) {
317 		return NULL;
318 	}
319 	block->next = base->blocks;
320 	base->blocks = block;
321 	if (config_stats) {
322 		base->allocated += sizeof(base_block_t);
323 		base->resident += PAGE_CEILING(sizeof(base_block_t));
324 		base->mapped += block->size;
325 		if (metadata_thp_madvise() &&
326 		    !(opt_metadata_thp == metadata_thp_auto
327 		      && !base->auto_thp_switched)) {
328 			assert(base->n_thp > 0);
329 			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
330 			    LG_HUGEPAGE;
331 		}
332 		assert(base->allocated <= base->resident);
333 		assert(base->resident <= base->mapped);
334 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
335 	}
336 	return &block->edata;
337 }
338 
339 base_t *
340 b0get(void) {
341 	return b0;
342 }
343 
344 base_t *
345 base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
346     bool metadata_use_hooks) {
347 	pszind_t pind_last = 0;
348 	size_t extent_sn_next = 0;
349 
350 	/*
351 	 * The base will contain the ehooks eventually, but it itself is
352 	 * allocated using them.  So we use some stack ehooks to bootstrap its
353 	 * memory, and then initialize the ehooks within the base_t.
354 	 */
355 	ehooks_t fake_ehooks;
356 	ehooks_init(&fake_ehooks, metadata_use_hooks ?
357 	    (extent_hooks_t *)__UNCONST(extent_hooks) :
358 	    (extent_hooks_t *)__UNCONST(&ehooks_default_extent_hooks), ind);
359 
360 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
361 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
362 	if (block == NULL) {
363 		return NULL;
364 	}
365 
366 	size_t gap_size;
367 	size_t base_alignment = CACHELINE;
368 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
369 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
370 	    &gap_size, base_size, base_alignment);
371 	ehooks_init(&base->ehooks, (extent_hooks_t *)__UNCONST(extent_hooks), ind);
372 	ehooks_init(&base->ehooks_base, metadata_use_hooks ?
373 	    (extent_hooks_t *)__UNCONST(extent_hooks) :
374 	    (extent_hooks_t *)__UNCONST(&ehooks_default_extent_hooks), ind);
375 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
376 	    malloc_mutex_rank_exclusive)) {
377 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
378 		return NULL;
379 	}
380 	base->pind_last = pind_last;
381 	base->extent_sn_next = extent_sn_next;
382 	base->blocks = block;
383 	base->auto_thp_switched = false;
384 	for (szind_t i = 0; i < SC_NSIZES; i++) {
385 		edata_heap_new(&base->avail[i]);
386 	}
387 	if (config_stats) {
388 		base->allocated = sizeof(base_block_t);
389 		base->resident = PAGE_CEILING(sizeof(base_block_t));
390 		base->mapped = block->size;
391 		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
392 		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
393 		    >> LG_HUGEPAGE : 0;
394 		assert(base->allocated <= base->resident);
395 		assert(base->resident <= base->mapped);
396 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
397 	}
398 	base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
399 	    base_size);
400 
401 	return base;
402 }
403 
404 void
405 base_delete(tsdn_t *tsdn, base_t *base) {
406 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
407 	base_block_t *next = base->blocks;
408 	do {
409 		base_block_t *block = next;
410 		next = block->next;
411 		base_unmap(tsdn, ehooks, base_ind_get(base), block,
412 		    block->size);
413 	} while (next != NULL);
414 }
415 
416 ehooks_t *
417 base_ehooks_get(base_t *base) {
418 	return &base->ehooks;
419 }
420 
421 ehooks_t *
422 base_ehooks_get_for_metadata(base_t *base) {
423 	return &base->ehooks_base;
424 }
425 
426 extent_hooks_t *
427 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
428 	extent_hooks_t *old_extent_hooks =
429 	    ehooks_get_extent_hooks_ptr(&base->ehooks);
430 	ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
431 	return old_extent_hooks;
432 }
433 
434 static void *
435 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
436     size_t *esn) {
437 	alignment = QUANTUM_CEILING(alignment);
438 	size_t usize = ALIGNMENT_CEILING(size, alignment);
439 	size_t asize = usize + alignment - QUANTUM;
440 
441 	edata_t *edata = NULL;
442 	malloc_mutex_lock(tsdn, &base->mtx);
443 	for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
444 		edata = edata_heap_remove_first(&base->avail[i]);
445 		if (edata != NULL) {
446 			/* Use existing space. */
447 			break;
448 		}
449 	}
450 	if (edata == NULL) {
451 		/* Try to allocate more space. */
452 		edata = base_extent_alloc(tsdn, base, usize, alignment);
453 	}
454 	void *ret;
455 	if (edata == NULL) {
456 		ret = NULL;
457 		goto label_return;
458 	}
459 
460 	ret = base_extent_bump_alloc(base, edata, usize, alignment);
461 	if (esn != NULL) {
462 		*esn = (size_t)edata_sn_get(edata);
463 	}
464 label_return:
465 	malloc_mutex_unlock(tsdn, &base->mtx);
466 	return ret;
467 }
468 
469 /*
470  * base_alloc() returns zeroed memory, which is always demand-zeroed for the
471  * auto arenas, in order to make multi-page sparse data structures such as radix
472  * tree nodes efficient with respect to physical memory usage.  Upon success a
473  * pointer to at least size bytes with specified alignment is returned.  Note
474  * that size is rounded up to the nearest multiple of alignment to avoid false
475  * sharing.
476  */
477 void *
478 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
479 	return base_alloc_impl(tsdn, base, size, alignment, NULL);
480 }
481 
482 edata_t *
483 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
484 	size_t esn;
485 	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
486 	    EDATA_ALIGNMENT, &esn);
487 	if (edata == NULL) {
488 		return NULL;
489 	}
490 	edata_esn_set(edata, esn);
491 	return edata;
492 }
493 
494 void
495 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
496     size_t *mapped, size_t *n_thp) {
497 	cassert(config_stats);
498 
499 	malloc_mutex_lock(tsdn, &base->mtx);
500 	assert(base->allocated <= base->resident);
501 	assert(base->resident <= base->mapped);
502 	*allocated = base->allocated;
503 	*resident = base->resident;
504 	*mapped = base->mapped;
505 	*n_thp = base->n_thp;
506 	malloc_mutex_unlock(tsdn, &base->mtx);
507 }
508 
509 void
510 base_prefork(tsdn_t *tsdn, base_t *base) {
511 	malloc_mutex_prefork(tsdn, &base->mtx);
512 }
513 
514 void
515 base_postfork_parent(tsdn_t *tsdn, base_t *base) {
516 	malloc_mutex_postfork_parent(tsdn, &base->mtx);
517 }
518 
519 void
520 base_postfork_child(tsdn_t *tsdn, base_t *base) {
521 	malloc_mutex_postfork_child(tsdn, &base->mtx);
522 }
523 
524 bool
525 base_boot(tsdn_t *tsdn) {
526 	b0 = base_new(tsdn, 0, (extent_hooks_t *)__UNCONST(&ehooks_default_extent_hooks),
527 	    /* metadata_use_hooks */ true);
528 	return (b0 == NULL);
529 }
530