xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision 6b3ee451d673206ed73793bef5f2afc789a14c6b)
1 /*	$NetBSD: jemalloc.c,v 1.64 2023/12/13 23:53:50 mrg Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * Copyright (C) 2023 Andrew Doran <ad@NetBSD.org>.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice(s), this list of conditions and the following disclaimer as
13  *    the first lines of this file unmodified other than the possible
14  *    addition of one or more copyright notices.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice(s), this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
21  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
27  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  *******************************************************************************
33  *
34  * This allocator implementation is designed to provide scalable performance
35  * for multi-threaded programs on multi-processor systems.  The following
36  * features are included for this purpose:
37  *
38  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
39  *     contention and cache sloshing.
40  *
41  *   + Cache line sharing between arenas is avoided for internal data
42  *     structures.
43  *
44  *   + Memory is managed in chunks and runs (chunks can be split into runs),
45  *     rather than as individual pages.  This provides a constant-time
46  *     mechanism for associating allocations with particular arenas.
47  *
48  * Allocation requests are rounded up to the nearest size class, and no record
49  * of the original request size is maintained.  Allocations are broken into
50  * categories according to size class.  Assuming runtime defaults, 4 kB pages
51  * and a 16 byte quantum, the size classes in each category are as follows:
52  *
53  *   |=====================================|
54  *   | Category | Subcategory    |    Size |
55  *   |=====================================|
56  *   | Small    | Tiny           |       2 |
57  *   |          |                |       4 |
58  *   |          |                |       8 |
59  *   |          |----------------+---------|
60  *   |          | Quantum-spaced |      16 |
61  *   |          |                |      32 |
62  *   |          |                |      48 |
63  *   |          |                |     ... |
64  *   |          |                |     480 |
65  *   |          |                |     496 |
66  *   |          |                |     512 |
67  *   |          |----------------+---------|
68  *   |          | Sub-page       |    1 kB |
69  *   |          |                |    2 kB |
70  *   |=====================================|
71  *   | Large                     |    4 kB |
72  *   |                           |    8 kB |
73  *   |                           |   12 kB |
74  *   |                           |     ... |
75  *   |                           | 1012 kB |
76  *   |                           | 1016 kB |
77  *   |                           | 1020 kB |
78  *   |=====================================|
79  *   | Huge                      |    1 MB |
80  *   |                           |    2 MB |
81  *   |                           |    3 MB |
82  *   |                           |     ... |
83  *   |=====================================|
84  *
85  * A different mechanism is used for each category:
86  *
87  *   Small : Each size class is segregated into its own set of runs.  Each run
88  *           maintains a bitmap of which regions are free/allocated.
89  *
90  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
91  *           in the associated arena chunk header maps.
92  *
93  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
94  *          Metadata are stored in a separate red-black tree.
95  *
96  *******************************************************************************
97  */
98 
99 /* LINTLIBRARY */
100 
101 /*
102  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
103  * defaults the A and J runtime options to off.  These settings are appropriate
104  * for production systems.
105  */
106 #define MALLOC_PRODUCTION
107 
108 #ifndef MALLOC_PRODUCTION
109 #  define MALLOC_DEBUG
110 #endif
111 
112 #include <sys/cdefs.h>
113 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
114 __RCSID("$NetBSD: jemalloc.c,v 1.64 2023/12/13 23:53:50 mrg Exp $");
115 
116 #include "namespace.h"
117 #include <sys/mman.h>
118 #include <sys/param.h>
119 #include <sys/time.h>
120 #include <sys/types.h>
121 #include <sys/sysctl.h>
122 #include <sys/rbtree.h>
123 #include <sys/uio.h>
124 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
125 
126 #include <errno.h>
127 #include <limits.h>
128 #include <pthread.h>
129 #include <sched.h>
130 #include <stdarg.h>
131 #include <stdbool.h>
132 #include <stdio.h>
133 #include <stdint.h>
134 #include <stdlib.h>
135 #include <string.h>
136 #include <strings.h>
137 #include <unistd.h>
138 #include <malloc.h>
139 
140 #include <reentrant.h>
141 #include "extern.h"
142 
143 #define STRERROR_R(a, b, c)	strerror_r_ss(a, b, c);
144 
145 /* MALLOC_STATS enables statistics calculation. */
146 #ifndef MALLOC_PRODUCTION
147 #  define MALLOC_STATS
148 #endif
149 
150 #ifdef MALLOC_DEBUG
151 #  ifdef NDEBUG
152 #    undef NDEBUG
153 #  endif
154 #else
155 #  ifndef NDEBUG
156 #    define NDEBUG
157 #  endif
158 #endif
159 #include <assert.h>
160 
161 #ifdef MALLOC_DEBUG
162    /* Disable inlining to make debugging easier. */
163 #  define inline
164 #endif
165 
166 /*
167  * Compare two pointers of 64/32 bit width and produce a ternary 32-bit
168  * indicator without using conditionals that maintains the expected
169  * ordering: [negative, equal, positive].
170  *
171  * XXX it depends on twos complement arithemetic.
172  * XXX maybe should be a built-in for rbtree?
173  */
174 static inline int
ptrcmp(const void * pa,const void * pb)175 ptrcmp(const void *pa, const void *pb)
176 {
177 #ifdef _LP64
178 	const uintptr_t a = (uintptr_t)pa, b = (uintptr_t)pb;
179 	const uintptr_t diff = a - b;
180 	assert(((a | b) & 1) == 0);
181 	return (int)(diff >> 32) | ((unsigned)diff >> 1);
182 #else
183 	return (intptr_t)pa - (intptr_t)pb;
184 #endif
185 }
186 
187 /* Size of stack-allocated buffer passed to strerror_r(). */
188 #define	STRERROR_BUF		64
189 
190 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
191 
192 /*
193  * If you touch the TINY_MIN_2POW definition for any architecture, please
194  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
195  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
196  * gcc is still buildable!
197  */
198 
199 #ifdef __i386__
200 #  define QUANTUM_2POW_MIN	4
201 #  define SIZEOF_PTR_2POW	2
202 #  define USE_BRK
203 #endif
204 #ifdef __ia64__
205 #  define QUANTUM_2POW_MIN	4
206 #  define SIZEOF_PTR_2POW	3
207 #endif
208 #ifdef __aarch64__
209 #  define QUANTUM_2POW_MIN	4
210 #  define SIZEOF_PTR_2POW	3
211 #  define TINY_MIN_2POW		3
212 #endif
213 #ifdef __alpha__
214 #  define QUANTUM_2POW_MIN	4
215 #  define SIZEOF_PTR_2POW	3
216 #  define TINY_MIN_2POW		3
217 #endif
218 #ifdef __sparc64__
219 #  define QUANTUM_2POW_MIN	4
220 #  define SIZEOF_PTR_2POW	3
221 #  define TINY_MIN_2POW		3
222 #endif
223 #ifdef __amd64__
224 #  define QUANTUM_2POW_MIN	4
225 #  define SIZEOF_PTR_2POW	3
226 #  define TINY_MIN_2POW		3
227 #endif
228 #ifdef __arm__
229 #  define QUANTUM_2POW_MIN	3
230 #  define SIZEOF_PTR_2POW	2
231 #  define USE_BRK
232 #  ifdef __ARM_EABI__
233 #    define TINY_MIN_2POW	3
234 #  endif
235 #endif
236 #ifdef __powerpc__
237 #  define QUANTUM_2POW_MIN	4
238 #  define SIZEOF_PTR_2POW	2
239 #  define USE_BRK
240 #  define TINY_MIN_2POW		3
241 #endif
242 #if defined(__sparc__) && !defined(__sparc64__)
243 #  define QUANTUM_2POW_MIN	4
244 #  define SIZEOF_PTR_2POW	2
245 #  define USE_BRK
246 #endif
247 #ifdef __or1k__
248 #  define QUANTUM_2POW_MIN	4
249 #  define SIZEOF_PTR_2POW	2
250 #  define USE_BRK
251 #endif
252 #ifdef __vax__
253 #  define QUANTUM_2POW_MIN	4
254 #  define SIZEOF_PTR_2POW	2
255 #  define USE_BRK
256 #endif
257 #ifdef __sh__
258 #  define QUANTUM_2POW_MIN	4
259 #  define SIZEOF_PTR_2POW	2
260 #  define USE_BRK
261 #endif
262 #ifdef __m68k__
263 #  define QUANTUM_2POW_MIN	4
264 #  define SIZEOF_PTR_2POW	2
265 #  define USE_BRK
266 #endif
267 #if defined(__mips__)
268 #  ifdef _LP64
269 #    define SIZEOF_PTR_2POW	3
270 #    define TINY_MIN_2POW	3
271 #  else
272 #    define SIZEOF_PTR_2POW	2
273 #  endif
274 #  define QUANTUM_2POW_MIN	4
275 #  define USE_BRK
276 #endif
277 #if defined(__riscv__)
278 #  ifdef _LP64
279 #    define SIZEOF_PTR_2POW	3
280 #    define TINY_MIN_2POW	3
281 #  else
282 #    define SIZEOF_PTR_2POW	2
283 #  endif
284 #  define QUANTUM_2POW_MIN	4
285 #  define USE_BRK
286 #endif
287 #ifdef __hppa__
288 #  define QUANTUM_2POW_MIN	4
289 #  define TINY_MIN_2POW		4
290 #  define SIZEOF_PTR_2POW	2
291 #  define USE_BRK
292 #endif
293 
294 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
295 
296 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
297 #ifndef SIZEOF_INT_2POW
298 #  define SIZEOF_INT_2POW	2
299 #endif
300 
301 /*
302  * Size and alignment of memory chunks that are allocated by the OS's virtual
303  * memory system.
304  */
305 #define	CHUNK_2POW_DEFAULT	20
306 
307 /*
308  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
309  * so over-estimates are okay (up to a point), but under-estimates will
310  * negatively affect performance.
311  */
312 #define	CACHELINE_2POW		6
313 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
314 
315 /* Smallest size class to support. */
316 #ifndef TINY_MIN_2POW
317 #define	TINY_MIN_2POW		2
318 #endif
319 
320 /*
321  * Maximum size class that is a multiple of the quantum, but not (necessarily)
322  * a power of 2.  Above this size, allocations are rounded up to the nearest
323  * power of 2.
324  */
325 #define	SMALL_MAX_2POW_DEFAULT	9
326 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
327 
328 /*
329  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
330  * as small as possible such that this setting is still honored, without
331  * violating other constraints.  The goal is to make runs as small as possible
332  * without exceeding a per run external fragmentation threshold.
333  *
334  * We use binary fixed point math for overhead computations, where the binary
335  * point is implicitly RUN_BFP bits to the left.
336  *
337  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
338  * honored for some/all object sizes, since there is one bit of header overhead
339  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
340  * that are so small that the per-region overhead is greater than:
341  *
342  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
343  */
344 #define RUN_BFP			12
345 /*                              \/   Implicit binary fixed point. */
346 #define RUN_MAX_OVRHD		0x0000003dU
347 #define RUN_MAX_OVRHD_RELAX	0x00001800U
348 
349 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
350 #define RUN_MAX_SMALL_2POW	15
351 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
352 
353 /******************************************************************************/
354 
355 #define	malloc_mutex_t	mutex_t
356 
357 /* Set to true once the allocator has been initialized. */
358 static bool malloc_initialized = false;
359 
360 #ifdef _REENTRANT
361 /* Used to avoid initialization races. */
362 static mutex_t init_lock = MUTEX_INITIALIZER;
363 #endif
364 
365 /******************************************************************************/
366 /*
367  * Statistics data structures.
368  */
369 
370 #ifdef MALLOC_STATS
371 
372 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
373 struct malloc_bin_stats_s {
374 	/*
375 	 * Number of allocation requests that corresponded to the size of this
376 	 * bin.
377 	 */
378 	uint64_t	nrequests;
379 
380 	/* Total number of runs created for this bin's size class. */
381 	uint64_t	nruns;
382 
383 	/*
384 	 * Total number of runs reused by extracting them from the runs tree for
385 	 * this bin's size class.
386 	 */
387 	uint64_t	reruns;
388 
389 	/* High-water mark for this bin. */
390 	unsigned long	highruns;
391 
392 	/* Current number of runs in this bin. */
393 	unsigned long	curruns;
394 };
395 
396 typedef struct arena_stats_s arena_stats_t;
397 struct arena_stats_s {
398 	/* Number of bytes currently mapped. */
399 	size_t		mapped;
400 
401 	/* Per-size-category statistics. */
402 	size_t		allocated_small;
403 	uint64_t	nmalloc_small;
404 	uint64_t	ndalloc_small;
405 
406 	size_t		allocated_large;
407 	uint64_t	nmalloc_large;
408 	uint64_t	ndalloc_large;
409 };
410 
411 typedef struct chunk_stats_s chunk_stats_t;
412 struct chunk_stats_s {
413 	/* Number of chunks that were allocated. */
414 	uint64_t	nchunks;
415 
416 	/* High-water mark for number of chunks allocated. */
417 	unsigned long	highchunks;
418 
419 	/*
420 	 * Current number of chunks allocated.  This value isn't maintained for
421 	 * any other purpose, so keep track of it in order to be able to set
422 	 * highchunks.
423 	 */
424 	unsigned long	curchunks;
425 };
426 
427 #endif /* #ifdef MALLOC_STATS */
428 
429 /******************************************************************************/
430 /*
431  * Chunk data structures.
432  */
433 
434 /* Tree of chunks. */
435 typedef struct chunk_node_s chunk_node_t;
436 struct chunk_node_s {
437 	/* Linkage for the chunk tree. */
438 	rb_node_t link;
439 
440 	/*
441 	 * Pointer to the chunk that this tree node is responsible for.  In some
442 	 * (but certainly not all) cases, this data structure is placed at the
443 	 * beginning of the corresponding chunk, so this field may point to this
444 	 * node.
445 	 */
446 	void	*chunk;
447 
448 	/* Total chunk size. */
449 	size_t	size;
450 };
451 typedef struct chunk_tree_s chunk_tree_t;
452 
453 static int chunk_comp(void *, const void *, const void *);
454 
455 static const rb_tree_ops_t chunk_tree_ops = {
456 	.rbto_compare_nodes = chunk_comp,
457 	.rbto_compare_key = chunk_comp,
458 	.rbto_node_offset = offsetof(struct chunk_node_s, link),
459 };
460 
461 /******************************************************************************/
462 /*
463  * Arena data structures.
464  */
465 
466 typedef struct arena_s arena_t;
467 typedef struct arena_bin_s arena_bin_t;
468 
469 typedef struct arena_chunk_map_s arena_chunk_map_t;
470 struct arena_chunk_map_s {
471 	/* Number of pages in run. */
472 	uint32_t	npages;
473 	/*
474 	 * Position within run.  For a free run, this is POS_FREE for the first
475 	 * and last pages.  The POS_FREE special value makes it possible to
476 	 * quickly coalesce free runs.
477 	 *
478 	 * This is the limiting factor for chunksize; there can be at most 2^31
479 	 * pages in a run.
480 	 */
481 #define POS_FREE ((uint32_t)0xffffffffU)
482 	uint32_t	pos;
483 };
484 
485 /* Arena chunk header. */
486 typedef struct arena_chunk_s arena_chunk_t;
487 struct arena_chunk_s {
488 	/* Linkage for the arena's chunk tree. */
489 	rb_node_t link;
490 
491 	/* Arena that owns the chunk. */
492 	arena_t *arena;
493 
494 	/*
495 	 * Number of pages in use.  This is maintained in order to make
496 	 * detection of empty chunks fast.
497 	 */
498 	uint32_t pages_used;
499 
500 	/*
501 	 * Every time a free run larger than this value is created/coalesced,
502 	 * this value is increased.  The only way that the value decreases is if
503 	 * arena_run_alloc() fails to find a free run as large as advertised by
504 	 * this value.
505 	 */
506 	uint32_t max_frun_npages;
507 
508 	/*
509 	 * Every time a free run that starts at an earlier page than this value
510 	 * is created/coalesced, this value is decreased.  It is reset in a
511 	 * similar fashion to max_frun_npages.
512 	 */
513 	uint32_t min_frun_ind;
514 
515 	/*
516 	 * Map of pages within chunk that keeps track of free/large/small.  For
517 	 * free runs, only the map entries for the first and last pages are
518 	 * kept up to date, so that free runs can be quickly coalesced.
519 	 */
520 	arena_chunk_map_t map[1]; /* Dynamically sized. */
521 };
522 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
523 
524 static int arena_chunk_comp(void *, const void *, const void *);
525 
526 static const rb_tree_ops_t arena_chunk_tree_ops = {
527 	.rbto_compare_nodes = arena_chunk_comp,
528 	.rbto_compare_key = arena_chunk_comp,
529 	.rbto_node_offset = offsetof(struct arena_chunk_s, link),
530 };
531 
532 typedef struct arena_run_s arena_run_t;
533 struct arena_run_s {
534 	/* Linkage for run trees. */
535 	rb_node_t	link;
536 
537 #ifdef MALLOC_DEBUG
538 	uint32_t	magic;
539 #  define ARENA_RUN_MAGIC 0x384adf93
540 #endif
541 
542 	/* Bin this run is associated with. */
543 	arena_bin_t	*bin;
544 
545 	/* Index of first element that might have a free region. */
546 	unsigned	regs_minelm;
547 
548 	/* Number of free regions in run. */
549 	unsigned	nfree;
550 
551 	/* Bitmask of in-use regions (0: in use, 1: free). */
552 	unsigned	regs_mask[1]; /* Dynamically sized. */
553 };
554 typedef struct arena_run_tree_s arena_run_tree_t;
555 
556 static int arena_run_comp(void *, const void *, const void *);
557 
558 static const rb_tree_ops_t arena_run_tree_ops = {
559 	.rbto_compare_nodes = arena_run_comp,
560 	.rbto_compare_key = arena_run_comp,
561 	.rbto_node_offset = offsetof(struct arena_run_s, link),
562 };
563 
564 struct arena_bin_s {
565 	/*
566 	 * Current run being used to service allocations of this bin's size
567 	 * class.
568 	 */
569 	arena_run_t	*runcur;
570 
571 	/*
572 	 * Tree of non-full runs.  This tree is used when looking for an
573 	 * existing run when runcur is no longer usable.  We choose the
574 	 * non-full run that is lowest in memory; this policy tends to keep
575 	 * objects packed well, and it can also help reduce the number of
576 	 * almost-empty chunks.
577 	 */
578 	rb_tree_t	runs;
579 
580 	/* Size of regions in a run for this bin's size class. */
581 	size_t		reg_size;
582 
583 	/* Total size of a run for this bin's size class. */
584 	size_t		run_size;
585 
586 	/* Total number of regions in a run for this bin's size class. */
587 	uint32_t	nregs;
588 
589 	/* Number of elements in a run's regs_mask for this bin's size class. */
590 	uint32_t	regs_mask_nelms;
591 
592 	/* Offset of first region in a run for this bin's size class. */
593 	uint32_t	reg0_offset;
594 
595 #ifdef MALLOC_STATS
596 	/* Bin statistics. */
597 	malloc_bin_stats_t stats;
598 #endif
599 };
600 
601 struct arena_s {
602 #ifdef MALLOC_DEBUG
603 	uint32_t		magic;
604 #  define ARENA_MAGIC 0x947d3d24
605 #endif
606 
607 	/* All operations on this arena require that mtx be locked. */
608 	malloc_mutex_t		mtx;
609 
610 #ifdef MALLOC_STATS
611 	arena_stats_t		stats;
612 #endif
613 
614 	/*
615 	 * Tree of chunks this arena manages.
616 	 */
617 	rb_tree_t	chunks;
618 
619 	/*
620 	 * In order to avoid rapid chunk allocation/deallocation when an arena
621 	 * oscillates right on the cusp of needing a new chunk, cache the most
622 	 * recently freed chunk.  This caching is disabled by opt_hint.
623 	 *
624 	 * There is one spare chunk per arena, rather than one spare total, in
625 	 * order to avoid interactions between multiple threads that could make
626 	 * a single spare inadequate.
627 	 */
628 	arena_chunk_t *spare;
629 
630 	/*
631 	 * bins is used to store rings of free regions of the following sizes,
632 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
633 	 *
634 	 *   bins[i] | size |
635 	 *   --------+------+
636 	 *        0  |    2 |
637 	 *        1  |    4 |
638 	 *        2  |    8 |
639 	 *   --------+------+
640 	 *        3  |   16 |
641 	 *        4  |   32 |
642 	 *        5  |   48 |
643 	 *        6  |   64 |
644 	 *           :      :
645 	 *           :      :
646 	 *       33  |  496 |
647 	 *       34  |  512 |
648 	 *   --------+------+
649 	 *       35  | 1024 |
650 	 *       36  | 2048 |
651 	 *   --------+------+
652 	 */
653 	arena_bin_t		bins[1]; /* Dynamically sized. */
654 };
655 
656 /******************************************************************************/
657 /*
658  * Data.
659  */
660 
661 /* Number of CPUs. */
662 static unsigned		ncpus;
663 
664 /* VM page size. */
665 static size_t		pagesize;
666 static size_t		pagesize_mask;
667 static int		pagesize_2pow;
668 
669 /* Various bin-related settings. */
670 static size_t		bin_maxclass; /* Max size class for bins. */
671 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
672 static unsigned		nqbins; /* Number of quantum-spaced bins. */
673 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
674 static size_t		small_min;
675 static size_t		small_max;
676 
677 /* Various quantum-related settings. */
678 static size_t		quantum;
679 static size_t		quantum_mask; /* (quantum - 1). */
680 
681 /* Various chunk-related settings. */
682 static size_t		chunksize;
683 static size_t		chunksize_mask; /* (chunksize - 1). */
684 static int		chunksize_2pow;
685 static unsigned		chunk_npages;
686 static unsigned		arena_chunk_header_npages;
687 static size_t		arena_maxclass; /* Max size class for arenas. */
688 
689 /********/
690 /*
691  * Chunks.
692  */
693 
694 #ifdef _REENTRANT
695 /* Protects chunk-related data structures. */
696 static malloc_mutex_t	chunks_mtx;
697 #endif
698 
699 /* Tree of chunks that are stand-alone huge allocations. */
700 static rb_tree_t	huge;
701 
702 #ifdef USE_BRK
703 /*
704  * Try to use brk for chunk-size allocations, due to address space constraints.
705  */
706 /*
707  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
708  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
709  * could cause recursive lock acquisition).
710  */
711 #ifdef _REENTRANT
712 static malloc_mutex_t	brk_mtx;
713 #endif
714 /* Result of first sbrk(0) call. */
715 static void		*brk_base;
716 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
717 static void		*brk_prev;
718 /* Current upper limit on brk addresses. */
719 static void		*brk_max;
720 #endif
721 
722 #ifdef MALLOC_STATS
723 /* Huge allocation statistics. */
724 static uint64_t		huge_nmalloc;
725 static uint64_t		huge_ndalloc;
726 static uint64_t		huge_nralloc;
727 static size_t		huge_allocated;
728 #endif
729 
730 /*
731  * Tree of chunks that were previously allocated.  This is used when allocating
732  * chunks, in an attempt to re-use address space.
733  */
734 static rb_tree_t	old_chunks;
735 
736 /****************************/
737 /*
738  * base (internal allocation).
739  */
740 
741 /*
742  * Current pages that are being used for internal memory allocations.  These
743  * pages are carved up in cacheline-size quanta, so that there is no chance of
744  * false cache line sharing.
745  */
746 static void		*base_pages;
747 static void		*base_next_addr;
748 static void		*base_past_addr; /* Addr immediately past base_pages. */
749 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
750 #ifdef _REENTRANT
751 static malloc_mutex_t	base_mtx;
752 #endif
753 #ifdef MALLOC_STATS
754 static size_t		base_mapped;
755 #endif
756 
757 /********/
758 /*
759  * Arenas.
760  */
761 
762 /*
763  * Arenas that are used to service external requests.  Not all elements of the
764  * arenas array are necessarily used; arenas are created lazily as needed.
765  */
766 static arena_t		**arenas;
767 #ifdef _REENTRANT
768 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
769 #endif
770 
771 #ifdef MALLOC_STATS
772 /* Chunk statistics. */
773 static chunk_stats_t	stats_chunks;
774 #endif
775 
776 /*******************************/
777 /*
778  * Runtime configuration options.
779  */
780 const char	*_malloc_options;
781 
782 #ifndef MALLOC_PRODUCTION
783 static bool	opt_abort = true;
784 static bool	opt_junk = true;
785 #else
786 static bool	opt_abort = false;
787 static bool	opt_junk = false;
788 #endif
789 static bool	opt_hint = false;
790 static bool	opt_print_stats = false;
791 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
792 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
793 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
794 static bool	opt_utrace = false;
795 static bool	opt_sysv = false;
796 static bool	opt_xmalloc = false;
797 static bool	opt_zero = false;
798 
799 typedef struct {
800 	void	*p;
801 	size_t	s;
802 	void	*r;
803 } malloc_utrace_t;
804 
805 /* Sprinkle branch hints for the compiler and CPU. */
806 #define	OPT(a)		__predict_false(opt_##a)
807 #define	NOT_OPT(a)	__predict_true(!opt_##a)
808 
809 /* Trace malloc/free for ktrace/kdump. */
810 #define	UTRACE(a, b, c)							\
811 	if (OPT(utrace)) {						\
812 		malloc_utrace_t ut;					\
813 		ut.p = a;						\
814 		ut.s = b;						\
815 		ut.r = c;						\
816 		utrace("malloc", &ut, sizeof(ut));			\
817 	}
818 
819 /******************************************************************************/
820 /*
821  * Begin function prototypes for non-inline static functions.
822  */
823 
824 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
825 		const char *p4);
826 #ifdef MALLOC_STATS
827 static void	malloc_printf(const char *format, ...);
828 #endif
829 static char	*size_t2s(size_t x, char *s);
830 static bool	base_pages_alloc(size_t minsize);
831 static void	*base_alloc(size_t size);
832 static chunk_node_t *base_chunk_node_alloc(void);
833 static void	base_chunk_node_dealloc(chunk_node_t *node);
834 #ifdef MALLOC_STATS
835 static void	stats_print(arena_t *arena);
836 #endif
837 static void	*pages_map(void *addr, size_t size);
838 static void	*pages_map_align(void *addr, size_t size, int align);
839 static void	pages_unmap(void *addr, size_t size);
840 static void	*chunk_alloc(size_t size);
841 static void	chunk_dealloc(void *chunk, size_t size);
842 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
843 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
844 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
845 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
846 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
847 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
848 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
849 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
850 static void	*arena_malloc(arena_t *arena, size_t size);
851 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
852     size_t alloc_size);
853 static size_t	arena_salloc(const void *ptr);
854 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
855 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
856 static void	arena_new(arena_t *arena);
857 static arena_t	*arenas_extend(void);
858 static void	*huge_malloc(size_t size);
859 static void	*huge_palloc(size_t alignment, size_t size);
860 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
861 static void	huge_dalloc(void *ptr);
862 static void	*imalloc(size_t size);
863 static void	*ipalloc(size_t alignment, size_t size);
864 static void	*icalloc(size_t size);
865 static size_t	isalloc(const void *ptr);
866 static void	*iralloc(void *ptr, size_t size);
867 static void	idalloc(void *ptr);
868 static void	malloc_print_stats(void);
869 static bool	malloc_init_hard(void);
870 
871 /*
872  * End function prototypes.
873  */
874 /******************************************************************************/
875 /*
876  * Begin mutex.
877  */
878 
879 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
880 #define	malloc_mutex_lock(m)	mutex_lock(m)
881 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
882 
883 /*
884  * End mutex.
885  */
886 /******************************************************************************/
887 /*
888  * Begin Utility functions/macros.
889  */
890 
891 /* Return the chunk address for allocation address a. */
892 #define	CHUNK_ADDR2BASE(a)						\
893 	((void *)((uintptr_t)(a) & ~chunksize_mask))
894 
895 /* Return the chunk offset of address a. */
896 #define	CHUNK_ADDR2OFFSET(a)						\
897 	((size_t)((uintptr_t)(a) & chunksize_mask))
898 
899 /* Return the smallest chunk multiple that is >= s. */
900 #define	CHUNK_CEILING(s)						\
901 	(((s) + chunksize_mask) & ~chunksize_mask)
902 
903 /* Return the smallest cacheline multiple that is >= s. */
904 #define	CACHELINE_CEILING(s)						\
905 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
906 
907 /* Return the smallest quantum multiple that is >= a. */
908 #define	QUANTUM_CEILING(a)						\
909 	(((a) + quantum_mask) & ~quantum_mask)
910 
911 /* Return the smallest pagesize multiple that is >= s. */
912 #define	PAGE_CEILING(s)							\
913 	(((s) + pagesize_mask) & ~pagesize_mask)
914 
915 /* Compute the smallest power of 2 that is >= x. */
916 static inline size_t
pow2_ceil(size_t x)917 pow2_ceil(size_t x)
918 {
919 
920 	x--;
921 	x |= x >> 1;
922 	x |= x >> 2;
923 	x |= x >> 4;
924 	x |= x >> 8;
925 	x |= x >> 16;
926 #if (SIZEOF_PTR == 8)
927 	x |= x >> 32;
928 #endif
929 	x++;
930 	return (x);
931 }
932 
933 static void
wrtmessage(const char * p1,const char * p2,const char * p3,const char * p4)934 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
935 {
936 
937 	write(STDERR_FILENO, p1, strlen(p1));
938 	write(STDERR_FILENO, p2, strlen(p2));
939 	write(STDERR_FILENO, p3, strlen(p3));
940 	write(STDERR_FILENO, p4, strlen(p4));
941 }
942 
943 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
944 	    const char *p4) = wrtmessage;
945 
946 #ifdef MALLOC_STATS
947 /*
948  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
949  */
950 static void
malloc_printf(const char * format,...)951 malloc_printf(const char *format, ...)
952 {
953 	char buf[4096];
954 	va_list ap;
955 
956 	va_start(ap, format);
957 	vsnprintf(buf, sizeof(buf), format, ap);
958 	va_end(ap);
959 	_malloc_message(buf, "", "", "");
960 }
961 #endif
962 
963 /*
964  * We don't want to depend on vsnprintf() for production builds, since that can
965  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
966  * integer printing functionality, so that malloc_printf() use can be limited to
967  * MALLOC_STATS code.
968  */
969 #define UMAX2S_BUFSIZE	21
970 static char *
size_t2s(size_t x,char * s)971 size_t2s(size_t x, char *s)
972 {
973 	unsigned i;
974 
975 	/* Make sure UMAX2S_BUFSIZE is large enough. */
976 	/* LINTED */
977 	assert(sizeof(size_t) <= 8);
978 
979 	i = UMAX2S_BUFSIZE - 1;
980 	s[i] = '\0';
981 	do {
982 		i--;
983 		s[i] = "0123456789"[(int)x % 10];
984 		x /= (uintmax_t)10LL;
985 	} while (x > 0);
986 
987 	return (&s[i]);
988 }
989 
990 /******************************************************************************/
991 
992 static bool
base_pages_alloc(size_t minsize)993 base_pages_alloc(size_t minsize)
994 {
995 	size_t csize = 0;
996 
997 #ifdef USE_BRK
998 	/*
999 	 * Do special brk allocation here, since base allocations don't need to
1000 	 * be chunk-aligned.
1001 	 */
1002 	if (brk_prev != (void *)-1) {
1003 		void *brk_cur;
1004 		intptr_t incr;
1005 
1006 		if (minsize != 0)
1007 			csize = CHUNK_CEILING(minsize);
1008 
1009 		malloc_mutex_lock(&brk_mtx);
1010 		do {
1011 			/* Get the current end of brk. */
1012 			brk_cur = sbrk(0);
1013 
1014 			/*
1015 			 * Calculate how much padding is necessary to
1016 			 * chunk-align the end of brk.  Don't worry about
1017 			 * brk_cur not being chunk-aligned though.
1018 			 */
1019 			incr = (intptr_t)chunksize
1020 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1021 			assert(incr >= 0);
1022 			if ((size_t)incr < minsize)
1023 				incr += csize;
1024 
1025 			brk_prev = sbrk(incr);
1026 			if (brk_prev == brk_cur) {
1027 				/* Success. */
1028 				malloc_mutex_unlock(&brk_mtx);
1029 				base_pages = brk_cur;
1030 				base_next_addr = base_pages;
1031 				base_past_addr = (void *)((uintptr_t)base_pages
1032 				    + incr);
1033 #ifdef MALLOC_STATS
1034 				base_mapped += incr;
1035 #endif
1036 				return (false);
1037 			}
1038 		} while (brk_prev != (void *)-1);
1039 		malloc_mutex_unlock(&brk_mtx);
1040 	}
1041 	if (minsize == 0) {
1042 		/*
1043 		 * Failure during initialization doesn't matter, so avoid
1044 		 * falling through to the mmap-based page mapping code.
1045 		 */
1046 		return (true);
1047 	}
1048 #endif
1049 	assert(minsize != 0);
1050 	csize = PAGE_CEILING(minsize);
1051 	base_pages = pages_map(NULL, csize);
1052 	if (base_pages == NULL)
1053 		return (true);
1054 	base_next_addr = base_pages;
1055 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1056 #ifdef MALLOC_STATS
1057 	base_mapped += csize;
1058 #endif
1059 	return (false);
1060 }
1061 
1062 static void *
base_alloc(size_t size)1063 base_alloc(size_t size)
1064 {
1065 	void *ret;
1066 	size_t csize;
1067 
1068 	/* Round size up to nearest multiple of the cacheline size. */
1069 	csize = CACHELINE_CEILING(size);
1070 
1071 	malloc_mutex_lock(&base_mtx);
1072 
1073 	/* Make sure there's enough space for the allocation. */
1074 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1075 		if (base_pages_alloc(csize)) {
1076 			ret = NULL;
1077 			goto RETURN;
1078 		}
1079 	}
1080 
1081 	/* Allocate. */
1082 	ret = base_next_addr;
1083 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1084 
1085 RETURN:
1086 	malloc_mutex_unlock(&base_mtx);
1087 	return (ret);
1088 }
1089 
1090 static chunk_node_t *
base_chunk_node_alloc(void)1091 base_chunk_node_alloc(void)
1092 {
1093 	chunk_node_t *ret;
1094 
1095 	malloc_mutex_lock(&base_mtx);
1096 	if (base_chunk_nodes != NULL) {
1097 		ret = base_chunk_nodes;
1098 		/* LINTED */
1099 		base_chunk_nodes = *(chunk_node_t **)ret;
1100 		malloc_mutex_unlock(&base_mtx);
1101 	} else {
1102 		malloc_mutex_unlock(&base_mtx);
1103 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1104 	}
1105 
1106 	return (ret);
1107 }
1108 
1109 static void
base_chunk_node_dealloc(chunk_node_t * node)1110 base_chunk_node_dealloc(chunk_node_t *node)
1111 {
1112 
1113 	malloc_mutex_lock(&base_mtx);
1114 	/* LINTED */
1115 	*(chunk_node_t **)node = base_chunk_nodes;
1116 	base_chunk_nodes = node;
1117 	malloc_mutex_unlock(&base_mtx);
1118 }
1119 
1120 /******************************************************************************/
1121 
1122 #ifdef MALLOC_STATS
1123 static void
stats_print(arena_t * arena)1124 stats_print(arena_t *arena)
1125 {
1126 	const unsigned minusone = (unsigned)-1;
1127 	unsigned i, gap_start;
1128 
1129 	malloc_printf(
1130 	    "          allocated/mapped            nmalloc      ndalloc\n");
1131 
1132 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1133 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1134 	    arena->stats.ndalloc_small);
1135 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1136 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1137 	    arena->stats.ndalloc_large);
1138 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1139 	    arena->stats.allocated_small + arena->stats.allocated_large,
1140 	    arena->stats.mapped,
1141 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1142 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1143 
1144 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1145 	    "    reruns maxruns curruns\n");
1146 	for (i = 0, gap_start = minusone; i < ntbins + nqbins + nsbins; i++) {
1147 		if (arena->bins[i].stats.nrequests == 0) {
1148 			if (gap_start == minusone)
1149 				gap_start = i;
1150 		} else {
1151 			if (gap_start != minusone) {
1152 				if (i > gap_start + 1) {
1153 					/* Gap of more than one size class. */
1154 					malloc_printf("[%u..%u]\n",
1155 					    gap_start, i - 1);
1156 				} else {
1157 					/* Gap of one size class. */
1158 					malloc_printf("[%u]\n", gap_start);
1159 				}
1160 				gap_start = minusone;
1161 			}
1162 			malloc_printf(
1163 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1164 			    " %9llu %7lu %7lu\n",
1165 			    i,
1166 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1167 			    arena->bins[i].reg_size,
1168 			    arena->bins[i].nregs,
1169 			    arena->bins[i].run_size >> pagesize_2pow,
1170 			    arena->bins[i].stats.nrequests,
1171 			    arena->bins[i].stats.nruns,
1172 			    arena->bins[i].stats.reruns,
1173 			    arena->bins[i].stats.highruns,
1174 			    arena->bins[i].stats.curruns);
1175 		}
1176 	}
1177 	if (gap_start != minusone) {
1178 		if (i > gap_start + 1) {
1179 			/* Gap of more than one size class. */
1180 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1181 		} else {
1182 			/* Gap of one size class. */
1183 			malloc_printf("[%u]\n", gap_start);
1184 		}
1185 	}
1186 }
1187 #endif
1188 
1189 /*
1190  * End Utility functions/macros.
1191  */
1192 /******************************************************************************/
1193 /*
1194  * Begin chunk management functions.
1195  */
1196 
1197 static int
chunk_comp(void * context,const void * va,const void * vb)1198 chunk_comp(void *context, const void *va, const void *vb)
1199 {
1200 	const chunk_node_t *a = va, *b = vb;
1201 
1202 	assert(a != NULL);
1203 	assert(b != NULL);
1204 
1205 	return ptrcmp(a->chunk, b->chunk);
1206 }
1207 
1208 static void *
pages_map_align(void * addr,size_t size,int align)1209 pages_map_align(void *addr, size_t size, int align)
1210 {
1211 	void *ret;
1212 
1213 	/*
1214 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1215 	 * of existing mappings, and we only want to create new mappings.
1216 	 */
1217 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1218 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1219 	assert(ret != NULL);
1220 
1221 	if (ret == MAP_FAILED)
1222 		ret = NULL;
1223 	else if (addr != NULL && ret != addr) {
1224 		/*
1225 		 * We succeeded in mapping memory, but not in the right place.
1226 		 */
1227 		if (munmap(ret, size) == -1) {
1228 			char buf[STRERROR_BUF];
1229 
1230 			STRERROR_R(errno, buf, sizeof(buf));
1231 			_malloc_message(getprogname(),
1232 			    ": (malloc) Error in munmap(): ", buf, "\n");
1233 			if (OPT(abort))
1234 				abort();
1235 		}
1236 		ret = NULL;
1237 	}
1238 
1239 	assert(ret == NULL || (addr == NULL && ret != addr)
1240 	    || (addr != NULL && ret == addr));
1241 	return (ret);
1242 }
1243 
1244 static void *
pages_map(void * addr,size_t size)1245 pages_map(void *addr, size_t size)
1246 {
1247 
1248 	return pages_map_align(addr, size, 0);
1249 }
1250 
1251 static void
pages_unmap(void * addr,size_t size)1252 pages_unmap(void *addr, size_t size)
1253 {
1254 
1255 	if (munmap(addr, size) == -1) {
1256 		char buf[STRERROR_BUF];
1257 
1258 		STRERROR_R(errno, buf, sizeof(buf));
1259 		_malloc_message(getprogname(),
1260 		    ": (malloc) Error in munmap(): ", buf, "\n");
1261 		if (OPT(abort))
1262 			abort();
1263 	}
1264 }
1265 
1266 static void *
chunk_alloc(size_t size)1267 chunk_alloc(size_t size)
1268 {
1269 	void *ret, *chunk;
1270 	chunk_node_t *tchunk, *delchunk;
1271 
1272 	assert(size != 0);
1273 	assert((size & chunksize_mask) == 0);
1274 
1275 	malloc_mutex_lock(&chunks_mtx);
1276 
1277 	if (size == chunksize) {
1278 		/*
1279 		 * Check for address ranges that were previously chunks and try
1280 		 * to use them.
1281 		 */
1282 
1283 		tchunk = RB_TREE_MIN(&old_chunks);
1284 		while (tchunk != NULL) {
1285 			/* Found an address range.  Try to recycle it. */
1286 
1287 			chunk = tchunk->chunk;
1288 			delchunk = tchunk;
1289 			tchunk = RB_TREE_NEXT(&old_chunks, delchunk);
1290 
1291 			/* Remove delchunk from the tree. */
1292 			rb_tree_remove_node(&old_chunks, delchunk);
1293 			base_chunk_node_dealloc(delchunk);
1294 
1295 #ifdef USE_BRK
1296 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1297 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1298 				/* Re-use a previously freed brk chunk. */
1299 				ret = chunk;
1300 				goto RETURN;
1301 			}
1302 #endif
1303 			if ((ret = pages_map(chunk, size)) != NULL) {
1304 				/* Success. */
1305 				goto RETURN;
1306 			}
1307 		}
1308 	}
1309 
1310 	/*
1311 	 * Try to over-allocate, but allow the OS to place the allocation
1312 	 * anywhere.  Beware of size_t wrap-around.
1313 	 */
1314 	if (size + chunksize > size) {
1315 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1316 		    != NULL) {
1317 			goto RETURN;
1318 		}
1319 	}
1320 
1321 #ifdef USE_BRK
1322 	/*
1323 	 * Try to create allocations in brk, in order to make full use of
1324 	 * limited address space.
1325 	 */
1326 	if (brk_prev != (void *)-1) {
1327 		void *brk_cur;
1328 		intptr_t incr;
1329 
1330 		/*
1331 		 * The loop is necessary to recover from races with other
1332 		 * threads that are using brk for something other than malloc.
1333 		 */
1334 		malloc_mutex_lock(&brk_mtx);
1335 		do {
1336 			/* Get the current end of brk. */
1337 			brk_cur = sbrk(0);
1338 
1339 			/*
1340 			 * Calculate how much padding is necessary to
1341 			 * chunk-align the end of brk.
1342 			 */
1343 			incr = (intptr_t)size
1344 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1345 			if (incr == (intptr_t)size) {
1346 				ret = brk_cur;
1347 			} else {
1348 				ret = (void *)((intptr_t)brk_cur + incr);
1349 				incr += size;
1350 			}
1351 
1352 			brk_prev = sbrk(incr);
1353 			if (brk_prev == brk_cur) {
1354 				/* Success. */
1355 				malloc_mutex_unlock(&brk_mtx);
1356 				brk_max = (void *)((intptr_t)ret + size);
1357 				goto RETURN;
1358 			}
1359 		} while (brk_prev != (void *)-1);
1360 		malloc_mutex_unlock(&brk_mtx);
1361 	}
1362 #endif
1363 
1364 	/* All strategies for allocation failed. */
1365 	ret = NULL;
1366 RETURN:
1367 	if (ret != NULL) {
1368 		chunk_node_t key;
1369 		/*
1370 		 * Clean out any entries in old_chunks that overlap with the
1371 		 * memory we just allocated.
1372 		 */
1373 		key.chunk = ret;
1374 		tchunk = rb_tree_find_node_geq(&old_chunks, &key);
1375 		while (tchunk != NULL
1376 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1377 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1378 			delchunk = tchunk;
1379 			tchunk = RB_TREE_NEXT(&old_chunks, delchunk);
1380 			rb_tree_remove_node(&old_chunks, delchunk);
1381 			base_chunk_node_dealloc(delchunk);
1382 		}
1383 
1384 	}
1385 #ifdef MALLOC_STATS
1386 	if (ret != NULL) {
1387 		stats_chunks.nchunks += (size / chunksize);
1388 		stats_chunks.curchunks += (size / chunksize);
1389 	}
1390 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1391 		stats_chunks.highchunks = stats_chunks.curchunks;
1392 #endif
1393 	malloc_mutex_unlock(&chunks_mtx);
1394 
1395 	assert(CHUNK_ADDR2BASE(ret) == ret);
1396 	return (ret);
1397 }
1398 
1399 static void
chunk_dealloc(void * chunk,size_t size)1400 chunk_dealloc(void *chunk, size_t size)
1401 {
1402 	chunk_node_t *node;
1403 
1404 	assert(chunk != NULL);
1405 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1406 	assert(size != 0);
1407 	assert((size & chunksize_mask) == 0);
1408 
1409 	malloc_mutex_lock(&chunks_mtx);
1410 
1411 #ifdef USE_BRK
1412 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1413 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1414 		void *brk_cur;
1415 
1416 		malloc_mutex_lock(&brk_mtx);
1417 		/* Get the current end of brk. */
1418 		brk_cur = sbrk(0);
1419 
1420 		/*
1421 		 * Try to shrink the data segment if this chunk is at the end
1422 		 * of the data segment.  The sbrk() call here is subject to a
1423 		 * race condition with threads that use brk(2) or sbrk(2)
1424 		 * directly, but the alternative would be to leak memory for
1425 		 * the sake of poorly designed multi-threaded programs.
1426 		 */
1427 		if (brk_cur == brk_max
1428 		    && (void *)((uintptr_t)chunk + size) == brk_max
1429 		    && sbrk(-(intptr_t)size) == brk_max) {
1430 			malloc_mutex_unlock(&brk_mtx);
1431 			if (brk_prev == brk_max) {
1432 				/* Success. */
1433 				brk_prev = (void *)((intptr_t)brk_max
1434 				    - (intptr_t)size);
1435 				brk_max = brk_prev;
1436 			}
1437 		} else {
1438 			size_t offset;
1439 
1440 			malloc_mutex_unlock(&brk_mtx);
1441 			madvise(chunk, size, MADV_FREE);
1442 
1443 			/*
1444 			 * Iteratively create records of each chunk-sized
1445 			 * memory region that 'chunk' is comprised of, so that
1446 			 * the address range can be recycled if memory usage
1447 			 * increases later on.
1448 			 */
1449 			for (offset = 0; offset < size; offset += chunksize) {
1450 				node = base_chunk_node_alloc();
1451 				if (node == NULL)
1452 					break;
1453 
1454 				node->chunk = (void *)((uintptr_t)chunk
1455 				    + (uintptr_t)offset);
1456 				node->size = chunksize;
1457 				rb_tree_insert_node(&old_chunks, node);
1458 			}
1459 		}
1460 	} else {
1461 #endif
1462 		pages_unmap(chunk, size);
1463 
1464 		/*
1465 		 * Make a record of the chunk's address, so that the address
1466 		 * range can be recycled if memory usage increases later on.
1467 		 * Don't bother to create entries if (size > chunksize), since
1468 		 * doing so could cause scalability issues for truly gargantuan
1469 		 * objects (many gigabytes or larger).
1470 		 */
1471 		if (size == chunksize) {
1472 			node = base_chunk_node_alloc();
1473 			if (node != NULL) {
1474 				node->chunk = (void *)(uintptr_t)chunk;
1475 				node->size = chunksize;
1476 				rb_tree_insert_node(&old_chunks, node);
1477 			}
1478 		}
1479 #ifdef USE_BRK
1480 	}
1481 #endif
1482 
1483 #ifdef MALLOC_STATS
1484 	stats_chunks.curchunks -= (size / chunksize);
1485 #endif
1486 	malloc_mutex_unlock(&chunks_mtx);
1487 }
1488 
1489 /*
1490  * End chunk management functions.
1491  */
1492 /******************************************************************************/
1493 /*
1494  * Begin arena.
1495  */
1496 
1497 /*
1498  * Choose a per-CPU arena.
1499  */
1500 static __noinline arena_t *
choose_arena_hard(void)1501 choose_arena_hard(void)
1502 {
1503 
1504 	assert(arenas[0] != NULL);
1505 
1506 	malloc_mutex_lock(&arenas_mtx);
1507 	for (unsigned i = 1; i < ncpus; i++)
1508 		if (arenas[i] == NULL)
1509 			arenas[i] = arenas_extend();
1510 	malloc_mutex_unlock(&arenas_mtx);
1511 
1512 	return arenas[thr_curcpu()];
1513 }
1514 
1515 static inline arena_t *
choose_arena(void)1516 choose_arena(void)
1517 {
1518 	arena_t *arena;
1519 
1520 	/* NB: when libpthread is absent, thr_curcpu() always returns zero. */
1521 	arena = arenas[thr_curcpu()];
1522 	if (__predict_true(arena != NULL))
1523 		return arena;
1524 
1525 	return choose_arena_hard();
1526 }
1527 
1528 static int
arena_chunk_comp(void * context,const void * va,const void * vb)1529 arena_chunk_comp(void *context, const void *va, const void *vb)
1530 {
1531 	const arena_chunk_t *a = va, *b = vb;
1532 	int diff;
1533 
1534 	assert(a != NULL);
1535 	assert(b != NULL);
1536 
1537 	if ((diff = a->max_frun_npages - b->max_frun_npages) != 0)
1538 		return diff;
1539 	return ptrcmp(a, b);
1540 }
1541 
1542 static int
arena_run_comp(void * context,const void * a,const void * b)1543 arena_run_comp(void *context, const void *a, const void *b)
1544 {
1545 
1546 	assert(a != NULL);
1547 	assert(b != NULL);
1548 
1549 	return ptrcmp(a, b);
1550 }
1551 
1552 static inline void *
arena_run_reg_alloc(arena_run_t * run,arena_bin_t * bin)1553 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1554 {
1555 	void *ret;
1556 	unsigned i, mask, bit, regind;
1557 
1558 	assert(run->magic == ARENA_RUN_MAGIC);
1559 	assert(run->regs_minelm < bin->regs_mask_nelms);
1560 
1561 	/*
1562 	 * Move the first check outside the loop, so that run->regs_minelm can
1563 	 * be updated unconditionally, without the possibility of updating it
1564 	 * multiple times.
1565 	 */
1566 	i = run->regs_minelm;
1567 	mask = run->regs_mask[i];
1568 	if (mask != 0) {
1569 		/* Usable allocation found. */
1570 		bit = ffs((int)mask) - 1;
1571 
1572 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1573 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1574 		    + (bin->reg_size * regind));
1575 
1576 		/* Clear bit. */
1577 		mask ^= (1U << bit);
1578 		run->regs_mask[i] = mask;
1579 
1580 		return (ret);
1581 	}
1582 
1583 	for (i++; i < bin->regs_mask_nelms; i++) {
1584 		mask = run->regs_mask[i];
1585 		if (mask != 0) {
1586 			/* Usable allocation found. */
1587 			bit = ffs((int)mask) - 1;
1588 
1589 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1590 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1591 			    + (bin->reg_size * regind));
1592 
1593 			/* Clear bit. */
1594 			mask ^= (1U << bit);
1595 			run->regs_mask[i] = mask;
1596 
1597 			/*
1598 			 * Make a note that nothing before this element
1599 			 * contains a free region.
1600 			 */
1601 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1602 
1603 			return (ret);
1604 		}
1605 	}
1606 	/* Not reached. */
1607 	/* LINTED */
1608 	assert(0);
1609 	return (NULL);
1610 }
1611 
1612 static inline void
arena_run_reg_dalloc(arena_run_t * run,arena_bin_t * bin,void * ptr,size_t size)1613 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1614 {
1615 	/*
1616 	 * To divide by a number D that is not a power of two we multiply
1617 	 * by (2^21 / D) and then right shift by 21 positions.
1618 	 *
1619 	 *   X / D
1620 	 *
1621 	 * becomes
1622 	 *
1623 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1624 	 */
1625 #define SIZE_INV_SHIFT 21
1626 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1627 	static const unsigned size_invs[] = {
1628 	    SIZE_INV(3),
1629 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1630 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1631 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1632 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1633 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1634 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1635 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1636 #if (QUANTUM_2POW_MIN < 4)
1637 	    ,
1638 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1639 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1640 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1641 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1642 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1643 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1644 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1645 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1646 #endif
1647 	};
1648 	unsigned diff, regind, elm, bit;
1649 
1650 	/* LINTED */
1651 	assert(run->magic == ARENA_RUN_MAGIC);
1652 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1653 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1654 
1655 	/*
1656 	 * Avoid doing division with a variable divisor if possible.  Using
1657 	 * actual division here can reduce allocator throughput by over 20%!
1658 	 */
1659 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1660 	if ((size & (size - 1)) == 0) {
1661 		/*
1662 		 * log2_table allows fast division of a power of two in the
1663 		 * [1..128] range.
1664 		 *
1665 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1666 		 */
1667 		static const unsigned char log2_table[] = {
1668 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1669 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1670 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1671 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1672 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1673 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1674 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1675 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1676 		};
1677 
1678 		if (size <= 128)
1679 			regind = (diff >> log2_table[size - 1]);
1680 		else if (size <= 32768)
1681 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1682 		else {
1683 			/*
1684 			 * The page size is too large for us to use the lookup
1685 			 * table.  Use real division.
1686 			 */
1687 			regind = (unsigned)(diff / size);
1688 		}
1689 	} else if (size <= (((sizeof(size_invs) / sizeof(unsigned)) + 2)
1690 	    << QUANTUM_2POW_MIN)) {
1691 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1692 		regind >>= SIZE_INV_SHIFT;
1693 	} else {
1694 		/*
1695 		 * size_invs isn't large enough to handle this size class, so
1696 		 * calculate regind using actual division.  This only happens
1697 		 * if the user increases small_max via the 'S' runtime
1698 		 * configuration option.
1699 		 */
1700 		regind = (unsigned)(diff / size);
1701 	};
1702 	assert(diff == regind * size);
1703 	assert(regind < bin->nregs);
1704 
1705 	elm = regind >> (SIZEOF_INT_2POW + 3);
1706 	if (elm < run->regs_minelm)
1707 		run->regs_minelm = elm;
1708 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1709 	assert((run->regs_mask[elm] & (1U << bit)) == 0);
1710 	run->regs_mask[elm] |= (1U << bit);
1711 #undef SIZE_INV
1712 #undef SIZE_INV_SHIFT
1713 }
1714 
1715 static void
arena_run_split(arena_t * arena,arena_run_t * run,size_t size)1716 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1717 {
1718 	arena_chunk_t *chunk;
1719 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1720 	unsigned i;
1721 
1722 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1723 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1724 	    >> pagesize_2pow);
1725 	total_pages = chunk->map[run_ind].npages;
1726 	need_pages = (unsigned)(size >> pagesize_2pow);
1727 	assert(need_pages <= total_pages);
1728 	rem_pages = total_pages - need_pages;
1729 
1730 	/* Split enough pages from the front of run to fit allocation size. */
1731 	map_offset = run_ind;
1732 	for (i = 0; i < need_pages; i++) {
1733 		chunk->map[map_offset + i].npages = need_pages;
1734 		chunk->map[map_offset + i].pos = i;
1735 	}
1736 
1737 	/* Keep track of trailing unused pages for later use. */
1738 	if (rem_pages > 0) {
1739 		/* Update map for trailing pages. */
1740 		map_offset += need_pages;
1741 		chunk->map[map_offset].npages = rem_pages;
1742 		chunk->map[map_offset].pos = POS_FREE;
1743 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1744 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1745 	}
1746 
1747 	chunk->pages_used += need_pages;
1748 }
1749 
1750 static arena_chunk_t *
arena_chunk_alloc(arena_t * arena)1751 arena_chunk_alloc(arena_t *arena)
1752 {
1753 	arena_chunk_t *chunk;
1754 
1755 	if (arena->spare != NULL) {
1756 		chunk = arena->spare;
1757 		arena->spare = NULL;
1758 
1759 		rb_tree_insert_node(&arena->chunks, chunk);
1760 	} else {
1761 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1762 		if (chunk == NULL)
1763 			return (NULL);
1764 #ifdef MALLOC_STATS
1765 		arena->stats.mapped += chunksize;
1766 #endif
1767 
1768 		chunk->arena = arena;
1769 
1770 		/*
1771 		 * Claim that no pages are in use, since the header is merely
1772 		 * overhead.
1773 		 */
1774 		chunk->pages_used = 0;
1775 
1776 		chunk->max_frun_npages = chunk_npages -
1777 		    arena_chunk_header_npages;
1778 		chunk->min_frun_ind = arena_chunk_header_npages;
1779 
1780 		/*
1781 		 * Initialize enough of the map to support one maximal free run.
1782 		 */
1783 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1784 		    arena_chunk_header_npages;
1785 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1786 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1787 		    arena_chunk_header_npages;
1788 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1789 
1790 		rb_tree_insert_node(&arena->chunks, chunk);
1791 	}
1792 
1793 	return (chunk);
1794 }
1795 
1796 static void
arena_chunk_dealloc(arena_t * arena,arena_chunk_t * chunk)1797 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1798 {
1799 
1800 	/*
1801 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1802 	 * will be cached, so that the arena does not use it.
1803 	 */
1804 	rb_tree_remove_node(&chunk->arena->chunks, chunk);
1805 
1806 	if (NOT_OPT(hint)) {
1807 		if (arena->spare != NULL) {
1808 			chunk_dealloc((void *)arena->spare, chunksize);
1809 #ifdef MALLOC_STATS
1810 			arena->stats.mapped -= chunksize;
1811 #endif
1812 		}
1813 		arena->spare = chunk;
1814 	} else {
1815 		assert(arena->spare == NULL);
1816 		chunk_dealloc((void *)chunk, chunksize);
1817 #ifdef MALLOC_STATS
1818 		arena->stats.mapped -= chunksize;
1819 #endif
1820 	}
1821 }
1822 
1823 static arena_run_t *
arena_run_alloc(arena_t * arena,size_t size)1824 arena_run_alloc(arena_t *arena, size_t size)
1825 {
1826 	arena_chunk_t *chunk;
1827 	arena_run_t *run;
1828 	unsigned need_npages;
1829 
1830 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1831 	    pagesize_2pow)));
1832 	assert((size & pagesize_mask) == 0);
1833 
1834 	/*
1835 	 * Search through the arena chunk tree for a large enough free run.
1836 	 * Tree order ensures that any exact fit is picked immediately or
1837 	 * otherwise the lowest address of the next size.
1838 	 */
1839 	need_npages = (unsigned)(size >> pagesize_2pow);
1840 	/* LINTED */
1841 	for (;;) {
1842 		rb_node_t *node = arena->chunks.rbt_root;
1843 		chunk = NULL;
1844 		while (!RB_SENTINEL_P(node)) {
1845 			assert(offsetof(struct arena_chunk_s, link) == 0);
1846 			arena_chunk_t *chunk_tmp = (arena_chunk_t *)node;
1847 			if (chunk_tmp->max_frun_npages == need_npages) {
1848 				chunk = chunk_tmp;
1849 				break;
1850 			}
1851 			if (chunk_tmp->max_frun_npages < need_npages) {
1852 				node = node->rb_nodes[1];
1853 				continue;
1854 			}
1855 			chunk = chunk_tmp;
1856 			node = node->rb_nodes[0];
1857 		}
1858 		if (chunk == NULL)
1859 			break;
1860 		/*
1861 		 * At this point, the chunk must have a cached run size large
1862 		 * enough to fit the allocation.
1863 		 */
1864 		assert(need_npages <= chunk->max_frun_npages);
1865 		{
1866 			arena_chunk_map_t *mapelm;
1867 			unsigned i;
1868 			unsigned max_frun_npages = 0;
1869 			unsigned min_frun_ind = chunk_npages;
1870 
1871 			assert(chunk->min_frun_ind >=
1872 			    arena_chunk_header_npages);
1873 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1874 				mapelm = &chunk->map[i];
1875 				if (mapelm->pos == POS_FREE) {
1876 					if (mapelm->npages >= need_npages) {
1877 						run = (arena_run_t *)
1878 						    ((uintptr_t)chunk + (i <<
1879 						    pagesize_2pow));
1880 						/* Update page map. */
1881 						arena_run_split(arena, run,
1882 						    size);
1883 						return (run);
1884 					}
1885 					if (mapelm->npages >
1886 					    max_frun_npages) {
1887 						max_frun_npages =
1888 						    mapelm->npages;
1889 					}
1890 					if (i < min_frun_ind) {
1891 						min_frun_ind = i;
1892 						if (i < chunk->min_frun_ind)
1893 							chunk->min_frun_ind = i;
1894 					}
1895 				}
1896 				i += mapelm->npages;
1897 			}
1898 			/*
1899 			 * Search failure.  Reset cached chunk->max_frun_npages.
1900 			 * chunk->min_frun_ind was already reset above (if
1901 			 * necessary).
1902 			 */
1903 			rb_tree_remove_node(&arena->chunks, chunk);
1904 			chunk->max_frun_npages = max_frun_npages;
1905 			rb_tree_insert_node(&arena->chunks, chunk);
1906 		}
1907 	}
1908 
1909 	/*
1910 	 * No usable runs.  Create a new chunk from which to allocate the run.
1911 	 */
1912 	chunk = arena_chunk_alloc(arena);
1913 	if (chunk == NULL)
1914 		return (NULL);
1915 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1916 	    pagesize_2pow));
1917 	/* Update page map. */
1918 	arena_run_split(arena, run, size);
1919 	return (run);
1920 }
1921 
1922 static void
arena_run_dalloc(arena_t * arena,arena_run_t * run,size_t size)1923 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1924 {
1925 	arena_chunk_t *chunk;
1926 	unsigned run_ind, run_pages;
1927 
1928 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1929 
1930 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1931 	    >> pagesize_2pow);
1932 	assert(run_ind >= arena_chunk_header_npages);
1933 	assert(run_ind < (chunksize >> pagesize_2pow));
1934 	run_pages = (unsigned)(size >> pagesize_2pow);
1935 	assert(run_pages == chunk->map[run_ind].npages);
1936 
1937 	/* Subtract pages from count of pages used in chunk. */
1938 	chunk->pages_used -= run_pages;
1939 
1940 	/* Mark run as deallocated. */
1941 	assert(chunk->map[run_ind].npages == run_pages);
1942 	chunk->map[run_ind].pos = POS_FREE;
1943 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1944 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1945 
1946 	/*
1947 	 * Tell the kernel that we don't need the data in this run, but only if
1948 	 * requested via runtime configuration.
1949 	 */
1950 	if (OPT(hint))
1951 		madvise(run, size, MADV_FREE);
1952 
1953 	/* Try to coalesce with neighboring runs. */
1954 	if (run_ind > arena_chunk_header_npages &&
1955 	    chunk->map[run_ind - 1].pos == POS_FREE) {
1956 		unsigned prev_npages;
1957 
1958 		/* Coalesce with previous run. */
1959 		prev_npages = chunk->map[run_ind - 1].npages;
1960 		run_ind -= prev_npages;
1961 		assert(chunk->map[run_ind].npages == prev_npages);
1962 		assert(chunk->map[run_ind].pos == POS_FREE);
1963 		run_pages += prev_npages;
1964 
1965 		chunk->map[run_ind].npages = run_pages;
1966 		assert(chunk->map[run_ind].pos == POS_FREE);
1967 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
1968 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
1969 	}
1970 
1971 	if (run_ind + run_pages < chunk_npages &&
1972 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
1973 		unsigned next_npages;
1974 
1975 		/* Coalesce with next run. */
1976 		next_npages = chunk->map[run_ind + run_pages].npages;
1977 		run_pages += next_npages;
1978 		assert(chunk->map[run_ind + run_pages - 1].npages ==
1979 		    next_npages);
1980 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
1981 
1982 		chunk->map[run_ind].npages = run_pages;
1983 		chunk->map[run_ind].pos = POS_FREE;
1984 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
1985 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
1986 	}
1987 
1988 	if (chunk->map[run_ind].npages > chunk->max_frun_npages) {
1989 		rb_tree_remove_node(&arena->chunks, chunk);
1990 		chunk->max_frun_npages = chunk->map[run_ind].npages;
1991 		rb_tree_insert_node(&arena->chunks, chunk);
1992 	}
1993 	if (run_ind < chunk->min_frun_ind)
1994 		chunk->min_frun_ind = run_ind;
1995 
1996 	/* Deallocate chunk if it is now completely unused. */
1997 	if (chunk->pages_used == 0)
1998 		arena_chunk_dealloc(arena, chunk);
1999 }
2000 
2001 static arena_run_t *
arena_bin_nonfull_run_get(arena_t * arena,arena_bin_t * bin)2002 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2003 {
2004 	arena_run_t *run;
2005 	unsigned i, remainder;
2006 
2007 	/* Look for a usable run. */
2008 	if ((run = RB_TREE_MIN(&bin->runs)) != NULL) {
2009 		/* run is guaranteed to have available space. */
2010 		rb_tree_remove_node(&bin->runs, run);
2011 #ifdef MALLOC_STATS
2012 		bin->stats.reruns++;
2013 #endif
2014 		return (run);
2015 	}
2016 	/* No existing runs have any space available. */
2017 
2018 	/* Allocate a new run. */
2019 	run = arena_run_alloc(arena, bin->run_size);
2020 	if (run == NULL)
2021 		return (NULL);
2022 
2023 	/* Initialize run internals. */
2024 	run->bin = bin;
2025 
2026 	for (i = 0; i < bin->regs_mask_nelms; i++)
2027 		run->regs_mask[i] = UINT_MAX;
2028 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2029 	if (remainder != 0) {
2030 		/* The last element has spare bits that need to be unset. */
2031 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2032 		    - remainder));
2033 	}
2034 
2035 	run->regs_minelm = 0;
2036 
2037 	run->nfree = bin->nregs;
2038 #ifdef MALLOC_DEBUG
2039 	run->magic = ARENA_RUN_MAGIC;
2040 #endif
2041 
2042 #ifdef MALLOC_STATS
2043 	bin->stats.nruns++;
2044 	bin->stats.curruns++;
2045 	if (bin->stats.curruns > bin->stats.highruns)
2046 		bin->stats.highruns = bin->stats.curruns;
2047 #endif
2048 	return (run);
2049 }
2050 
2051 /* bin->runcur must have space available before this function is called. */
2052 static inline void *
arena_bin_malloc_easy(arena_t * arena,arena_bin_t * bin,arena_run_t * run)2053 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2054 {
2055 	void *ret;
2056 
2057 	assert(run->magic == ARENA_RUN_MAGIC);
2058 	assert(run->nfree > 0);
2059 
2060 	ret = arena_run_reg_alloc(run, bin);
2061 	assert(ret != NULL);
2062 	run->nfree--;
2063 
2064 	return (ret);
2065 }
2066 
2067 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2068 static void *
arena_bin_malloc_hard(arena_t * arena,arena_bin_t * bin)2069 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2070 {
2071 
2072 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2073 	if (bin->runcur == NULL)
2074 		return (NULL);
2075 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2076 	assert(bin->runcur->nfree > 0);
2077 
2078 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2079 }
2080 
2081 /*
2082  * Calculate bin->run_size such that it meets the following constraints:
2083  *
2084  *   *) bin->run_size >= min_run_size
2085  *   *) bin->run_size <= arena_maxclass
2086  *   *) bin->run_size <= RUN_MAX_SMALL
2087  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2088  *
2089  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2090  * also calculated here, since these settings are all interdependent.
2091  */
2092 static size_t
arena_bin_run_size_calc(arena_bin_t * bin,size_t min_run_size)2093 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2094 {
2095 	size_t try_run_size, good_run_size;
2096 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2097 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2098 
2099 	assert(min_run_size >= pagesize);
2100 	assert(min_run_size <= arena_maxclass);
2101 	assert(min_run_size <= RUN_MAX_SMALL);
2102 
2103 	/*
2104 	 * Calculate known-valid settings before entering the run_size
2105 	 * expansion loop, so that the first part of the loop always copies
2106 	 * valid settings.
2107 	 *
2108 	 * The do..while loop iteratively reduces the number of regions until
2109 	 * the run header and the regions no longer overlap.  A closed formula
2110 	 * would be quite messy, since there is an interdependency between the
2111 	 * header's mask length and the number of regions.
2112 	 */
2113 	try_run_size = min_run_size;
2114 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2115 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2116 	do {
2117 		try_nregs--;
2118 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2119 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2120 		try_reg0_offset = (unsigned)(try_run_size -
2121 		    (try_nregs * bin->reg_size));
2122 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2123 	    > try_reg0_offset);
2124 
2125 	/* run_size expansion loop. */
2126 	do {
2127 		/*
2128 		 * Copy valid settings before trying more aggressive settings.
2129 		 */
2130 		good_run_size = try_run_size;
2131 		good_nregs = try_nregs;
2132 		good_mask_nelms = try_mask_nelms;
2133 		good_reg0_offset = try_reg0_offset;
2134 
2135 		/* Try more aggressive settings. */
2136 		try_run_size += pagesize;
2137 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2138 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2139 		do {
2140 			try_nregs--;
2141 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2142 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2143 			    1 : 0);
2144 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2145 			    bin->reg_size));
2146 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2147 		    (try_mask_nelms - 1)) > try_reg0_offset);
2148 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2149 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2150 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2151 
2152 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2153 	    <= good_reg0_offset);
2154 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2155 
2156 	/* Copy final settings. */
2157 	bin->run_size = good_run_size;
2158 	bin->nregs = good_nregs;
2159 	bin->regs_mask_nelms = good_mask_nelms;
2160 	bin->reg0_offset = good_reg0_offset;
2161 
2162 	return (good_run_size);
2163 }
2164 
2165 static void *
arena_malloc(arena_t * arena,size_t size)2166 arena_malloc(arena_t *arena, size_t size)
2167 {
2168 	void *ret;
2169 
2170 	assert(arena != NULL);
2171 	assert(arena->magic == ARENA_MAGIC);
2172 	assert(size != 0);
2173 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2174 
2175 	if (size <= bin_maxclass) {
2176 		arena_bin_t *bin;
2177 		arena_run_t *run;
2178 
2179 		/* Small allocation. */
2180 
2181 		if (size < small_min) {
2182 			/* Tiny. */
2183 			size = pow2_ceil(size);
2184 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2185 			    1)))];
2186 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2187 			/*
2188 			 * Bin calculation is always correct, but we may need
2189 			 * to fix size for the purposes of assertions and/or
2190 			 * stats accuracy.
2191 			 */
2192 			if (size < (1 << TINY_MIN_2POW))
2193 				size = (1 << TINY_MIN_2POW);
2194 #endif
2195 		} else if (size <= small_max) {
2196 			/* Quantum-spaced. */
2197 			size = QUANTUM_CEILING(size);
2198 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2199 			    - 1];
2200 		} else {
2201 			/* Sub-page. */
2202 			size = pow2_ceil(size);
2203 			bin = &arena->bins[ntbins + nqbins
2204 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2205 		}
2206 		assert(size == bin->reg_size);
2207 
2208 		malloc_mutex_lock(&arena->mtx);
2209 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2210 			ret = arena_bin_malloc_easy(arena, bin, run);
2211 		else
2212 			ret = arena_bin_malloc_hard(arena, bin);
2213 
2214 		if (ret == NULL) {
2215 			malloc_mutex_unlock(&arena->mtx);
2216 			return (NULL);
2217 		}
2218 
2219 #ifdef MALLOC_STATS
2220 		bin->stats.nrequests++;
2221 		arena->stats.nmalloc_small++;
2222 		arena->stats.allocated_small += size;
2223 #endif
2224 	} else {
2225 		/* Large allocation. */
2226 		size = PAGE_CEILING(size);
2227 		malloc_mutex_lock(&arena->mtx);
2228 		ret = (void *)arena_run_alloc(arena, size);
2229 		if (ret == NULL) {
2230 			malloc_mutex_unlock(&arena->mtx);
2231 			return (NULL);
2232 		}
2233 #ifdef MALLOC_STATS
2234 		arena->stats.nmalloc_large++;
2235 		arena->stats.allocated_large += size;
2236 #endif
2237 	}
2238 
2239 	malloc_mutex_unlock(&arena->mtx);
2240 
2241 	if (OPT(junk))
2242 		memset(ret, 0xa5, size);
2243 	else if (OPT(zero))
2244 		memset(ret, 0, size);
2245 	return (ret);
2246 }
2247 
2248 static inline void
arena_palloc_trim(arena_t * arena,arena_chunk_t * chunk,unsigned pageind,unsigned npages)2249 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2250     unsigned npages)
2251 {
2252 	unsigned i;
2253 
2254 	assert(npages > 0);
2255 
2256 	/*
2257 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2258 	 * separately allocated.
2259 	 */
2260 	for (i = 0; i < npages; i++) {
2261 		chunk->map[pageind + i].npages = npages;
2262 		chunk->map[pageind + i].pos = i;
2263 	}
2264 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2265 	    pagesize_2pow)), npages << pagesize_2pow);
2266 }
2267 
2268 /* Only handles large allocations that require more than page alignment. */
2269 static void *
arena_palloc(arena_t * arena,size_t alignment,size_t size,size_t alloc_size)2270 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2271 {
2272 	void *ret;
2273 	size_t offset;
2274 	arena_chunk_t *chunk;
2275 	unsigned pageind, i, npages;
2276 
2277 	assert((size & pagesize_mask) == 0);
2278 	assert((alignment & pagesize_mask) == 0);
2279 
2280 	npages = (unsigned)(size >> pagesize_2pow);
2281 
2282 	malloc_mutex_lock(&arena->mtx);
2283 	ret = (void *)arena_run_alloc(arena, alloc_size);
2284 	if (ret == NULL) {
2285 		malloc_mutex_unlock(&arena->mtx);
2286 		return (NULL);
2287 	}
2288 
2289 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2290 
2291 	offset = (uintptr_t)ret & (alignment - 1);
2292 	assert((offset & pagesize_mask) == 0);
2293 	assert(offset < alloc_size);
2294 	if (offset == 0) {
2295 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2296 		    pagesize_2pow);
2297 
2298 		/* Update the map for the run to be kept. */
2299 		for (i = 0; i < npages; i++) {
2300 			chunk->map[pageind + i].npages = npages;
2301 			assert(chunk->map[pageind + i].pos == i);
2302 		}
2303 
2304 		/* Trim trailing space. */
2305 		arena_palloc_trim(arena, chunk, pageind + npages,
2306 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2307 	} else {
2308 		size_t leadsize, trailsize;
2309 
2310 		leadsize = alignment - offset;
2311 		ret = (void *)((uintptr_t)ret + leadsize);
2312 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2313 		    pagesize_2pow);
2314 
2315 		/* Update the map for the run to be kept. */
2316 		for (i = 0; i < npages; i++) {
2317 			chunk->map[pageind + i].npages = npages;
2318 			chunk->map[pageind + i].pos = i;
2319 		}
2320 
2321 		/* Trim leading space. */
2322 		arena_palloc_trim(arena, chunk,
2323 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2324 		    (unsigned)(leadsize >> pagesize_2pow));
2325 
2326 		trailsize = alloc_size - leadsize - size;
2327 		if (trailsize != 0) {
2328 			/* Trim trailing space. */
2329 			assert(trailsize < alloc_size);
2330 			arena_palloc_trim(arena, chunk, pageind + npages,
2331 			    (unsigned)(trailsize >> pagesize_2pow));
2332 		}
2333 	}
2334 
2335 #ifdef MALLOC_STATS
2336 	arena->stats.nmalloc_large++;
2337 	arena->stats.allocated_large += size;
2338 #endif
2339 	malloc_mutex_unlock(&arena->mtx);
2340 
2341 	if (OPT(junk))
2342 		memset(ret, 0xa5, size);
2343 	else if (OPT(zero))
2344 		memset(ret, 0, size);
2345 	return (ret);
2346 }
2347 
2348 /* Return the size of the allocation pointed to by ptr. */
2349 static size_t
arena_salloc(const void * ptr)2350 arena_salloc(const void *ptr)
2351 {
2352 	size_t ret;
2353 	arena_chunk_t *chunk;
2354 	arena_chunk_map_t *mapelm;
2355 	unsigned pageind;
2356 
2357 	assert(ptr != NULL);
2358 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2359 
2360 	/*
2361 	 * No arena data structures that we query here can change in a way that
2362 	 * affects this function, so we don't need to lock.
2363 	 */
2364 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2365 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2366 	    pagesize_2pow);
2367 	mapelm = &chunk->map[pageind];
2368 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2369 	    pagesize_2pow)) {
2370 		arena_run_t *run;
2371 
2372 		pageind -= mapelm->pos;
2373 
2374 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2375 		    pagesize_2pow));
2376 		assert(run->magic == ARENA_RUN_MAGIC);
2377 		ret = run->bin->reg_size;
2378 	} else
2379 		ret = mapelm->npages << pagesize_2pow;
2380 
2381 	return (ret);
2382 }
2383 
2384 static void *
arena_ralloc(void * ptr,size_t size,size_t oldsize)2385 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2386 {
2387 	void *ret;
2388 
2389 	/* Avoid moving the allocation if the size class would not change. */
2390 	if (size < small_min) {
2391 		if (oldsize < small_min &&
2392 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2393 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2394 			goto IN_PLACE;
2395 	} else if (size <= small_max) {
2396 		if (oldsize >= small_min && oldsize <= small_max &&
2397 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2398 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2399 			goto IN_PLACE;
2400 	} else {
2401 		/*
2402 		 * We make no attempt to resize runs here, though it would be
2403 		 * possible to do so.
2404 		 */
2405 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2406 			goto IN_PLACE;
2407 	}
2408 
2409 	/*
2410 	 * If we get here, then size and oldsize are different enough that we
2411 	 * need to use a different size class.  In that case, fall back to
2412 	 * allocating new space and copying.
2413 	 */
2414 	ret = arena_malloc(choose_arena(), size);
2415 	if (ret == NULL)
2416 		return (NULL);
2417 
2418 	/* Junk/zero-filling were already done by arena_malloc(). */
2419 	if (size < oldsize)
2420 		memcpy(ret, ptr, size);
2421 	else
2422 		memcpy(ret, ptr, oldsize);
2423 	idalloc(ptr);
2424 	return (ret);
2425 IN_PLACE:
2426 	if (OPT(junk) && size < oldsize)
2427 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2428 	else if (OPT(zero) && size > oldsize)
2429 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2430 	return (ptr);
2431 }
2432 
2433 static void
arena_dalloc(arena_t * arena,arena_chunk_t * chunk,void * ptr)2434 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2435 {
2436 	unsigned pageind;
2437 	arena_chunk_map_t *mapelm;
2438 	size_t size;
2439 
2440 	assert(arena != NULL);
2441 	assert(arena->magic == ARENA_MAGIC);
2442 	assert(chunk->arena == arena);
2443 	assert(ptr != NULL);
2444 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2445 
2446 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2447 	    pagesize_2pow);
2448 	mapelm = &chunk->map[pageind];
2449 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2450 	    pagesize_2pow)) {
2451 		arena_run_t *run;
2452 		arena_bin_t *bin;
2453 
2454 		/* Small allocation. */
2455 
2456 		pageind -= mapelm->pos;
2457 
2458 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2459 		    pagesize_2pow));
2460 		assert(run->magic == ARENA_RUN_MAGIC);
2461 		bin = run->bin;
2462 		size = bin->reg_size;
2463 
2464 		if (OPT(junk))
2465 			memset(ptr, 0x5a, size);
2466 
2467 		malloc_mutex_lock(&arena->mtx);
2468 		arena_run_reg_dalloc(run, bin, ptr, size);
2469 		run->nfree++;
2470 
2471 		if (run->nfree == bin->nregs) {
2472 			/* Deallocate run. */
2473 			if (run == bin->runcur)
2474 				bin->runcur = NULL;
2475 			else if (bin->nregs != 1) {
2476 				/*
2477 				 * This block's conditional is necessary because
2478 				 * if the run only contains one region, then it
2479 				 * never gets inserted into the non-full runs
2480 				 * tree.
2481 				 */
2482 				rb_tree_remove_node(&bin->runs, run);
2483 			}
2484 #ifdef MALLOC_DEBUG
2485 			run->magic = 0;
2486 #endif
2487 			arena_run_dalloc(arena, run, bin->run_size);
2488 #ifdef MALLOC_STATS
2489 			bin->stats.curruns--;
2490 #endif
2491 		} else if (run->nfree == 1 && run != bin->runcur) {
2492 			/*
2493 			 * Make sure that bin->runcur always refers to the
2494 			 * lowest non-full run, if one exists.
2495 			 */
2496 			if (bin->runcur == NULL)
2497 				bin->runcur = run;
2498 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2499 				/* Switch runcur. */
2500 				if (bin->runcur->nfree > 0) {
2501 					/* Insert runcur. */
2502 					rb_tree_insert_node(&bin->runs, bin->runcur);
2503 				}
2504 				bin->runcur = run;
2505 			} else {
2506 				rb_tree_insert_node(&bin->runs, run);
2507 			}
2508 		}
2509 #ifdef MALLOC_STATS
2510 		arena->stats.allocated_small -= size;
2511 		arena->stats.ndalloc_small++;
2512 #endif
2513 	} else {
2514 		/* Large allocation. */
2515 
2516 		size = mapelm->npages << pagesize_2pow;
2517 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2518 
2519 		if (OPT(junk))
2520 			memset(ptr, 0x5a, size);
2521 
2522 		malloc_mutex_lock(&arena->mtx);
2523 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2524 #ifdef MALLOC_STATS
2525 		arena->stats.allocated_large -= size;
2526 		arena->stats.ndalloc_large++;
2527 #endif
2528 	}
2529 
2530 	malloc_mutex_unlock(&arena->mtx);
2531 }
2532 
2533 static void
arena_new(arena_t * arena)2534 arena_new(arena_t *arena)
2535 {
2536 	unsigned i;
2537 	arena_bin_t *bin;
2538 	size_t prev_run_size;
2539 
2540 	malloc_mutex_init(&arena->mtx);
2541 
2542 #ifdef MALLOC_STATS
2543 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2544 #endif
2545 
2546 	/* Initialize chunks. */
2547 	rb_tree_init(&arena->chunks, &arena_chunk_tree_ops);
2548 	arena->spare = NULL;
2549 
2550 	/* Initialize bins. */
2551 	prev_run_size = pagesize;
2552 
2553 	/* (2^n)-spaced tiny bins. */
2554 	for (i = 0; i < ntbins; i++) {
2555 		bin = &arena->bins[i];
2556 		bin->runcur = NULL;
2557 		rb_tree_init(&bin->runs, &arena_run_tree_ops);
2558 
2559 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2560 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2561 
2562 #ifdef MALLOC_STATS
2563 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2564 #endif
2565 	}
2566 
2567 	/* Quantum-spaced bins. */
2568 	for (; i < ntbins + nqbins; i++) {
2569 		bin = &arena->bins[i];
2570 		bin->runcur = NULL;
2571 		rb_tree_init(&bin->runs, &arena_run_tree_ops);
2572 
2573 		bin->reg_size = quantum * (i - ntbins + 1);
2574 /*
2575 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2576 */
2577 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2578 
2579 #ifdef MALLOC_STATS
2580 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2581 #endif
2582 	}
2583 
2584 	/* (2^n)-spaced sub-page bins. */
2585 	for (; i < ntbins + nqbins + nsbins; i++) {
2586 		bin = &arena->bins[i];
2587 		bin->runcur = NULL;
2588 		rb_tree_init(&bin->runs, &arena_run_tree_ops);
2589 
2590 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2591 
2592 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2593 
2594 #ifdef MALLOC_STATS
2595 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2596 #endif
2597 	}
2598 
2599 #ifdef MALLOC_DEBUG
2600 	arena->magic = ARENA_MAGIC;
2601 #endif
2602 }
2603 
2604 /* Create a new arena and insert it into the arenas array at index ind. */
2605 static arena_t *
arenas_extend(void)2606 arenas_extend(void)
2607 {
2608 	arena_t *ret;
2609 
2610 	/* Allocate enough space for trailing bins. */
2611 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2612 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2613 	if (ret != NULL) {
2614 		arena_new(ret);
2615 		return (ret);
2616 	}
2617 	/* Only reached if there is an OOM error. */
2618 
2619 	/*
2620 	 * OOM here is quite inconvenient to propagate, since dealing with it
2621 	 * would require a check for failure in the fast path.  Instead, punt
2622 	 * by using arenas[0].  In practice, this is an extremely unlikely
2623 	 * failure.
2624 	 */
2625 	_malloc_message(getprogname(),
2626 	    ": (malloc) Error initializing arena\n", "", "");
2627 	if (OPT(abort))
2628 		abort();
2629 
2630 	return (arenas[0]);
2631 }
2632 
2633 /*
2634  * End arena.
2635  */
2636 /******************************************************************************/
2637 /*
2638  * Begin general internal functions.
2639  */
2640 
2641 static void *
huge_malloc(size_t size)2642 huge_malloc(size_t size)
2643 {
2644 	void *ret;
2645 	size_t csize;
2646 	chunk_node_t *node;
2647 
2648 	/* Allocate one or more contiguous chunks for this request. */
2649 
2650 	csize = CHUNK_CEILING(size);
2651 	if (csize == 0) {
2652 		/* size is large enough to cause size_t wrap-around. */
2653 		return (NULL);
2654 	}
2655 
2656 	/* Allocate a chunk node with which to track the chunk. */
2657 	node = base_chunk_node_alloc();
2658 	if (node == NULL)
2659 		return (NULL);
2660 
2661 	ret = chunk_alloc(csize);
2662 	if (ret == NULL) {
2663 		base_chunk_node_dealloc(node);
2664 		return (NULL);
2665 	}
2666 
2667 	/* Insert node into huge. */
2668 	node->chunk = ret;
2669 	node->size = csize;
2670 
2671 	malloc_mutex_lock(&chunks_mtx);
2672 	rb_tree_insert_node(&huge, node);
2673 #ifdef MALLOC_STATS
2674 	huge_nmalloc++;
2675 	huge_allocated += csize;
2676 #endif
2677 	malloc_mutex_unlock(&chunks_mtx);
2678 
2679 	if (OPT(junk))
2680 		memset(ret, 0xa5, csize);
2681 	else if (OPT(zero))
2682 		memset(ret, 0, csize);
2683 
2684 	return (ret);
2685 }
2686 
2687 /* Only handles large allocations that require more than chunk alignment. */
2688 static void *
huge_palloc(size_t alignment,size_t size)2689 huge_palloc(size_t alignment, size_t size)
2690 {
2691 	void *ret;
2692 	size_t alloc_size, chunk_size, offset;
2693 	chunk_node_t *node;
2694 
2695 	/*
2696 	 * This allocation requires alignment that is even larger than chunk
2697 	 * alignment.  This means that huge_malloc() isn't good enough.
2698 	 *
2699 	 * Allocate almost twice as many chunks as are demanded by the size or
2700 	 * alignment, in order to assure the alignment can be achieved, then
2701 	 * unmap leading and trailing chunks.
2702 	 */
2703 	assert(alignment >= chunksize);
2704 
2705 	chunk_size = CHUNK_CEILING(size);
2706 
2707 	if (size >= alignment)
2708 		alloc_size = chunk_size + alignment - chunksize;
2709 	else
2710 		alloc_size = (alignment << 1) - chunksize;
2711 
2712 	/* Allocate a chunk node with which to track the chunk. */
2713 	node = base_chunk_node_alloc();
2714 	if (node == NULL)
2715 		return (NULL);
2716 
2717 	ret = chunk_alloc(alloc_size);
2718 	if (ret == NULL) {
2719 		base_chunk_node_dealloc(node);
2720 		return (NULL);
2721 	}
2722 
2723 	offset = (uintptr_t)ret & (alignment - 1);
2724 	assert((offset & chunksize_mask) == 0);
2725 	assert(offset < alloc_size);
2726 	if (offset == 0) {
2727 		/* Trim trailing space. */
2728 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2729 		    - chunk_size);
2730 	} else {
2731 		size_t trailsize;
2732 
2733 		/* Trim leading space. */
2734 		chunk_dealloc(ret, alignment - offset);
2735 
2736 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2737 
2738 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2739 		if (trailsize != 0) {
2740 		    /* Trim trailing space. */
2741 		    assert(trailsize < alloc_size);
2742 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2743 			trailsize);
2744 		}
2745 	}
2746 
2747 	/* Insert node into huge. */
2748 	node->chunk = ret;
2749 	node->size = chunk_size;
2750 
2751 	malloc_mutex_lock(&chunks_mtx);
2752 	rb_tree_insert_node(&huge, node);
2753 #ifdef MALLOC_STATS
2754 	huge_nmalloc++;
2755 	huge_allocated += chunk_size;
2756 #endif
2757 	malloc_mutex_unlock(&chunks_mtx);
2758 
2759 	if (OPT(junk))
2760 		memset(ret, 0xa5, chunk_size);
2761 	else if (OPT(zero))
2762 		memset(ret, 0, chunk_size);
2763 
2764 	return (ret);
2765 }
2766 
2767 static void *
huge_ralloc(void * ptr,size_t size,size_t oldsize)2768 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2769 {
2770 	void *ret;
2771 
2772 	/* Avoid moving the allocation if the size class would not change. */
2773 	if (oldsize > arena_maxclass &&
2774 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2775 		if (OPT(junk) && size < oldsize) {
2776 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2777 			    - size);
2778 		} else if (OPT(zero) && size > oldsize) {
2779 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2780 			    - oldsize);
2781 		}
2782 		return (ptr);
2783 	}
2784 
2785 	if (CHUNK_ADDR2BASE(ptr) == ptr
2786 #ifdef USE_BRK
2787 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2788 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2789 #endif
2790 	    ) {
2791 		chunk_node_t *node, key;
2792 		void *newptr;
2793 		size_t oldcsize;
2794 		size_t newcsize;
2795 
2796 		newcsize = CHUNK_CEILING(size);
2797 		oldcsize = CHUNK_CEILING(oldsize);
2798 		assert(oldcsize != newcsize);
2799 		if (newcsize == 0) {
2800 			/* size_t wrap-around */
2801 			return (NULL);
2802 		}
2803 
2804 		/*
2805 		 * Remove the old region from the tree now.  If mremap()
2806 		 * returns the region to the system, other thread may
2807 		 * map it for same huge allocation and insert it to the
2808 		 * tree before we acquire the mutex lock again.
2809 		 */
2810 		malloc_mutex_lock(&chunks_mtx);
2811 		key.chunk = __UNCONST(ptr);
2812 		node = rb_tree_find_node(&huge, &key);
2813 		assert(node != NULL);
2814 		assert(node->chunk == ptr);
2815 		assert(node->size == oldcsize);
2816 		rb_tree_remove_node(&huge, node);
2817 		malloc_mutex_unlock(&chunks_mtx);
2818 
2819 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2820 		    MAP_ALIGNED(chunksize_2pow));
2821 		if (newptr == MAP_FAILED) {
2822 			/* We still own the old region. */
2823 			malloc_mutex_lock(&chunks_mtx);
2824 			rb_tree_insert_node(&huge, node);
2825 			malloc_mutex_unlock(&chunks_mtx);
2826 		} else {
2827 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2828 
2829 			/* Insert new or resized old region. */
2830 			malloc_mutex_lock(&chunks_mtx);
2831 			node->size = newcsize;
2832 			node->chunk = newptr;
2833 			rb_tree_insert_node(&huge, node);
2834 #ifdef MALLOC_STATS
2835 			huge_nralloc++;
2836 			huge_allocated += newcsize - oldcsize;
2837 			if (newcsize > oldcsize) {
2838 				stats_chunks.curchunks +=
2839 				    (newcsize - oldcsize) / chunksize;
2840 				if (stats_chunks.curchunks >
2841 				    stats_chunks.highchunks)
2842 					stats_chunks.highchunks =
2843 					    stats_chunks.curchunks;
2844 			} else {
2845 				stats_chunks.curchunks -=
2846 				    (oldcsize - newcsize) / chunksize;
2847 			}
2848 #endif
2849 			malloc_mutex_unlock(&chunks_mtx);
2850 
2851 			if (OPT(junk) && size < oldsize) {
2852 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2853 				    newcsize - size);
2854 			} else if (OPT(zero) && size > oldsize) {
2855 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2856 				    size - oldsize);
2857 			}
2858 			return (newptr);
2859 		}
2860 	}
2861 
2862 	/*
2863 	 * If we get here, then size and oldsize are different enough that we
2864 	 * need to use a different size class.  In that case, fall back to
2865 	 * allocating new space and copying.
2866 	 */
2867 	ret = huge_malloc(size);
2868 	if (ret == NULL)
2869 		return (NULL);
2870 
2871 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2872 		/* The old allocation is a chunk. */
2873 		if (size < oldsize)
2874 			memcpy(ret, ptr, size);
2875 		else
2876 			memcpy(ret, ptr, oldsize);
2877 	} else {
2878 		/* The old allocation is a region. */
2879 		assert(oldsize < size);
2880 		memcpy(ret, ptr, oldsize);
2881 	}
2882 	idalloc(ptr);
2883 	return (ret);
2884 }
2885 
2886 static void
huge_dalloc(void * ptr)2887 huge_dalloc(void *ptr)
2888 {
2889 	chunk_node_t key;
2890 	chunk_node_t *node;
2891 
2892 	malloc_mutex_lock(&chunks_mtx);
2893 
2894 	/* Extract from tree of huge allocations. */
2895 	key.chunk = ptr;
2896 	node = rb_tree_find_node(&huge, &key);
2897 	assert(node != NULL);
2898 	assert(node->chunk == ptr);
2899 	rb_tree_remove_node(&huge, node);
2900 
2901 #ifdef MALLOC_STATS
2902 	huge_ndalloc++;
2903 	huge_allocated -= node->size;
2904 #endif
2905 
2906 	malloc_mutex_unlock(&chunks_mtx);
2907 
2908 	/* Unmap chunk. */
2909 #ifdef USE_BRK
2910 	if (OPT(junk))
2911 		memset(node->chunk, 0x5a, node->size);
2912 #endif
2913 	chunk_dealloc(node->chunk, node->size);
2914 
2915 	base_chunk_node_dealloc(node);
2916 }
2917 
2918 static void *
imalloc(size_t size)2919 imalloc(size_t size)
2920 {
2921 	void *ret;
2922 
2923 	assert(size != 0);
2924 
2925 	if (size <= arena_maxclass)
2926 		ret = arena_malloc(choose_arena(), size);
2927 	else
2928 		ret = huge_malloc(size);
2929 
2930 	return (ret);
2931 }
2932 
2933 static void *
ipalloc(size_t alignment,size_t size)2934 ipalloc(size_t alignment, size_t size)
2935 {
2936 	void *ret;
2937 	size_t ceil_size;
2938 
2939 	/*
2940 	 * Round size up to the nearest multiple of alignment.
2941 	 *
2942 	 * This done, we can take advantage of the fact that for each small
2943 	 * size class, every object is aligned at the smallest power of two
2944 	 * that is non-zero in the base two representation of the size.  For
2945 	 * example:
2946 	 *
2947 	 *   Size |   Base 2 | Minimum alignment
2948 	 *   -----+----------+------------------
2949 	 *     96 |  1100000 |  32
2950 	 *    144 | 10100000 |  32
2951 	 *    192 | 11000000 |  64
2952 	 *
2953 	 * Depending on runtime settings, it is possible that arena_malloc()
2954 	 * will further round up to a power of two, but that never causes
2955 	 * correctness issues.
2956 	 */
2957 	ceil_size = (size + (alignment - 1)) & (-alignment);
2958 	/*
2959 	 * (ceil_size < size) protects against the combination of maximal
2960 	 * alignment and size greater than maximal alignment.
2961 	 */
2962 	if (ceil_size < size) {
2963 		/* size_t overflow. */
2964 		return (NULL);
2965 	}
2966 
2967 	if (ceil_size <= pagesize || (alignment <= pagesize
2968 	    && ceil_size <= arena_maxclass))
2969 		ret = arena_malloc(choose_arena(), ceil_size);
2970 	else {
2971 		size_t run_size;
2972 
2973 		/*
2974 		 * We can't achieve sub-page alignment, so round up alignment
2975 		 * permanently; it makes later calculations simpler.
2976 		 */
2977 		alignment = PAGE_CEILING(alignment);
2978 		ceil_size = PAGE_CEILING(size);
2979 		/*
2980 		 * (ceil_size < size) protects against very large sizes within
2981 		 * pagesize of SIZE_T_MAX.
2982 		 *
2983 		 * (ceil_size + alignment < ceil_size) protects against the
2984 		 * combination of maximal alignment and ceil_size large enough
2985 		 * to cause overflow.  This is similar to the first overflow
2986 		 * check above, but it needs to be repeated due to the new
2987 		 * ceil_size value, which may now be *equal* to maximal
2988 		 * alignment, whereas before we only detected overflow if the
2989 		 * original size was *greater* than maximal alignment.
2990 		 */
2991 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
2992 			/* size_t overflow. */
2993 			return (NULL);
2994 		}
2995 
2996 		/*
2997 		 * Calculate the size of the over-size run that arena_palloc()
2998 		 * would need to allocate in order to guarantee the alignment.
2999 		 */
3000 		if (ceil_size >= alignment)
3001 			run_size = ceil_size + alignment - pagesize;
3002 		else {
3003 			/*
3004 			 * It is possible that (alignment << 1) will cause
3005 			 * overflow, but it doesn't matter because we also
3006 			 * subtract pagesize, which in the case of overflow
3007 			 * leaves us with a very large run_size.  That causes
3008 			 * the first conditional below to fail, which means
3009 			 * that the bogus run_size value never gets used for
3010 			 * anything important.
3011 			 */
3012 			run_size = (alignment << 1) - pagesize;
3013 		}
3014 
3015 		if (run_size <= arena_maxclass) {
3016 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3017 			    run_size);
3018 		} else if (alignment <= chunksize)
3019 			ret = huge_malloc(ceil_size);
3020 		else
3021 			ret = huge_palloc(alignment, ceil_size);
3022 	}
3023 
3024 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3025 	return (ret);
3026 }
3027 
3028 static void *
icalloc(size_t size)3029 icalloc(size_t size)
3030 {
3031 	void *ret;
3032 
3033 	if (size <= arena_maxclass) {
3034 		ret = arena_malloc(choose_arena(), size);
3035 		if (ret == NULL)
3036 			return (NULL);
3037 		memset(ret, 0, size);
3038 	} else {
3039 		/*
3040 		 * The virtual memory system provides zero-filled pages, so
3041 		 * there is no need to do so manually, unless opt_junk is
3042 		 * enabled, in which case huge_malloc() fills huge allocations
3043 		 * with junk.
3044 		 */
3045 		ret = huge_malloc(size);
3046 		if (ret == NULL)
3047 			return (NULL);
3048 
3049 		if (OPT(junk))
3050 			memset(ret, 0, size);
3051 #ifdef USE_BRK
3052 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3053 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3054 			/*
3055 			 * This may be a re-used brk chunk.  Therefore, zero
3056 			 * the memory.
3057 			 */
3058 			memset(ret, 0, size);
3059 		}
3060 #endif
3061 	}
3062 
3063 	return (ret);
3064 }
3065 
3066 static size_t
isalloc(const void * ptr)3067 isalloc(const void *ptr)
3068 {
3069 	size_t ret;
3070 	arena_chunk_t *chunk;
3071 
3072 	assert(ptr != NULL);
3073 
3074 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3075 	if (chunk != ptr) {
3076 		/* Region. */
3077 		assert(chunk->arena->magic == ARENA_MAGIC);
3078 
3079 		ret = arena_salloc(ptr);
3080 	} else {
3081 		chunk_node_t *node, key;
3082 
3083 		/* Chunk (huge allocation). */
3084 
3085 		malloc_mutex_lock(&chunks_mtx);
3086 
3087 		/* Extract from tree of huge allocations. */
3088 		key.chunk = __UNCONST(ptr);
3089 		node = rb_tree_find_node(&huge, &key);
3090 		assert(node != NULL);
3091 
3092 		ret = node->size;
3093 
3094 		malloc_mutex_unlock(&chunks_mtx);
3095 	}
3096 
3097 	return (ret);
3098 }
3099 
3100 static void *
iralloc(void * ptr,size_t size)3101 iralloc(void *ptr, size_t size)
3102 {
3103 	void *ret;
3104 	size_t oldsize;
3105 
3106 	assert(ptr != NULL);
3107 	assert(size != 0);
3108 
3109 	oldsize = isalloc(ptr);
3110 
3111 	if (size <= arena_maxclass)
3112 		ret = arena_ralloc(ptr, size, oldsize);
3113 	else
3114 		ret = huge_ralloc(ptr, size, oldsize);
3115 
3116 	return (ret);
3117 }
3118 
3119 static void
idalloc(void * ptr)3120 idalloc(void *ptr)
3121 {
3122 	arena_chunk_t *chunk;
3123 
3124 	assert(ptr != NULL);
3125 
3126 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3127 	if (chunk != ptr) {
3128 		/* Region. */
3129 		arena_dalloc(chunk->arena, chunk, ptr);
3130 	} else
3131 		huge_dalloc(ptr);
3132 }
3133 
3134 static void
malloc_print_stats(void)3135 malloc_print_stats(void)
3136 {
3137 
3138 	if (OPT(print_stats)) {
3139 		char s[UMAX2S_BUFSIZE];
3140 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3141 		    "");
3142 		_malloc_message("Assertions ",
3143 #ifdef NDEBUG
3144 		    "disabled",
3145 #else
3146 		    "enabled",
3147 #endif
3148 		    "\n", "");
3149 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3150 		    opt_abort ? "A" : "a",
3151 		    opt_junk ? "J" : "j",
3152 		    opt_hint ? "H" : "h");
3153 		_malloc_message(OPT(utrace) ? "PU" : "Pu",
3154 		    opt_sysv ? "V" : "v",
3155 		    opt_xmalloc ? "X" : "x",
3156 		    opt_zero ? "Z\n" : "z\n");
3157 
3158 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3159 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3160 		    "\n", "");
3161 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3162 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3163 		    "");
3164 
3165 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3166 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3167 		    ")\n", "");
3168 
3169 #ifdef MALLOC_STATS
3170 		{
3171 			size_t allocated, mapped;
3172 			unsigned i;
3173 			arena_t *arena;
3174 
3175 			/* Calculate and print allocated/mapped stats. */
3176 
3177 			/* arenas. */
3178 			for (i = 0, allocated = 0; i < ncpus; i++) {
3179 				if (arenas[i] != NULL) {
3180 					malloc_mutex_lock(&arenas[i]->mtx);
3181 					allocated +=
3182 					    arenas[i]->stats.allocated_small;
3183 					allocated +=
3184 					    arenas[i]->stats.allocated_large;
3185 					malloc_mutex_unlock(&arenas[i]->mtx);
3186 				}
3187 			}
3188 
3189 			/* huge/base. */
3190 			malloc_mutex_lock(&chunks_mtx);
3191 			allocated += huge_allocated;
3192 			mapped = stats_chunks.curchunks * chunksize;
3193 			malloc_mutex_unlock(&chunks_mtx);
3194 
3195 			malloc_mutex_lock(&base_mtx);
3196 			mapped += base_mapped;
3197 			malloc_mutex_unlock(&base_mtx);
3198 
3199 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3200 			    allocated, mapped);
3201 
3202 			/* Print chunk stats. */
3203 			{
3204 				chunk_stats_t chunks_stats;
3205 
3206 				malloc_mutex_lock(&chunks_mtx);
3207 				chunks_stats = stats_chunks;
3208 				malloc_mutex_unlock(&chunks_mtx);
3209 
3210 				malloc_printf("chunks: nchunks   "
3211 				    "highchunks    curchunks\n");
3212 				malloc_printf("  %13llu%13lu%13lu\n",
3213 				    chunks_stats.nchunks,
3214 				    chunks_stats.highchunks,
3215 				    chunks_stats.curchunks);
3216 			}
3217 
3218 			/* Print chunk stats. */
3219 			malloc_printf(
3220 			    "huge: nmalloc      ndalloc      "
3221 			    "nralloc    allocated\n");
3222 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3223 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3224 			    huge_allocated);
3225 
3226 			/* Print stats for each arena. */
3227 			for (i = 0; i < ncpus; i++) {
3228 				arena = arenas[i];
3229 				if (arena != NULL) {
3230 					malloc_printf(
3231 					    "\narenas[%u] @ %p\n", i, arena);
3232 					malloc_mutex_lock(&arena->mtx);
3233 					stats_print(arena);
3234 					malloc_mutex_unlock(&arena->mtx);
3235 				}
3236 			}
3237 		}
3238 #endif /* #ifdef MALLOC_STATS */
3239 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3240 	}
3241 }
3242 
3243 /*
3244  * libpthread might call malloc(3), so the malloc implementation has to take
3245  * pains to avoid infinite recursion during initialization.
3246  */
3247 static inline bool
malloc_init(void)3248 malloc_init(void)
3249 {
3250 
3251 	if (__predict_false(malloc_initialized == false))
3252 		return (malloc_init_hard());
3253 
3254 	return (false);
3255 }
3256 
3257 static bool
malloc_init_hard(void)3258 malloc_init_hard(void)
3259 {
3260 	unsigned i, j;
3261 	ssize_t linklen;
3262 	char buf[PATH_MAX + 1];
3263 	const char *opts = "";
3264 	int serrno;
3265 
3266 	malloc_mutex_lock(&init_lock);
3267 	if (malloc_initialized) {
3268 		/*
3269 		 * Another thread initialized the allocator before this one
3270 		 * acquired init_lock.
3271 		 */
3272 		malloc_mutex_unlock(&init_lock);
3273 		return (false);
3274 	}
3275 
3276 	serrno = errno;
3277 	/* Get number of CPUs. */
3278 	{
3279 		int mib[2];
3280 		size_t len;
3281 
3282 		mib[0] = CTL_HW;
3283 		mib[1] = HW_NCPU;
3284 		len = sizeof(ncpus);
3285 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3286 			/* Error. */
3287 			ncpus = 1;
3288 		}
3289 	}
3290 
3291 	/* Get page size. */
3292 	{
3293 		long result;
3294 
3295 		result = getpagesize();
3296 		assert(result != -1);
3297 		pagesize = (unsigned) result;
3298 
3299 		/*
3300 		 * We assume that pagesize is a power of 2 when calculating
3301 		 * pagesize_mask and pagesize_2pow.
3302 		 */
3303 		assert(((result - 1) & result) == 0);
3304 		pagesize_mask = result - 1;
3305 		pagesize_2pow = ffs((int)result) - 1;
3306 	}
3307 
3308 	for (i = 0; i < 3; i++) {
3309 		/* Get runtime configuration. */
3310 		switch (i) {
3311 		case 0:
3312 			if ((linklen = readlink("/etc/malloc.conf", buf,
3313 			    sizeof(buf) - 1)) != -1) {
3314 				/*
3315 				 * Use the contents of the "/etc/malloc.conf"
3316 				 * symbolic link's name.
3317 				 */
3318 				buf[linklen] = '\0';
3319 				opts = buf;
3320 			} else {
3321 				/* No configuration specified. */
3322 				buf[0] = '\0';
3323 				opts = buf;
3324 			}
3325 			break;
3326 		case 1:
3327 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3328 			    issetugid() == 0) {
3329 				/*
3330 				 * Do nothing; opts is already initialized to
3331 				 * the value of the MALLOC_OPTIONS environment
3332 				 * variable.
3333 				 */
3334 			} else {
3335 				/* No configuration specified. */
3336 				buf[0] = '\0';
3337 				opts = buf;
3338 			}
3339 			break;
3340 		case 2:
3341 			if (_malloc_options != NULL) {
3342 				/*
3343 				 * Use options that were compiled into the program.
3344 				 */
3345 				opts = _malloc_options;
3346 			} else {
3347 				/* No configuration specified. */
3348 				buf[0] = '\0';
3349 				opts = buf;
3350 			}
3351 			break;
3352 		default:
3353 			/* NOTREACHED */
3354 			/* LINTED */
3355 			assert(false);
3356 		}
3357 
3358 		for (j = 0; opts[j] != '\0'; j++) {
3359 			switch (opts[j]) {
3360 			case 'a':
3361 				opt_abort = false;
3362 				break;
3363 			case 'A':
3364 				opt_abort = true;
3365 				break;
3366 			case 'h':
3367 				opt_hint = false;
3368 				break;
3369 			case 'H':
3370 				opt_hint = true;
3371 				break;
3372 			case 'j':
3373 				opt_junk = false;
3374 				break;
3375 			case 'J':
3376 				opt_junk = true;
3377 				break;
3378 			case 'k':
3379 				/*
3380 				 * Chunks always require at least one header
3381 				 * page, so chunks can never be smaller than
3382 				 * two pages.
3383 				 */
3384 				if (opt_chunk_2pow > pagesize_2pow + 1)
3385 					opt_chunk_2pow--;
3386 				break;
3387 			case 'K':
3388 				if (opt_chunk_2pow + 1 <
3389 				    (int)(sizeof(size_t) << 3))
3390 					opt_chunk_2pow++;
3391 				break;
3392 			case 'p':
3393 				opt_print_stats = false;
3394 				break;
3395 			case 'P':
3396 				opt_print_stats = true;
3397 				break;
3398 			case 'q':
3399 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3400 					opt_quantum_2pow--;
3401 				break;
3402 			case 'Q':
3403 				if (opt_quantum_2pow < pagesize_2pow - 1)
3404 					opt_quantum_2pow++;
3405 				break;
3406 			case 's':
3407 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3408 					opt_small_max_2pow--;
3409 				break;
3410 			case 'S':
3411 				if (opt_small_max_2pow < pagesize_2pow - 1)
3412 					opt_small_max_2pow++;
3413 				break;
3414 			case 'u':
3415 				opt_utrace = false;
3416 				break;
3417 			case 'U':
3418 				opt_utrace = true;
3419 				break;
3420 			case 'v':
3421 				opt_sysv = false;
3422 				break;
3423 			case 'V':
3424 				opt_sysv = true;
3425 				break;
3426 			case 'x':
3427 				opt_xmalloc = false;
3428 				break;
3429 			case 'X':
3430 				opt_xmalloc = true;
3431 				break;
3432 			case 'z':
3433 				opt_zero = false;
3434 				break;
3435 			case 'Z':
3436 				opt_zero = true;
3437 				break;
3438 			default: {
3439 				char cbuf[2];
3440 
3441 				cbuf[0] = opts[j];
3442 				cbuf[1] = '\0';
3443 				_malloc_message(getprogname(),
3444 				    ": (malloc) Unsupported character in "
3445 				    "malloc options: '", cbuf, "'\n");
3446 			}
3447 			}
3448 		}
3449 	}
3450 	errno = serrno;
3451 
3452 	/* Take care to call atexit() only once. */
3453 	if (OPT(print_stats)) {
3454 		/* Print statistics at exit. */
3455 		atexit(malloc_print_stats);
3456 	}
3457 
3458 	/* Set variables according to the value of opt_small_max_2pow. */
3459 	if (opt_small_max_2pow < opt_quantum_2pow)
3460 		opt_small_max_2pow = opt_quantum_2pow;
3461 	small_max = (1U << opt_small_max_2pow);
3462 
3463 	/* Set bin-related variables. */
3464 	bin_maxclass = (pagesize >> 1);
3465 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3466 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3467 	assert(ntbins <= (unsigned)opt_quantum_2pow);
3468 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3469 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3470 
3471 	/* Set variables according to the value of opt_quantum_2pow. */
3472 	quantum = (1 << opt_quantum_2pow);
3473 	quantum_mask = quantum - 1;
3474 	if (ntbins > 0)
3475 		small_min = (quantum >> 1) + 1;
3476 	else
3477 		small_min = 1;
3478 	assert(small_min <= quantum);
3479 
3480 	/* Set variables according to the value of opt_chunk_2pow. */
3481 	chunksize = (1LU << opt_chunk_2pow);
3482 	chunksize_mask = chunksize - 1;
3483 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3484 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3485 	{
3486 		unsigned header_size;
3487 
3488 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3489 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3490 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3491 		if ((header_size & pagesize_mask) != 0)
3492 			arena_chunk_header_npages++;
3493 	}
3494 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3495 	    pagesize_2pow);
3496 
3497 	UTRACE(0, 0, 0);
3498 
3499 #ifdef MALLOC_STATS
3500 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3501 #endif
3502 
3503 	/* Various sanity checks that regard configuration. */
3504 	assert(quantum >= sizeof(void *));
3505 	assert(quantum <= pagesize);
3506 	assert(chunksize >= pagesize);
3507 	assert(quantum * 4 <= chunksize);
3508 
3509 	/* Initialize chunks data. */
3510 	malloc_mutex_init(&chunks_mtx);
3511 	rb_tree_init(&huge, &chunk_tree_ops);
3512 #ifdef USE_BRK
3513 	malloc_mutex_init(&brk_mtx);
3514 	brk_base = sbrk(0);
3515 	brk_prev = brk_base;
3516 	brk_max = brk_base;
3517 #endif
3518 #ifdef MALLOC_STATS
3519 	huge_nmalloc = 0;
3520 	huge_ndalloc = 0;
3521 	huge_nralloc = 0;
3522 	huge_allocated = 0;
3523 #endif
3524 	rb_tree_init(&old_chunks, &chunk_tree_ops);
3525 
3526 	/* Initialize base allocation data structures. */
3527 #ifdef MALLOC_STATS
3528 	base_mapped = 0;
3529 #endif
3530 #ifdef USE_BRK
3531 	/*
3532 	 * Allocate a base chunk here, since it doesn't actually have to be
3533 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3534 	 * the use of space that would otherwise be wasted.
3535 	 */
3536 	base_pages_alloc(0);
3537 #endif
3538 	base_chunk_nodes = NULL;
3539 	malloc_mutex_init(&base_mtx);
3540 
3541 	/* Allocate and initialize arenas. */
3542 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * ncpus);
3543 	if (arenas == NULL) {
3544 		malloc_mutex_unlock(&init_lock);
3545 		return (true);
3546 	}
3547 	/*
3548 	 * Zero the array.  In practice, this should always be pre-zeroed,
3549 	 * since it was just mmap()ed, but let's be sure.
3550 	 */
3551 	memset(arenas, 0, sizeof(arena_t *) * ncpus);
3552 
3553 	/*
3554 	 * Initialize one arena here.  The rest are lazily created in
3555 	 * arena_choose_hard().
3556 	 */
3557 	if ((arenas[0] = arenas_extend()) == NULL) {
3558 		malloc_mutex_unlock(&init_lock);
3559 		return (true);
3560 	}
3561 
3562 	malloc_mutex_init(&arenas_mtx);
3563 
3564 	malloc_initialized = true;
3565 	malloc_mutex_unlock(&init_lock);
3566 	return (false);
3567 }
3568 
3569 /*
3570  * End general internal functions.
3571  */
3572 /******************************************************************************/
3573 /*
3574  * Begin malloc(3)-compatible functions.
3575  */
3576 
3577 void *
malloc(size_t size)3578 malloc(size_t size)
3579 {
3580 	void *ret;
3581 
3582 	if (__predict_false(malloc_init())) {
3583 		ret = NULL;
3584 		goto RETURN;
3585 	}
3586 
3587 	if (__predict_false(size == 0)) {
3588 		if (NOT_OPT(sysv))
3589 			size = 1;
3590 		else {
3591 			ret = NULL;
3592 			goto RETURN;
3593 		}
3594 	}
3595 
3596 	ret = imalloc(size);
3597 
3598 RETURN:
3599 	if (__predict_false(ret == NULL)) {
3600 		if (OPT(xmalloc)) {
3601 			_malloc_message(getprogname(),
3602 			    ": (malloc) Error in malloc(): out of memory\n", "",
3603 			    "");
3604 			abort();
3605 		}
3606 		errno = ENOMEM;
3607 	}
3608 
3609 	UTRACE(0, size, ret);
3610 	return (ret);
3611 }
3612 
3613 int
posix_memalign(void ** memptr,size_t alignment,size_t size)3614 posix_memalign(void **memptr, size_t alignment, size_t size)
3615 {
3616 	int ret;
3617 	void *result;
3618 
3619 	if (__predict_false(malloc_init()))
3620 		result = NULL;
3621 	else {
3622 		/* Make sure that alignment is a large enough power of 2. */
3623 		if (((alignment - 1) & alignment) != 0
3624 		    || alignment < sizeof(void *)) {
3625 			if (OPT(xmalloc)) {
3626 				_malloc_message(getprogname(),
3627 				    ": (malloc) Error in posix_memalign(): "
3628 				    "invalid alignment\n", "", "");
3629 				abort();
3630 			}
3631 			result = NULL;
3632 			ret = EINVAL;
3633 			goto RETURN;
3634 		}
3635 
3636 		if (size == 0) {
3637 			if (NOT_OPT(sysv))
3638 				size = 1;
3639 			else {
3640 				result = NULL;
3641 				ret = 0;
3642 				goto RETURN;
3643 			}
3644 		}
3645 		result = ipalloc(alignment, size);
3646 	}
3647 
3648 	if (__predict_false(result == NULL)) {
3649 		if (OPT(xmalloc)) {
3650 			_malloc_message(getprogname(),
3651 			": (malloc) Error in posix_memalign(): out of memory\n",
3652 			"", "");
3653 			abort();
3654 		}
3655 		ret = ENOMEM;
3656 		goto RETURN;
3657 	}
3658 
3659 	*memptr = result;
3660 	ret = 0;
3661 
3662 RETURN:
3663 	UTRACE(0, size, result);
3664 	return (ret);
3665 }
3666 
3667 void *
calloc(size_t num,size_t size)3668 calloc(size_t num, size_t size)
3669 {
3670 	void *ret;
3671 	size_t num_size;
3672 
3673 	if (__predict_false(malloc_init())) {
3674 		num_size = 0;
3675 		ret = NULL;
3676 		goto RETURN;
3677 	}
3678 
3679 	num_size = num * size;
3680 	if (__predict_false(num_size == 0)) {
3681 		if (NOT_OPT(sysv) && ((num == 0) || (size == 0)))
3682 			num_size = 1;
3683 		else {
3684 			ret = NULL;
3685 			goto RETURN;
3686 		}
3687 	/*
3688 	 * Try to avoid division here.  We know that it isn't possible to
3689 	 * overflow during multiplication if neither operand uses any of the
3690 	 * most significant half of the bits in a size_t.
3691 	 */
3692 	} else if ((unsigned long long)((num | size) &
3693 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3694 	   (num_size / size != num)) {
3695 		/* size_t overflow. */
3696 		ret = NULL;
3697 		goto RETURN;
3698 	}
3699 
3700 	ret = icalloc(num_size);
3701 
3702 RETURN:
3703 	if (__predict_false(ret == NULL)) {
3704 		if (OPT(xmalloc)) {
3705 			_malloc_message(getprogname(),
3706 			    ": (malloc) Error in calloc(): out of memory\n", "",
3707 			    "");
3708 			abort();
3709 		}
3710 		errno = ENOMEM;
3711 	}
3712 
3713 	UTRACE(0, num_size, ret);
3714 	return (ret);
3715 }
3716 
3717 void *
realloc(void * ptr,size_t size)3718 realloc(void *ptr, size_t size)
3719 {
3720 	void *ret;
3721 
3722 	if (__predict_false(size == 0)) {
3723 		if (NOT_OPT(sysv))
3724 			size = 1;
3725 		else {
3726 			if (ptr != NULL)
3727 				idalloc(ptr);
3728 			ret = NULL;
3729 			goto RETURN;
3730 		}
3731 	}
3732 
3733 	if (__predict_true(ptr != NULL)) {
3734 		assert(malloc_initialized);
3735 
3736 		ret = iralloc(ptr, size);
3737 
3738 		if (__predict_false(ret == NULL)) {
3739 			if (OPT(xmalloc)) {
3740 				_malloc_message(getprogname(),
3741 				    ": (malloc) Error in realloc(): out of "
3742 				    "memory\n", "", "");
3743 				abort();
3744 			}
3745 			errno = ENOMEM;
3746 		}
3747 	} else {
3748 		if (__predict_false(malloc_init()))
3749 			ret = NULL;
3750 		else
3751 			ret = imalloc(size);
3752 
3753 		if (__predict_false(ret == NULL)) {
3754 			if (OPT(xmalloc)) {
3755 				_malloc_message(getprogname(),
3756 				    ": (malloc) Error in realloc(): out of "
3757 				    "memory\n", "", "");
3758 				abort();
3759 			}
3760 			errno = ENOMEM;
3761 		}
3762 	}
3763 
3764 RETURN:
3765 	UTRACE(ptr, size, ret);
3766 	return (ret);
3767 }
3768 
3769 void
free(void * ptr)3770 free(void *ptr)
3771 {
3772 
3773 	UTRACE(ptr, 0, 0);
3774 	if (__predict_true(ptr != NULL)) {
3775 		assert(malloc_initialized);
3776 
3777 		idalloc(ptr);
3778 	}
3779 }
3780 
3781 /*
3782  * End malloc(3)-compatible functions.
3783  */
3784 /******************************************************************************/
3785 /*
3786  * Begin non-standard functions.
3787  */
3788 size_t
malloc_usable_size(const void * ptr)3789 malloc_usable_size(const void *ptr)
3790 {
3791 
3792 	assert(ptr != NULL);
3793 
3794 	return (isalloc(ptr));
3795 }
3796 
3797 /*
3798  * End non-standard functions.
3799  */
3800 /******************************************************************************/
3801 /*
3802  * Begin library-private functions, used by threading libraries for protection
3803  * of malloc during fork().  These functions are only called if the program is
3804  * running in threaded mode, so there is no need to check whether the program
3805  * is threaded here.
3806  */
3807 
3808 void
_malloc_prefork(void)3809 _malloc_prefork(void)
3810 {
3811 	unsigned i;
3812 
3813 	/* Acquire all mutexes in a safe order. */
3814 	malloc_mutex_lock(&init_lock);
3815 	malloc_mutex_lock(&arenas_mtx);
3816 	for (i = 0; i < ncpus; i++) {
3817 		if (arenas[i] != NULL)
3818 			malloc_mutex_lock(&arenas[i]->mtx);
3819 	}
3820 	malloc_mutex_lock(&chunks_mtx);
3821 	malloc_mutex_lock(&base_mtx);
3822 #ifdef USE_BRK
3823 	malloc_mutex_lock(&brk_mtx);
3824 #endif
3825 }
3826 
3827 void
_malloc_postfork(void)3828 _malloc_postfork(void)
3829 {
3830 	unsigned i;
3831 
3832 	/* Release all mutexes, now that fork() has completed. */
3833 #ifdef USE_BRK
3834 	malloc_mutex_unlock(&brk_mtx);
3835 #endif
3836 	malloc_mutex_unlock(&base_mtx);
3837 	malloc_mutex_unlock(&chunks_mtx);
3838 
3839 	for (i = ncpus; i-- > 0; ) {
3840 		if (arenas[i] != NULL)
3841 			malloc_mutex_unlock(&arenas[i]->mtx);
3842 	}
3843 	malloc_mutex_unlock(&arenas_mtx);
3844 	malloc_mutex_unlock(&init_lock);
3845 }
3846 
3847 void
_malloc_postfork_child(void)3848 _malloc_postfork_child(void)
3849 {
3850 	unsigned i;
3851 
3852 	/* Release all mutexes, now that fork() has completed. */
3853 #ifdef USE_BRK
3854 	malloc_mutex_init(&brk_mtx);
3855 #endif
3856 	malloc_mutex_init(&base_mtx);
3857 	malloc_mutex_init(&chunks_mtx);
3858 
3859 	for (i = ncpus; i-- > 0; ) {
3860 		if (arenas[i] != NULL)
3861 			malloc_mutex_init(&arenas[i]->mtx);
3862 	}
3863 	malloc_mutex_init(&arenas_mtx);
3864 	malloc_mutex_init(&init_lock);
3865 }
3866 
3867 /*
3868  * End library-private functions.
3869  */
3870 /******************************************************************************/
3871