xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision 1b37b234c2d1e7ce7b90329ab4771937deb7fe4b)
1 /*	$NetBSD: jemalloc.c,v 1.63 2023/10/14 19:39:33 ad Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * Copyright (C) 2023 Andrew Doran <ad@NetBSD.org>.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice(s), this list of conditions and the following disclaimer as
13  *    the first lines of this file unmodified other than the possible
14  *    addition of one or more copyright notices.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice(s), this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
21  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
27  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  *******************************************************************************
33  *
34  * This allocator implementation is designed to provide scalable performance
35  * for multi-threaded programs on multi-processor systems.  The following
36  * features are included for this purpose:
37  *
38  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
39  *     contention and cache sloshing.
40  *
41  *   + Cache line sharing between arenas is avoided for internal data
42  *     structures.
43  *
44  *   + Memory is managed in chunks and runs (chunks can be split into runs),
45  *     rather than as individual pages.  This provides a constant-time
46  *     mechanism for associating allocations with particular arenas.
47  *
48  * Allocation requests are rounded up to the nearest size class, and no record
49  * of the original request size is maintained.  Allocations are broken into
50  * categories according to size class.  Assuming runtime defaults, 4 kB pages
51  * and a 16 byte quantum, the size classes in each category are as follows:
52  *
53  *   |=====================================|
54  *   | Category | Subcategory    |    Size |
55  *   |=====================================|
56  *   | Small    | Tiny           |       2 |
57  *   |          |                |       4 |
58  *   |          |                |       8 |
59  *   |          |----------------+---------|
60  *   |          | Quantum-spaced |      16 |
61  *   |          |                |      32 |
62  *   |          |                |      48 |
63  *   |          |                |     ... |
64  *   |          |                |     480 |
65  *   |          |                |     496 |
66  *   |          |                |     512 |
67  *   |          |----------------+---------|
68  *   |          | Sub-page       |    1 kB |
69  *   |          |                |    2 kB |
70  *   |=====================================|
71  *   | Large                     |    4 kB |
72  *   |                           |    8 kB |
73  *   |                           |   12 kB |
74  *   |                           |     ... |
75  *   |                           | 1012 kB |
76  *   |                           | 1016 kB |
77  *   |                           | 1020 kB |
78  *   |=====================================|
79  *   | Huge                      |    1 MB |
80  *   |                           |    2 MB |
81  *   |                           |    3 MB |
82  *   |                           |     ... |
83  *   |=====================================|
84  *
85  * A different mechanism is used for each category:
86  *
87  *   Small : Each size class is segregated into its own set of runs.  Each run
88  *           maintains a bitmap of which regions are free/allocated.
89  *
90  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
91  *           in the associated arena chunk header maps.
92  *
93  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
94  *          Metadata are stored in a separate red-black tree.
95  *
96  *******************************************************************************
97  */
98 
99 /* LINTLIBRARY */
100 
101 /*
102  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
103  * defaults the A and J runtime options to off.  These settings are appropriate
104  * for production systems.
105  */
106 #define MALLOC_PRODUCTION
107 
108 #ifndef MALLOC_PRODUCTION
109 #  define MALLOC_DEBUG
110 #endif
111 
112 #include <sys/cdefs.h>
113 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
114 __RCSID("$NetBSD: jemalloc.c,v 1.63 2023/10/14 19:39:33 ad Exp $");
115 
116 #include "namespace.h"
117 #include <sys/mman.h>
118 #include <sys/param.h>
119 #include <sys/time.h>
120 #include <sys/types.h>
121 #include <sys/sysctl.h>
122 #include <sys/rbtree.h>
123 #include <sys/uio.h>
124 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
125 
126 #include <errno.h>
127 #include <limits.h>
128 #include <pthread.h>
129 #include <sched.h>
130 #include <stdarg.h>
131 #include <stdbool.h>
132 #include <stdio.h>
133 #include <stdint.h>
134 #include <stdlib.h>
135 #include <string.h>
136 #include <strings.h>
137 #include <unistd.h>
138 
139 #include <reentrant.h>
140 #include "extern.h"
141 
142 #define STRERROR_R(a, b, c)	strerror_r_ss(a, b, c);
143 
144 /* MALLOC_STATS enables statistics calculation. */
145 #ifndef MALLOC_PRODUCTION
146 #  define MALLOC_STATS
147 #endif
148 
149 #ifdef MALLOC_DEBUG
150 #  ifdef NDEBUG
151 #    undef NDEBUG
152 #  endif
153 #else
154 #  ifndef NDEBUG
155 #    define NDEBUG
156 #  endif
157 #endif
158 #include <assert.h>
159 
160 #ifdef MALLOC_DEBUG
161    /* Disable inlining to make debugging easier. */
162 #  define inline
163 #endif
164 
165 /*
166  * Compare two pointers of 64/32 bit width and produce a ternary 32-bit
167  * indicator without using conditionals that maintains the expected
168  * ordering: [negative, equal, positive].
169  *
170  * XXX it depends on twos complement arithemetic.
171  * XXX maybe should be a built-in for rbtree?
172  */
173 static inline int
174 ptrcmp(const void *pa, const void *pb)
175 {
176 #ifdef _LP64
177 	const uintptr_t a = (uintptr_t)pa, b = (uintptr_t)pb;
178 	const uintptr_t diff = a - b;
179 	assert(((a | b) & 1) == 0);
180 	return (int)(diff >> 32) | ((unsigned)diff >> 1);
181 #else
182 	return (intptr_t)pa - (intptr_t)pb;
183 #endif
184 }
185 
186 /* Size of stack-allocated buffer passed to strerror_r(). */
187 #define	STRERROR_BUF		64
188 
189 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
190 
191 /*
192  * If you touch the TINY_MIN_2POW definition for any architecture, please
193  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
194  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
195  * gcc is still buildable!
196  */
197 
198 #ifdef __i386__
199 #  define QUANTUM_2POW_MIN	4
200 #  define SIZEOF_PTR_2POW	2
201 #  define USE_BRK
202 #endif
203 #ifdef __ia64__
204 #  define QUANTUM_2POW_MIN	4
205 #  define SIZEOF_PTR_2POW	3
206 #endif
207 #ifdef __aarch64__
208 #  define QUANTUM_2POW_MIN	4
209 #  define SIZEOF_PTR_2POW	3
210 #  define TINY_MIN_2POW		3
211 #endif
212 #ifdef __alpha__
213 #  define QUANTUM_2POW_MIN	4
214 #  define SIZEOF_PTR_2POW	3
215 #  define TINY_MIN_2POW		3
216 #endif
217 #ifdef __sparc64__
218 #  define QUANTUM_2POW_MIN	4
219 #  define SIZEOF_PTR_2POW	3
220 #  define TINY_MIN_2POW		3
221 #endif
222 #ifdef __amd64__
223 #  define QUANTUM_2POW_MIN	4
224 #  define SIZEOF_PTR_2POW	3
225 #  define TINY_MIN_2POW		3
226 #endif
227 #ifdef __arm__
228 #  define QUANTUM_2POW_MIN	3
229 #  define SIZEOF_PTR_2POW	2
230 #  define USE_BRK
231 #  ifdef __ARM_EABI__
232 #    define TINY_MIN_2POW	3
233 #  endif
234 #endif
235 #ifdef __powerpc__
236 #  define QUANTUM_2POW_MIN	4
237 #  define SIZEOF_PTR_2POW	2
238 #  define USE_BRK
239 #  define TINY_MIN_2POW		3
240 #endif
241 #if defined(__sparc__) && !defined(__sparc64__)
242 #  define QUANTUM_2POW_MIN	4
243 #  define SIZEOF_PTR_2POW	2
244 #  define USE_BRK
245 #endif
246 #ifdef __or1k__
247 #  define QUANTUM_2POW_MIN	4
248 #  define SIZEOF_PTR_2POW	2
249 #  define USE_BRK
250 #endif
251 #ifdef __vax__
252 #  define QUANTUM_2POW_MIN	4
253 #  define SIZEOF_PTR_2POW	2
254 #  define USE_BRK
255 #endif
256 #ifdef __sh__
257 #  define QUANTUM_2POW_MIN	4
258 #  define SIZEOF_PTR_2POW	2
259 #  define USE_BRK
260 #endif
261 #ifdef __m68k__
262 #  define QUANTUM_2POW_MIN	4
263 #  define SIZEOF_PTR_2POW	2
264 #  define USE_BRK
265 #endif
266 #if defined(__mips__)
267 #  ifdef _LP64
268 #    define SIZEOF_PTR_2POW	3
269 #    define TINY_MIN_2POW	3
270 #  else
271 #    define SIZEOF_PTR_2POW	2
272 #  endif
273 #  define QUANTUM_2POW_MIN	4
274 #  define USE_BRK
275 #endif
276 #if defined(__riscv__)
277 #  ifdef _LP64
278 #    define SIZEOF_PTR_2POW	3
279 #    define TINY_MIN_2POW	3
280 #  else
281 #    define SIZEOF_PTR_2POW	2
282 #  endif
283 #  define QUANTUM_2POW_MIN	4
284 #  define USE_BRK
285 #endif
286 #ifdef __hppa__
287 #  define QUANTUM_2POW_MIN	4
288 #  define TINY_MIN_2POW		4
289 #  define SIZEOF_PTR_2POW	2
290 #  define USE_BRK
291 #endif
292 
293 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
294 
295 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
296 #ifndef SIZEOF_INT_2POW
297 #  define SIZEOF_INT_2POW	2
298 #endif
299 
300 /*
301  * Size and alignment of memory chunks that are allocated by the OS's virtual
302  * memory system.
303  */
304 #define	CHUNK_2POW_DEFAULT	20
305 
306 /*
307  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
308  * so over-estimates are okay (up to a point), but under-estimates will
309  * negatively affect performance.
310  */
311 #define	CACHELINE_2POW		6
312 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
313 
314 /* Smallest size class to support. */
315 #ifndef TINY_MIN_2POW
316 #define	TINY_MIN_2POW		2
317 #endif
318 
319 /*
320  * Maximum size class that is a multiple of the quantum, but not (necessarily)
321  * a power of 2.  Above this size, allocations are rounded up to the nearest
322  * power of 2.
323  */
324 #define	SMALL_MAX_2POW_DEFAULT	9
325 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
326 
327 /*
328  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
329  * as small as possible such that this setting is still honored, without
330  * violating other constraints.  The goal is to make runs as small as possible
331  * without exceeding a per run external fragmentation threshold.
332  *
333  * We use binary fixed point math for overhead computations, where the binary
334  * point is implicitly RUN_BFP bits to the left.
335  *
336  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
337  * honored for some/all object sizes, since there is one bit of header overhead
338  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
339  * that are so small that the per-region overhead is greater than:
340  *
341  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
342  */
343 #define RUN_BFP			12
344 /*                              \/   Implicit binary fixed point. */
345 #define RUN_MAX_OVRHD		0x0000003dU
346 #define RUN_MAX_OVRHD_RELAX	0x00001800U
347 
348 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
349 #define RUN_MAX_SMALL_2POW	15
350 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
351 
352 /******************************************************************************/
353 
354 #define	malloc_mutex_t	mutex_t
355 
356 /* Set to true once the allocator has been initialized. */
357 static bool malloc_initialized = false;
358 
359 #ifdef _REENTRANT
360 /* Used to avoid initialization races. */
361 static mutex_t init_lock = MUTEX_INITIALIZER;
362 #endif
363 
364 /******************************************************************************/
365 /*
366  * Statistics data structures.
367  */
368 
369 #ifdef MALLOC_STATS
370 
371 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
372 struct malloc_bin_stats_s {
373 	/*
374 	 * Number of allocation requests that corresponded to the size of this
375 	 * bin.
376 	 */
377 	uint64_t	nrequests;
378 
379 	/* Total number of runs created for this bin's size class. */
380 	uint64_t	nruns;
381 
382 	/*
383 	 * Total number of runs reused by extracting them from the runs tree for
384 	 * this bin's size class.
385 	 */
386 	uint64_t	reruns;
387 
388 	/* High-water mark for this bin. */
389 	unsigned long	highruns;
390 
391 	/* Current number of runs in this bin. */
392 	unsigned long	curruns;
393 };
394 
395 typedef struct arena_stats_s arena_stats_t;
396 struct arena_stats_s {
397 	/* Number of bytes currently mapped. */
398 	size_t		mapped;
399 
400 	/* Per-size-category statistics. */
401 	size_t		allocated_small;
402 	uint64_t	nmalloc_small;
403 	uint64_t	ndalloc_small;
404 
405 	size_t		allocated_large;
406 	uint64_t	nmalloc_large;
407 	uint64_t	ndalloc_large;
408 };
409 
410 typedef struct chunk_stats_s chunk_stats_t;
411 struct chunk_stats_s {
412 	/* Number of chunks that were allocated. */
413 	uint64_t	nchunks;
414 
415 	/* High-water mark for number of chunks allocated. */
416 	unsigned long	highchunks;
417 
418 	/*
419 	 * Current number of chunks allocated.  This value isn't maintained for
420 	 * any other purpose, so keep track of it in order to be able to set
421 	 * highchunks.
422 	 */
423 	unsigned long	curchunks;
424 };
425 
426 #endif /* #ifdef MALLOC_STATS */
427 
428 /******************************************************************************/
429 /*
430  * Chunk data structures.
431  */
432 
433 /* Tree of chunks. */
434 typedef struct chunk_node_s chunk_node_t;
435 struct chunk_node_s {
436 	/* Linkage for the chunk tree. */
437 	rb_node_t link;
438 
439 	/*
440 	 * Pointer to the chunk that this tree node is responsible for.  In some
441 	 * (but certainly not all) cases, this data structure is placed at the
442 	 * beginning of the corresponding chunk, so this field may point to this
443 	 * node.
444 	 */
445 	void	*chunk;
446 
447 	/* Total chunk size. */
448 	size_t	size;
449 };
450 typedef struct chunk_tree_s chunk_tree_t;
451 
452 static int chunk_comp(void *, const void *, const void *);
453 
454 static const rb_tree_ops_t chunk_tree_ops = {
455 	.rbto_compare_nodes = chunk_comp,
456 	.rbto_compare_key = chunk_comp,
457 	.rbto_node_offset = offsetof(struct chunk_node_s, link),
458 };
459 
460 /******************************************************************************/
461 /*
462  * Arena data structures.
463  */
464 
465 typedef struct arena_s arena_t;
466 typedef struct arena_bin_s arena_bin_t;
467 
468 typedef struct arena_chunk_map_s arena_chunk_map_t;
469 struct arena_chunk_map_s {
470 	/* Number of pages in run. */
471 	uint32_t	npages;
472 	/*
473 	 * Position within run.  For a free run, this is POS_FREE for the first
474 	 * and last pages.  The POS_FREE special value makes it possible to
475 	 * quickly coalesce free runs.
476 	 *
477 	 * This is the limiting factor for chunksize; there can be at most 2^31
478 	 * pages in a run.
479 	 */
480 #define POS_FREE ((uint32_t)0xffffffffU)
481 	uint32_t	pos;
482 };
483 
484 /* Arena chunk header. */
485 typedef struct arena_chunk_s arena_chunk_t;
486 struct arena_chunk_s {
487 	/* Linkage for the arena's chunk tree. */
488 	rb_node_t link;
489 
490 	/* Arena that owns the chunk. */
491 	arena_t *arena;
492 
493 	/*
494 	 * Number of pages in use.  This is maintained in order to make
495 	 * detection of empty chunks fast.
496 	 */
497 	uint32_t pages_used;
498 
499 	/*
500 	 * Every time a free run larger than this value is created/coalesced,
501 	 * this value is increased.  The only way that the value decreases is if
502 	 * arena_run_alloc() fails to find a free run as large as advertised by
503 	 * this value.
504 	 */
505 	uint32_t max_frun_npages;
506 
507 	/*
508 	 * Every time a free run that starts at an earlier page than this value
509 	 * is created/coalesced, this value is decreased.  It is reset in a
510 	 * similar fashion to max_frun_npages.
511 	 */
512 	uint32_t min_frun_ind;
513 
514 	/*
515 	 * Map of pages within chunk that keeps track of free/large/small.  For
516 	 * free runs, only the map entries for the first and last pages are
517 	 * kept up to date, so that free runs can be quickly coalesced.
518 	 */
519 	arena_chunk_map_t map[1]; /* Dynamically sized. */
520 };
521 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
522 
523 static int arena_chunk_comp(void *, const void *, const void *);
524 
525 static const rb_tree_ops_t arena_chunk_tree_ops = {
526 	.rbto_compare_nodes = arena_chunk_comp,
527 	.rbto_compare_key = arena_chunk_comp,
528 	.rbto_node_offset = offsetof(struct arena_chunk_s, link),
529 };
530 
531 typedef struct arena_run_s arena_run_t;
532 struct arena_run_s {
533 	/* Linkage for run trees. */
534 	rb_node_t	link;
535 
536 #ifdef MALLOC_DEBUG
537 	uint32_t	magic;
538 #  define ARENA_RUN_MAGIC 0x384adf93
539 #endif
540 
541 	/* Bin this run is associated with. */
542 	arena_bin_t	*bin;
543 
544 	/* Index of first element that might have a free region. */
545 	unsigned	regs_minelm;
546 
547 	/* Number of free regions in run. */
548 	unsigned	nfree;
549 
550 	/* Bitmask of in-use regions (0: in use, 1: free). */
551 	unsigned	regs_mask[1]; /* Dynamically sized. */
552 };
553 typedef struct arena_run_tree_s arena_run_tree_t;
554 
555 static int arena_run_comp(void *, const void *, const void *);
556 
557 static const rb_tree_ops_t arena_run_tree_ops = {
558 	.rbto_compare_nodes = arena_run_comp,
559 	.rbto_compare_key = arena_run_comp,
560 	.rbto_node_offset = offsetof(struct arena_run_s, link),
561 };
562 
563 struct arena_bin_s {
564 	/*
565 	 * Current run being used to service allocations of this bin's size
566 	 * class.
567 	 */
568 	arena_run_t	*runcur;
569 
570 	/*
571 	 * Tree of non-full runs.  This tree is used when looking for an
572 	 * existing run when runcur is no longer usable.  We choose the
573 	 * non-full run that is lowest in memory; this policy tends to keep
574 	 * objects packed well, and it can also help reduce the number of
575 	 * almost-empty chunks.
576 	 */
577 	rb_tree_t	runs;
578 
579 	/* Size of regions in a run for this bin's size class. */
580 	size_t		reg_size;
581 
582 	/* Total size of a run for this bin's size class. */
583 	size_t		run_size;
584 
585 	/* Total number of regions in a run for this bin's size class. */
586 	uint32_t	nregs;
587 
588 	/* Number of elements in a run's regs_mask for this bin's size class. */
589 	uint32_t	regs_mask_nelms;
590 
591 	/* Offset of first region in a run for this bin's size class. */
592 	uint32_t	reg0_offset;
593 
594 #ifdef MALLOC_STATS
595 	/* Bin statistics. */
596 	malloc_bin_stats_t stats;
597 #endif
598 };
599 
600 struct arena_s {
601 #ifdef MALLOC_DEBUG
602 	uint32_t		magic;
603 #  define ARENA_MAGIC 0x947d3d24
604 #endif
605 
606 	/* All operations on this arena require that mtx be locked. */
607 	malloc_mutex_t		mtx;
608 
609 #ifdef MALLOC_STATS
610 	arena_stats_t		stats;
611 #endif
612 
613 	/*
614 	 * Tree of chunks this arena manages.
615 	 */
616 	rb_tree_t	chunks;
617 
618 	/*
619 	 * In order to avoid rapid chunk allocation/deallocation when an arena
620 	 * oscillates right on the cusp of needing a new chunk, cache the most
621 	 * recently freed chunk.  This caching is disabled by opt_hint.
622 	 *
623 	 * There is one spare chunk per arena, rather than one spare total, in
624 	 * order to avoid interactions between multiple threads that could make
625 	 * a single spare inadequate.
626 	 */
627 	arena_chunk_t *spare;
628 
629 	/*
630 	 * bins is used to store rings of free regions of the following sizes,
631 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
632 	 *
633 	 *   bins[i] | size |
634 	 *   --------+------+
635 	 *        0  |    2 |
636 	 *        1  |    4 |
637 	 *        2  |    8 |
638 	 *   --------+------+
639 	 *        3  |   16 |
640 	 *        4  |   32 |
641 	 *        5  |   48 |
642 	 *        6  |   64 |
643 	 *           :      :
644 	 *           :      :
645 	 *       33  |  496 |
646 	 *       34  |  512 |
647 	 *   --------+------+
648 	 *       35  | 1024 |
649 	 *       36  | 2048 |
650 	 *   --------+------+
651 	 */
652 	arena_bin_t		bins[1]; /* Dynamically sized. */
653 };
654 
655 /******************************************************************************/
656 /*
657  * Data.
658  */
659 
660 /* Number of CPUs. */
661 static unsigned		ncpus;
662 
663 /* VM page size. */
664 static size_t		pagesize;
665 static size_t		pagesize_mask;
666 static int		pagesize_2pow;
667 
668 /* Various bin-related settings. */
669 static size_t		bin_maxclass; /* Max size class for bins. */
670 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
671 static unsigned		nqbins; /* Number of quantum-spaced bins. */
672 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
673 static size_t		small_min;
674 static size_t		small_max;
675 
676 /* Various quantum-related settings. */
677 static size_t		quantum;
678 static size_t		quantum_mask; /* (quantum - 1). */
679 
680 /* Various chunk-related settings. */
681 static size_t		chunksize;
682 static size_t		chunksize_mask; /* (chunksize - 1). */
683 static int		chunksize_2pow;
684 static unsigned		chunk_npages;
685 static unsigned		arena_chunk_header_npages;
686 static size_t		arena_maxclass; /* Max size class for arenas. */
687 
688 /********/
689 /*
690  * Chunks.
691  */
692 
693 #ifdef _REENTRANT
694 /* Protects chunk-related data structures. */
695 static malloc_mutex_t	chunks_mtx;
696 #endif
697 
698 /* Tree of chunks that are stand-alone huge allocations. */
699 static rb_tree_t	huge;
700 
701 #ifdef USE_BRK
702 /*
703  * Try to use brk for chunk-size allocations, due to address space constraints.
704  */
705 /*
706  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
707  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
708  * could cause recursive lock acquisition).
709  */
710 #ifdef _REENTRANT
711 static malloc_mutex_t	brk_mtx;
712 #endif
713 /* Result of first sbrk(0) call. */
714 static void		*brk_base;
715 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
716 static void		*brk_prev;
717 /* Current upper limit on brk addresses. */
718 static void		*brk_max;
719 #endif
720 
721 #ifdef MALLOC_STATS
722 /* Huge allocation statistics. */
723 static uint64_t		huge_nmalloc;
724 static uint64_t		huge_ndalloc;
725 static uint64_t		huge_nralloc;
726 static size_t		huge_allocated;
727 #endif
728 
729 /*
730  * Tree of chunks that were previously allocated.  This is used when allocating
731  * chunks, in an attempt to re-use address space.
732  */
733 static rb_tree_t	old_chunks;
734 
735 /****************************/
736 /*
737  * base (internal allocation).
738  */
739 
740 /*
741  * Current pages that are being used for internal memory allocations.  These
742  * pages are carved up in cacheline-size quanta, so that there is no chance of
743  * false cache line sharing.
744  */
745 static void		*base_pages;
746 static void		*base_next_addr;
747 static void		*base_past_addr; /* Addr immediately past base_pages. */
748 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
749 #ifdef _REENTRANT
750 static malloc_mutex_t	base_mtx;
751 #endif
752 #ifdef MALLOC_STATS
753 static size_t		base_mapped;
754 #endif
755 
756 /********/
757 /*
758  * Arenas.
759  */
760 
761 /*
762  * Arenas that are used to service external requests.  Not all elements of the
763  * arenas array are necessarily used; arenas are created lazily as needed.
764  */
765 static arena_t		**arenas;
766 #ifdef _REENTRANT
767 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
768 #endif
769 
770 #ifdef MALLOC_STATS
771 /* Chunk statistics. */
772 static chunk_stats_t	stats_chunks;
773 #endif
774 
775 /*******************************/
776 /*
777  * Runtime configuration options.
778  */
779 const char	*_malloc_options;
780 
781 #ifndef MALLOC_PRODUCTION
782 static bool	opt_abort = true;
783 static bool	opt_junk = true;
784 #else
785 static bool	opt_abort = false;
786 static bool	opt_junk = false;
787 #endif
788 static bool	opt_hint = false;
789 static bool	opt_print_stats = false;
790 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
791 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
792 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
793 static bool	opt_utrace = false;
794 static bool	opt_sysv = false;
795 static bool	opt_xmalloc = false;
796 static bool	opt_zero = false;
797 
798 typedef struct {
799 	void	*p;
800 	size_t	s;
801 	void	*r;
802 } malloc_utrace_t;
803 
804 /* Sprinkle branch hints for the compiler and CPU. */
805 #define	OPT(a)		__predict_false(opt_##a)
806 #define	NOT_OPT(a)	__predict_true(!opt_##a)
807 
808 /* Trace malloc/free for ktrace/kdump. */
809 #define	UTRACE(a, b, c)							\
810 	if (OPT(utrace)) {						\
811 		malloc_utrace_t ut;					\
812 		ut.p = a;						\
813 		ut.s = b;						\
814 		ut.r = c;						\
815 		utrace("malloc", &ut, sizeof(ut));			\
816 	}
817 
818 /******************************************************************************/
819 /*
820  * Begin function prototypes for non-inline static functions.
821  */
822 
823 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
824 		const char *p4);
825 #ifdef MALLOC_STATS
826 static void	malloc_printf(const char *format, ...);
827 #endif
828 static char	*size_t2s(size_t x, char *s);
829 static bool	base_pages_alloc(size_t minsize);
830 static void	*base_alloc(size_t size);
831 static chunk_node_t *base_chunk_node_alloc(void);
832 static void	base_chunk_node_dealloc(chunk_node_t *node);
833 #ifdef MALLOC_STATS
834 static void	stats_print(arena_t *arena);
835 #endif
836 static void	*pages_map(void *addr, size_t size);
837 static void	*pages_map_align(void *addr, size_t size, int align);
838 static void	pages_unmap(void *addr, size_t size);
839 static void	*chunk_alloc(size_t size);
840 static void	chunk_dealloc(void *chunk, size_t size);
841 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
842 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
843 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
844 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
845 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
846 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
847 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
848 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
849 static void	*arena_malloc(arena_t *arena, size_t size);
850 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
851     size_t alloc_size);
852 static size_t	arena_salloc(const void *ptr);
853 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
854 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
855 static void	arena_new(arena_t *arena);
856 static arena_t	*arenas_extend(void);
857 static void	*huge_malloc(size_t size);
858 static void	*huge_palloc(size_t alignment, size_t size);
859 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
860 static void	huge_dalloc(void *ptr);
861 static void	*imalloc(size_t size);
862 static void	*ipalloc(size_t alignment, size_t size);
863 static void	*icalloc(size_t size);
864 static size_t	isalloc(const void *ptr);
865 static void	*iralloc(void *ptr, size_t size);
866 static void	idalloc(void *ptr);
867 static void	malloc_print_stats(void);
868 static bool	malloc_init_hard(void);
869 
870 /*
871  * End function prototypes.
872  */
873 /******************************************************************************/
874 /*
875  * Begin mutex.
876  */
877 
878 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
879 #define	malloc_mutex_lock(m)	mutex_lock(m)
880 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
881 
882 /*
883  * End mutex.
884  */
885 /******************************************************************************/
886 /*
887  * Begin Utility functions/macros.
888  */
889 
890 /* Return the chunk address for allocation address a. */
891 #define	CHUNK_ADDR2BASE(a)						\
892 	((void *)((uintptr_t)(a) & ~chunksize_mask))
893 
894 /* Return the chunk offset of address a. */
895 #define	CHUNK_ADDR2OFFSET(a)						\
896 	((size_t)((uintptr_t)(a) & chunksize_mask))
897 
898 /* Return the smallest chunk multiple that is >= s. */
899 #define	CHUNK_CEILING(s)						\
900 	(((s) + chunksize_mask) & ~chunksize_mask)
901 
902 /* Return the smallest cacheline multiple that is >= s. */
903 #define	CACHELINE_CEILING(s)						\
904 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
905 
906 /* Return the smallest quantum multiple that is >= a. */
907 #define	QUANTUM_CEILING(a)						\
908 	(((a) + quantum_mask) & ~quantum_mask)
909 
910 /* Return the smallest pagesize multiple that is >= s. */
911 #define	PAGE_CEILING(s)							\
912 	(((s) + pagesize_mask) & ~pagesize_mask)
913 
914 /* Compute the smallest power of 2 that is >= x. */
915 static inline size_t
916 pow2_ceil(size_t x)
917 {
918 
919 	x--;
920 	x |= x >> 1;
921 	x |= x >> 2;
922 	x |= x >> 4;
923 	x |= x >> 8;
924 	x |= x >> 16;
925 #if (SIZEOF_PTR == 8)
926 	x |= x >> 32;
927 #endif
928 	x++;
929 	return (x);
930 }
931 
932 static void
933 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
934 {
935 
936 	write(STDERR_FILENO, p1, strlen(p1));
937 	write(STDERR_FILENO, p2, strlen(p2));
938 	write(STDERR_FILENO, p3, strlen(p3));
939 	write(STDERR_FILENO, p4, strlen(p4));
940 }
941 
942 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
943 	    const char *p4) = wrtmessage;
944 
945 #ifdef MALLOC_STATS
946 /*
947  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
948  */
949 static void
950 malloc_printf(const char *format, ...)
951 {
952 	char buf[4096];
953 	va_list ap;
954 
955 	va_start(ap, format);
956 	vsnprintf(buf, sizeof(buf), format, ap);
957 	va_end(ap);
958 	_malloc_message(buf, "", "", "");
959 }
960 #endif
961 
962 /*
963  * We don't want to depend on vsnprintf() for production builds, since that can
964  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
965  * integer printing functionality, so that malloc_printf() use can be limited to
966  * MALLOC_STATS code.
967  */
968 #define UMAX2S_BUFSIZE	21
969 static char *
970 size_t2s(size_t x, char *s)
971 {
972 	unsigned i;
973 
974 	/* Make sure UMAX2S_BUFSIZE is large enough. */
975 	/* LINTED */
976 	assert(sizeof(size_t) <= 8);
977 
978 	i = UMAX2S_BUFSIZE - 1;
979 	s[i] = '\0';
980 	do {
981 		i--;
982 		s[i] = "0123456789"[(int)x % 10];
983 		x /= (uintmax_t)10LL;
984 	} while (x > 0);
985 
986 	return (&s[i]);
987 }
988 
989 /******************************************************************************/
990 
991 static bool
992 base_pages_alloc(size_t minsize)
993 {
994 	size_t csize = 0;
995 
996 #ifdef USE_BRK
997 	/*
998 	 * Do special brk allocation here, since base allocations don't need to
999 	 * be chunk-aligned.
1000 	 */
1001 	if (brk_prev != (void *)-1) {
1002 		void *brk_cur;
1003 		intptr_t incr;
1004 
1005 		if (minsize != 0)
1006 			csize = CHUNK_CEILING(minsize);
1007 
1008 		malloc_mutex_lock(&brk_mtx);
1009 		do {
1010 			/* Get the current end of brk. */
1011 			brk_cur = sbrk(0);
1012 
1013 			/*
1014 			 * Calculate how much padding is necessary to
1015 			 * chunk-align the end of brk.  Don't worry about
1016 			 * brk_cur not being chunk-aligned though.
1017 			 */
1018 			incr = (intptr_t)chunksize
1019 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1020 			assert(incr >= 0);
1021 			if ((size_t)incr < minsize)
1022 				incr += csize;
1023 
1024 			brk_prev = sbrk(incr);
1025 			if (brk_prev == brk_cur) {
1026 				/* Success. */
1027 				malloc_mutex_unlock(&brk_mtx);
1028 				base_pages = brk_cur;
1029 				base_next_addr = base_pages;
1030 				base_past_addr = (void *)((uintptr_t)base_pages
1031 				    + incr);
1032 #ifdef MALLOC_STATS
1033 				base_mapped += incr;
1034 #endif
1035 				return (false);
1036 			}
1037 		} while (brk_prev != (void *)-1);
1038 		malloc_mutex_unlock(&brk_mtx);
1039 	}
1040 	if (minsize == 0) {
1041 		/*
1042 		 * Failure during initialization doesn't matter, so avoid
1043 		 * falling through to the mmap-based page mapping code.
1044 		 */
1045 		return (true);
1046 	}
1047 #endif
1048 	assert(minsize != 0);
1049 	csize = PAGE_CEILING(minsize);
1050 	base_pages = pages_map(NULL, csize);
1051 	if (base_pages == NULL)
1052 		return (true);
1053 	base_next_addr = base_pages;
1054 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1055 #ifdef MALLOC_STATS
1056 	base_mapped += csize;
1057 #endif
1058 	return (false);
1059 }
1060 
1061 static void *
1062 base_alloc(size_t size)
1063 {
1064 	void *ret;
1065 	size_t csize;
1066 
1067 	/* Round size up to nearest multiple of the cacheline size. */
1068 	csize = CACHELINE_CEILING(size);
1069 
1070 	malloc_mutex_lock(&base_mtx);
1071 
1072 	/* Make sure there's enough space for the allocation. */
1073 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1074 		if (base_pages_alloc(csize)) {
1075 			ret = NULL;
1076 			goto RETURN;
1077 		}
1078 	}
1079 
1080 	/* Allocate. */
1081 	ret = base_next_addr;
1082 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1083 
1084 RETURN:
1085 	malloc_mutex_unlock(&base_mtx);
1086 	return (ret);
1087 }
1088 
1089 static chunk_node_t *
1090 base_chunk_node_alloc(void)
1091 {
1092 	chunk_node_t *ret;
1093 
1094 	malloc_mutex_lock(&base_mtx);
1095 	if (base_chunk_nodes != NULL) {
1096 		ret = base_chunk_nodes;
1097 		/* LINTED */
1098 		base_chunk_nodes = *(chunk_node_t **)ret;
1099 		malloc_mutex_unlock(&base_mtx);
1100 	} else {
1101 		malloc_mutex_unlock(&base_mtx);
1102 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1103 	}
1104 
1105 	return (ret);
1106 }
1107 
1108 static void
1109 base_chunk_node_dealloc(chunk_node_t *node)
1110 {
1111 
1112 	malloc_mutex_lock(&base_mtx);
1113 	/* LINTED */
1114 	*(chunk_node_t **)node = base_chunk_nodes;
1115 	base_chunk_nodes = node;
1116 	malloc_mutex_unlock(&base_mtx);
1117 }
1118 
1119 /******************************************************************************/
1120 
1121 #ifdef MALLOC_STATS
1122 static void
1123 stats_print(arena_t *arena)
1124 {
1125 	const unsigned minusone = (unsigned)-1;
1126 	unsigned i, gap_start;
1127 
1128 	malloc_printf(
1129 	    "          allocated/mapped            nmalloc      ndalloc\n");
1130 
1131 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1132 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1133 	    arena->stats.ndalloc_small);
1134 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1135 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1136 	    arena->stats.ndalloc_large);
1137 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1138 	    arena->stats.allocated_small + arena->stats.allocated_large,
1139 	    arena->stats.mapped,
1140 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1141 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1142 
1143 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1144 	    "    reruns maxruns curruns\n");
1145 	for (i = 0, gap_start = minusone; i < ntbins + nqbins + nsbins; i++) {
1146 		if (arena->bins[i].stats.nrequests == 0) {
1147 			if (gap_start == minusone)
1148 				gap_start = i;
1149 		} else {
1150 			if (gap_start != minusone) {
1151 				if (i > gap_start + 1) {
1152 					/* Gap of more than one size class. */
1153 					malloc_printf("[%u..%u]\n",
1154 					    gap_start, i - 1);
1155 				} else {
1156 					/* Gap of one size class. */
1157 					malloc_printf("[%u]\n", gap_start);
1158 				}
1159 				gap_start = minusone;
1160 			}
1161 			malloc_printf(
1162 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1163 			    " %9llu %7lu %7lu\n",
1164 			    i,
1165 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1166 			    arena->bins[i].reg_size,
1167 			    arena->bins[i].nregs,
1168 			    arena->bins[i].run_size >> pagesize_2pow,
1169 			    arena->bins[i].stats.nrequests,
1170 			    arena->bins[i].stats.nruns,
1171 			    arena->bins[i].stats.reruns,
1172 			    arena->bins[i].stats.highruns,
1173 			    arena->bins[i].stats.curruns);
1174 		}
1175 	}
1176 	if (gap_start != minusone) {
1177 		if (i > gap_start + 1) {
1178 			/* Gap of more than one size class. */
1179 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1180 		} else {
1181 			/* Gap of one size class. */
1182 			malloc_printf("[%u]\n", gap_start);
1183 		}
1184 	}
1185 }
1186 #endif
1187 
1188 /*
1189  * End Utility functions/macros.
1190  */
1191 /******************************************************************************/
1192 /*
1193  * Begin chunk management functions.
1194  */
1195 
1196 static int
1197 chunk_comp(void *context, const void *va, const void *vb)
1198 {
1199 	const chunk_node_t *a = va, *b = vb;
1200 
1201 	assert(a != NULL);
1202 	assert(b != NULL);
1203 
1204 	return ptrcmp(a->chunk, b->chunk);
1205 }
1206 
1207 static void *
1208 pages_map_align(void *addr, size_t size, int align)
1209 {
1210 	void *ret;
1211 
1212 	/*
1213 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1214 	 * of existing mappings, and we only want to create new mappings.
1215 	 */
1216 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1217 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1218 	assert(ret != NULL);
1219 
1220 	if (ret == MAP_FAILED)
1221 		ret = NULL;
1222 	else if (addr != NULL && ret != addr) {
1223 		/*
1224 		 * We succeeded in mapping memory, but not in the right place.
1225 		 */
1226 		if (munmap(ret, size) == -1) {
1227 			char buf[STRERROR_BUF];
1228 
1229 			STRERROR_R(errno, buf, sizeof(buf));
1230 			_malloc_message(getprogname(),
1231 			    ": (malloc) Error in munmap(): ", buf, "\n");
1232 			if (OPT(abort))
1233 				abort();
1234 		}
1235 		ret = NULL;
1236 	}
1237 
1238 	assert(ret == NULL || (addr == NULL && ret != addr)
1239 	    || (addr != NULL && ret == addr));
1240 	return (ret);
1241 }
1242 
1243 static void *
1244 pages_map(void *addr, size_t size)
1245 {
1246 
1247 	return pages_map_align(addr, size, 0);
1248 }
1249 
1250 static void
1251 pages_unmap(void *addr, size_t size)
1252 {
1253 
1254 	if (munmap(addr, size) == -1) {
1255 		char buf[STRERROR_BUF];
1256 
1257 		STRERROR_R(errno, buf, sizeof(buf));
1258 		_malloc_message(getprogname(),
1259 		    ": (malloc) Error in munmap(): ", buf, "\n");
1260 		if (OPT(abort))
1261 			abort();
1262 	}
1263 }
1264 
1265 static void *
1266 chunk_alloc(size_t size)
1267 {
1268 	void *ret, *chunk;
1269 	chunk_node_t *tchunk, *delchunk;
1270 
1271 	assert(size != 0);
1272 	assert((size & chunksize_mask) == 0);
1273 
1274 	malloc_mutex_lock(&chunks_mtx);
1275 
1276 	if (size == chunksize) {
1277 		/*
1278 		 * Check for address ranges that were previously chunks and try
1279 		 * to use them.
1280 		 */
1281 
1282 		tchunk = RB_TREE_MIN(&old_chunks);
1283 		while (tchunk != NULL) {
1284 			/* Found an address range.  Try to recycle it. */
1285 
1286 			chunk = tchunk->chunk;
1287 			delchunk = tchunk;
1288 			tchunk = RB_TREE_NEXT(&old_chunks, delchunk);
1289 
1290 			/* Remove delchunk from the tree. */
1291 			rb_tree_remove_node(&old_chunks, delchunk);
1292 			base_chunk_node_dealloc(delchunk);
1293 
1294 #ifdef USE_BRK
1295 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1296 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1297 				/* Re-use a previously freed brk chunk. */
1298 				ret = chunk;
1299 				goto RETURN;
1300 			}
1301 #endif
1302 			if ((ret = pages_map(chunk, size)) != NULL) {
1303 				/* Success. */
1304 				goto RETURN;
1305 			}
1306 		}
1307 	}
1308 
1309 	/*
1310 	 * Try to over-allocate, but allow the OS to place the allocation
1311 	 * anywhere.  Beware of size_t wrap-around.
1312 	 */
1313 	if (size + chunksize > size) {
1314 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1315 		    != NULL) {
1316 			goto RETURN;
1317 		}
1318 	}
1319 
1320 #ifdef USE_BRK
1321 	/*
1322 	 * Try to create allocations in brk, in order to make full use of
1323 	 * limited address space.
1324 	 */
1325 	if (brk_prev != (void *)-1) {
1326 		void *brk_cur;
1327 		intptr_t incr;
1328 
1329 		/*
1330 		 * The loop is necessary to recover from races with other
1331 		 * threads that are using brk for something other than malloc.
1332 		 */
1333 		malloc_mutex_lock(&brk_mtx);
1334 		do {
1335 			/* Get the current end of brk. */
1336 			brk_cur = sbrk(0);
1337 
1338 			/*
1339 			 * Calculate how much padding is necessary to
1340 			 * chunk-align the end of brk.
1341 			 */
1342 			incr = (intptr_t)size
1343 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1344 			if (incr == (intptr_t)size) {
1345 				ret = brk_cur;
1346 			} else {
1347 				ret = (void *)((intptr_t)brk_cur + incr);
1348 				incr += size;
1349 			}
1350 
1351 			brk_prev = sbrk(incr);
1352 			if (brk_prev == brk_cur) {
1353 				/* Success. */
1354 				malloc_mutex_unlock(&brk_mtx);
1355 				brk_max = (void *)((intptr_t)ret + size);
1356 				goto RETURN;
1357 			}
1358 		} while (brk_prev != (void *)-1);
1359 		malloc_mutex_unlock(&brk_mtx);
1360 	}
1361 #endif
1362 
1363 	/* All strategies for allocation failed. */
1364 	ret = NULL;
1365 RETURN:
1366 	if (ret != NULL) {
1367 		chunk_node_t key;
1368 		/*
1369 		 * Clean out any entries in old_chunks that overlap with the
1370 		 * memory we just allocated.
1371 		 */
1372 		key.chunk = ret;
1373 		tchunk = rb_tree_find_node_geq(&old_chunks, &key);
1374 		while (tchunk != NULL
1375 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1376 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1377 			delchunk = tchunk;
1378 			tchunk = RB_TREE_NEXT(&old_chunks, delchunk);
1379 			rb_tree_remove_node(&old_chunks, delchunk);
1380 			base_chunk_node_dealloc(delchunk);
1381 		}
1382 
1383 	}
1384 #ifdef MALLOC_STATS
1385 	if (ret != NULL) {
1386 		stats_chunks.nchunks += (size / chunksize);
1387 		stats_chunks.curchunks += (size / chunksize);
1388 	}
1389 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1390 		stats_chunks.highchunks = stats_chunks.curchunks;
1391 #endif
1392 	malloc_mutex_unlock(&chunks_mtx);
1393 
1394 	assert(CHUNK_ADDR2BASE(ret) == ret);
1395 	return (ret);
1396 }
1397 
1398 static void
1399 chunk_dealloc(void *chunk, size_t size)
1400 {
1401 	chunk_node_t *node;
1402 
1403 	assert(chunk != NULL);
1404 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1405 	assert(size != 0);
1406 	assert((size & chunksize_mask) == 0);
1407 
1408 	malloc_mutex_lock(&chunks_mtx);
1409 
1410 #ifdef USE_BRK
1411 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1412 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1413 		void *brk_cur;
1414 
1415 		malloc_mutex_lock(&brk_mtx);
1416 		/* Get the current end of brk. */
1417 		brk_cur = sbrk(0);
1418 
1419 		/*
1420 		 * Try to shrink the data segment if this chunk is at the end
1421 		 * of the data segment.  The sbrk() call here is subject to a
1422 		 * race condition with threads that use brk(2) or sbrk(2)
1423 		 * directly, but the alternative would be to leak memory for
1424 		 * the sake of poorly designed multi-threaded programs.
1425 		 */
1426 		if (brk_cur == brk_max
1427 		    && (void *)((uintptr_t)chunk + size) == brk_max
1428 		    && sbrk(-(intptr_t)size) == brk_max) {
1429 			malloc_mutex_unlock(&brk_mtx);
1430 			if (brk_prev == brk_max) {
1431 				/* Success. */
1432 				brk_prev = (void *)((intptr_t)brk_max
1433 				    - (intptr_t)size);
1434 				brk_max = brk_prev;
1435 			}
1436 		} else {
1437 			size_t offset;
1438 
1439 			malloc_mutex_unlock(&brk_mtx);
1440 			madvise(chunk, size, MADV_FREE);
1441 
1442 			/*
1443 			 * Iteratively create records of each chunk-sized
1444 			 * memory region that 'chunk' is comprised of, so that
1445 			 * the address range can be recycled if memory usage
1446 			 * increases later on.
1447 			 */
1448 			for (offset = 0; offset < size; offset += chunksize) {
1449 				node = base_chunk_node_alloc();
1450 				if (node == NULL)
1451 					break;
1452 
1453 				node->chunk = (void *)((uintptr_t)chunk
1454 				    + (uintptr_t)offset);
1455 				node->size = chunksize;
1456 				rb_tree_insert_node(&old_chunks, node);
1457 			}
1458 		}
1459 	} else {
1460 #endif
1461 		pages_unmap(chunk, size);
1462 
1463 		/*
1464 		 * Make a record of the chunk's address, so that the address
1465 		 * range can be recycled if memory usage increases later on.
1466 		 * Don't bother to create entries if (size > chunksize), since
1467 		 * doing so could cause scalability issues for truly gargantuan
1468 		 * objects (many gigabytes or larger).
1469 		 */
1470 		if (size == chunksize) {
1471 			node = base_chunk_node_alloc();
1472 			if (node != NULL) {
1473 				node->chunk = (void *)(uintptr_t)chunk;
1474 				node->size = chunksize;
1475 				rb_tree_insert_node(&old_chunks, node);
1476 			}
1477 		}
1478 #ifdef USE_BRK
1479 	}
1480 #endif
1481 
1482 #ifdef MALLOC_STATS
1483 	stats_chunks.curchunks -= (size / chunksize);
1484 #endif
1485 	malloc_mutex_unlock(&chunks_mtx);
1486 }
1487 
1488 /*
1489  * End chunk management functions.
1490  */
1491 /******************************************************************************/
1492 /*
1493  * Begin arena.
1494  */
1495 
1496 /*
1497  * Choose a per-CPU arena.
1498  */
1499 static __noinline arena_t *
1500 choose_arena_hard(void)
1501 {
1502 
1503 	assert(arenas[0] != NULL);
1504 
1505 	malloc_mutex_lock(&arenas_mtx);
1506 	for (unsigned i = 1; i < ncpus; i++)
1507 		if (arenas[i] == NULL)
1508 			arenas[i] = arenas_extend();
1509 	malloc_mutex_unlock(&arenas_mtx);
1510 
1511 	return arenas[thr_curcpu()];
1512 }
1513 
1514 static inline arena_t *
1515 choose_arena(void)
1516 {
1517 	arena_t *arena;
1518 
1519 	/* NB: when libpthread is absent, thr_curcpu() always returns zero. */
1520 	arena = arenas[thr_curcpu()];
1521 	if (__predict_true(arena != NULL))
1522 		return arena;
1523 
1524 	return choose_arena_hard();
1525 }
1526 
1527 static int
1528 arena_chunk_comp(void *context, const void *va, const void *vb)
1529 {
1530 	const arena_chunk_t *a = va, *b = vb;
1531 	int diff;
1532 
1533 	assert(a != NULL);
1534 	assert(b != NULL);
1535 
1536 	if ((diff = a->max_frun_npages - b->max_frun_npages) != 0)
1537 		return diff;
1538 	return ptrcmp(a, b);
1539 }
1540 
1541 static int
1542 arena_run_comp(void *context, const void *a, const void *b)
1543 {
1544 
1545 	assert(a != NULL);
1546 	assert(b != NULL);
1547 
1548 	return ptrcmp(a, b);
1549 }
1550 
1551 static inline void *
1552 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1553 {
1554 	void *ret;
1555 	unsigned i, mask, bit, regind;
1556 
1557 	assert(run->magic == ARENA_RUN_MAGIC);
1558 	assert(run->regs_minelm < bin->regs_mask_nelms);
1559 
1560 	/*
1561 	 * Move the first check outside the loop, so that run->regs_minelm can
1562 	 * be updated unconditionally, without the possibility of updating it
1563 	 * multiple times.
1564 	 */
1565 	i = run->regs_minelm;
1566 	mask = run->regs_mask[i];
1567 	if (mask != 0) {
1568 		/* Usable allocation found. */
1569 		bit = ffs((int)mask) - 1;
1570 
1571 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1572 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1573 		    + (bin->reg_size * regind));
1574 
1575 		/* Clear bit. */
1576 		mask ^= (1U << bit);
1577 		run->regs_mask[i] = mask;
1578 
1579 		return (ret);
1580 	}
1581 
1582 	for (i++; i < bin->regs_mask_nelms; i++) {
1583 		mask = run->regs_mask[i];
1584 		if (mask != 0) {
1585 			/* Usable allocation found. */
1586 			bit = ffs((int)mask) - 1;
1587 
1588 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1589 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1590 			    + (bin->reg_size * regind));
1591 
1592 			/* Clear bit. */
1593 			mask ^= (1U << bit);
1594 			run->regs_mask[i] = mask;
1595 
1596 			/*
1597 			 * Make a note that nothing before this element
1598 			 * contains a free region.
1599 			 */
1600 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1601 
1602 			return (ret);
1603 		}
1604 	}
1605 	/* Not reached. */
1606 	/* LINTED */
1607 	assert(0);
1608 	return (NULL);
1609 }
1610 
1611 static inline void
1612 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1613 {
1614 	/*
1615 	 * To divide by a number D that is not a power of two we multiply
1616 	 * by (2^21 / D) and then right shift by 21 positions.
1617 	 *
1618 	 *   X / D
1619 	 *
1620 	 * becomes
1621 	 *
1622 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1623 	 */
1624 #define SIZE_INV_SHIFT 21
1625 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1626 	static const unsigned size_invs[] = {
1627 	    SIZE_INV(3),
1628 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1629 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1630 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1631 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1632 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1633 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1634 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1635 #if (QUANTUM_2POW_MIN < 4)
1636 	    ,
1637 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1638 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1639 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1640 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1641 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1642 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1643 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1644 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1645 #endif
1646 	};
1647 	unsigned diff, regind, elm, bit;
1648 
1649 	/* LINTED */
1650 	assert(run->magic == ARENA_RUN_MAGIC);
1651 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1652 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1653 
1654 	/*
1655 	 * Avoid doing division with a variable divisor if possible.  Using
1656 	 * actual division here can reduce allocator throughput by over 20%!
1657 	 */
1658 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1659 	if ((size & (size - 1)) == 0) {
1660 		/*
1661 		 * log2_table allows fast division of a power of two in the
1662 		 * [1..128] range.
1663 		 *
1664 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1665 		 */
1666 		static const unsigned char log2_table[] = {
1667 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1668 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1669 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1670 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1671 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1672 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1673 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1674 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1675 		};
1676 
1677 		if (size <= 128)
1678 			regind = (diff >> log2_table[size - 1]);
1679 		else if (size <= 32768)
1680 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1681 		else {
1682 			/*
1683 			 * The page size is too large for us to use the lookup
1684 			 * table.  Use real division.
1685 			 */
1686 			regind = (unsigned)(diff / size);
1687 		}
1688 	} else if (size <= (((sizeof(size_invs) / sizeof(unsigned)) + 2)
1689 	    << QUANTUM_2POW_MIN)) {
1690 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1691 		regind >>= SIZE_INV_SHIFT;
1692 	} else {
1693 		/*
1694 		 * size_invs isn't large enough to handle this size class, so
1695 		 * calculate regind using actual division.  This only happens
1696 		 * if the user increases small_max via the 'S' runtime
1697 		 * configuration option.
1698 		 */
1699 		regind = (unsigned)(diff / size);
1700 	};
1701 	assert(diff == regind * size);
1702 	assert(regind < bin->nregs);
1703 
1704 	elm = regind >> (SIZEOF_INT_2POW + 3);
1705 	if (elm < run->regs_minelm)
1706 		run->regs_minelm = elm;
1707 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1708 	assert((run->regs_mask[elm] & (1U << bit)) == 0);
1709 	run->regs_mask[elm] |= (1U << bit);
1710 #undef SIZE_INV
1711 #undef SIZE_INV_SHIFT
1712 }
1713 
1714 static void
1715 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1716 {
1717 	arena_chunk_t *chunk;
1718 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1719 	unsigned i;
1720 
1721 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1722 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1723 	    >> pagesize_2pow);
1724 	total_pages = chunk->map[run_ind].npages;
1725 	need_pages = (unsigned)(size >> pagesize_2pow);
1726 	assert(need_pages <= total_pages);
1727 	rem_pages = total_pages - need_pages;
1728 
1729 	/* Split enough pages from the front of run to fit allocation size. */
1730 	map_offset = run_ind;
1731 	for (i = 0; i < need_pages; i++) {
1732 		chunk->map[map_offset + i].npages = need_pages;
1733 		chunk->map[map_offset + i].pos = i;
1734 	}
1735 
1736 	/* Keep track of trailing unused pages for later use. */
1737 	if (rem_pages > 0) {
1738 		/* Update map for trailing pages. */
1739 		map_offset += need_pages;
1740 		chunk->map[map_offset].npages = rem_pages;
1741 		chunk->map[map_offset].pos = POS_FREE;
1742 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1743 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1744 	}
1745 
1746 	chunk->pages_used += need_pages;
1747 }
1748 
1749 static arena_chunk_t *
1750 arena_chunk_alloc(arena_t *arena)
1751 {
1752 	arena_chunk_t *chunk;
1753 
1754 	if (arena->spare != NULL) {
1755 		chunk = arena->spare;
1756 		arena->spare = NULL;
1757 
1758 		rb_tree_insert_node(&arena->chunks, chunk);
1759 	} else {
1760 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1761 		if (chunk == NULL)
1762 			return (NULL);
1763 #ifdef MALLOC_STATS
1764 		arena->stats.mapped += chunksize;
1765 #endif
1766 
1767 		chunk->arena = arena;
1768 
1769 		/*
1770 		 * Claim that no pages are in use, since the header is merely
1771 		 * overhead.
1772 		 */
1773 		chunk->pages_used = 0;
1774 
1775 		chunk->max_frun_npages = chunk_npages -
1776 		    arena_chunk_header_npages;
1777 		chunk->min_frun_ind = arena_chunk_header_npages;
1778 
1779 		/*
1780 		 * Initialize enough of the map to support one maximal free run.
1781 		 */
1782 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1783 		    arena_chunk_header_npages;
1784 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1785 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1786 		    arena_chunk_header_npages;
1787 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1788 
1789 		rb_tree_insert_node(&arena->chunks, chunk);
1790 	}
1791 
1792 	return (chunk);
1793 }
1794 
1795 static void
1796 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1797 {
1798 
1799 	/*
1800 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1801 	 * will be cached, so that the arena does not use it.
1802 	 */
1803 	rb_tree_remove_node(&chunk->arena->chunks, chunk);
1804 
1805 	if (NOT_OPT(hint)) {
1806 		if (arena->spare != NULL) {
1807 			chunk_dealloc((void *)arena->spare, chunksize);
1808 #ifdef MALLOC_STATS
1809 			arena->stats.mapped -= chunksize;
1810 #endif
1811 		}
1812 		arena->spare = chunk;
1813 	} else {
1814 		assert(arena->spare == NULL);
1815 		chunk_dealloc((void *)chunk, chunksize);
1816 #ifdef MALLOC_STATS
1817 		arena->stats.mapped -= chunksize;
1818 #endif
1819 	}
1820 }
1821 
1822 static arena_run_t *
1823 arena_run_alloc(arena_t *arena, size_t size)
1824 {
1825 	arena_chunk_t *chunk;
1826 	arena_run_t *run;
1827 	unsigned need_npages;
1828 
1829 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1830 	    pagesize_2pow)));
1831 	assert((size & pagesize_mask) == 0);
1832 
1833 	/*
1834 	 * Search through the arena chunk tree for a large enough free run.
1835 	 * Tree order ensures that any exact fit is picked immediately or
1836 	 * otherwise the lowest address of the next size.
1837 	 */
1838 	need_npages = (unsigned)(size >> pagesize_2pow);
1839 	/* LINTED */
1840 	for (;;) {
1841 		rb_node_t *node = arena->chunks.rbt_root;
1842 		chunk = NULL;
1843 		while (!RB_SENTINEL_P(node)) {
1844 			assert(offsetof(struct arena_chunk_s, link) == 0);
1845 			arena_chunk_t *chunk_tmp = (arena_chunk_t *)node;
1846 			if (chunk_tmp->max_frun_npages == need_npages) {
1847 				chunk = chunk_tmp;
1848 				break;
1849 			}
1850 			if (chunk_tmp->max_frun_npages < need_npages) {
1851 				node = node->rb_nodes[1];
1852 				continue;
1853 			}
1854 			chunk = chunk_tmp;
1855 			node = node->rb_nodes[0];
1856 		}
1857 		if (chunk == NULL)
1858 			break;
1859 		/*
1860 		 * At this point, the chunk must have a cached run size large
1861 		 * enough to fit the allocation.
1862 		 */
1863 		assert(need_npages <= chunk->max_frun_npages);
1864 		{
1865 			arena_chunk_map_t *mapelm;
1866 			unsigned i;
1867 			unsigned max_frun_npages = 0;
1868 			unsigned min_frun_ind = chunk_npages;
1869 
1870 			assert(chunk->min_frun_ind >=
1871 			    arena_chunk_header_npages);
1872 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1873 				mapelm = &chunk->map[i];
1874 				if (mapelm->pos == POS_FREE) {
1875 					if (mapelm->npages >= need_npages) {
1876 						run = (arena_run_t *)
1877 						    ((uintptr_t)chunk + (i <<
1878 						    pagesize_2pow));
1879 						/* Update page map. */
1880 						arena_run_split(arena, run,
1881 						    size);
1882 						return (run);
1883 					}
1884 					if (mapelm->npages >
1885 					    max_frun_npages) {
1886 						max_frun_npages =
1887 						    mapelm->npages;
1888 					}
1889 					if (i < min_frun_ind) {
1890 						min_frun_ind = i;
1891 						if (i < chunk->min_frun_ind)
1892 							chunk->min_frun_ind = i;
1893 					}
1894 				}
1895 				i += mapelm->npages;
1896 			}
1897 			/*
1898 			 * Search failure.  Reset cached chunk->max_frun_npages.
1899 			 * chunk->min_frun_ind was already reset above (if
1900 			 * necessary).
1901 			 */
1902 			rb_tree_remove_node(&arena->chunks, chunk);
1903 			chunk->max_frun_npages = max_frun_npages;
1904 			rb_tree_insert_node(&arena->chunks, chunk);
1905 		}
1906 	}
1907 
1908 	/*
1909 	 * No usable runs.  Create a new chunk from which to allocate the run.
1910 	 */
1911 	chunk = arena_chunk_alloc(arena);
1912 	if (chunk == NULL)
1913 		return (NULL);
1914 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1915 	    pagesize_2pow));
1916 	/* Update page map. */
1917 	arena_run_split(arena, run, size);
1918 	return (run);
1919 }
1920 
1921 static void
1922 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1923 {
1924 	arena_chunk_t *chunk;
1925 	unsigned run_ind, run_pages;
1926 
1927 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1928 
1929 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1930 	    >> pagesize_2pow);
1931 	assert(run_ind >= arena_chunk_header_npages);
1932 	assert(run_ind < (chunksize >> pagesize_2pow));
1933 	run_pages = (unsigned)(size >> pagesize_2pow);
1934 	assert(run_pages == chunk->map[run_ind].npages);
1935 
1936 	/* Subtract pages from count of pages used in chunk. */
1937 	chunk->pages_used -= run_pages;
1938 
1939 	/* Mark run as deallocated. */
1940 	assert(chunk->map[run_ind].npages == run_pages);
1941 	chunk->map[run_ind].pos = POS_FREE;
1942 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1943 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1944 
1945 	/*
1946 	 * Tell the kernel that we don't need the data in this run, but only if
1947 	 * requested via runtime configuration.
1948 	 */
1949 	if (OPT(hint))
1950 		madvise(run, size, MADV_FREE);
1951 
1952 	/* Try to coalesce with neighboring runs. */
1953 	if (run_ind > arena_chunk_header_npages &&
1954 	    chunk->map[run_ind - 1].pos == POS_FREE) {
1955 		unsigned prev_npages;
1956 
1957 		/* Coalesce with previous run. */
1958 		prev_npages = chunk->map[run_ind - 1].npages;
1959 		run_ind -= prev_npages;
1960 		assert(chunk->map[run_ind].npages == prev_npages);
1961 		assert(chunk->map[run_ind].pos == POS_FREE);
1962 		run_pages += prev_npages;
1963 
1964 		chunk->map[run_ind].npages = run_pages;
1965 		assert(chunk->map[run_ind].pos == POS_FREE);
1966 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
1967 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
1968 	}
1969 
1970 	if (run_ind + run_pages < chunk_npages &&
1971 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
1972 		unsigned next_npages;
1973 
1974 		/* Coalesce with next run. */
1975 		next_npages = chunk->map[run_ind + run_pages].npages;
1976 		run_pages += next_npages;
1977 		assert(chunk->map[run_ind + run_pages - 1].npages ==
1978 		    next_npages);
1979 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
1980 
1981 		chunk->map[run_ind].npages = run_pages;
1982 		chunk->map[run_ind].pos = POS_FREE;
1983 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
1984 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
1985 	}
1986 
1987 	if (chunk->map[run_ind].npages > chunk->max_frun_npages) {
1988 		rb_tree_remove_node(&arena->chunks, chunk);
1989 		chunk->max_frun_npages = chunk->map[run_ind].npages;
1990 		rb_tree_insert_node(&arena->chunks, chunk);
1991 	}
1992 	if (run_ind < chunk->min_frun_ind)
1993 		chunk->min_frun_ind = run_ind;
1994 
1995 	/* Deallocate chunk if it is now completely unused. */
1996 	if (chunk->pages_used == 0)
1997 		arena_chunk_dealloc(arena, chunk);
1998 }
1999 
2000 static arena_run_t *
2001 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2002 {
2003 	arena_run_t *run;
2004 	unsigned i, remainder;
2005 
2006 	/* Look for a usable run. */
2007 	if ((run = RB_TREE_MIN(&bin->runs)) != NULL) {
2008 		/* run is guaranteed to have available space. */
2009 		rb_tree_remove_node(&bin->runs, run);
2010 #ifdef MALLOC_STATS
2011 		bin->stats.reruns++;
2012 #endif
2013 		return (run);
2014 	}
2015 	/* No existing runs have any space available. */
2016 
2017 	/* Allocate a new run. */
2018 	run = arena_run_alloc(arena, bin->run_size);
2019 	if (run == NULL)
2020 		return (NULL);
2021 
2022 	/* Initialize run internals. */
2023 	run->bin = bin;
2024 
2025 	for (i = 0; i < bin->regs_mask_nelms; i++)
2026 		run->regs_mask[i] = UINT_MAX;
2027 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2028 	if (remainder != 0) {
2029 		/* The last element has spare bits that need to be unset. */
2030 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2031 		    - remainder));
2032 	}
2033 
2034 	run->regs_minelm = 0;
2035 
2036 	run->nfree = bin->nregs;
2037 #ifdef MALLOC_DEBUG
2038 	run->magic = ARENA_RUN_MAGIC;
2039 #endif
2040 
2041 #ifdef MALLOC_STATS
2042 	bin->stats.nruns++;
2043 	bin->stats.curruns++;
2044 	if (bin->stats.curruns > bin->stats.highruns)
2045 		bin->stats.highruns = bin->stats.curruns;
2046 #endif
2047 	return (run);
2048 }
2049 
2050 /* bin->runcur must have space available before this function is called. */
2051 static inline void *
2052 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2053 {
2054 	void *ret;
2055 
2056 	assert(run->magic == ARENA_RUN_MAGIC);
2057 	assert(run->nfree > 0);
2058 
2059 	ret = arena_run_reg_alloc(run, bin);
2060 	assert(ret != NULL);
2061 	run->nfree--;
2062 
2063 	return (ret);
2064 }
2065 
2066 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2067 static void *
2068 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2069 {
2070 
2071 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2072 	if (bin->runcur == NULL)
2073 		return (NULL);
2074 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2075 	assert(bin->runcur->nfree > 0);
2076 
2077 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2078 }
2079 
2080 /*
2081  * Calculate bin->run_size such that it meets the following constraints:
2082  *
2083  *   *) bin->run_size >= min_run_size
2084  *   *) bin->run_size <= arena_maxclass
2085  *   *) bin->run_size <= RUN_MAX_SMALL
2086  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2087  *
2088  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2089  * also calculated here, since these settings are all interdependent.
2090  */
2091 static size_t
2092 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2093 {
2094 	size_t try_run_size, good_run_size;
2095 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2096 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2097 
2098 	assert(min_run_size >= pagesize);
2099 	assert(min_run_size <= arena_maxclass);
2100 	assert(min_run_size <= RUN_MAX_SMALL);
2101 
2102 	/*
2103 	 * Calculate known-valid settings before entering the run_size
2104 	 * expansion loop, so that the first part of the loop always copies
2105 	 * valid settings.
2106 	 *
2107 	 * The do..while loop iteratively reduces the number of regions until
2108 	 * the run header and the regions no longer overlap.  A closed formula
2109 	 * would be quite messy, since there is an interdependency between the
2110 	 * header's mask length and the number of regions.
2111 	 */
2112 	try_run_size = min_run_size;
2113 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2114 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2115 	do {
2116 		try_nregs--;
2117 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2118 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2119 		try_reg0_offset = (unsigned)(try_run_size -
2120 		    (try_nregs * bin->reg_size));
2121 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2122 	    > try_reg0_offset);
2123 
2124 	/* run_size expansion loop. */
2125 	do {
2126 		/*
2127 		 * Copy valid settings before trying more aggressive settings.
2128 		 */
2129 		good_run_size = try_run_size;
2130 		good_nregs = try_nregs;
2131 		good_mask_nelms = try_mask_nelms;
2132 		good_reg0_offset = try_reg0_offset;
2133 
2134 		/* Try more aggressive settings. */
2135 		try_run_size += pagesize;
2136 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2137 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2138 		do {
2139 			try_nregs--;
2140 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2141 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2142 			    1 : 0);
2143 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2144 			    bin->reg_size));
2145 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2146 		    (try_mask_nelms - 1)) > try_reg0_offset);
2147 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2148 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2149 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2150 
2151 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2152 	    <= good_reg0_offset);
2153 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2154 
2155 	/* Copy final settings. */
2156 	bin->run_size = good_run_size;
2157 	bin->nregs = good_nregs;
2158 	bin->regs_mask_nelms = good_mask_nelms;
2159 	bin->reg0_offset = good_reg0_offset;
2160 
2161 	return (good_run_size);
2162 }
2163 
2164 static void *
2165 arena_malloc(arena_t *arena, size_t size)
2166 {
2167 	void *ret;
2168 
2169 	assert(arena != NULL);
2170 	assert(arena->magic == ARENA_MAGIC);
2171 	assert(size != 0);
2172 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2173 
2174 	if (size <= bin_maxclass) {
2175 		arena_bin_t *bin;
2176 		arena_run_t *run;
2177 
2178 		/* Small allocation. */
2179 
2180 		if (size < small_min) {
2181 			/* Tiny. */
2182 			size = pow2_ceil(size);
2183 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2184 			    1)))];
2185 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2186 			/*
2187 			 * Bin calculation is always correct, but we may need
2188 			 * to fix size for the purposes of assertions and/or
2189 			 * stats accuracy.
2190 			 */
2191 			if (size < (1 << TINY_MIN_2POW))
2192 				size = (1 << TINY_MIN_2POW);
2193 #endif
2194 		} else if (size <= small_max) {
2195 			/* Quantum-spaced. */
2196 			size = QUANTUM_CEILING(size);
2197 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2198 			    - 1];
2199 		} else {
2200 			/* Sub-page. */
2201 			size = pow2_ceil(size);
2202 			bin = &arena->bins[ntbins + nqbins
2203 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2204 		}
2205 		assert(size == bin->reg_size);
2206 
2207 		malloc_mutex_lock(&arena->mtx);
2208 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2209 			ret = arena_bin_malloc_easy(arena, bin, run);
2210 		else
2211 			ret = arena_bin_malloc_hard(arena, bin);
2212 
2213 		if (ret == NULL) {
2214 			malloc_mutex_unlock(&arena->mtx);
2215 			return (NULL);
2216 		}
2217 
2218 #ifdef MALLOC_STATS
2219 		bin->stats.nrequests++;
2220 		arena->stats.nmalloc_small++;
2221 		arena->stats.allocated_small += size;
2222 #endif
2223 	} else {
2224 		/* Large allocation. */
2225 		size = PAGE_CEILING(size);
2226 		malloc_mutex_lock(&arena->mtx);
2227 		ret = (void *)arena_run_alloc(arena, size);
2228 		if (ret == NULL) {
2229 			malloc_mutex_unlock(&arena->mtx);
2230 			return (NULL);
2231 		}
2232 #ifdef MALLOC_STATS
2233 		arena->stats.nmalloc_large++;
2234 		arena->stats.allocated_large += size;
2235 #endif
2236 	}
2237 
2238 	malloc_mutex_unlock(&arena->mtx);
2239 
2240 	if (OPT(junk))
2241 		memset(ret, 0xa5, size);
2242 	else if (OPT(zero))
2243 		memset(ret, 0, size);
2244 	return (ret);
2245 }
2246 
2247 static inline void
2248 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2249     unsigned npages)
2250 {
2251 	unsigned i;
2252 
2253 	assert(npages > 0);
2254 
2255 	/*
2256 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2257 	 * separately allocated.
2258 	 */
2259 	for (i = 0; i < npages; i++) {
2260 		chunk->map[pageind + i].npages = npages;
2261 		chunk->map[pageind + i].pos = i;
2262 	}
2263 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2264 	    pagesize_2pow)), npages << pagesize_2pow);
2265 }
2266 
2267 /* Only handles large allocations that require more than page alignment. */
2268 static void *
2269 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2270 {
2271 	void *ret;
2272 	size_t offset;
2273 	arena_chunk_t *chunk;
2274 	unsigned pageind, i, npages;
2275 
2276 	assert((size & pagesize_mask) == 0);
2277 	assert((alignment & pagesize_mask) == 0);
2278 
2279 	npages = (unsigned)(size >> pagesize_2pow);
2280 
2281 	malloc_mutex_lock(&arena->mtx);
2282 	ret = (void *)arena_run_alloc(arena, alloc_size);
2283 	if (ret == NULL) {
2284 		malloc_mutex_unlock(&arena->mtx);
2285 		return (NULL);
2286 	}
2287 
2288 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2289 
2290 	offset = (uintptr_t)ret & (alignment - 1);
2291 	assert((offset & pagesize_mask) == 0);
2292 	assert(offset < alloc_size);
2293 	if (offset == 0) {
2294 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2295 		    pagesize_2pow);
2296 
2297 		/* Update the map for the run to be kept. */
2298 		for (i = 0; i < npages; i++) {
2299 			chunk->map[pageind + i].npages = npages;
2300 			assert(chunk->map[pageind + i].pos == i);
2301 		}
2302 
2303 		/* Trim trailing space. */
2304 		arena_palloc_trim(arena, chunk, pageind + npages,
2305 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2306 	} else {
2307 		size_t leadsize, trailsize;
2308 
2309 		leadsize = alignment - offset;
2310 		ret = (void *)((uintptr_t)ret + leadsize);
2311 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2312 		    pagesize_2pow);
2313 
2314 		/* Update the map for the run to be kept. */
2315 		for (i = 0; i < npages; i++) {
2316 			chunk->map[pageind + i].npages = npages;
2317 			chunk->map[pageind + i].pos = i;
2318 		}
2319 
2320 		/* Trim leading space. */
2321 		arena_palloc_trim(arena, chunk,
2322 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2323 		    (unsigned)(leadsize >> pagesize_2pow));
2324 
2325 		trailsize = alloc_size - leadsize - size;
2326 		if (trailsize != 0) {
2327 			/* Trim trailing space. */
2328 			assert(trailsize < alloc_size);
2329 			arena_palloc_trim(arena, chunk, pageind + npages,
2330 			    (unsigned)(trailsize >> pagesize_2pow));
2331 		}
2332 	}
2333 
2334 #ifdef MALLOC_STATS
2335 	arena->stats.nmalloc_large++;
2336 	arena->stats.allocated_large += size;
2337 #endif
2338 	malloc_mutex_unlock(&arena->mtx);
2339 
2340 	if (OPT(junk))
2341 		memset(ret, 0xa5, size);
2342 	else if (OPT(zero))
2343 		memset(ret, 0, size);
2344 	return (ret);
2345 }
2346 
2347 /* Return the size of the allocation pointed to by ptr. */
2348 static size_t
2349 arena_salloc(const void *ptr)
2350 {
2351 	size_t ret;
2352 	arena_chunk_t *chunk;
2353 	arena_chunk_map_t *mapelm;
2354 	unsigned pageind;
2355 
2356 	assert(ptr != NULL);
2357 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2358 
2359 	/*
2360 	 * No arena data structures that we query here can change in a way that
2361 	 * affects this function, so we don't need to lock.
2362 	 */
2363 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2364 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2365 	    pagesize_2pow);
2366 	mapelm = &chunk->map[pageind];
2367 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2368 	    pagesize_2pow)) {
2369 		arena_run_t *run;
2370 
2371 		pageind -= mapelm->pos;
2372 
2373 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2374 		    pagesize_2pow));
2375 		assert(run->magic == ARENA_RUN_MAGIC);
2376 		ret = run->bin->reg_size;
2377 	} else
2378 		ret = mapelm->npages << pagesize_2pow;
2379 
2380 	return (ret);
2381 }
2382 
2383 static void *
2384 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2385 {
2386 	void *ret;
2387 
2388 	/* Avoid moving the allocation if the size class would not change. */
2389 	if (size < small_min) {
2390 		if (oldsize < small_min &&
2391 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2392 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2393 			goto IN_PLACE;
2394 	} else if (size <= small_max) {
2395 		if (oldsize >= small_min && oldsize <= small_max &&
2396 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2397 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2398 			goto IN_PLACE;
2399 	} else {
2400 		/*
2401 		 * We make no attempt to resize runs here, though it would be
2402 		 * possible to do so.
2403 		 */
2404 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2405 			goto IN_PLACE;
2406 	}
2407 
2408 	/*
2409 	 * If we get here, then size and oldsize are different enough that we
2410 	 * need to use a different size class.  In that case, fall back to
2411 	 * allocating new space and copying.
2412 	 */
2413 	ret = arena_malloc(choose_arena(), size);
2414 	if (ret == NULL)
2415 		return (NULL);
2416 
2417 	/* Junk/zero-filling were already done by arena_malloc(). */
2418 	if (size < oldsize)
2419 		memcpy(ret, ptr, size);
2420 	else
2421 		memcpy(ret, ptr, oldsize);
2422 	idalloc(ptr);
2423 	return (ret);
2424 IN_PLACE:
2425 	if (OPT(junk) && size < oldsize)
2426 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2427 	else if (OPT(zero) && size > oldsize)
2428 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2429 	return (ptr);
2430 }
2431 
2432 static void
2433 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2434 {
2435 	unsigned pageind;
2436 	arena_chunk_map_t *mapelm;
2437 	size_t size;
2438 
2439 	assert(arena != NULL);
2440 	assert(arena->magic == ARENA_MAGIC);
2441 	assert(chunk->arena == arena);
2442 	assert(ptr != NULL);
2443 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2444 
2445 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2446 	    pagesize_2pow);
2447 	mapelm = &chunk->map[pageind];
2448 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2449 	    pagesize_2pow)) {
2450 		arena_run_t *run;
2451 		arena_bin_t *bin;
2452 
2453 		/* Small allocation. */
2454 
2455 		pageind -= mapelm->pos;
2456 
2457 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2458 		    pagesize_2pow));
2459 		assert(run->magic == ARENA_RUN_MAGIC);
2460 		bin = run->bin;
2461 		size = bin->reg_size;
2462 
2463 		if (OPT(junk))
2464 			memset(ptr, 0x5a, size);
2465 
2466 		malloc_mutex_lock(&arena->mtx);
2467 		arena_run_reg_dalloc(run, bin, ptr, size);
2468 		run->nfree++;
2469 
2470 		if (run->nfree == bin->nregs) {
2471 			/* Deallocate run. */
2472 			if (run == bin->runcur)
2473 				bin->runcur = NULL;
2474 			else if (bin->nregs != 1) {
2475 				/*
2476 				 * This block's conditional is necessary because
2477 				 * if the run only contains one region, then it
2478 				 * never gets inserted into the non-full runs
2479 				 * tree.
2480 				 */
2481 				rb_tree_remove_node(&bin->runs, run);
2482 			}
2483 #ifdef MALLOC_DEBUG
2484 			run->magic = 0;
2485 #endif
2486 			arena_run_dalloc(arena, run, bin->run_size);
2487 #ifdef MALLOC_STATS
2488 			bin->stats.curruns--;
2489 #endif
2490 		} else if (run->nfree == 1 && run != bin->runcur) {
2491 			/*
2492 			 * Make sure that bin->runcur always refers to the
2493 			 * lowest non-full run, if one exists.
2494 			 */
2495 			if (bin->runcur == NULL)
2496 				bin->runcur = run;
2497 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2498 				/* Switch runcur. */
2499 				if (bin->runcur->nfree > 0) {
2500 					/* Insert runcur. */
2501 					rb_tree_insert_node(&bin->runs, bin->runcur);
2502 				}
2503 				bin->runcur = run;
2504 			} else {
2505 				rb_tree_insert_node(&bin->runs, run);
2506 			}
2507 		}
2508 #ifdef MALLOC_STATS
2509 		arena->stats.allocated_small -= size;
2510 		arena->stats.ndalloc_small++;
2511 #endif
2512 	} else {
2513 		/* Large allocation. */
2514 
2515 		size = mapelm->npages << pagesize_2pow;
2516 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2517 
2518 		if (OPT(junk))
2519 			memset(ptr, 0x5a, size);
2520 
2521 		malloc_mutex_lock(&arena->mtx);
2522 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2523 #ifdef MALLOC_STATS
2524 		arena->stats.allocated_large -= size;
2525 		arena->stats.ndalloc_large++;
2526 #endif
2527 	}
2528 
2529 	malloc_mutex_unlock(&arena->mtx);
2530 }
2531 
2532 static void
2533 arena_new(arena_t *arena)
2534 {
2535 	unsigned i;
2536 	arena_bin_t *bin;
2537 	size_t prev_run_size;
2538 
2539 	malloc_mutex_init(&arena->mtx);
2540 
2541 #ifdef MALLOC_STATS
2542 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2543 #endif
2544 
2545 	/* Initialize chunks. */
2546 	rb_tree_init(&arena->chunks, &arena_chunk_tree_ops);
2547 	arena->spare = NULL;
2548 
2549 	/* Initialize bins. */
2550 	prev_run_size = pagesize;
2551 
2552 	/* (2^n)-spaced tiny bins. */
2553 	for (i = 0; i < ntbins; i++) {
2554 		bin = &arena->bins[i];
2555 		bin->runcur = NULL;
2556 		rb_tree_init(&bin->runs, &arena_run_tree_ops);
2557 
2558 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2559 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2560 
2561 #ifdef MALLOC_STATS
2562 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2563 #endif
2564 	}
2565 
2566 	/* Quantum-spaced bins. */
2567 	for (; i < ntbins + nqbins; i++) {
2568 		bin = &arena->bins[i];
2569 		bin->runcur = NULL;
2570 		rb_tree_init(&bin->runs, &arena_run_tree_ops);
2571 
2572 		bin->reg_size = quantum * (i - ntbins + 1);
2573 /*
2574 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2575 */
2576 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2577 
2578 #ifdef MALLOC_STATS
2579 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2580 #endif
2581 	}
2582 
2583 	/* (2^n)-spaced sub-page bins. */
2584 	for (; i < ntbins + nqbins + nsbins; i++) {
2585 		bin = &arena->bins[i];
2586 		bin->runcur = NULL;
2587 		rb_tree_init(&bin->runs, &arena_run_tree_ops);
2588 
2589 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2590 
2591 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2592 
2593 #ifdef MALLOC_STATS
2594 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2595 #endif
2596 	}
2597 
2598 #ifdef MALLOC_DEBUG
2599 	arena->magic = ARENA_MAGIC;
2600 #endif
2601 }
2602 
2603 /* Create a new arena and insert it into the arenas array at index ind. */
2604 static arena_t *
2605 arenas_extend(void)
2606 {
2607 	arena_t *ret;
2608 
2609 	/* Allocate enough space for trailing bins. */
2610 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2611 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2612 	if (ret != NULL) {
2613 		arena_new(ret);
2614 		return (ret);
2615 	}
2616 	/* Only reached if there is an OOM error. */
2617 
2618 	/*
2619 	 * OOM here is quite inconvenient to propagate, since dealing with it
2620 	 * would require a check for failure in the fast path.  Instead, punt
2621 	 * by using arenas[0].  In practice, this is an extremely unlikely
2622 	 * failure.
2623 	 */
2624 	_malloc_message(getprogname(),
2625 	    ": (malloc) Error initializing arena\n", "", "");
2626 	if (OPT(abort))
2627 		abort();
2628 
2629 	return (arenas[0]);
2630 }
2631 
2632 /*
2633  * End arena.
2634  */
2635 /******************************************************************************/
2636 /*
2637  * Begin general internal functions.
2638  */
2639 
2640 static void *
2641 huge_malloc(size_t size)
2642 {
2643 	void *ret;
2644 	size_t csize;
2645 	chunk_node_t *node;
2646 
2647 	/* Allocate one or more contiguous chunks for this request. */
2648 
2649 	csize = CHUNK_CEILING(size);
2650 	if (csize == 0) {
2651 		/* size is large enough to cause size_t wrap-around. */
2652 		return (NULL);
2653 	}
2654 
2655 	/* Allocate a chunk node with which to track the chunk. */
2656 	node = base_chunk_node_alloc();
2657 	if (node == NULL)
2658 		return (NULL);
2659 
2660 	ret = chunk_alloc(csize);
2661 	if (ret == NULL) {
2662 		base_chunk_node_dealloc(node);
2663 		return (NULL);
2664 	}
2665 
2666 	/* Insert node into huge. */
2667 	node->chunk = ret;
2668 	node->size = csize;
2669 
2670 	malloc_mutex_lock(&chunks_mtx);
2671 	rb_tree_insert_node(&huge, node);
2672 #ifdef MALLOC_STATS
2673 	huge_nmalloc++;
2674 	huge_allocated += csize;
2675 #endif
2676 	malloc_mutex_unlock(&chunks_mtx);
2677 
2678 	if (OPT(junk))
2679 		memset(ret, 0xa5, csize);
2680 	else if (OPT(zero))
2681 		memset(ret, 0, csize);
2682 
2683 	return (ret);
2684 }
2685 
2686 /* Only handles large allocations that require more than chunk alignment. */
2687 static void *
2688 huge_palloc(size_t alignment, size_t size)
2689 {
2690 	void *ret;
2691 	size_t alloc_size, chunk_size, offset;
2692 	chunk_node_t *node;
2693 
2694 	/*
2695 	 * This allocation requires alignment that is even larger than chunk
2696 	 * alignment.  This means that huge_malloc() isn't good enough.
2697 	 *
2698 	 * Allocate almost twice as many chunks as are demanded by the size or
2699 	 * alignment, in order to assure the alignment can be achieved, then
2700 	 * unmap leading and trailing chunks.
2701 	 */
2702 	assert(alignment >= chunksize);
2703 
2704 	chunk_size = CHUNK_CEILING(size);
2705 
2706 	if (size >= alignment)
2707 		alloc_size = chunk_size + alignment - chunksize;
2708 	else
2709 		alloc_size = (alignment << 1) - chunksize;
2710 
2711 	/* Allocate a chunk node with which to track the chunk. */
2712 	node = base_chunk_node_alloc();
2713 	if (node == NULL)
2714 		return (NULL);
2715 
2716 	ret = chunk_alloc(alloc_size);
2717 	if (ret == NULL) {
2718 		base_chunk_node_dealloc(node);
2719 		return (NULL);
2720 	}
2721 
2722 	offset = (uintptr_t)ret & (alignment - 1);
2723 	assert((offset & chunksize_mask) == 0);
2724 	assert(offset < alloc_size);
2725 	if (offset == 0) {
2726 		/* Trim trailing space. */
2727 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2728 		    - chunk_size);
2729 	} else {
2730 		size_t trailsize;
2731 
2732 		/* Trim leading space. */
2733 		chunk_dealloc(ret, alignment - offset);
2734 
2735 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2736 
2737 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2738 		if (trailsize != 0) {
2739 		    /* Trim trailing space. */
2740 		    assert(trailsize < alloc_size);
2741 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2742 			trailsize);
2743 		}
2744 	}
2745 
2746 	/* Insert node into huge. */
2747 	node->chunk = ret;
2748 	node->size = chunk_size;
2749 
2750 	malloc_mutex_lock(&chunks_mtx);
2751 	rb_tree_insert_node(&huge, node);
2752 #ifdef MALLOC_STATS
2753 	huge_nmalloc++;
2754 	huge_allocated += chunk_size;
2755 #endif
2756 	malloc_mutex_unlock(&chunks_mtx);
2757 
2758 	if (OPT(junk))
2759 		memset(ret, 0xa5, chunk_size);
2760 	else if (OPT(zero))
2761 		memset(ret, 0, chunk_size);
2762 
2763 	return (ret);
2764 }
2765 
2766 static void *
2767 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2768 {
2769 	void *ret;
2770 
2771 	/* Avoid moving the allocation if the size class would not change. */
2772 	if (oldsize > arena_maxclass &&
2773 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2774 		if (OPT(junk) && size < oldsize) {
2775 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2776 			    - size);
2777 		} else if (OPT(zero) && size > oldsize) {
2778 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2779 			    - oldsize);
2780 		}
2781 		return (ptr);
2782 	}
2783 
2784 	if (CHUNK_ADDR2BASE(ptr) == ptr
2785 #ifdef USE_BRK
2786 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2787 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2788 #endif
2789 	    ) {
2790 		chunk_node_t *node, key;
2791 		void *newptr;
2792 		size_t oldcsize;
2793 		size_t newcsize;
2794 
2795 		newcsize = CHUNK_CEILING(size);
2796 		oldcsize = CHUNK_CEILING(oldsize);
2797 		assert(oldcsize != newcsize);
2798 		if (newcsize == 0) {
2799 			/* size_t wrap-around */
2800 			return (NULL);
2801 		}
2802 
2803 		/*
2804 		 * Remove the old region from the tree now.  If mremap()
2805 		 * returns the region to the system, other thread may
2806 		 * map it for same huge allocation and insert it to the
2807 		 * tree before we acquire the mutex lock again.
2808 		 */
2809 		malloc_mutex_lock(&chunks_mtx);
2810 		key.chunk = __UNCONST(ptr);
2811 		node = rb_tree_find_node(&huge, &key);
2812 		assert(node != NULL);
2813 		assert(node->chunk == ptr);
2814 		assert(node->size == oldcsize);
2815 		rb_tree_remove_node(&huge, node);
2816 		malloc_mutex_unlock(&chunks_mtx);
2817 
2818 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2819 		    MAP_ALIGNED(chunksize_2pow));
2820 		if (newptr == MAP_FAILED) {
2821 			/* We still own the old region. */
2822 			malloc_mutex_lock(&chunks_mtx);
2823 			rb_tree_insert_node(&huge, node);
2824 			malloc_mutex_unlock(&chunks_mtx);
2825 		} else {
2826 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2827 
2828 			/* Insert new or resized old region. */
2829 			malloc_mutex_lock(&chunks_mtx);
2830 			node->size = newcsize;
2831 			node->chunk = newptr;
2832 			rb_tree_insert_node(&huge, node);
2833 #ifdef MALLOC_STATS
2834 			huge_nralloc++;
2835 			huge_allocated += newcsize - oldcsize;
2836 			if (newcsize > oldcsize) {
2837 				stats_chunks.curchunks +=
2838 				    (newcsize - oldcsize) / chunksize;
2839 				if (stats_chunks.curchunks >
2840 				    stats_chunks.highchunks)
2841 					stats_chunks.highchunks =
2842 					    stats_chunks.curchunks;
2843 			} else {
2844 				stats_chunks.curchunks -=
2845 				    (oldcsize - newcsize) / chunksize;
2846 			}
2847 #endif
2848 			malloc_mutex_unlock(&chunks_mtx);
2849 
2850 			if (OPT(junk) && size < oldsize) {
2851 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2852 				    newcsize - size);
2853 			} else if (OPT(zero) && size > oldsize) {
2854 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2855 				    size - oldsize);
2856 			}
2857 			return (newptr);
2858 		}
2859 	}
2860 
2861 	/*
2862 	 * If we get here, then size and oldsize are different enough that we
2863 	 * need to use a different size class.  In that case, fall back to
2864 	 * allocating new space and copying.
2865 	 */
2866 	ret = huge_malloc(size);
2867 	if (ret == NULL)
2868 		return (NULL);
2869 
2870 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2871 		/* The old allocation is a chunk. */
2872 		if (size < oldsize)
2873 			memcpy(ret, ptr, size);
2874 		else
2875 			memcpy(ret, ptr, oldsize);
2876 	} else {
2877 		/* The old allocation is a region. */
2878 		assert(oldsize < size);
2879 		memcpy(ret, ptr, oldsize);
2880 	}
2881 	idalloc(ptr);
2882 	return (ret);
2883 }
2884 
2885 static void
2886 huge_dalloc(void *ptr)
2887 {
2888 	chunk_node_t key;
2889 	chunk_node_t *node;
2890 
2891 	malloc_mutex_lock(&chunks_mtx);
2892 
2893 	/* Extract from tree of huge allocations. */
2894 	key.chunk = ptr;
2895 	node = rb_tree_find_node(&huge, &key);
2896 	assert(node != NULL);
2897 	assert(node->chunk == ptr);
2898 	rb_tree_remove_node(&huge, node);
2899 
2900 #ifdef MALLOC_STATS
2901 	huge_ndalloc++;
2902 	huge_allocated -= node->size;
2903 #endif
2904 
2905 	malloc_mutex_unlock(&chunks_mtx);
2906 
2907 	/* Unmap chunk. */
2908 #ifdef USE_BRK
2909 	if (OPT(junk))
2910 		memset(node->chunk, 0x5a, node->size);
2911 #endif
2912 	chunk_dealloc(node->chunk, node->size);
2913 
2914 	base_chunk_node_dealloc(node);
2915 }
2916 
2917 static void *
2918 imalloc(size_t size)
2919 {
2920 	void *ret;
2921 
2922 	assert(size != 0);
2923 
2924 	if (size <= arena_maxclass)
2925 		ret = arena_malloc(choose_arena(), size);
2926 	else
2927 		ret = huge_malloc(size);
2928 
2929 	return (ret);
2930 }
2931 
2932 static void *
2933 ipalloc(size_t alignment, size_t size)
2934 {
2935 	void *ret;
2936 	size_t ceil_size;
2937 
2938 	/*
2939 	 * Round size up to the nearest multiple of alignment.
2940 	 *
2941 	 * This done, we can take advantage of the fact that for each small
2942 	 * size class, every object is aligned at the smallest power of two
2943 	 * that is non-zero in the base two representation of the size.  For
2944 	 * example:
2945 	 *
2946 	 *   Size |   Base 2 | Minimum alignment
2947 	 *   -----+----------+------------------
2948 	 *     96 |  1100000 |  32
2949 	 *    144 | 10100000 |  32
2950 	 *    192 | 11000000 |  64
2951 	 *
2952 	 * Depending on runtime settings, it is possible that arena_malloc()
2953 	 * will further round up to a power of two, but that never causes
2954 	 * correctness issues.
2955 	 */
2956 	ceil_size = (size + (alignment - 1)) & (-alignment);
2957 	/*
2958 	 * (ceil_size < size) protects against the combination of maximal
2959 	 * alignment and size greater than maximal alignment.
2960 	 */
2961 	if (ceil_size < size) {
2962 		/* size_t overflow. */
2963 		return (NULL);
2964 	}
2965 
2966 	if (ceil_size <= pagesize || (alignment <= pagesize
2967 	    && ceil_size <= arena_maxclass))
2968 		ret = arena_malloc(choose_arena(), ceil_size);
2969 	else {
2970 		size_t run_size;
2971 
2972 		/*
2973 		 * We can't achieve sub-page alignment, so round up alignment
2974 		 * permanently; it makes later calculations simpler.
2975 		 */
2976 		alignment = PAGE_CEILING(alignment);
2977 		ceil_size = PAGE_CEILING(size);
2978 		/*
2979 		 * (ceil_size < size) protects against very large sizes within
2980 		 * pagesize of SIZE_T_MAX.
2981 		 *
2982 		 * (ceil_size + alignment < ceil_size) protects against the
2983 		 * combination of maximal alignment and ceil_size large enough
2984 		 * to cause overflow.  This is similar to the first overflow
2985 		 * check above, but it needs to be repeated due to the new
2986 		 * ceil_size value, which may now be *equal* to maximal
2987 		 * alignment, whereas before we only detected overflow if the
2988 		 * original size was *greater* than maximal alignment.
2989 		 */
2990 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
2991 			/* size_t overflow. */
2992 			return (NULL);
2993 		}
2994 
2995 		/*
2996 		 * Calculate the size of the over-size run that arena_palloc()
2997 		 * would need to allocate in order to guarantee the alignment.
2998 		 */
2999 		if (ceil_size >= alignment)
3000 			run_size = ceil_size + alignment - pagesize;
3001 		else {
3002 			/*
3003 			 * It is possible that (alignment << 1) will cause
3004 			 * overflow, but it doesn't matter because we also
3005 			 * subtract pagesize, which in the case of overflow
3006 			 * leaves us with a very large run_size.  That causes
3007 			 * the first conditional below to fail, which means
3008 			 * that the bogus run_size value never gets used for
3009 			 * anything important.
3010 			 */
3011 			run_size = (alignment << 1) - pagesize;
3012 		}
3013 
3014 		if (run_size <= arena_maxclass) {
3015 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3016 			    run_size);
3017 		} else if (alignment <= chunksize)
3018 			ret = huge_malloc(ceil_size);
3019 		else
3020 			ret = huge_palloc(alignment, ceil_size);
3021 	}
3022 
3023 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3024 	return (ret);
3025 }
3026 
3027 static void *
3028 icalloc(size_t size)
3029 {
3030 	void *ret;
3031 
3032 	if (size <= arena_maxclass) {
3033 		ret = arena_malloc(choose_arena(), size);
3034 		if (ret == NULL)
3035 			return (NULL);
3036 		memset(ret, 0, size);
3037 	} else {
3038 		/*
3039 		 * The virtual memory system provides zero-filled pages, so
3040 		 * there is no need to do so manually, unless opt_junk is
3041 		 * enabled, in which case huge_malloc() fills huge allocations
3042 		 * with junk.
3043 		 */
3044 		ret = huge_malloc(size);
3045 		if (ret == NULL)
3046 			return (NULL);
3047 
3048 		if (OPT(junk))
3049 			memset(ret, 0, size);
3050 #ifdef USE_BRK
3051 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3052 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3053 			/*
3054 			 * This may be a re-used brk chunk.  Therefore, zero
3055 			 * the memory.
3056 			 */
3057 			memset(ret, 0, size);
3058 		}
3059 #endif
3060 	}
3061 
3062 	return (ret);
3063 }
3064 
3065 static size_t
3066 isalloc(const void *ptr)
3067 {
3068 	size_t ret;
3069 	arena_chunk_t *chunk;
3070 
3071 	assert(ptr != NULL);
3072 
3073 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3074 	if (chunk != ptr) {
3075 		/* Region. */
3076 		assert(chunk->arena->magic == ARENA_MAGIC);
3077 
3078 		ret = arena_salloc(ptr);
3079 	} else {
3080 		chunk_node_t *node, key;
3081 
3082 		/* Chunk (huge allocation). */
3083 
3084 		malloc_mutex_lock(&chunks_mtx);
3085 
3086 		/* Extract from tree of huge allocations. */
3087 		key.chunk = __UNCONST(ptr);
3088 		node = rb_tree_find_node(&huge, &key);
3089 		assert(node != NULL);
3090 
3091 		ret = node->size;
3092 
3093 		malloc_mutex_unlock(&chunks_mtx);
3094 	}
3095 
3096 	return (ret);
3097 }
3098 
3099 static void *
3100 iralloc(void *ptr, size_t size)
3101 {
3102 	void *ret;
3103 	size_t oldsize;
3104 
3105 	assert(ptr != NULL);
3106 	assert(size != 0);
3107 
3108 	oldsize = isalloc(ptr);
3109 
3110 	if (size <= arena_maxclass)
3111 		ret = arena_ralloc(ptr, size, oldsize);
3112 	else
3113 		ret = huge_ralloc(ptr, size, oldsize);
3114 
3115 	return (ret);
3116 }
3117 
3118 static void
3119 idalloc(void *ptr)
3120 {
3121 	arena_chunk_t *chunk;
3122 
3123 	assert(ptr != NULL);
3124 
3125 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3126 	if (chunk != ptr) {
3127 		/* Region. */
3128 		arena_dalloc(chunk->arena, chunk, ptr);
3129 	} else
3130 		huge_dalloc(ptr);
3131 }
3132 
3133 static void
3134 malloc_print_stats(void)
3135 {
3136 
3137 	if (OPT(print_stats)) {
3138 		char s[UMAX2S_BUFSIZE];
3139 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3140 		    "");
3141 		_malloc_message("Assertions ",
3142 #ifdef NDEBUG
3143 		    "disabled",
3144 #else
3145 		    "enabled",
3146 #endif
3147 		    "\n", "");
3148 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3149 		    opt_abort ? "A" : "a",
3150 		    opt_junk ? "J" : "j",
3151 		    opt_hint ? "H" : "h");
3152 		_malloc_message(OPT(utrace) ? "PU" : "Pu",
3153 		    opt_sysv ? "V" : "v",
3154 		    opt_xmalloc ? "X" : "x",
3155 		    opt_zero ? "Z\n" : "z\n");
3156 
3157 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3158 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3159 		    "\n", "");
3160 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3161 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3162 		    "");
3163 
3164 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3165 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3166 		    ")\n", "");
3167 
3168 #ifdef MALLOC_STATS
3169 		{
3170 			size_t allocated, mapped;
3171 			unsigned i;
3172 			arena_t *arena;
3173 
3174 			/* Calculate and print allocated/mapped stats. */
3175 
3176 			/* arenas. */
3177 			for (i = 0, allocated = 0; i < ncpus; i++) {
3178 				if (arenas[i] != NULL) {
3179 					malloc_mutex_lock(&arenas[i]->mtx);
3180 					allocated +=
3181 					    arenas[i]->stats.allocated_small;
3182 					allocated +=
3183 					    arenas[i]->stats.allocated_large;
3184 					malloc_mutex_unlock(&arenas[i]->mtx);
3185 				}
3186 			}
3187 
3188 			/* huge/base. */
3189 			malloc_mutex_lock(&chunks_mtx);
3190 			allocated += huge_allocated;
3191 			mapped = stats_chunks.curchunks * chunksize;
3192 			malloc_mutex_unlock(&chunks_mtx);
3193 
3194 			malloc_mutex_lock(&base_mtx);
3195 			mapped += base_mapped;
3196 			malloc_mutex_unlock(&base_mtx);
3197 
3198 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3199 			    allocated, mapped);
3200 
3201 			/* Print chunk stats. */
3202 			{
3203 				chunk_stats_t chunks_stats;
3204 
3205 				malloc_mutex_lock(&chunks_mtx);
3206 				chunks_stats = stats_chunks;
3207 				malloc_mutex_unlock(&chunks_mtx);
3208 
3209 				malloc_printf("chunks: nchunks   "
3210 				    "highchunks    curchunks\n");
3211 				malloc_printf("  %13llu%13lu%13lu\n",
3212 				    chunks_stats.nchunks,
3213 				    chunks_stats.highchunks,
3214 				    chunks_stats.curchunks);
3215 			}
3216 
3217 			/* Print chunk stats. */
3218 			malloc_printf(
3219 			    "huge: nmalloc      ndalloc      "
3220 			    "nralloc    allocated\n");
3221 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3222 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3223 			    huge_allocated);
3224 
3225 			/* Print stats for each arena. */
3226 			for (i = 0; i < ncpus; i++) {
3227 				arena = arenas[i];
3228 				if (arena != NULL) {
3229 					malloc_printf(
3230 					    "\narenas[%u] @ %p\n", i, arena);
3231 					malloc_mutex_lock(&arena->mtx);
3232 					stats_print(arena);
3233 					malloc_mutex_unlock(&arena->mtx);
3234 				}
3235 			}
3236 		}
3237 #endif /* #ifdef MALLOC_STATS */
3238 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3239 	}
3240 }
3241 
3242 /*
3243  * libpthread might call malloc(3), so the malloc implementation has to take
3244  * pains to avoid infinite recursion during initialization.
3245  */
3246 static inline bool
3247 malloc_init(void)
3248 {
3249 
3250 	if (__predict_false(malloc_initialized == false))
3251 		return (malloc_init_hard());
3252 
3253 	return (false);
3254 }
3255 
3256 static bool
3257 malloc_init_hard(void)
3258 {
3259 	unsigned i, j;
3260 	ssize_t linklen;
3261 	char buf[PATH_MAX + 1];
3262 	const char *opts = "";
3263 	int serrno;
3264 
3265 	malloc_mutex_lock(&init_lock);
3266 	if (malloc_initialized) {
3267 		/*
3268 		 * Another thread initialized the allocator before this one
3269 		 * acquired init_lock.
3270 		 */
3271 		malloc_mutex_unlock(&init_lock);
3272 		return (false);
3273 	}
3274 
3275 	serrno = errno;
3276 	/* Get number of CPUs. */
3277 	{
3278 		int mib[2];
3279 		size_t len;
3280 
3281 		mib[0] = CTL_HW;
3282 		mib[1] = HW_NCPU;
3283 		len = sizeof(ncpus);
3284 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3285 			/* Error. */
3286 			ncpus = 1;
3287 		}
3288 	}
3289 
3290 	/* Get page size. */
3291 	{
3292 		long result;
3293 
3294 		result = getpagesize();
3295 		assert(result != -1);
3296 		pagesize = (unsigned) result;
3297 
3298 		/*
3299 		 * We assume that pagesize is a power of 2 when calculating
3300 		 * pagesize_mask and pagesize_2pow.
3301 		 */
3302 		assert(((result - 1) & result) == 0);
3303 		pagesize_mask = result - 1;
3304 		pagesize_2pow = ffs((int)result) - 1;
3305 	}
3306 
3307 	for (i = 0; i < 3; i++) {
3308 		/* Get runtime configuration. */
3309 		switch (i) {
3310 		case 0:
3311 			if ((linklen = readlink("/etc/malloc.conf", buf,
3312 			    sizeof(buf) - 1)) != -1) {
3313 				/*
3314 				 * Use the contents of the "/etc/malloc.conf"
3315 				 * symbolic link's name.
3316 				 */
3317 				buf[linklen] = '\0';
3318 				opts = buf;
3319 			} else {
3320 				/* No configuration specified. */
3321 				buf[0] = '\0';
3322 				opts = buf;
3323 			}
3324 			break;
3325 		case 1:
3326 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3327 			    issetugid() == 0) {
3328 				/*
3329 				 * Do nothing; opts is already initialized to
3330 				 * the value of the MALLOC_OPTIONS environment
3331 				 * variable.
3332 				 */
3333 			} else {
3334 				/* No configuration specified. */
3335 				buf[0] = '\0';
3336 				opts = buf;
3337 			}
3338 			break;
3339 		case 2:
3340 			if (_malloc_options != NULL) {
3341 				/*
3342 				 * Use options that were compiled into the program.
3343 				 */
3344 				opts = _malloc_options;
3345 			} else {
3346 				/* No configuration specified. */
3347 				buf[0] = '\0';
3348 				opts = buf;
3349 			}
3350 			break;
3351 		default:
3352 			/* NOTREACHED */
3353 			/* LINTED */
3354 			assert(false);
3355 		}
3356 
3357 		for (j = 0; opts[j] != '\0'; j++) {
3358 			switch (opts[j]) {
3359 			case 'a':
3360 				opt_abort = false;
3361 				break;
3362 			case 'A':
3363 				opt_abort = true;
3364 				break;
3365 			case 'h':
3366 				opt_hint = false;
3367 				break;
3368 			case 'H':
3369 				opt_hint = true;
3370 				break;
3371 			case 'j':
3372 				opt_junk = false;
3373 				break;
3374 			case 'J':
3375 				opt_junk = true;
3376 				break;
3377 			case 'k':
3378 				/*
3379 				 * Chunks always require at least one header
3380 				 * page, so chunks can never be smaller than
3381 				 * two pages.
3382 				 */
3383 				if (opt_chunk_2pow > pagesize_2pow + 1)
3384 					opt_chunk_2pow--;
3385 				break;
3386 			case 'K':
3387 				if (opt_chunk_2pow + 1 <
3388 				    (int)(sizeof(size_t) << 3))
3389 					opt_chunk_2pow++;
3390 				break;
3391 			case 'p':
3392 				opt_print_stats = false;
3393 				break;
3394 			case 'P':
3395 				opt_print_stats = true;
3396 				break;
3397 			case 'q':
3398 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3399 					opt_quantum_2pow--;
3400 				break;
3401 			case 'Q':
3402 				if (opt_quantum_2pow < pagesize_2pow - 1)
3403 					opt_quantum_2pow++;
3404 				break;
3405 			case 's':
3406 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3407 					opt_small_max_2pow--;
3408 				break;
3409 			case 'S':
3410 				if (opt_small_max_2pow < pagesize_2pow - 1)
3411 					opt_small_max_2pow++;
3412 				break;
3413 			case 'u':
3414 				opt_utrace = false;
3415 				break;
3416 			case 'U':
3417 				opt_utrace = true;
3418 				break;
3419 			case 'v':
3420 				opt_sysv = false;
3421 				break;
3422 			case 'V':
3423 				opt_sysv = true;
3424 				break;
3425 			case 'x':
3426 				opt_xmalloc = false;
3427 				break;
3428 			case 'X':
3429 				opt_xmalloc = true;
3430 				break;
3431 			case 'z':
3432 				opt_zero = false;
3433 				break;
3434 			case 'Z':
3435 				opt_zero = true;
3436 				break;
3437 			default: {
3438 				char cbuf[2];
3439 
3440 				cbuf[0] = opts[j];
3441 				cbuf[1] = '\0';
3442 				_malloc_message(getprogname(),
3443 				    ": (malloc) Unsupported character in "
3444 				    "malloc options: '", cbuf, "'\n");
3445 			}
3446 			}
3447 		}
3448 	}
3449 	errno = serrno;
3450 
3451 	/* Take care to call atexit() only once. */
3452 	if (OPT(print_stats)) {
3453 		/* Print statistics at exit. */
3454 		atexit(malloc_print_stats);
3455 	}
3456 
3457 	/* Set variables according to the value of opt_small_max_2pow. */
3458 	if (opt_small_max_2pow < opt_quantum_2pow)
3459 		opt_small_max_2pow = opt_quantum_2pow;
3460 	small_max = (1U << opt_small_max_2pow);
3461 
3462 	/* Set bin-related variables. */
3463 	bin_maxclass = (pagesize >> 1);
3464 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3465 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3466 	assert(ntbins <= (unsigned)opt_quantum_2pow);
3467 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3468 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3469 
3470 	/* Set variables according to the value of opt_quantum_2pow. */
3471 	quantum = (1 << opt_quantum_2pow);
3472 	quantum_mask = quantum - 1;
3473 	if (ntbins > 0)
3474 		small_min = (quantum >> 1) + 1;
3475 	else
3476 		small_min = 1;
3477 	assert(small_min <= quantum);
3478 
3479 	/* Set variables according to the value of opt_chunk_2pow. */
3480 	chunksize = (1LU << opt_chunk_2pow);
3481 	chunksize_mask = chunksize - 1;
3482 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3483 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3484 	{
3485 		unsigned header_size;
3486 
3487 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3488 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3489 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3490 		if ((header_size & pagesize_mask) != 0)
3491 			arena_chunk_header_npages++;
3492 	}
3493 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3494 	    pagesize_2pow);
3495 
3496 	UTRACE(0, 0, 0);
3497 
3498 #ifdef MALLOC_STATS
3499 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3500 #endif
3501 
3502 	/* Various sanity checks that regard configuration. */
3503 	assert(quantum >= sizeof(void *));
3504 	assert(quantum <= pagesize);
3505 	assert(chunksize >= pagesize);
3506 	assert(quantum * 4 <= chunksize);
3507 
3508 	/* Initialize chunks data. */
3509 	malloc_mutex_init(&chunks_mtx);
3510 	rb_tree_init(&huge, &chunk_tree_ops);
3511 #ifdef USE_BRK
3512 	malloc_mutex_init(&brk_mtx);
3513 	brk_base = sbrk(0);
3514 	brk_prev = brk_base;
3515 	brk_max = brk_base;
3516 #endif
3517 #ifdef MALLOC_STATS
3518 	huge_nmalloc = 0;
3519 	huge_ndalloc = 0;
3520 	huge_nralloc = 0;
3521 	huge_allocated = 0;
3522 #endif
3523 	rb_tree_init(&old_chunks, &chunk_tree_ops);
3524 
3525 	/* Initialize base allocation data structures. */
3526 #ifdef MALLOC_STATS
3527 	base_mapped = 0;
3528 #endif
3529 #ifdef USE_BRK
3530 	/*
3531 	 * Allocate a base chunk here, since it doesn't actually have to be
3532 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3533 	 * the use of space that would otherwise be wasted.
3534 	 */
3535 	base_pages_alloc(0);
3536 #endif
3537 	base_chunk_nodes = NULL;
3538 	malloc_mutex_init(&base_mtx);
3539 
3540 	/* Allocate and initialize arenas. */
3541 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * ncpus);
3542 	if (arenas == NULL) {
3543 		malloc_mutex_unlock(&init_lock);
3544 		return (true);
3545 	}
3546 	/*
3547 	 * Zero the array.  In practice, this should always be pre-zeroed,
3548 	 * since it was just mmap()ed, but let's be sure.
3549 	 */
3550 	memset(arenas, 0, sizeof(arena_t *) * ncpus);
3551 
3552 	/*
3553 	 * Initialize one arena here.  The rest are lazily created in
3554 	 * arena_choose_hard().
3555 	 */
3556 	if ((arenas[0] = arenas_extend()) == NULL) {
3557 		malloc_mutex_unlock(&init_lock);
3558 		return (true);
3559 	}
3560 
3561 	malloc_mutex_init(&arenas_mtx);
3562 
3563 	malloc_initialized = true;
3564 	malloc_mutex_unlock(&init_lock);
3565 	return (false);
3566 }
3567 
3568 /*
3569  * End general internal functions.
3570  */
3571 /******************************************************************************/
3572 /*
3573  * Begin malloc(3)-compatible functions.
3574  */
3575 
3576 void *
3577 malloc(size_t size)
3578 {
3579 	void *ret;
3580 
3581 	if (__predict_false(malloc_init())) {
3582 		ret = NULL;
3583 		goto RETURN;
3584 	}
3585 
3586 	if (__predict_false(size == 0)) {
3587 		if (NOT_OPT(sysv))
3588 			size = 1;
3589 		else {
3590 			ret = NULL;
3591 			goto RETURN;
3592 		}
3593 	}
3594 
3595 	ret = imalloc(size);
3596 
3597 RETURN:
3598 	if (__predict_false(ret == NULL)) {
3599 		if (OPT(xmalloc)) {
3600 			_malloc_message(getprogname(),
3601 			    ": (malloc) Error in malloc(): out of memory\n", "",
3602 			    "");
3603 			abort();
3604 		}
3605 		errno = ENOMEM;
3606 	}
3607 
3608 	UTRACE(0, size, ret);
3609 	return (ret);
3610 }
3611 
3612 int
3613 posix_memalign(void **memptr, size_t alignment, size_t size)
3614 {
3615 	int ret;
3616 	void *result;
3617 
3618 	if (__predict_false(malloc_init()))
3619 		result = NULL;
3620 	else {
3621 		/* Make sure that alignment is a large enough power of 2. */
3622 		if (((alignment - 1) & alignment) != 0
3623 		    || alignment < sizeof(void *)) {
3624 			if (OPT(xmalloc)) {
3625 				_malloc_message(getprogname(),
3626 				    ": (malloc) Error in posix_memalign(): "
3627 				    "invalid alignment\n", "", "");
3628 				abort();
3629 			}
3630 			result = NULL;
3631 			ret = EINVAL;
3632 			goto RETURN;
3633 		}
3634 
3635 		if (size == 0) {
3636 			if (NOT_OPT(sysv))
3637 				size = 1;
3638 			else {
3639 				result = NULL;
3640 				ret = 0;
3641 				goto RETURN;
3642 			}
3643 		}
3644 		result = ipalloc(alignment, size);
3645 	}
3646 
3647 	if (__predict_false(result == NULL)) {
3648 		if (OPT(xmalloc)) {
3649 			_malloc_message(getprogname(),
3650 			": (malloc) Error in posix_memalign(): out of memory\n",
3651 			"", "");
3652 			abort();
3653 		}
3654 		ret = ENOMEM;
3655 		goto RETURN;
3656 	}
3657 
3658 	*memptr = result;
3659 	ret = 0;
3660 
3661 RETURN:
3662 	UTRACE(0, size, result);
3663 	return (ret);
3664 }
3665 
3666 void *
3667 calloc(size_t num, size_t size)
3668 {
3669 	void *ret;
3670 	size_t num_size;
3671 
3672 	if (__predict_false(malloc_init())) {
3673 		num_size = 0;
3674 		ret = NULL;
3675 		goto RETURN;
3676 	}
3677 
3678 	num_size = num * size;
3679 	if (__predict_false(num_size == 0)) {
3680 		if (NOT_OPT(sysv) && ((num == 0) || (size == 0)))
3681 			num_size = 1;
3682 		else {
3683 			ret = NULL;
3684 			goto RETURN;
3685 		}
3686 	/*
3687 	 * Try to avoid division here.  We know that it isn't possible to
3688 	 * overflow during multiplication if neither operand uses any of the
3689 	 * most significant half of the bits in a size_t.
3690 	 */
3691 	} else if ((unsigned long long)((num | size) &
3692 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3693 	   (num_size / size != num)) {
3694 		/* size_t overflow. */
3695 		ret = NULL;
3696 		goto RETURN;
3697 	}
3698 
3699 	ret = icalloc(num_size);
3700 
3701 RETURN:
3702 	if (__predict_false(ret == NULL)) {
3703 		if (OPT(xmalloc)) {
3704 			_malloc_message(getprogname(),
3705 			    ": (malloc) Error in calloc(): out of memory\n", "",
3706 			    "");
3707 			abort();
3708 		}
3709 		errno = ENOMEM;
3710 	}
3711 
3712 	UTRACE(0, num_size, ret);
3713 	return (ret);
3714 }
3715 
3716 void *
3717 realloc(void *ptr, size_t size)
3718 {
3719 	void *ret;
3720 
3721 	if (__predict_false(size == 0)) {
3722 		if (NOT_OPT(sysv))
3723 			size = 1;
3724 		else {
3725 			if (ptr != NULL)
3726 				idalloc(ptr);
3727 			ret = NULL;
3728 			goto RETURN;
3729 		}
3730 	}
3731 
3732 	if (__predict_true(ptr != NULL)) {
3733 		assert(malloc_initialized);
3734 
3735 		ret = iralloc(ptr, size);
3736 
3737 		if (__predict_false(ret == NULL)) {
3738 			if (OPT(xmalloc)) {
3739 				_malloc_message(getprogname(),
3740 				    ": (malloc) Error in realloc(): out of "
3741 				    "memory\n", "", "");
3742 				abort();
3743 			}
3744 			errno = ENOMEM;
3745 		}
3746 	} else {
3747 		if (__predict_false(malloc_init()))
3748 			ret = NULL;
3749 		else
3750 			ret = imalloc(size);
3751 
3752 		if (__predict_false(ret == NULL)) {
3753 			if (OPT(xmalloc)) {
3754 				_malloc_message(getprogname(),
3755 				    ": (malloc) Error in realloc(): out of "
3756 				    "memory\n", "", "");
3757 				abort();
3758 			}
3759 			errno = ENOMEM;
3760 		}
3761 	}
3762 
3763 RETURN:
3764 	UTRACE(ptr, size, ret);
3765 	return (ret);
3766 }
3767 
3768 void
3769 free(void *ptr)
3770 {
3771 
3772 	UTRACE(ptr, 0, 0);
3773 	if (__predict_true(ptr != NULL)) {
3774 		assert(malloc_initialized);
3775 
3776 		idalloc(ptr);
3777 	}
3778 }
3779 
3780 /*
3781  * End malloc(3)-compatible functions.
3782  */
3783 /******************************************************************************/
3784 /*
3785  * Begin non-standard functions.
3786  */
3787 #ifndef __NetBSD__
3788 size_t
3789 malloc_usable_size(const void *ptr)
3790 {
3791 
3792 	assert(ptr != NULL);
3793 
3794 	return (isalloc(ptr));
3795 }
3796 #endif
3797 
3798 /*
3799  * End non-standard functions.
3800  */
3801 /******************************************************************************/
3802 /*
3803  * Begin library-private functions, used by threading libraries for protection
3804  * of malloc during fork().  These functions are only called if the program is
3805  * running in threaded mode, so there is no need to check whether the program
3806  * is threaded here.
3807  */
3808 
3809 void
3810 _malloc_prefork(void)
3811 {
3812 	unsigned i;
3813 
3814 	/* Acquire all mutexes in a safe order. */
3815 	malloc_mutex_lock(&init_lock);
3816 	malloc_mutex_lock(&arenas_mtx);
3817 	for (i = 0; i < ncpus; i++) {
3818 		if (arenas[i] != NULL)
3819 			malloc_mutex_lock(&arenas[i]->mtx);
3820 	}
3821 	malloc_mutex_lock(&chunks_mtx);
3822 	malloc_mutex_lock(&base_mtx);
3823 #ifdef USE_BRK
3824 	malloc_mutex_lock(&brk_mtx);
3825 #endif
3826 }
3827 
3828 void
3829 _malloc_postfork(void)
3830 {
3831 	unsigned i;
3832 
3833 	/* Release all mutexes, now that fork() has completed. */
3834 #ifdef USE_BRK
3835 	malloc_mutex_unlock(&brk_mtx);
3836 #endif
3837 	malloc_mutex_unlock(&base_mtx);
3838 	malloc_mutex_unlock(&chunks_mtx);
3839 
3840 	for (i = ncpus; i-- > 0; ) {
3841 		if (arenas[i] != NULL)
3842 			malloc_mutex_unlock(&arenas[i]->mtx);
3843 	}
3844 	malloc_mutex_unlock(&arenas_mtx);
3845 	malloc_mutex_unlock(&init_lock);
3846 }
3847 
3848 void
3849 _malloc_postfork_child(void)
3850 {
3851 	unsigned i;
3852 
3853 	/* Release all mutexes, now that fork() has completed. */
3854 #ifdef USE_BRK
3855 	malloc_mutex_init(&brk_mtx);
3856 #endif
3857 	malloc_mutex_init(&base_mtx);
3858 	malloc_mutex_init(&chunks_mtx);
3859 
3860 	for (i = ncpus; i-- > 0; ) {
3861 		if (arenas[i] != NULL)
3862 			malloc_mutex_init(&arenas[i]->mtx);
3863 	}
3864 	malloc_mutex_init(&arenas_mtx);
3865 	malloc_mutex_init(&init_lock);
3866 }
3867 
3868 /*
3869  * End library-private functions.
3870  */
3871 /******************************************************************************/
3872