xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: jemalloc.c,v 1.16 2007/12/04 17:43:51 christos Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.16 2007/12/04 17:43:51 christos Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 #ifdef __i386__
220 #  define QUANTUM_2POW_MIN	4
221 #  define SIZEOF_PTR_2POW	2
222 #  define USE_BRK
223 #endif
224 #ifdef __ia64__
225 #  define QUANTUM_2POW_MIN	4
226 #  define SIZEOF_PTR_2POW	3
227 #endif
228 #ifdef __alpha__
229 #  define QUANTUM_2POW_MIN	4
230 #  define SIZEOF_PTR_2POW	3
231 #  define NO_TLS
232 #endif
233 #ifdef __sparc64__
234 #  define QUANTUM_2POW_MIN	4
235 #  define SIZEOF_PTR_2POW	3
236 #  define NO_TLS
237 #endif
238 #ifdef __amd64__
239 #  define QUANTUM_2POW_MIN	4
240 #  define SIZEOF_PTR_2POW	3
241 #endif
242 #ifdef __arm__
243 #  define QUANTUM_2POW_MIN	3
244 #  define SIZEOF_PTR_2POW	2
245 #  define USE_BRK
246 #  define NO_TLS
247 #endif
248 #ifdef __powerpc__
249 #  define QUANTUM_2POW_MIN	4
250 #  define SIZEOF_PTR_2POW	2
251 #  define USE_BRK
252 #endif
253 #if defined(__sparc__) && !defined(__sparc64__)
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	2
256 #  define USE_BRK
257 #endif
258 #ifdef __vax__
259 #  define QUANTUM_2POW_MIN	4
260 #  define SIZEOF_PTR_2POW	2
261 #  define USE_BRK
262 #endif
263 #ifdef __sh__
264 #  define QUANTUM_2POW_MIN	4
265 #  define SIZEOF_PTR_2POW	2
266 #  define USE_BRK
267 #endif
268 #ifdef __m68k__
269 #  define QUANTUM_2POW_MIN	4
270 #  define SIZEOF_PTR_2POW	2
271 #  define USE_BRK
272 #endif
273 #ifdef __mips__
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __hppa__
279 #  define QUANTUM_2POW_MIN     4
280 #  define SIZEOF_PTR_2POW      2
281 #  define USE_BRK
282 #endif
283 
284 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
285 
286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
287 #ifndef SIZEOF_INT_2POW
288 #  define SIZEOF_INT_2POW	2
289 #endif
290 
291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
292 #if (!defined(PIC) && !defined(NO_TLS))
293 #  define NO_TLS
294 #endif
295 
296 /*
297  * Size and alignment of memory chunks that are allocated by the OS's virtual
298  * memory system.
299  */
300 #define	CHUNK_2POW_DEFAULT	20
301 
302 /*
303  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
304  * so over-estimates are okay (up to a point), but under-estimates will
305  * negatively affect performance.
306  */
307 #define	CACHELINE_2POW		6
308 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
309 
310 /* Smallest size class to support. */
311 #define	TINY_MIN_2POW		1
312 
313 /*
314  * Maximum size class that is a multiple of the quantum, but not (necessarily)
315  * a power of 2.  Above this size, allocations are rounded up to the nearest
316  * power of 2.
317  */
318 #define	SMALL_MAX_2POW_DEFAULT	9
319 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
320 
321 /*
322  * Maximum desired run header overhead.  Runs are sized as small as possible
323  * such that this setting is still honored, without violating other constraints.
324  * The goal is to make runs as small as possible without exceeding a per run
325  * external fragmentation threshold.
326  *
327  * Note that it is possible to set this low enough that it cannot be honored
328  * for some/all object sizes, since there is one bit of header overhead per
329  * object (plus a constant).  In such cases, this constraint is relaxed.
330  *
331  * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
332  * overhead for which RUN_MAX_OVRHD is relaxed.
333  */
334 #define RUN_MAX_OVRHD		0.015
335 #define RUN_MAX_OVRHD_RELAX	1.5
336 
337 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
338 #define RUN_MAX_SMALL_2POW	15
339 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
340 
341 /******************************************************************************/
342 
343 #ifdef __FreeBSD__
344 /*
345  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
346  * they require malloc()ed memory.
347  */
348 typedef struct {
349 	spinlock_t	lock;
350 } malloc_mutex_t;
351 
352 /* Set to true once the allocator has been initialized. */
353 static bool malloc_initialized = false;
354 
355 /* Used to avoid initialization races. */
356 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
357 #else
358 #define	malloc_mutex_t	mutex_t
359 
360 /* Set to true once the allocator has been initialized. */
361 static bool malloc_initialized = false;
362 
363 /* Used to avoid initialization races. */
364 static mutex_t init_lock = MUTEX_INITIALIZER;
365 #endif
366 
367 /******************************************************************************/
368 /*
369  * Statistics data structures.
370  */
371 
372 #ifdef MALLOC_STATS
373 
374 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
375 struct malloc_bin_stats_s {
376 	/*
377 	 * Number of allocation requests that corresponded to the size of this
378 	 * bin.
379 	 */
380 	uint64_t	nrequests;
381 
382 	/* Total number of runs created for this bin's size class. */
383 	uint64_t	nruns;
384 
385 	/*
386 	 * Total number of runs reused by extracting them from the runs tree for
387 	 * this bin's size class.
388 	 */
389 	uint64_t	reruns;
390 
391 	/* High-water mark for this bin. */
392 	unsigned long	highruns;
393 
394 	/* Current number of runs in this bin. */
395 	unsigned long	curruns;
396 };
397 
398 typedef struct arena_stats_s arena_stats_t;
399 struct arena_stats_s {
400 	/* Number of bytes currently mapped. */
401 	size_t		mapped;
402 
403 	/* Per-size-category statistics. */
404 	size_t		allocated_small;
405 	uint64_t	nmalloc_small;
406 	uint64_t	ndalloc_small;
407 
408 	size_t		allocated_large;
409 	uint64_t	nmalloc_large;
410 	uint64_t	ndalloc_large;
411 };
412 
413 typedef struct chunk_stats_s chunk_stats_t;
414 struct chunk_stats_s {
415 	/* Number of chunks that were allocated. */
416 	uint64_t	nchunks;
417 
418 	/* High-water mark for number of chunks allocated. */
419 	unsigned long	highchunks;
420 
421 	/*
422 	 * Current number of chunks allocated.  This value isn't maintained for
423 	 * any other purpose, so keep track of it in order to be able to set
424 	 * highchunks.
425 	 */
426 	unsigned long	curchunks;
427 };
428 
429 #endif /* #ifdef MALLOC_STATS */
430 
431 /******************************************************************************/
432 /*
433  * Chunk data structures.
434  */
435 
436 /* Tree of chunks. */
437 typedef struct chunk_node_s chunk_node_t;
438 struct chunk_node_s {
439 	/* Linkage for the chunk tree. */
440 	RB_ENTRY(chunk_node_s) link;
441 
442 	/*
443 	 * Pointer to the chunk that this tree node is responsible for.  In some
444 	 * (but certainly not all) cases, this data structure is placed at the
445 	 * beginning of the corresponding chunk, so this field may point to this
446 	 * node.
447 	 */
448 	void	*chunk;
449 
450 	/* Total chunk size. */
451 	size_t	size;
452 };
453 typedef struct chunk_tree_s chunk_tree_t;
454 RB_HEAD(chunk_tree_s, chunk_node_s);
455 
456 /******************************************************************************/
457 /*
458  * Arena data structures.
459  */
460 
461 typedef struct arena_s arena_t;
462 typedef struct arena_bin_s arena_bin_t;
463 
464 typedef struct arena_chunk_map_s arena_chunk_map_t;
465 struct arena_chunk_map_s {
466 	/* Number of pages in run. */
467 	uint32_t	npages;
468 	/*
469 	 * Position within run.  For a free run, this is POS_FREE for the first
470 	 * and last pages.  The POS_FREE special value makes it possible to
471 	 * quickly coalesce free runs.
472 	 *
473 	 * This is the limiting factor for chunksize; there can be at most 2^31
474 	 * pages in a run.
475 	 */
476 #define POS_FREE ((uint32_t)0xffffffffU)
477 	uint32_t	pos;
478 };
479 
480 /* Arena chunk header. */
481 typedef struct arena_chunk_s arena_chunk_t;
482 struct arena_chunk_s {
483 	/* Arena that owns the chunk. */
484 	arena_t *arena;
485 
486 	/* Linkage for the arena's chunk tree. */
487 	RB_ENTRY(arena_chunk_s) link;
488 
489 	/*
490 	 * Number of pages in use.  This is maintained in order to make
491 	 * detection of empty chunks fast.
492 	 */
493 	uint32_t pages_used;
494 
495 	/*
496 	 * Every time a free run larger than this value is created/coalesced,
497 	 * this value is increased.  The only way that the value decreases is if
498 	 * arena_run_alloc() fails to find a free run as large as advertised by
499 	 * this value.
500 	 */
501 	uint32_t max_frun_npages;
502 
503 	/*
504 	 * Every time a free run that starts at an earlier page than this value
505 	 * is created/coalesced, this value is decreased.  It is reset in a
506 	 * similar fashion to max_frun_npages.
507 	 */
508 	uint32_t min_frun_ind;
509 
510 	/*
511 	 * Map of pages within chunk that keeps track of free/large/small.  For
512 	 * free runs, only the map entries for the first and last pages are
513 	 * kept up to date, so that free runs can be quickly coalesced.
514 	 */
515 	arena_chunk_map_t map[1]; /* Dynamically sized. */
516 };
517 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
518 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
519 
520 typedef struct arena_run_s arena_run_t;
521 struct arena_run_s {
522 	/* Linkage for run trees. */
523 	RB_ENTRY(arena_run_s) link;
524 
525 #ifdef MALLOC_DEBUG
526 	uint32_t	magic;
527 #  define ARENA_RUN_MAGIC 0x384adf93
528 #endif
529 
530 	/* Bin this run is associated with. */
531 	arena_bin_t	*bin;
532 
533 	/* Index of first element that might have a free region. */
534 	unsigned	regs_minelm;
535 
536 	/* Number of free regions in run. */
537 	unsigned	nfree;
538 
539 	/* Bitmask of in-use regions (0: in use, 1: free). */
540 	unsigned	regs_mask[1]; /* Dynamically sized. */
541 };
542 typedef struct arena_run_tree_s arena_run_tree_t;
543 RB_HEAD(arena_run_tree_s, arena_run_s);
544 
545 struct arena_bin_s {
546 	/*
547 	 * Current run being used to service allocations of this bin's size
548 	 * class.
549 	 */
550 	arena_run_t	*runcur;
551 
552 	/*
553 	 * Tree of non-full runs.  This tree is used when looking for an
554 	 * existing run when runcur is no longer usable.  We choose the
555 	 * non-full run that is lowest in memory; this policy tends to keep
556 	 * objects packed well, and it can also help reduce the number of
557 	 * almost-empty chunks.
558 	 */
559 	arena_run_tree_t runs;
560 
561 	/* Size of regions in a run for this bin's size class. */
562 	size_t		reg_size;
563 
564 	/* Total size of a run for this bin's size class. */
565 	size_t		run_size;
566 
567 	/* Total number of regions in a run for this bin's size class. */
568 	uint32_t	nregs;
569 
570 	/* Number of elements in a run's regs_mask for this bin's size class. */
571 	uint32_t	regs_mask_nelms;
572 
573 	/* Offset of first region in a run for this bin's size class. */
574 	uint32_t	reg0_offset;
575 
576 #ifdef MALLOC_STATS
577 	/* Bin statistics. */
578 	malloc_bin_stats_t stats;
579 #endif
580 };
581 
582 struct arena_s {
583 #ifdef MALLOC_DEBUG
584 	uint32_t		magic;
585 #  define ARENA_MAGIC 0x947d3d24
586 #endif
587 
588 	/* All operations on this arena require that mtx be locked. */
589 	malloc_mutex_t		mtx;
590 
591 #ifdef MALLOC_STATS
592 	arena_stats_t		stats;
593 #endif
594 
595 	/*
596 	 * Tree of chunks this arena manages.
597 	 */
598 	arena_chunk_tree_t	chunks;
599 
600 	/*
601 	 * In order to avoid rapid chunk allocation/deallocation when an arena
602 	 * oscillates right on the cusp of needing a new chunk, cache the most
603 	 * recently freed chunk.  This caching is disabled by opt_hint.
604 	 *
605 	 * There is one spare chunk per arena, rather than one spare total, in
606 	 * order to avoid interactions between multiple threads that could make
607 	 * a single spare inadequate.
608 	 */
609 	arena_chunk_t *spare;
610 
611 	/*
612 	 * bins is used to store rings of free regions of the following sizes,
613 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
614 	 *
615 	 *   bins[i] | size |
616 	 *   --------+------+
617 	 *        0  |    2 |
618 	 *        1  |    4 |
619 	 *        2  |    8 |
620 	 *   --------+------+
621 	 *        3  |   16 |
622 	 *        4  |   32 |
623 	 *        5  |   48 |
624 	 *        6  |   64 |
625 	 *           :      :
626 	 *           :      :
627 	 *       33  |  496 |
628 	 *       34  |  512 |
629 	 *   --------+------+
630 	 *       35  | 1024 |
631 	 *       36  | 2048 |
632 	 *   --------+------+
633 	 */
634 	arena_bin_t		bins[1]; /* Dynamically sized. */
635 };
636 
637 /******************************************************************************/
638 /*
639  * Data.
640  */
641 
642 /* Number of CPUs. */
643 static unsigned		ncpus;
644 
645 /* VM page size. */
646 static size_t		pagesize;
647 static size_t		pagesize_mask;
648 static int		pagesize_2pow;
649 
650 /* Various bin-related settings. */
651 static size_t		bin_maxclass; /* Max size class for bins. */
652 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
653 static unsigned		nqbins; /* Number of quantum-spaced bins. */
654 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
655 static size_t		small_min;
656 static size_t		small_max;
657 
658 /* Various quantum-related settings. */
659 static size_t		quantum;
660 static size_t		quantum_mask; /* (quantum - 1). */
661 
662 /* Various chunk-related settings. */
663 static size_t		chunksize;
664 static size_t		chunksize_mask; /* (chunksize - 1). */
665 static int		chunksize_2pow;
666 static unsigned		chunk_npages;
667 static unsigned		arena_chunk_header_npages;
668 static size_t		arena_maxclass; /* Max size class for arenas. */
669 
670 /********/
671 /*
672  * Chunks.
673  */
674 
675 /* Protects chunk-related data structures. */
676 static malloc_mutex_t	chunks_mtx;
677 
678 /* Tree of chunks that are stand-alone huge allocations. */
679 static chunk_tree_t	huge;
680 
681 #ifdef USE_BRK
682 /*
683  * Try to use brk for chunk-size allocations, due to address space constraints.
684  */
685 /*
686  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
687  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
688  * could cause recursive lock acquisition).
689  */
690 static malloc_mutex_t	brk_mtx;
691 /* Result of first sbrk(0) call. */
692 static void		*brk_base;
693 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
694 static void		*brk_prev;
695 /* Current upper limit on brk addresses. */
696 static void		*brk_max;
697 #endif
698 
699 #ifdef MALLOC_STATS
700 /* Huge allocation statistics. */
701 static uint64_t		huge_nmalloc;
702 static uint64_t		huge_ndalloc;
703 static uint64_t		huge_nralloc;
704 static size_t		huge_allocated;
705 #endif
706 
707 /*
708  * Tree of chunks that were previously allocated.  This is used when allocating
709  * chunks, in an attempt to re-use address space.
710  */
711 static chunk_tree_t	old_chunks;
712 
713 /****************************/
714 /*
715  * base (internal allocation).
716  */
717 
718 /*
719  * Current pages that are being used for internal memory allocations.  These
720  * pages are carved up in cacheline-size quanta, so that there is no chance of
721  * false cache line sharing.
722  */
723 static void		*base_pages;
724 static void		*base_next_addr;
725 static void		*base_past_addr; /* Addr immediately past base_pages. */
726 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
727 static malloc_mutex_t	base_mtx;
728 #ifdef MALLOC_STATS
729 static size_t		base_mapped;
730 #endif
731 
732 /********/
733 /*
734  * Arenas.
735  */
736 
737 /*
738  * Arenas that are used to service external requests.  Not all elements of the
739  * arenas array are necessarily used; arenas are created lazily as needed.
740  */
741 static arena_t		**arenas;
742 static unsigned		narenas;
743 static unsigned		next_arena;
744 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
745 
746 #ifndef NO_TLS
747 /*
748  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
749  * for allocations.
750  */
751 static __thread arena_t	*arenas_map;
752 #define	get_arenas_map()	(arenas_map)
753 #define	set_arenas_map(x)	(arenas_map = x)
754 #else
755 static thread_key_t arenas_map_key;
756 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
757 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
758 #endif
759 
760 #ifdef MALLOC_STATS
761 /* Chunk statistics. */
762 static chunk_stats_t	stats_chunks;
763 #endif
764 
765 /*******************************/
766 /*
767  * Runtime configuration options.
768  */
769 const char	*_malloc_options;
770 
771 #ifndef MALLOC_PRODUCTION
772 static bool	opt_abort = true;
773 static bool	opt_junk = true;
774 #else
775 static bool	opt_abort = false;
776 static bool	opt_junk = false;
777 #endif
778 static bool	opt_hint = false;
779 static bool	opt_print_stats = false;
780 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
781 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
782 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
783 static bool	opt_utrace = false;
784 static bool	opt_sysv = false;
785 static bool	opt_xmalloc = false;
786 static bool	opt_zero = false;
787 static int32_t	opt_narenas_lshift = 0;
788 
789 typedef struct {
790 	void	*p;
791 	size_t	s;
792 	void	*r;
793 } malloc_utrace_t;
794 
795 #define	UTRACE(a, b, c)							\
796 	if (opt_utrace) {						\
797 		malloc_utrace_t ut;					\
798 		ut.p = a;						\
799 		ut.s = b;						\
800 		ut.r = c;						\
801 		xutrace(&ut, sizeof(ut));				\
802 	}
803 
804 /******************************************************************************/
805 /*
806  * Begin function prototypes for non-inline static functions.
807  */
808 
809 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
810 		const char *p4);
811 #ifdef MALLOC_STATS
812 static void	malloc_printf(const char *format, ...);
813 #endif
814 static char	*umax2s(uintmax_t x, char *s);
815 static bool	base_pages_alloc(size_t minsize);
816 static void	*base_alloc(size_t size);
817 static chunk_node_t *base_chunk_node_alloc(void);
818 static void	base_chunk_node_dealloc(chunk_node_t *node);
819 #ifdef MALLOC_STATS
820 static void	stats_print(arena_t *arena);
821 #endif
822 static void	*pages_map(void *addr, size_t size);
823 static void	*pages_map_align(void *addr, size_t size, int align);
824 static void	pages_unmap(void *addr, size_t size);
825 static void	*chunk_alloc(size_t size);
826 static void	chunk_dealloc(void *chunk, size_t size);
827 static arena_t	*choose_arena_hard(void);
828 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
829 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
830 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
831 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
832 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
833 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
834 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
835 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
836 static void	*arena_malloc(arena_t *arena, size_t size);
837 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
838     size_t alloc_size);
839 static size_t	arena_salloc(const void *ptr);
840 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
841 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
842 static bool	arena_new(arena_t *arena);
843 static arena_t	*arenas_extend(unsigned ind);
844 static void	*huge_malloc(size_t size);
845 static void	*huge_palloc(size_t alignment, size_t size);
846 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
847 static void	huge_dalloc(void *ptr);
848 static void	*imalloc(size_t size);
849 static void	*ipalloc(size_t alignment, size_t size);
850 static void	*icalloc(size_t size);
851 static size_t	isalloc(const void *ptr);
852 static void	*iralloc(void *ptr, size_t size);
853 static void	idalloc(void *ptr);
854 static void	malloc_print_stats(void);
855 static bool	malloc_init_hard(void);
856 
857 /*
858  * End function prototypes.
859  */
860 /******************************************************************************/
861 /*
862  * Begin mutex.
863  */
864 
865 #ifdef __NetBSD__
866 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
867 #define	malloc_mutex_lock(m)	mutex_lock(m)
868 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
869 #else	/* __NetBSD__ */
870 static inline void
871 malloc_mutex_init(malloc_mutex_t *a_mutex)
872 {
873 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
874 
875 	a_mutex->lock = lock;
876 }
877 
878 static inline void
879 malloc_mutex_lock(malloc_mutex_t *a_mutex)
880 {
881 
882 	if (__isthreaded)
883 		_SPINLOCK(&a_mutex->lock);
884 }
885 
886 static inline void
887 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
888 {
889 
890 	if (__isthreaded)
891 		_SPINUNLOCK(&a_mutex->lock);
892 }
893 #endif	/* __NetBSD__ */
894 
895 /*
896  * End mutex.
897  */
898 /******************************************************************************/
899 /*
900  * Begin Utility functions/macros.
901  */
902 
903 /* Return the chunk address for allocation address a. */
904 #define	CHUNK_ADDR2BASE(a)						\
905 	((void *)((uintptr_t)(a) & ~chunksize_mask))
906 
907 /* Return the chunk offset of address a. */
908 #define	CHUNK_ADDR2OFFSET(a)						\
909 	((size_t)((uintptr_t)(a) & chunksize_mask))
910 
911 /* Return the smallest chunk multiple that is >= s. */
912 #define	CHUNK_CEILING(s)						\
913 	(((s) + chunksize_mask) & ~chunksize_mask)
914 
915 /* Return the smallest cacheline multiple that is >= s. */
916 #define	CACHELINE_CEILING(s)						\
917 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
918 
919 /* Return the smallest quantum multiple that is >= a. */
920 #define	QUANTUM_CEILING(a)						\
921 	(((a) + quantum_mask) & ~quantum_mask)
922 
923 /* Return the smallest pagesize multiple that is >= s. */
924 #define	PAGE_CEILING(s)							\
925 	(((s) + pagesize_mask) & ~pagesize_mask)
926 
927 /* Compute the smallest power of 2 that is >= x. */
928 static inline size_t
929 pow2_ceil(size_t x)
930 {
931 
932 	x--;
933 	x |= x >> 1;
934 	x |= x >> 2;
935 	x |= x >> 4;
936 	x |= x >> 8;
937 	x |= x >> 16;
938 #if (SIZEOF_PTR == 8)
939 	x |= x >> 32;
940 #endif
941 	x++;
942 	return (x);
943 }
944 
945 static void
946 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
947 {
948 
949 	write(STDERR_FILENO, p1, strlen(p1));
950 	write(STDERR_FILENO, p2, strlen(p2));
951 	write(STDERR_FILENO, p3, strlen(p3));
952 	write(STDERR_FILENO, p4, strlen(p4));
953 }
954 
955 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
956 	    const char *p4) = wrtmessage;
957 
958 #ifdef MALLOC_STATS
959 /*
960  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
961  */
962 static void
963 malloc_printf(const char *format, ...)
964 {
965 	char buf[4096];
966 	va_list ap;
967 
968 	va_start(ap, format);
969 	vsnprintf(buf, sizeof(buf), format, ap);
970 	va_end(ap);
971 	_malloc_message(buf, "", "", "");
972 }
973 #endif
974 
975 /*
976  * We don't want to depend on vsnprintf() for production builds, since that can
977  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
978  * integer printing functionality, so that malloc_printf() use can be limited to
979  * MALLOC_STATS code.
980  */
981 #define UMAX2S_BUFSIZE	21
982 static char *
983 umax2s(uintmax_t x, char *s)
984 {
985 	unsigned i;
986 
987 	/* Make sure UMAX2S_BUFSIZE is large enough. */
988 	/* LINTED */
989 	assert(sizeof(uintmax_t) <= 8);
990 
991 	i = UMAX2S_BUFSIZE - 1;
992 	s[i] = '\0';
993 	do {
994 		i--;
995 		s[i] = "0123456789"[(int)x % 10];
996 		x /= (uintmax_t)10LL;
997 	} while (x > 0);
998 
999 	return (&s[i]);
1000 }
1001 
1002 /******************************************************************************/
1003 
1004 static bool
1005 base_pages_alloc(size_t minsize)
1006 {
1007 	size_t csize = 0;
1008 
1009 #ifdef USE_BRK
1010 	/*
1011 	 * Do special brk allocation here, since base allocations don't need to
1012 	 * be chunk-aligned.
1013 	 */
1014 	if (brk_prev != (void *)-1) {
1015 		void *brk_cur;
1016 		intptr_t incr;
1017 
1018 		if (minsize != 0)
1019 			csize = CHUNK_CEILING(minsize);
1020 
1021 		malloc_mutex_lock(&brk_mtx);
1022 		do {
1023 			/* Get the current end of brk. */
1024 			brk_cur = sbrk(0);
1025 
1026 			/*
1027 			 * Calculate how much padding is necessary to
1028 			 * chunk-align the end of brk.  Don't worry about
1029 			 * brk_cur not being chunk-aligned though.
1030 			 */
1031 			incr = (intptr_t)chunksize
1032 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1033 			if (incr < minsize)
1034 				incr += csize;
1035 
1036 			brk_prev = sbrk(incr);
1037 			if (brk_prev == brk_cur) {
1038 				/* Success. */
1039 				malloc_mutex_unlock(&brk_mtx);
1040 				base_pages = brk_cur;
1041 				base_next_addr = base_pages;
1042 				base_past_addr = (void *)((uintptr_t)base_pages
1043 				    + incr);
1044 #ifdef MALLOC_STATS
1045 				base_mapped += incr;
1046 #endif
1047 				return (false);
1048 			}
1049 		} while (brk_prev != (void *)-1);
1050 		malloc_mutex_unlock(&brk_mtx);
1051 	}
1052 	if (minsize == 0) {
1053 		/*
1054 		 * Failure during initialization doesn't matter, so avoid
1055 		 * falling through to the mmap-based page mapping code.
1056 		 */
1057 		return (true);
1058 	}
1059 #endif
1060 	assert(minsize != 0);
1061 	csize = PAGE_CEILING(minsize);
1062 	base_pages = pages_map(NULL, csize);
1063 	if (base_pages == NULL)
1064 		return (true);
1065 	base_next_addr = base_pages;
1066 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1067 #ifdef MALLOC_STATS
1068 	base_mapped += csize;
1069 #endif
1070 	return (false);
1071 }
1072 
1073 static void *
1074 base_alloc(size_t size)
1075 {
1076 	void *ret;
1077 	size_t csize;
1078 
1079 	/* Round size up to nearest multiple of the cacheline size. */
1080 	csize = CACHELINE_CEILING(size);
1081 
1082 	malloc_mutex_lock(&base_mtx);
1083 
1084 	/* Make sure there's enough space for the allocation. */
1085 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1086 		if (base_pages_alloc(csize)) {
1087 			ret = NULL;
1088 			goto RETURN;
1089 		}
1090 	}
1091 
1092 	/* Allocate. */
1093 	ret = base_next_addr;
1094 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1095 
1096 RETURN:
1097 	malloc_mutex_unlock(&base_mtx);
1098 	return (ret);
1099 }
1100 
1101 static chunk_node_t *
1102 base_chunk_node_alloc(void)
1103 {
1104 	chunk_node_t *ret;
1105 
1106 	malloc_mutex_lock(&base_mtx);
1107 	if (base_chunk_nodes != NULL) {
1108 		ret = base_chunk_nodes;
1109 		/* LINTED */
1110 		base_chunk_nodes = *(chunk_node_t **)ret;
1111 		malloc_mutex_unlock(&base_mtx);
1112 	} else {
1113 		malloc_mutex_unlock(&base_mtx);
1114 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1115 	}
1116 
1117 	return (ret);
1118 }
1119 
1120 static void
1121 base_chunk_node_dealloc(chunk_node_t *node)
1122 {
1123 
1124 	malloc_mutex_lock(&base_mtx);
1125 	/* LINTED */
1126 	*(chunk_node_t **)node = base_chunk_nodes;
1127 	base_chunk_nodes = node;
1128 	malloc_mutex_unlock(&base_mtx);
1129 }
1130 
1131 /******************************************************************************/
1132 
1133 #ifdef MALLOC_STATS
1134 static void
1135 stats_print(arena_t *arena)
1136 {
1137 	unsigned i;
1138 	int gap_start;
1139 
1140 	malloc_printf(
1141 	    "          allocated/mapped            nmalloc      ndalloc\n");
1142 
1143 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1144 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1145 	    arena->stats.ndalloc_small);
1146 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1147 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1148 	    arena->stats.ndalloc_large);
1149 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1150 	    arena->stats.allocated_small + arena->stats.allocated_large,
1151 	    arena->stats.mapped,
1152 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1153 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1154 
1155 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1156 	    "    reruns maxruns curruns\n");
1157 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1158 		if (arena->bins[i].stats.nrequests == 0) {
1159 			if (gap_start == -1)
1160 				gap_start = i;
1161 		} else {
1162 			if (gap_start != -1) {
1163 				if (i > gap_start + 1) {
1164 					/* Gap of more than one size class. */
1165 					malloc_printf("[%u..%u]\n",
1166 					    gap_start, i - 1);
1167 				} else {
1168 					/* Gap of one size class. */
1169 					malloc_printf("[%u]\n", gap_start);
1170 				}
1171 				gap_start = -1;
1172 			}
1173 			malloc_printf(
1174 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1175 			    " %9llu %7lu %7lu\n",
1176 			    i,
1177 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1178 			    arena->bins[i].reg_size,
1179 			    arena->bins[i].nregs,
1180 			    arena->bins[i].run_size >> pagesize_2pow,
1181 			    arena->bins[i].stats.nrequests,
1182 			    arena->bins[i].stats.nruns,
1183 			    arena->bins[i].stats.reruns,
1184 			    arena->bins[i].stats.highruns,
1185 			    arena->bins[i].stats.curruns);
1186 		}
1187 	}
1188 	if (gap_start != -1) {
1189 		if (i > gap_start + 1) {
1190 			/* Gap of more than one size class. */
1191 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1192 		} else {
1193 			/* Gap of one size class. */
1194 			malloc_printf("[%u]\n", gap_start);
1195 		}
1196 	}
1197 }
1198 #endif
1199 
1200 /*
1201  * End Utility functions/macros.
1202  */
1203 /******************************************************************************/
1204 /*
1205  * Begin chunk management functions.
1206  */
1207 
1208 #ifndef lint
1209 static inline int
1210 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1211 {
1212 
1213 	assert(a != NULL);
1214 	assert(b != NULL);
1215 
1216 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1217 		return (-1);
1218 	else if (a->chunk == b->chunk)
1219 		return (0);
1220 	else
1221 		return (1);
1222 }
1223 
1224 /* Generate red-black tree code for chunks. */
1225 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1226 #endif
1227 
1228 static void *
1229 pages_map_align(void *addr, size_t size, int align)
1230 {
1231 	void *ret;
1232 
1233 	/*
1234 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1235 	 * of existing mappings, and we only want to create new mappings.
1236 	 */
1237 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1238 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1239 	assert(ret != NULL);
1240 
1241 	if (ret == MAP_FAILED)
1242 		ret = NULL;
1243 	else if (addr != NULL && ret != addr) {
1244 		/*
1245 		 * We succeeded in mapping memory, but not in the right place.
1246 		 */
1247 		if (munmap(ret, size) == -1) {
1248 			char buf[STRERROR_BUF];
1249 
1250 			STRERROR_R(errno, buf, sizeof(buf));
1251 			_malloc_message(getprogname(),
1252 			    ": (malloc) Error in munmap(): ", buf, "\n");
1253 			if (opt_abort)
1254 				abort();
1255 		}
1256 		ret = NULL;
1257 	}
1258 
1259 	assert(ret == NULL || (addr == NULL && ret != addr)
1260 	    || (addr != NULL && ret == addr));
1261 	return (ret);
1262 }
1263 
1264 static void *
1265 pages_map(void *addr, size_t size)
1266 {
1267 
1268 	return pages_map_align(addr, size, 0);
1269 }
1270 
1271 static void
1272 pages_unmap(void *addr, size_t size)
1273 {
1274 
1275 	if (munmap(addr, size) == -1) {
1276 		char buf[STRERROR_BUF];
1277 
1278 		STRERROR_R(errno, buf, sizeof(buf));
1279 		_malloc_message(getprogname(),
1280 		    ": (malloc) Error in munmap(): ", buf, "\n");
1281 		if (opt_abort)
1282 			abort();
1283 	}
1284 }
1285 
1286 static void *
1287 chunk_alloc(size_t size)
1288 {
1289 	void *ret, *chunk;
1290 	chunk_node_t *tchunk, *delchunk;
1291 
1292 	assert(size != 0);
1293 	assert((size & chunksize_mask) == 0);
1294 
1295 	malloc_mutex_lock(&chunks_mtx);
1296 
1297 	if (size == chunksize) {
1298 		/*
1299 		 * Check for address ranges that were previously chunks and try
1300 		 * to use them.
1301 		 */
1302 
1303 		/* LINTED */
1304 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1305 		while (tchunk != NULL) {
1306 			/* Found an address range.  Try to recycle it. */
1307 
1308 			chunk = tchunk->chunk;
1309 			delchunk = tchunk;
1310 			/* LINTED */
1311 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1312 
1313 			/* Remove delchunk from the tree. */
1314 			/* LINTED */
1315 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1316 			base_chunk_node_dealloc(delchunk);
1317 
1318 #ifdef USE_BRK
1319 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1320 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1321 				/* Re-use a previously freed brk chunk. */
1322 				ret = chunk;
1323 				goto RETURN;
1324 			}
1325 #endif
1326 			if ((ret = pages_map(chunk, size)) != NULL) {
1327 				/* Success. */
1328 				goto RETURN;
1329 			}
1330 		}
1331 	}
1332 
1333 	/*
1334 	 * Try to over-allocate, but allow the OS to place the allocation
1335 	 * anywhere.  Beware of size_t wrap-around.
1336 	 */
1337 	if (size + chunksize > size) {
1338 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1339 		    != NULL) {
1340 			goto RETURN;
1341 		}
1342 	}
1343 
1344 #ifdef USE_BRK
1345 	/*
1346 	 * Try to create allocations in brk, in order to make full use of
1347 	 * limited address space.
1348 	 */
1349 	if (brk_prev != (void *)-1) {
1350 		void *brk_cur;
1351 		intptr_t incr;
1352 
1353 		/*
1354 		 * The loop is necessary to recover from races with other
1355 		 * threads that are using brk for something other than malloc.
1356 		 */
1357 		malloc_mutex_lock(&brk_mtx);
1358 		do {
1359 			/* Get the current end of brk. */
1360 			brk_cur = sbrk(0);
1361 
1362 			/*
1363 			 * Calculate how much padding is necessary to
1364 			 * chunk-align the end of brk.
1365 			 */
1366 			incr = (intptr_t)size
1367 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1368 			if (incr == size) {
1369 				ret = brk_cur;
1370 			} else {
1371 				ret = (void *)((intptr_t)brk_cur + incr);
1372 				incr += size;
1373 			}
1374 
1375 			brk_prev = sbrk(incr);
1376 			if (brk_prev == brk_cur) {
1377 				/* Success. */
1378 				malloc_mutex_unlock(&brk_mtx);
1379 				brk_max = (void *)((intptr_t)ret + size);
1380 				goto RETURN;
1381 			}
1382 		} while (brk_prev != (void *)-1);
1383 		malloc_mutex_unlock(&brk_mtx);
1384 	}
1385 #endif
1386 
1387 	/* All strategies for allocation failed. */
1388 	ret = NULL;
1389 RETURN:
1390 	if (ret != NULL) {
1391 		chunk_node_t key;
1392 		/*
1393 		 * Clean out any entries in old_chunks that overlap with the
1394 		 * memory we just allocated.
1395 		 */
1396 		key.chunk = ret;
1397 		/* LINTED */
1398 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1399 		while (tchunk != NULL
1400 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1401 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1402 			delchunk = tchunk;
1403 			/* LINTED */
1404 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1405 			/* LINTED */
1406 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1407 			base_chunk_node_dealloc(delchunk);
1408 		}
1409 
1410 	}
1411 #ifdef MALLOC_STATS
1412 	if (ret != NULL) {
1413 		stats_chunks.nchunks += (size / chunksize);
1414 		stats_chunks.curchunks += (size / chunksize);
1415 	}
1416 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1417 		stats_chunks.highchunks = stats_chunks.curchunks;
1418 #endif
1419 	malloc_mutex_unlock(&chunks_mtx);
1420 
1421 	assert(CHUNK_ADDR2BASE(ret) == ret);
1422 	return (ret);
1423 }
1424 
1425 static void
1426 chunk_dealloc(void *chunk, size_t size)
1427 {
1428 	chunk_node_t *node;
1429 
1430 	assert(chunk != NULL);
1431 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1432 	assert(size != 0);
1433 	assert((size & chunksize_mask) == 0);
1434 
1435 	malloc_mutex_lock(&chunks_mtx);
1436 
1437 #ifdef USE_BRK
1438 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1439 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1440 		void *brk_cur;
1441 
1442 		malloc_mutex_lock(&brk_mtx);
1443 		/* Get the current end of brk. */
1444 		brk_cur = sbrk(0);
1445 
1446 		/*
1447 		 * Try to shrink the data segment if this chunk is at the end
1448 		 * of the data segment.  The sbrk() call here is subject to a
1449 		 * race condition with threads that use brk(2) or sbrk(2)
1450 		 * directly, but the alternative would be to leak memory for
1451 		 * the sake of poorly designed multi-threaded programs.
1452 		 */
1453 		if (brk_cur == brk_max
1454 		    && (void *)((uintptr_t)chunk + size) == brk_max
1455 		    && sbrk(-(intptr_t)size) == brk_max) {
1456 			malloc_mutex_unlock(&brk_mtx);
1457 			if (brk_prev == brk_max) {
1458 				/* Success. */
1459 				brk_prev = (void *)((intptr_t)brk_max
1460 				    - (intptr_t)size);
1461 				brk_max = brk_prev;
1462 			}
1463 		} else {
1464 			size_t offset;
1465 
1466 			malloc_mutex_unlock(&brk_mtx);
1467 			madvise(chunk, size, MADV_FREE);
1468 
1469 			/*
1470 			 * Iteratively create records of each chunk-sized
1471 			 * memory region that 'chunk' is comprised of, so that
1472 			 * the address range can be recycled if memory usage
1473 			 * increases later on.
1474 			 */
1475 			for (offset = 0; offset < size; offset += chunksize) {
1476 				node = base_chunk_node_alloc();
1477 				if (node == NULL)
1478 					break;
1479 
1480 				node->chunk = (void *)((uintptr_t)chunk
1481 				    + (uintptr_t)offset);
1482 				node->size = chunksize;
1483 				/* LINTED */
1484 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1485 			}
1486 		}
1487 	} else {
1488 #endif
1489 		pages_unmap(chunk, size);
1490 
1491 		/*
1492 		 * Make a record of the chunk's address, so that the address
1493 		 * range can be recycled if memory usage increases later on.
1494 		 * Don't bother to create entries if (size > chunksize), since
1495 		 * doing so could cause scalability issues for truly gargantuan
1496 		 * objects (many gigabytes or larger).
1497 		 */
1498 		if (size == chunksize) {
1499 			node = base_chunk_node_alloc();
1500 			if (node != NULL) {
1501 				node->chunk = (void *)(uintptr_t)chunk;
1502 				node->size = chunksize;
1503 				/* LINTED */
1504 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1505 			}
1506 		}
1507 #ifdef USE_BRK
1508 	}
1509 #endif
1510 
1511 #ifdef MALLOC_STATS
1512 	stats_chunks.curchunks -= (size / chunksize);
1513 #endif
1514 	malloc_mutex_unlock(&chunks_mtx);
1515 }
1516 
1517 /*
1518  * End chunk management functions.
1519  */
1520 /******************************************************************************/
1521 /*
1522  * Begin arena.
1523  */
1524 
1525 /*
1526  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1527  * code if necessary).
1528  */
1529 static inline arena_t *
1530 choose_arena(void)
1531 {
1532 	arena_t *ret;
1533 
1534 	/*
1535 	 * We can only use TLS if this is a PIC library, since for the static
1536 	 * library version, libc's malloc is used by TLS allocation, which
1537 	 * introduces a bootstrapping issue.
1538 	 */
1539 	if (__isthreaded == false) {
1540 	    /*
1541 	     * Avoid the overhead of TLS for single-threaded operation.  If the
1542 	     * app switches to threaded mode, the initial thread may end up
1543 	     * being assigned to some other arena, but this one-time switch
1544 	     * shouldn't cause significant issues.
1545 	     */
1546 	    return (arenas[0]);
1547 	}
1548 
1549 	ret = get_arenas_map();
1550 	if (ret == NULL)
1551 		ret = choose_arena_hard();
1552 
1553 	assert(ret != NULL);
1554 	return (ret);
1555 }
1556 
1557 /*
1558  * Choose an arena based on a per-thread value (slow-path code only, called
1559  * only by choose_arena()).
1560  */
1561 static arena_t *
1562 choose_arena_hard(void)
1563 {
1564 	arena_t *ret;
1565 
1566 	assert(__isthreaded);
1567 
1568 	/* Assign one of the arenas to this thread, in a round-robin fashion. */
1569 	malloc_mutex_lock(&arenas_mtx);
1570 	ret = arenas[next_arena];
1571 	if (ret == NULL)
1572 		ret = arenas_extend(next_arena);
1573 	if (ret == NULL) {
1574 		/*
1575 		 * Make sure that this function never returns NULL, so that
1576 		 * choose_arena() doesn't have to check for a NULL return
1577 		 * value.
1578 		 */
1579 		ret = arenas[0];
1580 	}
1581 	next_arena = (next_arena + 1) % narenas;
1582 	malloc_mutex_unlock(&arenas_mtx);
1583 	set_arenas_map(ret);
1584 
1585 	return (ret);
1586 }
1587 
1588 #ifndef lint
1589 static inline int
1590 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1591 {
1592 
1593 	assert(a != NULL);
1594 	assert(b != NULL);
1595 
1596 	if ((uintptr_t)a < (uintptr_t)b)
1597 		return (-1);
1598 	else if (a == b)
1599 		return (0);
1600 	else
1601 		return (1);
1602 }
1603 
1604 /* Generate red-black tree code for arena chunks. */
1605 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1606 #endif
1607 
1608 #ifndef lint
1609 static inline int
1610 arena_run_comp(arena_run_t *a, arena_run_t *b)
1611 {
1612 
1613 	assert(a != NULL);
1614 	assert(b != NULL);
1615 
1616 	if ((uintptr_t)a < (uintptr_t)b)
1617 		return (-1);
1618 	else if (a == b)
1619 		return (0);
1620 	else
1621 		return (1);
1622 }
1623 
1624 /* Generate red-black tree code for arena runs. */
1625 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1626 #endif
1627 
1628 static inline void *
1629 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1630 {
1631 	void *ret;
1632 	unsigned i, mask, bit, regind;
1633 
1634 	assert(run->magic == ARENA_RUN_MAGIC);
1635 	assert(run->regs_minelm < bin->regs_mask_nelms);
1636 
1637 	/*
1638 	 * Move the first check outside the loop, so that run->regs_minelm can
1639 	 * be updated unconditionally, without the possibility of updating it
1640 	 * multiple times.
1641 	 */
1642 	i = run->regs_minelm;
1643 	mask = run->regs_mask[i];
1644 	if (mask != 0) {
1645 		/* Usable allocation found. */
1646 		bit = ffs((int)mask) - 1;
1647 
1648 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1649 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1650 		    + (bin->reg_size * regind));
1651 
1652 		/* Clear bit. */
1653 		mask ^= (1 << bit);
1654 		run->regs_mask[i] = mask;
1655 
1656 		return (ret);
1657 	}
1658 
1659 	for (i++; i < bin->regs_mask_nelms; i++) {
1660 		mask = run->regs_mask[i];
1661 		if (mask != 0) {
1662 			/* Usable allocation found. */
1663 			bit = ffs((int)mask) - 1;
1664 
1665 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1666 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1667 			    + (bin->reg_size * regind));
1668 
1669 			/* Clear bit. */
1670 			mask ^= (1 << bit);
1671 			run->regs_mask[i] = mask;
1672 
1673 			/*
1674 			 * Make a note that nothing before this element
1675 			 * contains a free region.
1676 			 */
1677 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1678 
1679 			return (ret);
1680 		}
1681 	}
1682 	/* Not reached. */
1683 	/* LINTED */
1684 	assert(0);
1685 	return (NULL);
1686 }
1687 
1688 static inline void
1689 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1690 {
1691 	/*
1692 	 * To divide by a number D that is not a power of two we multiply
1693 	 * by (2^21 / D) and then right shift by 21 positions.
1694 	 *
1695 	 *   X / D
1696 	 *
1697 	 * becomes
1698 	 *
1699 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1700 	 */
1701 #define SIZE_INV_SHIFT 21
1702 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1703 	static const unsigned size_invs[] = {
1704 	    SIZE_INV(3),
1705 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1706 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1707 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1708 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1709 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1710 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1711 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1712 #if (QUANTUM_2POW_MIN < 4)
1713 	    ,
1714 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1715 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1716 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1717 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1718 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1719 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1720 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1721 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1722 #endif
1723 	};
1724 	unsigned diff, regind, elm, bit;
1725 
1726 	/* LINTED */
1727 	assert(run->magic == ARENA_RUN_MAGIC);
1728 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1729 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1730 
1731 	/*
1732 	 * Avoid doing division with a variable divisor if possible.  Using
1733 	 * actual division here can reduce allocator throughput by over 20%!
1734 	 */
1735 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1736 	if ((size & (size - 1)) == 0) {
1737 		/*
1738 		 * log2_table allows fast division of a power of two in the
1739 		 * [1..128] range.
1740 		 *
1741 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1742 		 */
1743 		static const unsigned char log2_table[] = {
1744 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1745 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1746 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1747 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1748 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1749 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1750 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1751 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1752 		};
1753 
1754 		if (size <= 128)
1755 			regind = (diff >> log2_table[size - 1]);
1756 		else if (size <= 32768)
1757 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1758 		else {
1759 			/*
1760 			 * The page size is too large for us to use the lookup
1761 			 * table.  Use real division.
1762 			 */
1763 			regind = (unsigned)(diff / size);
1764 		}
1765 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1766 	    << QUANTUM_2POW_MIN) + 2) {
1767 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1768 		regind >>= SIZE_INV_SHIFT;
1769 	} else {
1770 		/*
1771 		 * size_invs isn't large enough to handle this size class, so
1772 		 * calculate regind using actual division.  This only happens
1773 		 * if the user increases small_max via the 'S' runtime
1774 		 * configuration option.
1775 		 */
1776 		regind = (unsigned)(diff / size);
1777 	};
1778 	assert(diff == regind * size);
1779 	assert(regind < bin->nregs);
1780 
1781 	elm = regind >> (SIZEOF_INT_2POW + 3);
1782 	if (elm < run->regs_minelm)
1783 		run->regs_minelm = elm;
1784 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1785 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1786 	run->regs_mask[elm] |= (1 << bit);
1787 #undef SIZE_INV
1788 #undef SIZE_INV_SHIFT
1789 }
1790 
1791 static void
1792 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1793 {
1794 	arena_chunk_t *chunk;
1795 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1796 	unsigned i;
1797 
1798 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1799 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1800 	    >> pagesize_2pow);
1801 	total_pages = chunk->map[run_ind].npages;
1802 	need_pages = (unsigned)(size >> pagesize_2pow);
1803 	assert(need_pages <= total_pages);
1804 	rem_pages = total_pages - need_pages;
1805 
1806 	/* Split enough pages from the front of run to fit allocation size. */
1807 	map_offset = run_ind;
1808 	for (i = 0; i < need_pages; i++) {
1809 		chunk->map[map_offset + i].npages = need_pages;
1810 		chunk->map[map_offset + i].pos = i;
1811 	}
1812 
1813 	/* Keep track of trailing unused pages for later use. */
1814 	if (rem_pages > 0) {
1815 		/* Update map for trailing pages. */
1816 		map_offset += need_pages;
1817 		chunk->map[map_offset].npages = rem_pages;
1818 		chunk->map[map_offset].pos = POS_FREE;
1819 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1820 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1821 	}
1822 
1823 	chunk->pages_used += need_pages;
1824 }
1825 
1826 static arena_chunk_t *
1827 arena_chunk_alloc(arena_t *arena)
1828 {
1829 	arena_chunk_t *chunk;
1830 
1831 	if (arena->spare != NULL) {
1832 		chunk = arena->spare;
1833 		arena->spare = NULL;
1834 
1835 		/* LINTED */
1836 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1837 	} else {
1838 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1839 		if (chunk == NULL)
1840 			return (NULL);
1841 #ifdef MALLOC_STATS
1842 		arena->stats.mapped += chunksize;
1843 #endif
1844 
1845 		chunk->arena = arena;
1846 
1847 		/* LINTED */
1848 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1849 
1850 		/*
1851 		 * Claim that no pages are in use, since the header is merely
1852 		 * overhead.
1853 		 */
1854 		chunk->pages_used = 0;
1855 
1856 		chunk->max_frun_npages = chunk_npages -
1857 		    arena_chunk_header_npages;
1858 		chunk->min_frun_ind = arena_chunk_header_npages;
1859 
1860 		/*
1861 		 * Initialize enough of the map to support one maximal free run.
1862 		 */
1863 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1864 		    arena_chunk_header_npages;
1865 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1866 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1867 		    arena_chunk_header_npages;
1868 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1869 	}
1870 
1871 	return (chunk);
1872 }
1873 
1874 static void
1875 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1876 {
1877 
1878 	/*
1879 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1880 	 * will be cached, so that the arena does not use it.
1881 	 */
1882 	/* LINTED */
1883 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1884 
1885 	if (opt_hint == false) {
1886 		if (arena->spare != NULL) {
1887 			chunk_dealloc((void *)arena->spare, chunksize);
1888 #ifdef MALLOC_STATS
1889 			arena->stats.mapped -= chunksize;
1890 #endif
1891 		}
1892 		arena->spare = chunk;
1893 	} else {
1894 		assert(arena->spare == NULL);
1895 		chunk_dealloc((void *)chunk, chunksize);
1896 #ifdef MALLOC_STATS
1897 		arena->stats.mapped -= chunksize;
1898 #endif
1899 	}
1900 }
1901 
1902 static arena_run_t *
1903 arena_run_alloc(arena_t *arena, size_t size)
1904 {
1905 	arena_chunk_t *chunk;
1906 	arena_run_t *run;
1907 	unsigned need_npages, limit_pages, compl_need_npages;
1908 
1909 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1910 	    pagesize_2pow)));
1911 	assert((size & pagesize_mask) == 0);
1912 
1913 	/*
1914 	 * Search through arena's chunks in address order for a free run that is
1915 	 * large enough.  Look for the first fit.
1916 	 */
1917 	need_npages = (unsigned)(size >> pagesize_2pow);
1918 	limit_pages = chunk_npages - arena_chunk_header_npages;
1919 	compl_need_npages = limit_pages - need_npages;
1920 	/* LINTED */
1921 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1922 		/*
1923 		 * Avoid searching this chunk if there are not enough
1924 		 * contiguous free pages for there to possibly be a large
1925 		 * enough free run.
1926 		 */
1927 		if (chunk->pages_used <= compl_need_npages &&
1928 		    need_npages <= chunk->max_frun_npages) {
1929 			arena_chunk_map_t *mapelm;
1930 			unsigned i;
1931 			unsigned max_frun_npages = 0;
1932 			unsigned min_frun_ind = chunk_npages;
1933 
1934 			assert(chunk->min_frun_ind >=
1935 			    arena_chunk_header_npages);
1936 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1937 				mapelm = &chunk->map[i];
1938 				if (mapelm->pos == POS_FREE) {
1939 					if (mapelm->npages >= need_npages) {
1940 						run = (arena_run_t *)
1941 						    ((uintptr_t)chunk + (i <<
1942 						    pagesize_2pow));
1943 						/* Update page map. */
1944 						arena_run_split(arena, run,
1945 						    size);
1946 						return (run);
1947 					}
1948 					if (mapelm->npages >
1949 					    max_frun_npages) {
1950 						max_frun_npages =
1951 						    mapelm->npages;
1952 					}
1953 					if (i < min_frun_ind) {
1954 						min_frun_ind = i;
1955 						if (i < chunk->min_frun_ind)
1956 							chunk->min_frun_ind = i;
1957 					}
1958 				}
1959 				i += mapelm->npages;
1960 			}
1961 			/*
1962 			 * Search failure.  Reset cached chunk->max_frun_npages.
1963 			 * chunk->min_frun_ind was already reset above (if
1964 			 * necessary).
1965 			 */
1966 			chunk->max_frun_npages = max_frun_npages;
1967 		}
1968 	}
1969 
1970 	/*
1971 	 * No usable runs.  Create a new chunk from which to allocate the run.
1972 	 */
1973 	chunk = arena_chunk_alloc(arena);
1974 	if (chunk == NULL)
1975 		return (NULL);
1976 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1977 	    pagesize_2pow));
1978 	/* Update page map. */
1979 	arena_run_split(arena, run, size);
1980 	return (run);
1981 }
1982 
1983 static void
1984 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1985 {
1986 	arena_chunk_t *chunk;
1987 	unsigned run_ind, run_pages;
1988 
1989 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1990 
1991 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1992 	    >> pagesize_2pow);
1993 	assert(run_ind >= arena_chunk_header_npages);
1994 	assert(run_ind < (chunksize >> pagesize_2pow));
1995 	run_pages = (unsigned)(size >> pagesize_2pow);
1996 	assert(run_pages == chunk->map[run_ind].npages);
1997 
1998 	/* Subtract pages from count of pages used in chunk. */
1999 	chunk->pages_used -= run_pages;
2000 
2001 	/* Mark run as deallocated. */
2002 	assert(chunk->map[run_ind].npages == run_pages);
2003 	chunk->map[run_ind].pos = POS_FREE;
2004 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
2005 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2006 
2007 	/*
2008 	 * Tell the kernel that we don't need the data in this run, but only if
2009 	 * requested via runtime configuration.
2010 	 */
2011 	if (opt_hint)
2012 		madvise(run, size, MADV_FREE);
2013 
2014 	/* Try to coalesce with neighboring runs. */
2015 	if (run_ind > arena_chunk_header_npages &&
2016 	    chunk->map[run_ind - 1].pos == POS_FREE) {
2017 		unsigned prev_npages;
2018 
2019 		/* Coalesce with previous run. */
2020 		prev_npages = chunk->map[run_ind - 1].npages;
2021 		run_ind -= prev_npages;
2022 		assert(chunk->map[run_ind].npages == prev_npages);
2023 		assert(chunk->map[run_ind].pos == POS_FREE);
2024 		run_pages += prev_npages;
2025 
2026 		chunk->map[run_ind].npages = run_pages;
2027 		assert(chunk->map[run_ind].pos == POS_FREE);
2028 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2029 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2030 	}
2031 
2032 	if (run_ind + run_pages < chunk_npages &&
2033 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2034 		unsigned next_npages;
2035 
2036 		/* Coalesce with next run. */
2037 		next_npages = chunk->map[run_ind + run_pages].npages;
2038 		run_pages += next_npages;
2039 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2040 		    next_npages);
2041 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2042 
2043 		chunk->map[run_ind].npages = run_pages;
2044 		chunk->map[run_ind].pos = POS_FREE;
2045 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2046 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2047 	}
2048 
2049 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2050 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2051 	if (run_ind < chunk->min_frun_ind)
2052 		chunk->min_frun_ind = run_ind;
2053 
2054 	/* Deallocate chunk if it is now completely unused. */
2055 	if (chunk->pages_used == 0)
2056 		arena_chunk_dealloc(arena, chunk);
2057 }
2058 
2059 static arena_run_t *
2060 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2061 {
2062 	arena_run_t *run;
2063 	unsigned i, remainder;
2064 
2065 	/* Look for a usable run. */
2066 	/* LINTED */
2067 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2068 		/* run is guaranteed to have available space. */
2069 		/* LINTED */
2070 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2071 #ifdef MALLOC_STATS
2072 		bin->stats.reruns++;
2073 #endif
2074 		return (run);
2075 	}
2076 	/* No existing runs have any space available. */
2077 
2078 	/* Allocate a new run. */
2079 	run = arena_run_alloc(arena, bin->run_size);
2080 	if (run == NULL)
2081 		return (NULL);
2082 
2083 	/* Initialize run internals. */
2084 	run->bin = bin;
2085 
2086 	for (i = 0; i < bin->regs_mask_nelms; i++)
2087 		run->regs_mask[i] = UINT_MAX;
2088 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2089 	if (remainder != 0) {
2090 		/* The last element has spare bits that need to be unset. */
2091 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2092 		    - remainder));
2093 	}
2094 
2095 	run->regs_minelm = 0;
2096 
2097 	run->nfree = bin->nregs;
2098 #ifdef MALLOC_DEBUG
2099 	run->magic = ARENA_RUN_MAGIC;
2100 #endif
2101 
2102 #ifdef MALLOC_STATS
2103 	bin->stats.nruns++;
2104 	bin->stats.curruns++;
2105 	if (bin->stats.curruns > bin->stats.highruns)
2106 		bin->stats.highruns = bin->stats.curruns;
2107 #endif
2108 	return (run);
2109 }
2110 
2111 /* bin->runcur must have space available before this function is called. */
2112 static inline void *
2113 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2114 {
2115 	void *ret;
2116 
2117 	assert(run->magic == ARENA_RUN_MAGIC);
2118 	assert(run->nfree > 0);
2119 
2120 	ret = arena_run_reg_alloc(run, bin);
2121 	assert(ret != NULL);
2122 	run->nfree--;
2123 
2124 	return (ret);
2125 }
2126 
2127 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2128 static void *
2129 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2130 {
2131 
2132 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2133 	if (bin->runcur == NULL)
2134 		return (NULL);
2135 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2136 	assert(bin->runcur->nfree > 0);
2137 
2138 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2139 }
2140 
2141 /*
2142  * Calculate bin->run_size such that it meets the following constraints:
2143  *
2144  *   *) bin->run_size >= min_run_size
2145  *   *) bin->run_size <= arena_maxclass
2146  *   *) bin->run_size <= RUN_MAX_SMALL
2147  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2148  *
2149  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2150  * also calculated here, since these settings are all interdependent.
2151  */
2152 static size_t
2153 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2154 {
2155 	size_t try_run_size, good_run_size;
2156 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2157 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2158 	float max_ovrhd = RUN_MAX_OVRHD;
2159 
2160 	assert(min_run_size >= pagesize);
2161 	assert(min_run_size <= arena_maxclass);
2162 	assert(min_run_size <= RUN_MAX_SMALL);
2163 
2164 	/*
2165 	 * Calculate known-valid settings before entering the run_size
2166 	 * expansion loop, so that the first part of the loop always copies
2167 	 * valid settings.
2168 	 *
2169 	 * The do..while loop iteratively reduces the number of regions until
2170 	 * the run header and the regions no longer overlap.  A closed formula
2171 	 * would be quite messy, since there is an interdependency between the
2172 	 * header's mask length and the number of regions.
2173 	 */
2174 	try_run_size = min_run_size;
2175 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2176 	    bin->reg_size) + 1); /* Counter-act the first line of the loop. */
2177 	do {
2178 		try_nregs--;
2179 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2180 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2181 		try_reg0_offset = (unsigned)(try_run_size -
2182 		    (try_nregs * bin->reg_size));
2183 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2184 	    > try_reg0_offset);
2185 
2186 	/* run_size expansion loop. */
2187 	do {
2188 		/*
2189 		 * Copy valid settings before trying more aggressive settings.
2190 		 */
2191 		good_run_size = try_run_size;
2192 		good_nregs = try_nregs;
2193 		good_mask_nelms = try_mask_nelms;
2194 		good_reg0_offset = try_reg0_offset;
2195 
2196 		/* Try more aggressive settings. */
2197 		try_run_size += pagesize;
2198 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2199 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2200 		do {
2201 			try_nregs--;
2202 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2203 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2204 			    1 : 0);
2205 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2206 			    bin->reg_size));
2207 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2208 		    (try_mask_nelms - 1)) > try_reg0_offset);
2209 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2210 	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
2211 	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
2212 	    max_ovrhd);
2213 
2214 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2215 	    <= good_reg0_offset);
2216 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2217 
2218 	/* Copy final settings. */
2219 	bin->run_size = good_run_size;
2220 	bin->nregs = good_nregs;
2221 	bin->regs_mask_nelms = good_mask_nelms;
2222 	bin->reg0_offset = good_reg0_offset;
2223 
2224 	return (good_run_size);
2225 }
2226 
2227 static void *
2228 arena_malloc(arena_t *arena, size_t size)
2229 {
2230 	void *ret;
2231 
2232 	assert(arena != NULL);
2233 	assert(arena->magic == ARENA_MAGIC);
2234 	assert(size != 0);
2235 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2236 
2237 	if (size <= bin_maxclass) {
2238 		arena_bin_t *bin;
2239 		arena_run_t *run;
2240 
2241 		/* Small allocation. */
2242 
2243 		if (size < small_min) {
2244 			/* Tiny. */
2245 			size = pow2_ceil(size);
2246 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2247 			    1)))];
2248 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2249 			/*
2250 			 * Bin calculation is always correct, but we may need
2251 			 * to fix size for the purposes of assertions and/or
2252 			 * stats accuracy.
2253 			 */
2254 			if (size < (1 << TINY_MIN_2POW))
2255 				size = (1 << TINY_MIN_2POW);
2256 #endif
2257 		} else if (size <= small_max) {
2258 			/* Quantum-spaced. */
2259 			size = QUANTUM_CEILING(size);
2260 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2261 			    - 1];
2262 		} else {
2263 			/* Sub-page. */
2264 			size = pow2_ceil(size);
2265 			bin = &arena->bins[ntbins + nqbins
2266 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2267 		}
2268 		assert(size == bin->reg_size);
2269 
2270 		malloc_mutex_lock(&arena->mtx);
2271 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2272 			ret = arena_bin_malloc_easy(arena, bin, run);
2273 		else
2274 			ret = arena_bin_malloc_hard(arena, bin);
2275 
2276 		if (ret == NULL) {
2277 			malloc_mutex_unlock(&arena->mtx);
2278 			return (NULL);
2279 		}
2280 
2281 #ifdef MALLOC_STATS
2282 		bin->stats.nrequests++;
2283 		arena->stats.nmalloc_small++;
2284 		arena->stats.allocated_small += size;
2285 #endif
2286 	} else {
2287 		/* Large allocation. */
2288 		size = PAGE_CEILING(size);
2289 		malloc_mutex_lock(&arena->mtx);
2290 		ret = (void *)arena_run_alloc(arena, size);
2291 		if (ret == NULL) {
2292 			malloc_mutex_unlock(&arena->mtx);
2293 			return (NULL);
2294 		}
2295 #ifdef MALLOC_STATS
2296 		arena->stats.nmalloc_large++;
2297 		arena->stats.allocated_large += size;
2298 #endif
2299 	}
2300 
2301 	malloc_mutex_unlock(&arena->mtx);
2302 
2303 	if (opt_junk)
2304 		memset(ret, 0xa5, size);
2305 	else if (opt_zero)
2306 		memset(ret, 0, size);
2307 	return (ret);
2308 }
2309 
2310 static inline void
2311 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2312     unsigned npages)
2313 {
2314 	unsigned i;
2315 
2316 	assert(npages > 0);
2317 
2318 	/*
2319 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2320 	 * separately allocated.
2321 	 */
2322 	for (i = 0; i < npages; i++) {
2323 		chunk->map[pageind + i].npages = npages;
2324 		chunk->map[pageind + i].pos = i;
2325 	}
2326 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2327 	    pagesize_2pow)), npages << pagesize_2pow);
2328 }
2329 
2330 /* Only handles large allocations that require more than page alignment. */
2331 static void *
2332 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2333 {
2334 	void *ret;
2335 	size_t offset;
2336 	arena_chunk_t *chunk;
2337 	unsigned pageind, i, npages;
2338 
2339 	assert((size & pagesize_mask) == 0);
2340 	assert((alignment & pagesize_mask) == 0);
2341 
2342 	npages = (unsigned)(size >> pagesize_2pow);
2343 
2344 	malloc_mutex_lock(&arena->mtx);
2345 	ret = (void *)arena_run_alloc(arena, alloc_size);
2346 	if (ret == NULL) {
2347 		malloc_mutex_unlock(&arena->mtx);
2348 		return (NULL);
2349 	}
2350 
2351 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2352 
2353 	offset = (uintptr_t)ret & (alignment - 1);
2354 	assert((offset & pagesize_mask) == 0);
2355 	assert(offset < alloc_size);
2356 	if (offset == 0) {
2357 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2358 		    pagesize_2pow);
2359 
2360 		/* Update the map for the run to be kept. */
2361 		for (i = 0; i < npages; i++) {
2362 			chunk->map[pageind + i].npages = npages;
2363 			assert(chunk->map[pageind + i].pos == i);
2364 		}
2365 
2366 		/* Trim trailing space. */
2367 		arena_palloc_trim(arena, chunk, pageind + npages,
2368 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2369 	} else {
2370 		size_t leadsize, trailsize;
2371 
2372 		leadsize = alignment - offset;
2373 		ret = (void *)((uintptr_t)ret + leadsize);
2374 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2375 		    pagesize_2pow);
2376 
2377 		/* Update the map for the run to be kept. */
2378 		for (i = 0; i < npages; i++) {
2379 			chunk->map[pageind + i].npages = npages;
2380 			chunk->map[pageind + i].pos = i;
2381 		}
2382 
2383 		/* Trim leading space. */
2384 		arena_palloc_trim(arena, chunk,
2385 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2386 		    (unsigned)(leadsize >> pagesize_2pow));
2387 
2388 		trailsize = alloc_size - leadsize - size;
2389 		if (trailsize != 0) {
2390 			/* Trim trailing space. */
2391 			assert(trailsize < alloc_size);
2392 			arena_palloc_trim(arena, chunk, pageind + npages,
2393 			    (unsigned)(trailsize >> pagesize_2pow));
2394 		}
2395 	}
2396 
2397 #ifdef MALLOC_STATS
2398 	arena->stats.nmalloc_large++;
2399 	arena->stats.allocated_large += size;
2400 #endif
2401 	malloc_mutex_unlock(&arena->mtx);
2402 
2403 	if (opt_junk)
2404 		memset(ret, 0xa5, size);
2405 	else if (opt_zero)
2406 		memset(ret, 0, size);
2407 	return (ret);
2408 }
2409 
2410 /* Return the size of the allocation pointed to by ptr. */
2411 static size_t
2412 arena_salloc(const void *ptr)
2413 {
2414 	size_t ret;
2415 	arena_chunk_t *chunk;
2416 	arena_chunk_map_t *mapelm;
2417 	unsigned pageind;
2418 
2419 	assert(ptr != NULL);
2420 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2421 
2422 	/*
2423 	 * No arena data structures that we query here can change in a way that
2424 	 * affects this function, so we don't need to lock.
2425 	 */
2426 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2427 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2428 	    pagesize_2pow);
2429 	mapelm = &chunk->map[pageind];
2430 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2431 	    pagesize_2pow)) {
2432 		arena_run_t *run;
2433 
2434 		pageind -= mapelm->pos;
2435 
2436 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2437 		    pagesize_2pow));
2438 		assert(run->magic == ARENA_RUN_MAGIC);
2439 		ret = run->bin->reg_size;
2440 	} else
2441 		ret = mapelm->npages << pagesize_2pow;
2442 
2443 	return (ret);
2444 }
2445 
2446 static void *
2447 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2448 {
2449 	void *ret;
2450 
2451 	/* Avoid moving the allocation if the size class would not change. */
2452 	if (size < small_min) {
2453 		if (oldsize < small_min &&
2454 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2455 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2456 			goto IN_PLACE;
2457 	} else if (size <= small_max) {
2458 		if (oldsize >= small_min && oldsize <= small_max &&
2459 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2460 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2461 			goto IN_PLACE;
2462 	} else {
2463 		/*
2464 		 * We make no attempt to resize runs here, though it would be
2465 		 * possible to do so.
2466 		 */
2467 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2468 			goto IN_PLACE;
2469 	}
2470 
2471 	/*
2472 	 * If we get here, then size and oldsize are different enough that we
2473 	 * need to use a different size class.  In that case, fall back to
2474 	 * allocating new space and copying.
2475 	 */
2476 	ret = arena_malloc(choose_arena(), size);
2477 	if (ret == NULL)
2478 		return (NULL);
2479 
2480 	/* Junk/zero-filling were already done by arena_malloc(). */
2481 	if (size < oldsize)
2482 		memcpy(ret, ptr, size);
2483 	else
2484 		memcpy(ret, ptr, oldsize);
2485 	idalloc(ptr);
2486 	return (ret);
2487 IN_PLACE:
2488 	if (opt_junk && size < oldsize)
2489 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2490 	else if (opt_zero && size > oldsize)
2491 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2492 	return (ptr);
2493 }
2494 
2495 static void
2496 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2497 {
2498 	unsigned pageind;
2499 	arena_chunk_map_t *mapelm;
2500 	size_t size;
2501 
2502 	assert(arena != NULL);
2503 	assert(arena->magic == ARENA_MAGIC);
2504 	assert(chunk->arena == arena);
2505 	assert(ptr != NULL);
2506 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2507 
2508 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2509 	    pagesize_2pow);
2510 	mapelm = &chunk->map[pageind];
2511 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2512 	    pagesize_2pow)) {
2513 		arena_run_t *run;
2514 		arena_bin_t *bin;
2515 
2516 		/* Small allocation. */
2517 
2518 		pageind -= mapelm->pos;
2519 
2520 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2521 		    pagesize_2pow));
2522 		assert(run->magic == ARENA_RUN_MAGIC);
2523 		bin = run->bin;
2524 		size = bin->reg_size;
2525 
2526 		if (opt_junk)
2527 			memset(ptr, 0x5a, size);
2528 
2529 		malloc_mutex_lock(&arena->mtx);
2530 		arena_run_reg_dalloc(run, bin, ptr, size);
2531 		run->nfree++;
2532 
2533 		if (run->nfree == bin->nregs) {
2534 			/* Deallocate run. */
2535 			if (run == bin->runcur)
2536 				bin->runcur = NULL;
2537 			else if (bin->nregs != 1) {
2538 				/*
2539 				 * This block's conditional is necessary because
2540 				 * if the run only contains one region, then it
2541 				 * never gets inserted into the non-full runs
2542 				 * tree.
2543 				 */
2544 				/* LINTED */
2545 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2546 			}
2547 #ifdef MALLOC_DEBUG
2548 			run->magic = 0;
2549 #endif
2550 			arena_run_dalloc(arena, run, bin->run_size);
2551 #ifdef MALLOC_STATS
2552 			bin->stats.curruns--;
2553 #endif
2554 		} else if (run->nfree == 1 && run != bin->runcur) {
2555 			/*
2556 			 * Make sure that bin->runcur always refers to the
2557 			 * lowest non-full run, if one exists.
2558 			 */
2559 			if (bin->runcur == NULL)
2560 				bin->runcur = run;
2561 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2562 				/* Switch runcur. */
2563 				if (bin->runcur->nfree > 0) {
2564 					/* Insert runcur. */
2565 					/* LINTED */
2566 					RB_INSERT(arena_run_tree_s, &bin->runs,
2567 					    bin->runcur);
2568 				}
2569 				bin->runcur = run;
2570 			} else {
2571 				/* LINTED */
2572 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2573 			}
2574 		}
2575 #ifdef MALLOC_STATS
2576 		arena->stats.allocated_small -= size;
2577 		arena->stats.ndalloc_small++;
2578 #endif
2579 	} else {
2580 		/* Large allocation. */
2581 
2582 		size = mapelm->npages << pagesize_2pow;
2583 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2584 
2585 		if (opt_junk)
2586 			memset(ptr, 0x5a, size);
2587 
2588 		malloc_mutex_lock(&arena->mtx);
2589 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2590 #ifdef MALLOC_STATS
2591 		arena->stats.allocated_large -= size;
2592 		arena->stats.ndalloc_large++;
2593 #endif
2594 	}
2595 
2596 	malloc_mutex_unlock(&arena->mtx);
2597 }
2598 
2599 static bool
2600 arena_new(arena_t *arena)
2601 {
2602 	unsigned i;
2603 	arena_bin_t *bin;
2604 	size_t prev_run_size;
2605 
2606 	malloc_mutex_init(&arena->mtx);
2607 
2608 #ifdef MALLOC_STATS
2609 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2610 #endif
2611 
2612 	/* Initialize chunks. */
2613 	RB_INIT(&arena->chunks);
2614 	arena->spare = NULL;
2615 
2616 	/* Initialize bins. */
2617 	prev_run_size = pagesize;
2618 
2619 	/* (2^n)-spaced tiny bins. */
2620 	for (i = 0; i < ntbins; i++) {
2621 		bin = &arena->bins[i];
2622 		bin->runcur = NULL;
2623 		RB_INIT(&bin->runs);
2624 
2625 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2626 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2627 
2628 #ifdef MALLOC_STATS
2629 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2630 #endif
2631 	}
2632 
2633 	/* Quantum-spaced bins. */
2634 	for (; i < ntbins + nqbins; i++) {
2635 		bin = &arena->bins[i];
2636 		bin->runcur = NULL;
2637 		RB_INIT(&bin->runs);
2638 
2639 		bin->reg_size = quantum * (i - ntbins + 1);
2640 /*
2641 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2642 */
2643 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2644 
2645 #ifdef MALLOC_STATS
2646 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2647 #endif
2648 	}
2649 
2650 	/* (2^n)-spaced sub-page bins. */
2651 	for (; i < ntbins + nqbins + nsbins; i++) {
2652 		bin = &arena->bins[i];
2653 		bin->runcur = NULL;
2654 		RB_INIT(&bin->runs);
2655 
2656 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2657 
2658 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2659 
2660 #ifdef MALLOC_STATS
2661 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2662 #endif
2663 	}
2664 
2665 #ifdef MALLOC_DEBUG
2666 	arena->magic = ARENA_MAGIC;
2667 #endif
2668 
2669 	return (false);
2670 }
2671 
2672 /* Create a new arena and insert it into the arenas array at index ind. */
2673 static arena_t *
2674 arenas_extend(unsigned ind)
2675 {
2676 	arena_t *ret;
2677 
2678 	/* Allocate enough space for trailing bins. */
2679 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2680 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2681 	if (ret != NULL && arena_new(ret) == false) {
2682 		arenas[ind] = ret;
2683 		return (ret);
2684 	}
2685 	/* Only reached if there is an OOM error. */
2686 
2687 	/*
2688 	 * OOM here is quite inconvenient to propagate, since dealing with it
2689 	 * would require a check for failure in the fast path.  Instead, punt
2690 	 * by using arenas[0].  In practice, this is an extremely unlikely
2691 	 * failure.
2692 	 */
2693 	_malloc_message(getprogname(),
2694 	    ": (malloc) Error initializing arena\n", "", "");
2695 	if (opt_abort)
2696 		abort();
2697 
2698 	return (arenas[0]);
2699 }
2700 
2701 /*
2702  * End arena.
2703  */
2704 /******************************************************************************/
2705 /*
2706  * Begin general internal functions.
2707  */
2708 
2709 static void *
2710 huge_malloc(size_t size)
2711 {
2712 	void *ret;
2713 	size_t csize;
2714 	chunk_node_t *node;
2715 
2716 	/* Allocate one or more contiguous chunks for this request. */
2717 
2718 	csize = CHUNK_CEILING(size);
2719 	if (csize == 0) {
2720 		/* size is large enough to cause size_t wrap-around. */
2721 		return (NULL);
2722 	}
2723 
2724 	/* Allocate a chunk node with which to track the chunk. */
2725 	node = base_chunk_node_alloc();
2726 	if (node == NULL)
2727 		return (NULL);
2728 
2729 	ret = chunk_alloc(csize);
2730 	if (ret == NULL) {
2731 		base_chunk_node_dealloc(node);
2732 		return (NULL);
2733 	}
2734 
2735 	/* Insert node into huge. */
2736 	node->chunk = ret;
2737 	node->size = csize;
2738 
2739 	malloc_mutex_lock(&chunks_mtx);
2740 	RB_INSERT(chunk_tree_s, &huge, node);
2741 #ifdef MALLOC_STATS
2742 	huge_nmalloc++;
2743 	huge_allocated += csize;
2744 #endif
2745 	malloc_mutex_unlock(&chunks_mtx);
2746 
2747 	if (opt_junk)
2748 		memset(ret, 0xa5, csize);
2749 	else if (opt_zero)
2750 		memset(ret, 0, csize);
2751 
2752 	return (ret);
2753 }
2754 
2755 /* Only handles large allocations that require more than chunk alignment. */
2756 static void *
2757 huge_palloc(size_t alignment, size_t size)
2758 {
2759 	void *ret;
2760 	size_t alloc_size, chunk_size, offset;
2761 	chunk_node_t *node;
2762 
2763 	/*
2764 	 * This allocation requires alignment that is even larger than chunk
2765 	 * alignment.  This means that huge_malloc() isn't good enough.
2766 	 *
2767 	 * Allocate almost twice as many chunks as are demanded by the size or
2768 	 * alignment, in order to assure the alignment can be achieved, then
2769 	 * unmap leading and trailing chunks.
2770 	 */
2771 	assert(alignment >= chunksize);
2772 
2773 	chunk_size = CHUNK_CEILING(size);
2774 
2775 	if (size >= alignment)
2776 		alloc_size = chunk_size + alignment - chunksize;
2777 	else
2778 		alloc_size = (alignment << 1) - chunksize;
2779 
2780 	/* Allocate a chunk node with which to track the chunk. */
2781 	node = base_chunk_node_alloc();
2782 	if (node == NULL)
2783 		return (NULL);
2784 
2785 	ret = chunk_alloc(alloc_size);
2786 	if (ret == NULL) {
2787 		base_chunk_node_dealloc(node);
2788 		return (NULL);
2789 	}
2790 
2791 	offset = (uintptr_t)ret & (alignment - 1);
2792 	assert((offset & chunksize_mask) == 0);
2793 	assert(offset < alloc_size);
2794 	if (offset == 0) {
2795 		/* Trim trailing space. */
2796 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2797 		    - chunk_size);
2798 	} else {
2799 		size_t trailsize;
2800 
2801 		/* Trim leading space. */
2802 		chunk_dealloc(ret, alignment - offset);
2803 
2804 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2805 
2806 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2807 		if (trailsize != 0) {
2808 		    /* Trim trailing space. */
2809 		    assert(trailsize < alloc_size);
2810 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2811 			trailsize);
2812 		}
2813 	}
2814 
2815 	/* Insert node into huge. */
2816 	node->chunk = ret;
2817 	node->size = chunk_size;
2818 
2819 	malloc_mutex_lock(&chunks_mtx);
2820 	RB_INSERT(chunk_tree_s, &huge, node);
2821 #ifdef MALLOC_STATS
2822 	huge_nmalloc++;
2823 	huge_allocated += chunk_size;
2824 #endif
2825 	malloc_mutex_unlock(&chunks_mtx);
2826 
2827 	if (opt_junk)
2828 		memset(ret, 0xa5, chunk_size);
2829 	else if (opt_zero)
2830 		memset(ret, 0, chunk_size);
2831 
2832 	return (ret);
2833 }
2834 
2835 static void *
2836 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2837 {
2838 	void *ret;
2839 
2840 	/* Avoid moving the allocation if the size class would not change. */
2841 	if (oldsize > arena_maxclass &&
2842 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2843 		if (opt_junk && size < oldsize) {
2844 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2845 			    - size);
2846 		} else if (opt_zero && size > oldsize) {
2847 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2848 			    - oldsize);
2849 		}
2850 		return (ptr);
2851 	}
2852 
2853 	if (CHUNK_ADDR2BASE(ptr) == ptr
2854 #ifdef USE_BRK
2855 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2856 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2857 #endif
2858 	    ) {
2859 		chunk_node_t *node, key;
2860 		void *newptr;
2861 		size_t oldcsize;
2862 		size_t newcsize;
2863 
2864 		newcsize = CHUNK_CEILING(size);
2865 		oldcsize = CHUNK_CEILING(oldsize);
2866 		assert(oldcsize != newcsize);
2867 		if (newcsize == 0) {
2868 			/* size_t wrap-around */
2869 			return (NULL);
2870 		}
2871 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2872 		    MAP_ALIGNED(chunksize_2pow));
2873 		if (newptr != MAP_FAILED) {
2874 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2875 
2876 			/* update tree */
2877 			malloc_mutex_lock(&chunks_mtx);
2878 			key.chunk = __DECONST(void *, ptr);
2879 			/* LINTED */
2880 			node = RB_FIND(chunk_tree_s, &huge, &key);
2881 			assert(node != NULL);
2882 			assert(node->chunk == ptr);
2883 			assert(node->size == oldcsize);
2884 			node->size = newcsize;
2885 			if (ptr != newptr) {
2886 				RB_REMOVE(chunk_tree_s, &huge, node);
2887 				node->chunk = newptr;
2888 				RB_INSERT(chunk_tree_s, &huge, node);
2889 			}
2890 #ifdef MALLOC_STATS
2891 			huge_nralloc++;
2892 			huge_allocated += newcsize - oldcsize;
2893 			if (newcsize > oldcsize) {
2894 				stats_chunks.curchunks +=
2895 				    (newcsize - oldcsize) / chunksize;
2896 				if (stats_chunks.curchunks >
2897 				    stats_chunks.highchunks)
2898 					stats_chunks.highchunks =
2899 					    stats_chunks.curchunks;
2900 			} else {
2901 				stats_chunks.curchunks -=
2902 				    (oldcsize - newcsize) / chunksize;
2903 			}
2904 #endif
2905 			malloc_mutex_unlock(&chunks_mtx);
2906 
2907 			if (opt_junk && size < oldsize) {
2908 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2909 				    newcsize - size);
2910 			} else if (opt_zero && size > oldsize) {
2911 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2912 				    size - oldsize);
2913 			}
2914 			return (newptr);
2915 		}
2916 	}
2917 
2918 	/*
2919 	 * If we get here, then size and oldsize are different enough that we
2920 	 * need to use a different size class.  In that case, fall back to
2921 	 * allocating new space and copying.
2922 	 */
2923 	ret = huge_malloc(size);
2924 	if (ret == NULL)
2925 		return (NULL);
2926 
2927 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2928 		/* The old allocation is a chunk. */
2929 		if (size < oldsize)
2930 			memcpy(ret, ptr, size);
2931 		else
2932 			memcpy(ret, ptr, oldsize);
2933 	} else {
2934 		/* The old allocation is a region. */
2935 		assert(oldsize < size);
2936 		memcpy(ret, ptr, oldsize);
2937 	}
2938 	idalloc(ptr);
2939 	return (ret);
2940 }
2941 
2942 static void
2943 huge_dalloc(void *ptr)
2944 {
2945 	chunk_node_t key;
2946 	chunk_node_t *node;
2947 
2948 	malloc_mutex_lock(&chunks_mtx);
2949 
2950 	/* Extract from tree of huge allocations. */
2951 	key.chunk = ptr;
2952 	/* LINTED */
2953 	node = RB_FIND(chunk_tree_s, &huge, &key);
2954 	assert(node != NULL);
2955 	assert(node->chunk == ptr);
2956 	/* LINTED */
2957 	RB_REMOVE(chunk_tree_s, &huge, node);
2958 
2959 #ifdef MALLOC_STATS
2960 	huge_ndalloc++;
2961 	huge_allocated -= node->size;
2962 #endif
2963 
2964 	malloc_mutex_unlock(&chunks_mtx);
2965 
2966 	/* Unmap chunk. */
2967 #ifdef USE_BRK
2968 	if (opt_junk)
2969 		memset(node->chunk, 0x5a, node->size);
2970 #endif
2971 	chunk_dealloc(node->chunk, node->size);
2972 
2973 	base_chunk_node_dealloc(node);
2974 }
2975 
2976 static void *
2977 imalloc(size_t size)
2978 {
2979 	void *ret;
2980 
2981 	assert(size != 0);
2982 
2983 	if (size <= arena_maxclass)
2984 		ret = arena_malloc(choose_arena(), size);
2985 	else
2986 		ret = huge_malloc(size);
2987 
2988 	return (ret);
2989 }
2990 
2991 static void *
2992 ipalloc(size_t alignment, size_t size)
2993 {
2994 	void *ret;
2995 	size_t ceil_size;
2996 
2997 	/*
2998 	 * Round size up to the nearest multiple of alignment.
2999 	 *
3000 	 * This done, we can take advantage of the fact that for each small
3001 	 * size class, every object is aligned at the smallest power of two
3002 	 * that is non-zero in the base two representation of the size.  For
3003 	 * example:
3004 	 *
3005 	 *   Size |   Base 2 | Minimum alignment
3006 	 *   -----+----------+------------------
3007 	 *     96 |  1100000 |  32
3008 	 *    144 | 10100000 |  32
3009 	 *    192 | 11000000 |  64
3010 	 *
3011 	 * Depending on runtime settings, it is possible that arena_malloc()
3012 	 * will further round up to a power of two, but that never causes
3013 	 * correctness issues.
3014 	 */
3015 	ceil_size = (size + (alignment - 1)) & (-alignment);
3016 	/*
3017 	 * (ceil_size < size) protects against the combination of maximal
3018 	 * alignment and size greater than maximal alignment.
3019 	 */
3020 	if (ceil_size < size) {
3021 		/* size_t overflow. */
3022 		return (NULL);
3023 	}
3024 
3025 	if (ceil_size <= pagesize || (alignment <= pagesize
3026 	    && ceil_size <= arena_maxclass))
3027 		ret = arena_malloc(choose_arena(), ceil_size);
3028 	else {
3029 		size_t run_size;
3030 
3031 		/*
3032 		 * We can't achieve sub-page alignment, so round up alignment
3033 		 * permanently; it makes later calculations simpler.
3034 		 */
3035 		alignment = PAGE_CEILING(alignment);
3036 		ceil_size = PAGE_CEILING(size);
3037 		/*
3038 		 * (ceil_size < size) protects against very large sizes within
3039 		 * pagesize of SIZE_T_MAX.
3040 		 *
3041 		 * (ceil_size + alignment < ceil_size) protects against the
3042 		 * combination of maximal alignment and ceil_size large enough
3043 		 * to cause overflow.  This is similar to the first overflow
3044 		 * check above, but it needs to be repeated due to the new
3045 		 * ceil_size value, which may now be *equal* to maximal
3046 		 * alignment, whereas before we only detected overflow if the
3047 		 * original size was *greater* than maximal alignment.
3048 		 */
3049 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3050 			/* size_t overflow. */
3051 			return (NULL);
3052 		}
3053 
3054 		/*
3055 		 * Calculate the size of the over-size run that arena_palloc()
3056 		 * would need to allocate in order to guarantee the alignment.
3057 		 */
3058 		if (ceil_size >= alignment)
3059 			run_size = ceil_size + alignment - pagesize;
3060 		else {
3061 			/*
3062 			 * It is possible that (alignment << 1) will cause
3063 			 * overflow, but it doesn't matter because we also
3064 			 * subtract pagesize, which in the case of overflow
3065 			 * leaves us with a very large run_size.  That causes
3066 			 * the first conditional below to fail, which means
3067 			 * that the bogus run_size value never gets used for
3068 			 * anything important.
3069 			 */
3070 			run_size = (alignment << 1) - pagesize;
3071 		}
3072 
3073 		if (run_size <= arena_maxclass) {
3074 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3075 			    run_size);
3076 		} else if (alignment <= chunksize)
3077 			ret = huge_malloc(ceil_size);
3078 		else
3079 			ret = huge_palloc(alignment, ceil_size);
3080 	}
3081 
3082 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3083 	return (ret);
3084 }
3085 
3086 static void *
3087 icalloc(size_t size)
3088 {
3089 	void *ret;
3090 
3091 	if (size <= arena_maxclass) {
3092 		ret = arena_malloc(choose_arena(), size);
3093 		if (ret == NULL)
3094 			return (NULL);
3095 		memset(ret, 0, size);
3096 	} else {
3097 		/*
3098 		 * The virtual memory system provides zero-filled pages, so
3099 		 * there is no need to do so manually, unless opt_junk is
3100 		 * enabled, in which case huge_malloc() fills huge allocations
3101 		 * with junk.
3102 		 */
3103 		ret = huge_malloc(size);
3104 		if (ret == NULL)
3105 			return (NULL);
3106 
3107 		if (opt_junk)
3108 			memset(ret, 0, size);
3109 #ifdef USE_BRK
3110 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3111 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3112 			/*
3113 			 * This may be a re-used brk chunk.  Therefore, zero
3114 			 * the memory.
3115 			 */
3116 			memset(ret, 0, size);
3117 		}
3118 #endif
3119 	}
3120 
3121 	return (ret);
3122 }
3123 
3124 static size_t
3125 isalloc(const void *ptr)
3126 {
3127 	size_t ret;
3128 	arena_chunk_t *chunk;
3129 
3130 	assert(ptr != NULL);
3131 
3132 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3133 	if (chunk != ptr) {
3134 		/* Region. */
3135 		assert(chunk->arena->magic == ARENA_MAGIC);
3136 
3137 		ret = arena_salloc(ptr);
3138 	} else {
3139 		chunk_node_t *node, key;
3140 
3141 		/* Chunk (huge allocation). */
3142 
3143 		malloc_mutex_lock(&chunks_mtx);
3144 
3145 		/* Extract from tree of huge allocations. */
3146 		key.chunk = __DECONST(void *, ptr);
3147 		/* LINTED */
3148 		node = RB_FIND(chunk_tree_s, &huge, &key);
3149 		assert(node != NULL);
3150 
3151 		ret = node->size;
3152 
3153 		malloc_mutex_unlock(&chunks_mtx);
3154 	}
3155 
3156 	return (ret);
3157 }
3158 
3159 static void *
3160 iralloc(void *ptr, size_t size)
3161 {
3162 	void *ret;
3163 	size_t oldsize;
3164 
3165 	assert(ptr != NULL);
3166 	assert(size != 0);
3167 
3168 	oldsize = isalloc(ptr);
3169 
3170 	if (size <= arena_maxclass)
3171 		ret = arena_ralloc(ptr, size, oldsize);
3172 	else
3173 		ret = huge_ralloc(ptr, size, oldsize);
3174 
3175 	return (ret);
3176 }
3177 
3178 static void
3179 idalloc(void *ptr)
3180 {
3181 	arena_chunk_t *chunk;
3182 
3183 	assert(ptr != NULL);
3184 
3185 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3186 	if (chunk != ptr) {
3187 		/* Region. */
3188 		arena_dalloc(chunk->arena, chunk, ptr);
3189 	} else
3190 		huge_dalloc(ptr);
3191 }
3192 
3193 static void
3194 malloc_print_stats(void)
3195 {
3196 
3197 	if (opt_print_stats) {
3198 		char s[UMAX2S_BUFSIZE];
3199 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3200 		    "");
3201 		_malloc_message("Assertions ",
3202 #ifdef NDEBUG
3203 		    "disabled",
3204 #else
3205 		    "enabled",
3206 #endif
3207 		    "\n", "");
3208 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3209 		    opt_abort ? "A" : "a",
3210 		    opt_junk ? "J" : "j",
3211 		    opt_hint ? "H" : "h");
3212 		_malloc_message(opt_utrace ? "PU" : "Pu",
3213 		    opt_sysv ? "V" : "v",
3214 		    opt_xmalloc ? "X" : "x",
3215 		    opt_zero ? "Z\n" : "z\n");
3216 
3217 		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
3218 		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
3219 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
3220 		    "\n", "");
3221 		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
3222 		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
3223 		    "");
3224 
3225 		_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
3226 		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
3227 
3228 #ifdef MALLOC_STATS
3229 		{
3230 			size_t allocated, mapped;
3231 			unsigned i;
3232 			arena_t *arena;
3233 
3234 			/* Calculate and print allocated/mapped stats. */
3235 
3236 			/* arenas. */
3237 			for (i = 0, allocated = 0; i < narenas; i++) {
3238 				if (arenas[i] != NULL) {
3239 					malloc_mutex_lock(&arenas[i]->mtx);
3240 					allocated +=
3241 					    arenas[i]->stats.allocated_small;
3242 					allocated +=
3243 					    arenas[i]->stats.allocated_large;
3244 					malloc_mutex_unlock(&arenas[i]->mtx);
3245 				}
3246 			}
3247 
3248 			/* huge/base. */
3249 			malloc_mutex_lock(&chunks_mtx);
3250 			allocated += huge_allocated;
3251 			mapped = stats_chunks.curchunks * chunksize;
3252 			malloc_mutex_unlock(&chunks_mtx);
3253 
3254 			malloc_mutex_lock(&base_mtx);
3255 			mapped += base_mapped;
3256 			malloc_mutex_unlock(&base_mtx);
3257 
3258 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3259 			    allocated, mapped);
3260 
3261 			/* Print chunk stats. */
3262 			{
3263 				chunk_stats_t chunks_stats;
3264 
3265 				malloc_mutex_lock(&chunks_mtx);
3266 				chunks_stats = stats_chunks;
3267 				malloc_mutex_unlock(&chunks_mtx);
3268 
3269 				malloc_printf("chunks: nchunks   "
3270 				    "highchunks    curchunks\n");
3271 				malloc_printf("  %13llu%13lu%13lu\n",
3272 				    chunks_stats.nchunks,
3273 				    chunks_stats.highchunks,
3274 				    chunks_stats.curchunks);
3275 			}
3276 
3277 			/* Print chunk stats. */
3278 			malloc_printf(
3279 			    "huge: nmalloc      ndalloc      "
3280 			    "nralloc    allocated\n");
3281 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3282 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3283 			    huge_allocated);
3284 
3285 			/* Print stats for each arena. */
3286 			for (i = 0; i < narenas; i++) {
3287 				arena = arenas[i];
3288 				if (arena != NULL) {
3289 					malloc_printf(
3290 					    "\narenas[%u] @ %p\n", i, arena);
3291 					malloc_mutex_lock(&arena->mtx);
3292 					stats_print(arena);
3293 					malloc_mutex_unlock(&arena->mtx);
3294 				}
3295 			}
3296 		}
3297 #endif /* #ifdef MALLOC_STATS */
3298 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3299 	}
3300 }
3301 
3302 /*
3303  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3304  * implementation has to take pains to avoid infinite recursion during
3305  * initialization.
3306  */
3307 static inline bool
3308 malloc_init(void)
3309 {
3310 
3311 	if (malloc_initialized == false)
3312 		return (malloc_init_hard());
3313 
3314 	return (false);
3315 }
3316 
3317 static bool
3318 malloc_init_hard(void)
3319 {
3320 	unsigned i, j;
3321 	ssize_t linklen;
3322 	char buf[PATH_MAX + 1];
3323 	const char *opts = "";
3324 
3325 	malloc_mutex_lock(&init_lock);
3326 	if (malloc_initialized) {
3327 		/*
3328 		 * Another thread initialized the allocator before this one
3329 		 * acquired init_lock.
3330 		 */
3331 		malloc_mutex_unlock(&init_lock);
3332 		return (false);
3333 	}
3334 
3335 	/* Get number of CPUs. */
3336 	{
3337 		int mib[2];
3338 		size_t len;
3339 
3340 		mib[0] = CTL_HW;
3341 		mib[1] = HW_NCPU;
3342 		len = sizeof(ncpus);
3343 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3344 			/* Error. */
3345 			ncpus = 1;
3346 		}
3347 	}
3348 
3349 	/* Get page size. */
3350 	{
3351 		long result;
3352 
3353 		result = sysconf(_SC_PAGESIZE);
3354 		assert(result != -1);
3355 		pagesize = (unsigned) result;
3356 
3357 		/*
3358 		 * We assume that pagesize is a power of 2 when calculating
3359 		 * pagesize_mask and pagesize_2pow.
3360 		 */
3361 		assert(((result - 1) & result) == 0);
3362 		pagesize_mask = result - 1;
3363 		pagesize_2pow = ffs((int)result) - 1;
3364 	}
3365 
3366 	for (i = 0; i < 3; i++) {
3367 		/* Get runtime configuration. */
3368 		switch (i) {
3369 		case 0:
3370 			if ((linklen = readlink("/etc/malloc.conf", buf,
3371 						sizeof(buf) - 1)) != -1) {
3372 				/*
3373 				 * Use the contents of the "/etc/malloc.conf"
3374 				 * symbolic link's name.
3375 				 */
3376 				buf[linklen] = '\0';
3377 				opts = buf;
3378 			} else {
3379 				/* No configuration specified. */
3380 				buf[0] = '\0';
3381 				opts = buf;
3382 			}
3383 			break;
3384 		case 1:
3385 			if (issetugid() == 0 && (opts =
3386 			    getenv("MALLOC_OPTIONS")) != NULL) {
3387 				/*
3388 				 * Do nothing; opts is already initialized to
3389 				 * the value of the MALLOC_OPTIONS environment
3390 				 * variable.
3391 				 */
3392 			} else {
3393 				/* No configuration specified. */
3394 				buf[0] = '\0';
3395 				opts = buf;
3396 			}
3397 			break;
3398 		case 2:
3399 			if (_malloc_options != NULL) {
3400 			    /*
3401 			     * Use options that were compiled into the program.
3402 			     */
3403 			    opts = _malloc_options;
3404 			} else {
3405 				/* No configuration specified. */
3406 				buf[0] = '\0';
3407 				opts = buf;
3408 			}
3409 			break;
3410 		default:
3411 			/* NOTREACHED */
3412 			/* LINTED */
3413 			assert(false);
3414 		}
3415 
3416 		for (j = 0; opts[j] != '\0'; j++) {
3417 			switch (opts[j]) {
3418 			case 'a':
3419 				opt_abort = false;
3420 				break;
3421 			case 'A':
3422 				opt_abort = true;
3423 				break;
3424 			case 'h':
3425 				opt_hint = false;
3426 				break;
3427 			case 'H':
3428 				opt_hint = true;
3429 				break;
3430 			case 'j':
3431 				opt_junk = false;
3432 				break;
3433 			case 'J':
3434 				opt_junk = true;
3435 				break;
3436 			case 'k':
3437 				/*
3438 				 * Chunks always require at least one header
3439 				 * page, so chunks can never be smaller than
3440 				 * two pages.
3441 				 */
3442 				if (opt_chunk_2pow > pagesize_2pow + 1)
3443 					opt_chunk_2pow--;
3444 				break;
3445 			case 'K':
3446 				/*
3447 				 * There must be fewer pages in a chunk than
3448 				 * can be recorded by the pos field of
3449 				 * arena_chunk_map_t, in order to make POS_FREE
3450 				 * special.
3451 				 */
3452 				if (opt_chunk_2pow - pagesize_2pow
3453 				    < (sizeof(uint32_t) << 3) - 1)
3454 					opt_chunk_2pow++;
3455 				break;
3456 			case 'n':
3457 				opt_narenas_lshift--;
3458 				break;
3459 			case 'N':
3460 				opt_narenas_lshift++;
3461 				break;
3462 			case 'p':
3463 				opt_print_stats = false;
3464 				break;
3465 			case 'P':
3466 				opt_print_stats = true;
3467 				break;
3468 			case 'q':
3469 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3470 					opt_quantum_2pow--;
3471 				break;
3472 			case 'Q':
3473 				if (opt_quantum_2pow < pagesize_2pow - 1)
3474 					opt_quantum_2pow++;
3475 				break;
3476 			case 's':
3477 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3478 					opt_small_max_2pow--;
3479 				break;
3480 			case 'S':
3481 				if (opt_small_max_2pow < pagesize_2pow - 1)
3482 					opt_small_max_2pow++;
3483 				break;
3484 			case 'u':
3485 				opt_utrace = false;
3486 				break;
3487 			case 'U':
3488 				opt_utrace = true;
3489 				break;
3490 			case 'v':
3491 				opt_sysv = false;
3492 				break;
3493 			case 'V':
3494 				opt_sysv = true;
3495 				break;
3496 			case 'x':
3497 				opt_xmalloc = false;
3498 				break;
3499 			case 'X':
3500 				opt_xmalloc = true;
3501 				break;
3502 			case 'z':
3503 				opt_zero = false;
3504 				break;
3505 			case 'Z':
3506 				opt_zero = true;
3507 				break;
3508 			default: {
3509 				char cbuf[2];
3510 
3511 				cbuf[0] = opts[j];
3512 				cbuf[1] = '\0';
3513 				_malloc_message(getprogname(),
3514 				    ": (malloc) Unsupported character in "
3515 				    "malloc options: '", cbuf, "'\n");
3516 			}
3517 			}
3518 		}
3519 	}
3520 
3521 	/* Take care to call atexit() only once. */
3522 	if (opt_print_stats) {
3523 		/* Print statistics at exit. */
3524 		atexit(malloc_print_stats);
3525 	}
3526 
3527 	/* Set variables according to the value of opt_small_max_2pow. */
3528 	if (opt_small_max_2pow < opt_quantum_2pow)
3529 		opt_small_max_2pow = opt_quantum_2pow;
3530 	small_max = (1 << opt_small_max_2pow);
3531 
3532 	/* Set bin-related variables. */
3533 	bin_maxclass = (pagesize >> 1);
3534 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3535 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3536 	assert(ntbins <= opt_quantum_2pow);
3537 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3538 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3539 
3540 	/* Set variables according to the value of opt_quantum_2pow. */
3541 	quantum = (1 << opt_quantum_2pow);
3542 	quantum_mask = quantum - 1;
3543 	if (ntbins > 0)
3544 		small_min = (quantum >> 1) + 1;
3545 	else
3546 		small_min = 1;
3547 	assert(small_min <= quantum);
3548 
3549 	/* Set variables according to the value of opt_chunk_2pow. */
3550 	chunksize = (1LU << opt_chunk_2pow);
3551 	chunksize_mask = chunksize - 1;
3552 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3553 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3554 	{
3555 		unsigned header_size;
3556 
3557 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3558 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3559 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3560 		if ((header_size & pagesize_mask) != 0)
3561 			arena_chunk_header_npages++;
3562 	}
3563 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3564 	    pagesize_2pow);
3565 
3566 	UTRACE(0, 0, 0);
3567 
3568 #ifdef MALLOC_STATS
3569 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3570 #endif
3571 
3572 	/* Various sanity checks that regard configuration. */
3573 	assert(quantum >= sizeof(void *));
3574 	assert(quantum <= pagesize);
3575 	assert(chunksize >= pagesize);
3576 	assert(quantum * 4 <= chunksize);
3577 
3578 	/* Initialize chunks data. */
3579 	malloc_mutex_init(&chunks_mtx);
3580 	RB_INIT(&huge);
3581 #ifdef USE_BRK
3582 	malloc_mutex_init(&brk_mtx);
3583 	brk_base = sbrk(0);
3584 	brk_prev = brk_base;
3585 	brk_max = brk_base;
3586 #endif
3587 #ifdef MALLOC_STATS
3588 	huge_nmalloc = 0;
3589 	huge_ndalloc = 0;
3590 	huge_nralloc = 0;
3591 	huge_allocated = 0;
3592 #endif
3593 	RB_INIT(&old_chunks);
3594 
3595 	/* Initialize base allocation data structures. */
3596 #ifdef MALLOC_STATS
3597 	base_mapped = 0;
3598 #endif
3599 #ifdef USE_BRK
3600 	/*
3601 	 * Allocate a base chunk here, since it doesn't actually have to be
3602 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3603 	 * the use of space that would otherwise be wasted.
3604 	 */
3605 	base_pages_alloc(0);
3606 #endif
3607 	base_chunk_nodes = NULL;
3608 	malloc_mutex_init(&base_mtx);
3609 
3610 	if (ncpus > 1) {
3611 		/*
3612 		 * For SMP systems, create four times as many arenas as there
3613 		 * are CPUs by default.
3614 		 */
3615 		opt_narenas_lshift += 2;
3616 	}
3617 
3618 #ifdef NO_TLS
3619 	/* Initialize arena key. */
3620 	(void)thr_keycreate(&arenas_map_key, NULL);
3621 #endif
3622 
3623 	/* Determine how many arenas to use. */
3624 	narenas = ncpus;
3625 	if (opt_narenas_lshift > 0) {
3626 		if ((narenas << opt_narenas_lshift) > narenas)
3627 			narenas <<= opt_narenas_lshift;
3628 		/*
3629 		 * Make sure not to exceed the limits of what base_malloc()
3630 		 * can handle.
3631 		 */
3632 		if (narenas * sizeof(arena_t *) > chunksize)
3633 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3634 	} else if (opt_narenas_lshift < 0) {
3635 		if ((narenas << opt_narenas_lshift) < narenas)
3636 			narenas <<= opt_narenas_lshift;
3637 		/* Make sure there is at least one arena. */
3638 		if (narenas == 0)
3639 			narenas = 1;
3640 	}
3641 
3642 	next_arena = 0;
3643 
3644 	/* Allocate and initialize arenas. */
3645 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3646 	if (arenas == NULL) {
3647 		malloc_mutex_unlock(&init_lock);
3648 		return (true);
3649 	}
3650 	/*
3651 	 * Zero the array.  In practice, this should always be pre-zeroed,
3652 	 * since it was just mmap()ed, but let's be sure.
3653 	 */
3654 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3655 
3656 	/*
3657 	 * Initialize one arena here.  The rest are lazily created in
3658 	 * arena_choose_hard().
3659 	 */
3660 	arenas_extend(0);
3661 	if (arenas[0] == NULL) {
3662 		malloc_mutex_unlock(&init_lock);
3663 		return (true);
3664 	}
3665 
3666 	malloc_mutex_init(&arenas_mtx);
3667 
3668 	malloc_initialized = true;
3669 	malloc_mutex_unlock(&init_lock);
3670 	return (false);
3671 }
3672 
3673 /*
3674  * End general internal functions.
3675  */
3676 /******************************************************************************/
3677 /*
3678  * Begin malloc(3)-compatible functions.
3679  */
3680 
3681 void *
3682 malloc(size_t size)
3683 {
3684 	void *ret;
3685 
3686 	if (malloc_init()) {
3687 		ret = NULL;
3688 		goto RETURN;
3689 	}
3690 
3691 	if (size == 0) {
3692 		if (opt_sysv == false)
3693 			size = 1;
3694 		else {
3695 			ret = NULL;
3696 			goto RETURN;
3697 		}
3698 	}
3699 
3700 	ret = imalloc(size);
3701 
3702 RETURN:
3703 	if (ret == NULL) {
3704 		if (opt_xmalloc) {
3705 			_malloc_message(getprogname(),
3706 			    ": (malloc) Error in malloc(): out of memory\n", "",
3707 			    "");
3708 			abort();
3709 		}
3710 		errno = ENOMEM;
3711 	}
3712 
3713 	UTRACE(0, size, ret);
3714 	return (ret);
3715 }
3716 
3717 int
3718 posix_memalign(void **memptr, size_t alignment, size_t size)
3719 {
3720 	int ret;
3721 	void *result;
3722 
3723 	if (malloc_init())
3724 		result = NULL;
3725 	else {
3726 		/* Make sure that alignment is a large enough power of 2. */
3727 		if (((alignment - 1) & alignment) != 0
3728 		    || alignment < sizeof(void *)) {
3729 			if (opt_xmalloc) {
3730 				_malloc_message(getprogname(),
3731 				    ": (malloc) Error in posix_memalign(): "
3732 				    "invalid alignment\n", "", "");
3733 				abort();
3734 			}
3735 			result = NULL;
3736 			ret = EINVAL;
3737 			goto RETURN;
3738 		}
3739 
3740 		result = ipalloc(alignment, size);
3741 	}
3742 
3743 	if (result == NULL) {
3744 		if (opt_xmalloc) {
3745 			_malloc_message(getprogname(),
3746 			": (malloc) Error in posix_memalign(): out of memory\n",
3747 			"", "");
3748 			abort();
3749 		}
3750 		ret = ENOMEM;
3751 		goto RETURN;
3752 	}
3753 
3754 	*memptr = result;
3755 	ret = 0;
3756 
3757 RETURN:
3758 	UTRACE(0, size, result);
3759 	return (ret);
3760 }
3761 
3762 void *
3763 calloc(size_t num, size_t size)
3764 {
3765 	void *ret;
3766 	size_t num_size;
3767 
3768 	if (malloc_init()) {
3769 		num_size = 0;
3770 		ret = NULL;
3771 		goto RETURN;
3772 	}
3773 
3774 	num_size = num * size;
3775 	if (num_size == 0) {
3776 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3777 			num_size = 1;
3778 		else {
3779 			ret = NULL;
3780 			goto RETURN;
3781 		}
3782 	/*
3783 	 * Try to avoid division here.  We know that it isn't possible to
3784 	 * overflow during multiplication if neither operand uses any of the
3785 	 * most significant half of the bits in a size_t.
3786 	 */
3787 	} else if ((unsigned long long)((num | size) &
3788 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3789 	   (num_size / size != num)) {
3790 		/* size_t overflow. */
3791 		ret = NULL;
3792 		goto RETURN;
3793 	}
3794 
3795 	ret = icalloc(num_size);
3796 
3797 RETURN:
3798 	if (ret == NULL) {
3799 		if (opt_xmalloc) {
3800 			_malloc_message(getprogname(),
3801 			    ": (malloc) Error in calloc(): out of memory\n", "",
3802 			    "");
3803 			abort();
3804 		}
3805 		errno = ENOMEM;
3806 	}
3807 
3808 	UTRACE(0, num_size, ret);
3809 	return (ret);
3810 }
3811 
3812 void *
3813 realloc(void *ptr, size_t size)
3814 {
3815 	void *ret;
3816 
3817 	if (size == 0) {
3818 		if (opt_sysv == false)
3819 			size = 1;
3820 		else {
3821 			if (ptr != NULL)
3822 				idalloc(ptr);
3823 			ret = NULL;
3824 			goto RETURN;
3825 		}
3826 	}
3827 
3828 	if (ptr != NULL) {
3829 		assert(malloc_initialized);
3830 
3831 		ret = iralloc(ptr, size);
3832 
3833 		if (ret == NULL) {
3834 			if (opt_xmalloc) {
3835 				_malloc_message(getprogname(),
3836 				    ": (malloc) Error in realloc(): out of "
3837 				    "memory\n", "", "");
3838 				abort();
3839 			}
3840 			errno = ENOMEM;
3841 		}
3842 	} else {
3843 		if (malloc_init())
3844 			ret = NULL;
3845 		else
3846 			ret = imalloc(size);
3847 
3848 		if (ret == NULL) {
3849 			if (opt_xmalloc) {
3850 				_malloc_message(getprogname(),
3851 				    ": (malloc) Error in realloc(): out of "
3852 				    "memory\n", "", "");
3853 				abort();
3854 			}
3855 			errno = ENOMEM;
3856 		}
3857 	}
3858 
3859 RETURN:
3860 	UTRACE(ptr, size, ret);
3861 	return (ret);
3862 }
3863 
3864 void
3865 free(void *ptr)
3866 {
3867 
3868 	UTRACE(ptr, 0, 0);
3869 	if (ptr != NULL) {
3870 		assert(malloc_initialized);
3871 
3872 		idalloc(ptr);
3873 	}
3874 }
3875 
3876 /*
3877  * End malloc(3)-compatible functions.
3878  */
3879 /******************************************************************************/
3880 /*
3881  * Begin non-standard functions.
3882  */
3883 #ifndef __NetBSD__
3884 size_t
3885 malloc_usable_size(const void *ptr)
3886 {
3887 
3888 	assert(ptr != NULL);
3889 
3890 	return (isalloc(ptr));
3891 }
3892 #endif
3893 
3894 /*
3895  * End non-standard functions.
3896  */
3897 /******************************************************************************/
3898 /*
3899  * Begin library-private functions, used by threading libraries for protection
3900  * of malloc during fork().  These functions are only called if the program is
3901  * running in threaded mode, so there is no need to check whether the program
3902  * is threaded here.
3903  */
3904 
3905 void
3906 _malloc_prefork(void)
3907 {
3908 	unsigned i;
3909 
3910 	/* Acquire all mutexes in a safe order. */
3911 
3912 	malloc_mutex_lock(&arenas_mtx);
3913 	for (i = 0; i < narenas; i++) {
3914 		if (arenas[i] != NULL)
3915 			malloc_mutex_lock(&arenas[i]->mtx);
3916 	}
3917 	malloc_mutex_unlock(&arenas_mtx);
3918 
3919 	malloc_mutex_lock(&base_mtx);
3920 
3921 	malloc_mutex_lock(&chunks_mtx);
3922 }
3923 
3924 void
3925 _malloc_postfork(void)
3926 {
3927 	unsigned i;
3928 
3929 	/* Release all mutexes, now that fork() has completed. */
3930 
3931 	malloc_mutex_unlock(&chunks_mtx);
3932 
3933 	malloc_mutex_unlock(&base_mtx);
3934 
3935 	malloc_mutex_lock(&arenas_mtx);
3936 	for (i = 0; i < narenas; i++) {
3937 		if (arenas[i] != NULL)
3938 			malloc_mutex_unlock(&arenas[i]->mtx);
3939 	}
3940 	malloc_mutex_unlock(&arenas_mtx);
3941 }
3942 
3943 /*
3944  * End library-private functions.
3945  */
3946 /******************************************************************************/
3947