xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision da39824b722dbd84beb9a1ab7e8de6710cc44d4b)
1 /*	$NetBSD: jemalloc.c,v 1.32 2014/02/25 12:13:19 martin Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.32 2014/02/25 12:13:19 martin Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 
220 /*
221  * If you touch the TINY_MIN_2POW definition for any architecture, please
222  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
223  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
224  * gcc is still buildable!
225  */
226 
227 #ifdef __i386__
228 #  define QUANTUM_2POW_MIN	4
229 #  define SIZEOF_PTR_2POW	2
230 #  define USE_BRK
231 #endif
232 #ifdef __ia64__
233 #  define QUANTUM_2POW_MIN	4
234 #  define SIZEOF_PTR_2POW	3
235 #endif
236 #ifdef __alpha__
237 #  define QUANTUM_2POW_MIN	4
238 #  define SIZEOF_PTR_2POW	3
239 #  define TINY_MIN_2POW		3
240 #  define NO_TLS
241 #endif
242 #ifdef __sparc64__
243 #  define QUANTUM_2POW_MIN	4
244 #  define SIZEOF_PTR_2POW	3
245 #  define TINY_MIN_2POW		3
246 #  define NO_TLS
247 #endif
248 #ifdef __amd64__
249 #  define QUANTUM_2POW_MIN	4
250 #  define SIZEOF_PTR_2POW	3
251 #  define TINY_MIN_2POW		3
252 #endif
253 #ifdef __arm__
254 #  define QUANTUM_2POW_MIN	3
255 #  define SIZEOF_PTR_2POW	2
256 #  define USE_BRK
257 #  ifdef __ARM_EABI__
258 #    define TINY_MIN_2POW	3
259 #  endif
260 #  define NO_TLS
261 #endif
262 #ifdef __powerpc__
263 #  define QUANTUM_2POW_MIN	4
264 #  define SIZEOF_PTR_2POW	2
265 #  define USE_BRK
266 #  define TINY_MIN_2POW		3
267 #endif
268 #if defined(__sparc__) && !defined(__sparc64__)
269 #  define QUANTUM_2POW_MIN	4
270 #  define SIZEOF_PTR_2POW	2
271 #  define USE_BRK
272 #endif
273 #ifdef __vax__
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __sh__
279 #  define QUANTUM_2POW_MIN	4
280 #  define SIZEOF_PTR_2POW	2
281 #  define USE_BRK
282 #endif
283 #ifdef __m68k__
284 #  define QUANTUM_2POW_MIN	4
285 #  define SIZEOF_PTR_2POW	2
286 #  define USE_BRK
287 #endif
288 #ifdef __mips__
289 #  define QUANTUM_2POW_MIN	4
290 #  define SIZEOF_PTR_2POW	2
291 #  define USE_BRK
292 #endif
293 #ifdef __hppa__
294 #  define QUANTUM_2POW_MIN     4
295 #  define SIZEOF_PTR_2POW      2
296 #  define USE_BRK
297 #endif
298 
299 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
300 
301 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
302 #ifndef SIZEOF_INT_2POW
303 #  define SIZEOF_INT_2POW	2
304 #endif
305 
306 /*
307  * Size and alignment of memory chunks that are allocated by the OS's virtual
308  * memory system.
309  */
310 #define	CHUNK_2POW_DEFAULT	20
311 
312 /*
313  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
314  * so over-estimates are okay (up to a point), but under-estimates will
315  * negatively affect performance.
316  */
317 #define	CACHELINE_2POW		6
318 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
319 
320 /* Smallest size class to support. */
321 #ifndef TINY_MIN_2POW
322 #define	TINY_MIN_2POW		2
323 #endif
324 
325 /*
326  * Maximum size class that is a multiple of the quantum, but not (necessarily)
327  * a power of 2.  Above this size, allocations are rounded up to the nearest
328  * power of 2.
329  */
330 #define	SMALL_MAX_2POW_DEFAULT	9
331 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
332 
333 /*
334  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
335  * as small as possible such that this setting is still honored, without
336  * violating other constraints.  The goal is to make runs as small as possible
337  * without exceeding a per run external fragmentation threshold.
338  *
339  * We use binary fixed point math for overhead computations, where the binary
340  * point is implicitly RUN_BFP bits to the left.
341  *
342  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
343  * honored for some/all object sizes, since there is one bit of header overhead
344  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
345  * that are so small that the per-region overhead is greater than:
346  *
347  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
348  */
349 #define RUN_BFP			12
350 /*                              \/   Implicit binary fixed point. */
351 #define RUN_MAX_OVRHD		0x0000003dU
352 #define RUN_MAX_OVRHD_RELAX	0x00001800U
353 
354 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
355 #define RUN_MAX_SMALL_2POW	15
356 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
357 
358 /******************************************************************************/
359 
360 #ifdef __FreeBSD__
361 /*
362  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
363  * they require malloc()ed memory.
364  */
365 typedef struct {
366 	spinlock_t	lock;
367 } malloc_mutex_t;
368 
369 /* Set to true once the allocator has been initialized. */
370 static bool malloc_initialized = false;
371 
372 /* Used to avoid initialization races. */
373 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
374 #else
375 #define	malloc_mutex_t	mutex_t
376 
377 /* Set to true once the allocator has been initialized. */
378 static bool malloc_initialized = false;
379 
380 /* Used to avoid initialization races. */
381 static mutex_t init_lock = MUTEX_INITIALIZER;
382 #endif
383 
384 /******************************************************************************/
385 /*
386  * Statistics data structures.
387  */
388 
389 #ifdef MALLOC_STATS
390 
391 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
392 struct malloc_bin_stats_s {
393 	/*
394 	 * Number of allocation requests that corresponded to the size of this
395 	 * bin.
396 	 */
397 	uint64_t	nrequests;
398 
399 	/* Total number of runs created for this bin's size class. */
400 	uint64_t	nruns;
401 
402 	/*
403 	 * Total number of runs reused by extracting them from the runs tree for
404 	 * this bin's size class.
405 	 */
406 	uint64_t	reruns;
407 
408 	/* High-water mark for this bin. */
409 	unsigned long	highruns;
410 
411 	/* Current number of runs in this bin. */
412 	unsigned long	curruns;
413 };
414 
415 typedef struct arena_stats_s arena_stats_t;
416 struct arena_stats_s {
417 	/* Number of bytes currently mapped. */
418 	size_t		mapped;
419 
420 	/* Per-size-category statistics. */
421 	size_t		allocated_small;
422 	uint64_t	nmalloc_small;
423 	uint64_t	ndalloc_small;
424 
425 	size_t		allocated_large;
426 	uint64_t	nmalloc_large;
427 	uint64_t	ndalloc_large;
428 };
429 
430 typedef struct chunk_stats_s chunk_stats_t;
431 struct chunk_stats_s {
432 	/* Number of chunks that were allocated. */
433 	uint64_t	nchunks;
434 
435 	/* High-water mark for number of chunks allocated. */
436 	unsigned long	highchunks;
437 
438 	/*
439 	 * Current number of chunks allocated.  This value isn't maintained for
440 	 * any other purpose, so keep track of it in order to be able to set
441 	 * highchunks.
442 	 */
443 	unsigned long	curchunks;
444 };
445 
446 #endif /* #ifdef MALLOC_STATS */
447 
448 /******************************************************************************/
449 /*
450  * Chunk data structures.
451  */
452 
453 /* Tree of chunks. */
454 typedef struct chunk_node_s chunk_node_t;
455 struct chunk_node_s {
456 	/* Linkage for the chunk tree. */
457 	RB_ENTRY(chunk_node_s) link;
458 
459 	/*
460 	 * Pointer to the chunk that this tree node is responsible for.  In some
461 	 * (but certainly not all) cases, this data structure is placed at the
462 	 * beginning of the corresponding chunk, so this field may point to this
463 	 * node.
464 	 */
465 	void	*chunk;
466 
467 	/* Total chunk size. */
468 	size_t	size;
469 };
470 typedef struct chunk_tree_s chunk_tree_t;
471 RB_HEAD(chunk_tree_s, chunk_node_s);
472 
473 /******************************************************************************/
474 /*
475  * Arena data structures.
476  */
477 
478 typedef struct arena_s arena_t;
479 typedef struct arena_bin_s arena_bin_t;
480 
481 typedef struct arena_chunk_map_s arena_chunk_map_t;
482 struct arena_chunk_map_s {
483 	/* Number of pages in run. */
484 	uint32_t	npages;
485 	/*
486 	 * Position within run.  For a free run, this is POS_FREE for the first
487 	 * and last pages.  The POS_FREE special value makes it possible to
488 	 * quickly coalesce free runs.
489 	 *
490 	 * This is the limiting factor for chunksize; there can be at most 2^31
491 	 * pages in a run.
492 	 */
493 #define POS_FREE ((uint32_t)0xffffffffU)
494 	uint32_t	pos;
495 };
496 
497 /* Arena chunk header. */
498 typedef struct arena_chunk_s arena_chunk_t;
499 struct arena_chunk_s {
500 	/* Arena that owns the chunk. */
501 	arena_t *arena;
502 
503 	/* Linkage for the arena's chunk tree. */
504 	RB_ENTRY(arena_chunk_s) link;
505 
506 	/*
507 	 * Number of pages in use.  This is maintained in order to make
508 	 * detection of empty chunks fast.
509 	 */
510 	uint32_t pages_used;
511 
512 	/*
513 	 * Every time a free run larger than this value is created/coalesced,
514 	 * this value is increased.  The only way that the value decreases is if
515 	 * arena_run_alloc() fails to find a free run as large as advertised by
516 	 * this value.
517 	 */
518 	uint32_t max_frun_npages;
519 
520 	/*
521 	 * Every time a free run that starts at an earlier page than this value
522 	 * is created/coalesced, this value is decreased.  It is reset in a
523 	 * similar fashion to max_frun_npages.
524 	 */
525 	uint32_t min_frun_ind;
526 
527 	/*
528 	 * Map of pages within chunk that keeps track of free/large/small.  For
529 	 * free runs, only the map entries for the first and last pages are
530 	 * kept up to date, so that free runs can be quickly coalesced.
531 	 */
532 	arena_chunk_map_t map[1]; /* Dynamically sized. */
533 };
534 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
535 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
536 
537 typedef struct arena_run_s arena_run_t;
538 struct arena_run_s {
539 	/* Linkage for run trees. */
540 	RB_ENTRY(arena_run_s) link;
541 
542 #ifdef MALLOC_DEBUG
543 	uint32_t	magic;
544 #  define ARENA_RUN_MAGIC 0x384adf93
545 #endif
546 
547 	/* Bin this run is associated with. */
548 	arena_bin_t	*bin;
549 
550 	/* Index of first element that might have a free region. */
551 	unsigned	regs_minelm;
552 
553 	/* Number of free regions in run. */
554 	unsigned	nfree;
555 
556 	/* Bitmask of in-use regions (0: in use, 1: free). */
557 	unsigned	regs_mask[1]; /* Dynamically sized. */
558 };
559 typedef struct arena_run_tree_s arena_run_tree_t;
560 RB_HEAD(arena_run_tree_s, arena_run_s);
561 
562 struct arena_bin_s {
563 	/*
564 	 * Current run being used to service allocations of this bin's size
565 	 * class.
566 	 */
567 	arena_run_t	*runcur;
568 
569 	/*
570 	 * Tree of non-full runs.  This tree is used when looking for an
571 	 * existing run when runcur is no longer usable.  We choose the
572 	 * non-full run that is lowest in memory; this policy tends to keep
573 	 * objects packed well, and it can also help reduce the number of
574 	 * almost-empty chunks.
575 	 */
576 	arena_run_tree_t runs;
577 
578 	/* Size of regions in a run for this bin's size class. */
579 	size_t		reg_size;
580 
581 	/* Total size of a run for this bin's size class. */
582 	size_t		run_size;
583 
584 	/* Total number of regions in a run for this bin's size class. */
585 	uint32_t	nregs;
586 
587 	/* Number of elements in a run's regs_mask for this bin's size class. */
588 	uint32_t	regs_mask_nelms;
589 
590 	/* Offset of first region in a run for this bin's size class. */
591 	uint32_t	reg0_offset;
592 
593 #ifdef MALLOC_STATS
594 	/* Bin statistics. */
595 	malloc_bin_stats_t stats;
596 #endif
597 };
598 
599 struct arena_s {
600 #ifdef MALLOC_DEBUG
601 	uint32_t		magic;
602 #  define ARENA_MAGIC 0x947d3d24
603 #endif
604 
605 	/* All operations on this arena require that mtx be locked. */
606 	malloc_mutex_t		mtx;
607 
608 #ifdef MALLOC_STATS
609 	arena_stats_t		stats;
610 #endif
611 
612 	/*
613 	 * Tree of chunks this arena manages.
614 	 */
615 	arena_chunk_tree_t	chunks;
616 
617 	/*
618 	 * In order to avoid rapid chunk allocation/deallocation when an arena
619 	 * oscillates right on the cusp of needing a new chunk, cache the most
620 	 * recently freed chunk.  This caching is disabled by opt_hint.
621 	 *
622 	 * There is one spare chunk per arena, rather than one spare total, in
623 	 * order to avoid interactions between multiple threads that could make
624 	 * a single spare inadequate.
625 	 */
626 	arena_chunk_t *spare;
627 
628 	/*
629 	 * bins is used to store rings of free regions of the following sizes,
630 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
631 	 *
632 	 *   bins[i] | size |
633 	 *   --------+------+
634 	 *        0  |    2 |
635 	 *        1  |    4 |
636 	 *        2  |    8 |
637 	 *   --------+------+
638 	 *        3  |   16 |
639 	 *        4  |   32 |
640 	 *        5  |   48 |
641 	 *        6  |   64 |
642 	 *           :      :
643 	 *           :      :
644 	 *       33  |  496 |
645 	 *       34  |  512 |
646 	 *   --------+------+
647 	 *       35  | 1024 |
648 	 *       36  | 2048 |
649 	 *   --------+------+
650 	 */
651 	arena_bin_t		bins[1]; /* Dynamically sized. */
652 };
653 
654 /******************************************************************************/
655 /*
656  * Data.
657  */
658 
659 /* Number of CPUs. */
660 static unsigned		ncpus;
661 
662 /* VM page size. */
663 static size_t		pagesize;
664 static size_t		pagesize_mask;
665 static int		pagesize_2pow;
666 
667 /* Various bin-related settings. */
668 static size_t		bin_maxclass; /* Max size class for bins. */
669 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
670 static unsigned		nqbins; /* Number of quantum-spaced bins. */
671 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
672 static size_t		small_min;
673 static size_t		small_max;
674 
675 /* Various quantum-related settings. */
676 static size_t		quantum;
677 static size_t		quantum_mask; /* (quantum - 1). */
678 
679 /* Various chunk-related settings. */
680 static size_t		chunksize;
681 static size_t		chunksize_mask; /* (chunksize - 1). */
682 static int		chunksize_2pow;
683 static unsigned		chunk_npages;
684 static unsigned		arena_chunk_header_npages;
685 static size_t		arena_maxclass; /* Max size class for arenas. */
686 
687 /********/
688 /*
689  * Chunks.
690  */
691 
692 /* Protects chunk-related data structures. */
693 static malloc_mutex_t	chunks_mtx;
694 
695 /* Tree of chunks that are stand-alone huge allocations. */
696 static chunk_tree_t	huge;
697 
698 #ifdef USE_BRK
699 /*
700  * Try to use brk for chunk-size allocations, due to address space constraints.
701  */
702 /*
703  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
704  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
705  * could cause recursive lock acquisition).
706  */
707 static malloc_mutex_t	brk_mtx;
708 /* Result of first sbrk(0) call. */
709 static void		*brk_base;
710 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
711 static void		*brk_prev;
712 /* Current upper limit on brk addresses. */
713 static void		*brk_max;
714 #endif
715 
716 #ifdef MALLOC_STATS
717 /* Huge allocation statistics. */
718 static uint64_t		huge_nmalloc;
719 static uint64_t		huge_ndalloc;
720 static uint64_t		huge_nralloc;
721 static size_t		huge_allocated;
722 #endif
723 
724 /*
725  * Tree of chunks that were previously allocated.  This is used when allocating
726  * chunks, in an attempt to re-use address space.
727  */
728 static chunk_tree_t	old_chunks;
729 
730 /****************************/
731 /*
732  * base (internal allocation).
733  */
734 
735 /*
736  * Current pages that are being used for internal memory allocations.  These
737  * pages are carved up in cacheline-size quanta, so that there is no chance of
738  * false cache line sharing.
739  */
740 static void		*base_pages;
741 static void		*base_next_addr;
742 static void		*base_past_addr; /* Addr immediately past base_pages. */
743 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
744 static malloc_mutex_t	base_mtx;
745 #ifdef MALLOC_STATS
746 static size_t		base_mapped;
747 #endif
748 
749 /********/
750 /*
751  * Arenas.
752  */
753 
754 /*
755  * Arenas that are used to service external requests.  Not all elements of the
756  * arenas array are necessarily used; arenas are created lazily as needed.
757  */
758 static arena_t		**arenas;
759 static unsigned		narenas;
760 static unsigned		next_arena;
761 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
762 
763 #ifndef NO_TLS
764 /*
765  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
766  * for allocations.
767  */
768 static __thread arena_t	*arenas_map;
769 #define	get_arenas_map()	(arenas_map)
770 #define	set_arenas_map(x)	(arenas_map = x)
771 #else
772 static thread_key_t arenas_map_key;
773 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
774 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
775 #endif
776 
777 #ifdef MALLOC_STATS
778 /* Chunk statistics. */
779 static chunk_stats_t	stats_chunks;
780 #endif
781 
782 /*******************************/
783 /*
784  * Runtime configuration options.
785  */
786 const char	*_malloc_options;
787 
788 #ifndef MALLOC_PRODUCTION
789 static bool	opt_abort = true;
790 static bool	opt_junk = true;
791 #else
792 static bool	opt_abort = false;
793 static bool	opt_junk = false;
794 #endif
795 static bool	opt_hint = false;
796 static bool	opt_print_stats = false;
797 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
798 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
799 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
800 static bool	opt_utrace = false;
801 static bool	opt_sysv = false;
802 static bool	opt_xmalloc = false;
803 static bool	opt_zero = false;
804 static int32_t	opt_narenas_lshift = 0;
805 
806 typedef struct {
807 	void	*p;
808 	size_t	s;
809 	void	*r;
810 } malloc_utrace_t;
811 
812 #define	UTRACE(a, b, c)							\
813 	if (opt_utrace) {						\
814 		malloc_utrace_t ut;					\
815 		ut.p = a;						\
816 		ut.s = b;						\
817 		ut.r = c;						\
818 		xutrace(&ut, sizeof(ut));				\
819 	}
820 
821 /******************************************************************************/
822 /*
823  * Begin function prototypes for non-inline static functions.
824  */
825 
826 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
827 		const char *p4);
828 #ifdef MALLOC_STATS
829 static void	malloc_printf(const char *format, ...);
830 #endif
831 static char	*size_t2s(size_t x, char *s);
832 static bool	base_pages_alloc(size_t minsize);
833 static void	*base_alloc(size_t size);
834 static chunk_node_t *base_chunk_node_alloc(void);
835 static void	base_chunk_node_dealloc(chunk_node_t *node);
836 #ifdef MALLOC_STATS
837 static void	stats_print(arena_t *arena);
838 #endif
839 static void	*pages_map(void *addr, size_t size);
840 static void	*pages_map_align(void *addr, size_t size, int align);
841 static void	pages_unmap(void *addr, size_t size);
842 static void	*chunk_alloc(size_t size);
843 static void	chunk_dealloc(void *chunk, size_t size);
844 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
845 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
846 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
847 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
848 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
849 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
850 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
851 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
852 static void	*arena_malloc(arena_t *arena, size_t size);
853 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
854     size_t alloc_size);
855 static size_t	arena_salloc(const void *ptr);
856 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
857 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
858 static bool	arena_new(arena_t *arena);
859 static arena_t	*arenas_extend(unsigned ind);
860 static void	*huge_malloc(size_t size);
861 static void	*huge_palloc(size_t alignment, size_t size);
862 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
863 static void	huge_dalloc(void *ptr);
864 static void	*imalloc(size_t size);
865 static void	*ipalloc(size_t alignment, size_t size);
866 static void	*icalloc(size_t size);
867 static size_t	isalloc(const void *ptr);
868 static void	*iralloc(void *ptr, size_t size);
869 static void	idalloc(void *ptr);
870 static void	malloc_print_stats(void);
871 static bool	malloc_init_hard(void);
872 
873 /*
874  * End function prototypes.
875  */
876 /******************************************************************************/
877 /*
878  * Begin mutex.
879  */
880 
881 #ifdef __NetBSD__
882 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
883 #define	malloc_mutex_lock(m)	mutex_lock(m)
884 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
885 #else	/* __NetBSD__ */
886 static inline void
887 malloc_mutex_init(malloc_mutex_t *a_mutex)
888 {
889 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
890 
891 	a_mutex->lock = lock;
892 }
893 
894 static inline void
895 malloc_mutex_lock(malloc_mutex_t *a_mutex)
896 {
897 
898 	if (__isthreaded)
899 		_SPINLOCK(&a_mutex->lock);
900 }
901 
902 static inline void
903 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
904 {
905 
906 	if (__isthreaded)
907 		_SPINUNLOCK(&a_mutex->lock);
908 }
909 #endif	/* __NetBSD__ */
910 
911 /*
912  * End mutex.
913  */
914 /******************************************************************************/
915 /*
916  * Begin Utility functions/macros.
917  */
918 
919 /* Return the chunk address for allocation address a. */
920 #define	CHUNK_ADDR2BASE(a)						\
921 	((void *)((uintptr_t)(a) & ~chunksize_mask))
922 
923 /* Return the chunk offset of address a. */
924 #define	CHUNK_ADDR2OFFSET(a)						\
925 	((size_t)((uintptr_t)(a) & chunksize_mask))
926 
927 /* Return the smallest chunk multiple that is >= s. */
928 #define	CHUNK_CEILING(s)						\
929 	(((s) + chunksize_mask) & ~chunksize_mask)
930 
931 /* Return the smallest cacheline multiple that is >= s. */
932 #define	CACHELINE_CEILING(s)						\
933 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
934 
935 /* Return the smallest quantum multiple that is >= a. */
936 #define	QUANTUM_CEILING(a)						\
937 	(((a) + quantum_mask) & ~quantum_mask)
938 
939 /* Return the smallest pagesize multiple that is >= s. */
940 #define	PAGE_CEILING(s)							\
941 	(((s) + pagesize_mask) & ~pagesize_mask)
942 
943 /* Compute the smallest power of 2 that is >= x. */
944 static inline size_t
945 pow2_ceil(size_t x)
946 {
947 
948 	x--;
949 	x |= x >> 1;
950 	x |= x >> 2;
951 	x |= x >> 4;
952 	x |= x >> 8;
953 	x |= x >> 16;
954 #if (SIZEOF_PTR == 8)
955 	x |= x >> 32;
956 #endif
957 	x++;
958 	return (x);
959 }
960 
961 static void
962 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
963 {
964 
965 	write(STDERR_FILENO, p1, strlen(p1));
966 	write(STDERR_FILENO, p2, strlen(p2));
967 	write(STDERR_FILENO, p3, strlen(p3));
968 	write(STDERR_FILENO, p4, strlen(p4));
969 }
970 
971 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
972 	    const char *p4) = wrtmessage;
973 
974 #ifdef MALLOC_STATS
975 /*
976  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
977  */
978 static void
979 malloc_printf(const char *format, ...)
980 {
981 	char buf[4096];
982 	va_list ap;
983 
984 	va_start(ap, format);
985 	vsnprintf(buf, sizeof(buf), format, ap);
986 	va_end(ap);
987 	_malloc_message(buf, "", "", "");
988 }
989 #endif
990 
991 /*
992  * We don't want to depend on vsnprintf() for production builds, since that can
993  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
994  * integer printing functionality, so that malloc_printf() use can be limited to
995  * MALLOC_STATS code.
996  */
997 #define UMAX2S_BUFSIZE	21
998 static char *
999 size_t2s(size_t x, char *s)
1000 {
1001 	unsigned i;
1002 
1003 	/* Make sure UMAX2S_BUFSIZE is large enough. */
1004 	/* LINTED */
1005 	assert(sizeof(size_t) <= 8);
1006 
1007 	i = UMAX2S_BUFSIZE - 1;
1008 	s[i] = '\0';
1009 	do {
1010 		i--;
1011 		s[i] = "0123456789"[(int)x % 10];
1012 		x /= (uintmax_t)10LL;
1013 	} while (x > 0);
1014 
1015 	return (&s[i]);
1016 }
1017 
1018 /******************************************************************************/
1019 
1020 static bool
1021 base_pages_alloc(size_t minsize)
1022 {
1023 	size_t csize = 0;
1024 
1025 #ifdef USE_BRK
1026 	/*
1027 	 * Do special brk allocation here, since base allocations don't need to
1028 	 * be chunk-aligned.
1029 	 */
1030 	if (brk_prev != (void *)-1) {
1031 		void *brk_cur;
1032 		intptr_t incr;
1033 
1034 		if (minsize != 0)
1035 			csize = CHUNK_CEILING(minsize);
1036 
1037 		malloc_mutex_lock(&brk_mtx);
1038 		do {
1039 			/* Get the current end of brk. */
1040 			brk_cur = sbrk(0);
1041 
1042 			/*
1043 			 * Calculate how much padding is necessary to
1044 			 * chunk-align the end of brk.  Don't worry about
1045 			 * brk_cur not being chunk-aligned though.
1046 			 */
1047 			incr = (intptr_t)chunksize
1048 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1049 			assert(incr >= 0);
1050 			if ((size_t)incr < minsize)
1051 				incr += csize;
1052 
1053 			brk_prev = sbrk(incr);
1054 			if (brk_prev == brk_cur) {
1055 				/* Success. */
1056 				malloc_mutex_unlock(&brk_mtx);
1057 				base_pages = brk_cur;
1058 				base_next_addr = base_pages;
1059 				base_past_addr = (void *)((uintptr_t)base_pages
1060 				    + incr);
1061 #ifdef MALLOC_STATS
1062 				base_mapped += incr;
1063 #endif
1064 				return (false);
1065 			}
1066 		} while (brk_prev != (void *)-1);
1067 		malloc_mutex_unlock(&brk_mtx);
1068 	}
1069 	if (minsize == 0) {
1070 		/*
1071 		 * Failure during initialization doesn't matter, so avoid
1072 		 * falling through to the mmap-based page mapping code.
1073 		 */
1074 		return (true);
1075 	}
1076 #endif
1077 	assert(minsize != 0);
1078 	csize = PAGE_CEILING(minsize);
1079 	base_pages = pages_map(NULL, csize);
1080 	if (base_pages == NULL)
1081 		return (true);
1082 	base_next_addr = base_pages;
1083 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1084 #ifdef MALLOC_STATS
1085 	base_mapped += csize;
1086 #endif
1087 	return (false);
1088 }
1089 
1090 static void *
1091 base_alloc(size_t size)
1092 {
1093 	void *ret;
1094 	size_t csize;
1095 
1096 	/* Round size up to nearest multiple of the cacheline size. */
1097 	csize = CACHELINE_CEILING(size);
1098 
1099 	malloc_mutex_lock(&base_mtx);
1100 
1101 	/* Make sure there's enough space for the allocation. */
1102 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1103 		if (base_pages_alloc(csize)) {
1104 			ret = NULL;
1105 			goto RETURN;
1106 		}
1107 	}
1108 
1109 	/* Allocate. */
1110 	ret = base_next_addr;
1111 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1112 
1113 RETURN:
1114 	malloc_mutex_unlock(&base_mtx);
1115 	return (ret);
1116 }
1117 
1118 static chunk_node_t *
1119 base_chunk_node_alloc(void)
1120 {
1121 	chunk_node_t *ret;
1122 
1123 	malloc_mutex_lock(&base_mtx);
1124 	if (base_chunk_nodes != NULL) {
1125 		ret = base_chunk_nodes;
1126 		/* LINTED */
1127 		base_chunk_nodes = *(chunk_node_t **)ret;
1128 		malloc_mutex_unlock(&base_mtx);
1129 	} else {
1130 		malloc_mutex_unlock(&base_mtx);
1131 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1132 	}
1133 
1134 	return (ret);
1135 }
1136 
1137 static void
1138 base_chunk_node_dealloc(chunk_node_t *node)
1139 {
1140 
1141 	malloc_mutex_lock(&base_mtx);
1142 	/* LINTED */
1143 	*(chunk_node_t **)node = base_chunk_nodes;
1144 	base_chunk_nodes = node;
1145 	malloc_mutex_unlock(&base_mtx);
1146 }
1147 
1148 /******************************************************************************/
1149 
1150 #ifdef MALLOC_STATS
1151 static void
1152 stats_print(arena_t *arena)
1153 {
1154 	unsigned i;
1155 	int gap_start;
1156 
1157 	malloc_printf(
1158 	    "          allocated/mapped            nmalloc      ndalloc\n");
1159 
1160 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1161 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1162 	    arena->stats.ndalloc_small);
1163 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1164 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1165 	    arena->stats.ndalloc_large);
1166 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1167 	    arena->stats.allocated_small + arena->stats.allocated_large,
1168 	    arena->stats.mapped,
1169 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1170 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1171 
1172 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1173 	    "    reruns maxruns curruns\n");
1174 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1175 		if (arena->bins[i].stats.nrequests == 0) {
1176 			if (gap_start == -1)
1177 				gap_start = i;
1178 		} else {
1179 			if (gap_start != -1) {
1180 				if (i > gap_start + 1) {
1181 					/* Gap of more than one size class. */
1182 					malloc_printf("[%u..%u]\n",
1183 					    gap_start, i - 1);
1184 				} else {
1185 					/* Gap of one size class. */
1186 					malloc_printf("[%u]\n", gap_start);
1187 				}
1188 				gap_start = -1;
1189 			}
1190 			malloc_printf(
1191 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1192 			    " %9llu %7lu %7lu\n",
1193 			    i,
1194 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1195 			    arena->bins[i].reg_size,
1196 			    arena->bins[i].nregs,
1197 			    arena->bins[i].run_size >> pagesize_2pow,
1198 			    arena->bins[i].stats.nrequests,
1199 			    arena->bins[i].stats.nruns,
1200 			    arena->bins[i].stats.reruns,
1201 			    arena->bins[i].stats.highruns,
1202 			    arena->bins[i].stats.curruns);
1203 		}
1204 	}
1205 	if (gap_start != -1) {
1206 		if (i > gap_start + 1) {
1207 			/* Gap of more than one size class. */
1208 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1209 		} else {
1210 			/* Gap of one size class. */
1211 			malloc_printf("[%u]\n", gap_start);
1212 		}
1213 	}
1214 }
1215 #endif
1216 
1217 /*
1218  * End Utility functions/macros.
1219  */
1220 /******************************************************************************/
1221 /*
1222  * Begin chunk management functions.
1223  */
1224 
1225 #ifndef lint
1226 static inline int
1227 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1228 {
1229 
1230 	assert(a != NULL);
1231 	assert(b != NULL);
1232 
1233 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1234 		return (-1);
1235 	else if (a->chunk == b->chunk)
1236 		return (0);
1237 	else
1238 		return (1);
1239 }
1240 
1241 /* Generate red-black tree code for chunks. */
1242 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1243 #endif
1244 
1245 static void *
1246 pages_map_align(void *addr, size_t size, int align)
1247 {
1248 	void *ret;
1249 
1250 	/*
1251 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1252 	 * of existing mappings, and we only want to create new mappings.
1253 	 */
1254 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1255 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1256 	assert(ret != NULL);
1257 
1258 	if (ret == MAP_FAILED)
1259 		ret = NULL;
1260 	else if (addr != NULL && ret != addr) {
1261 		/*
1262 		 * We succeeded in mapping memory, but not in the right place.
1263 		 */
1264 		if (munmap(ret, size) == -1) {
1265 			char buf[STRERROR_BUF];
1266 
1267 			STRERROR_R(errno, buf, sizeof(buf));
1268 			_malloc_message(getprogname(),
1269 			    ": (malloc) Error in munmap(): ", buf, "\n");
1270 			if (opt_abort)
1271 				abort();
1272 		}
1273 		ret = NULL;
1274 	}
1275 
1276 	assert(ret == NULL || (addr == NULL && ret != addr)
1277 	    || (addr != NULL && ret == addr));
1278 	return (ret);
1279 }
1280 
1281 static void *
1282 pages_map(void *addr, size_t size)
1283 {
1284 
1285 	return pages_map_align(addr, size, 0);
1286 }
1287 
1288 static void
1289 pages_unmap(void *addr, size_t size)
1290 {
1291 
1292 	if (munmap(addr, size) == -1) {
1293 		char buf[STRERROR_BUF];
1294 
1295 		STRERROR_R(errno, buf, sizeof(buf));
1296 		_malloc_message(getprogname(),
1297 		    ": (malloc) Error in munmap(): ", buf, "\n");
1298 		if (opt_abort)
1299 			abort();
1300 	}
1301 }
1302 
1303 static void *
1304 chunk_alloc(size_t size)
1305 {
1306 	void *ret, *chunk;
1307 	chunk_node_t *tchunk, *delchunk;
1308 
1309 	assert(size != 0);
1310 	assert((size & chunksize_mask) == 0);
1311 
1312 	malloc_mutex_lock(&chunks_mtx);
1313 
1314 	if (size == chunksize) {
1315 		/*
1316 		 * Check for address ranges that were previously chunks and try
1317 		 * to use them.
1318 		 */
1319 
1320 		/* LINTED */
1321 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1322 		while (tchunk != NULL) {
1323 			/* Found an address range.  Try to recycle it. */
1324 
1325 			chunk = tchunk->chunk;
1326 			delchunk = tchunk;
1327 			/* LINTED */
1328 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1329 
1330 			/* Remove delchunk from the tree. */
1331 			/* LINTED */
1332 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1333 			base_chunk_node_dealloc(delchunk);
1334 
1335 #ifdef USE_BRK
1336 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1337 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1338 				/* Re-use a previously freed brk chunk. */
1339 				ret = chunk;
1340 				goto RETURN;
1341 			}
1342 #endif
1343 			if ((ret = pages_map(chunk, size)) != NULL) {
1344 				/* Success. */
1345 				goto RETURN;
1346 			}
1347 		}
1348 	}
1349 
1350 	/*
1351 	 * Try to over-allocate, but allow the OS to place the allocation
1352 	 * anywhere.  Beware of size_t wrap-around.
1353 	 */
1354 	if (size + chunksize > size) {
1355 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1356 		    != NULL) {
1357 			goto RETURN;
1358 		}
1359 	}
1360 
1361 #ifdef USE_BRK
1362 	/*
1363 	 * Try to create allocations in brk, in order to make full use of
1364 	 * limited address space.
1365 	 */
1366 	if (brk_prev != (void *)-1) {
1367 		void *brk_cur;
1368 		intptr_t incr;
1369 
1370 		/*
1371 		 * The loop is necessary to recover from races with other
1372 		 * threads that are using brk for something other than malloc.
1373 		 */
1374 		malloc_mutex_lock(&brk_mtx);
1375 		do {
1376 			/* Get the current end of brk. */
1377 			brk_cur = sbrk(0);
1378 
1379 			/*
1380 			 * Calculate how much padding is necessary to
1381 			 * chunk-align the end of brk.
1382 			 */
1383 			incr = (intptr_t)size
1384 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1385 			if (incr == (intptr_t)size) {
1386 				ret = brk_cur;
1387 			} else {
1388 				ret = (void *)((intptr_t)brk_cur + incr);
1389 				incr += size;
1390 			}
1391 
1392 			brk_prev = sbrk(incr);
1393 			if (brk_prev == brk_cur) {
1394 				/* Success. */
1395 				malloc_mutex_unlock(&brk_mtx);
1396 				brk_max = (void *)((intptr_t)ret + size);
1397 				goto RETURN;
1398 			}
1399 		} while (brk_prev != (void *)-1);
1400 		malloc_mutex_unlock(&brk_mtx);
1401 	}
1402 #endif
1403 
1404 	/* All strategies for allocation failed. */
1405 	ret = NULL;
1406 RETURN:
1407 	if (ret != NULL) {
1408 		chunk_node_t key;
1409 		/*
1410 		 * Clean out any entries in old_chunks that overlap with the
1411 		 * memory we just allocated.
1412 		 */
1413 		key.chunk = ret;
1414 		/* LINTED */
1415 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1416 		while (tchunk != NULL
1417 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1418 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1419 			delchunk = tchunk;
1420 			/* LINTED */
1421 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1422 			/* LINTED */
1423 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1424 			base_chunk_node_dealloc(delchunk);
1425 		}
1426 
1427 	}
1428 #ifdef MALLOC_STATS
1429 	if (ret != NULL) {
1430 		stats_chunks.nchunks += (size / chunksize);
1431 		stats_chunks.curchunks += (size / chunksize);
1432 	}
1433 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1434 		stats_chunks.highchunks = stats_chunks.curchunks;
1435 #endif
1436 	malloc_mutex_unlock(&chunks_mtx);
1437 
1438 	assert(CHUNK_ADDR2BASE(ret) == ret);
1439 	return (ret);
1440 }
1441 
1442 static void
1443 chunk_dealloc(void *chunk, size_t size)
1444 {
1445 	chunk_node_t *node;
1446 
1447 	assert(chunk != NULL);
1448 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1449 	assert(size != 0);
1450 	assert((size & chunksize_mask) == 0);
1451 
1452 	malloc_mutex_lock(&chunks_mtx);
1453 
1454 #ifdef USE_BRK
1455 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1456 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1457 		void *brk_cur;
1458 
1459 		malloc_mutex_lock(&brk_mtx);
1460 		/* Get the current end of brk. */
1461 		brk_cur = sbrk(0);
1462 
1463 		/*
1464 		 * Try to shrink the data segment if this chunk is at the end
1465 		 * of the data segment.  The sbrk() call here is subject to a
1466 		 * race condition with threads that use brk(2) or sbrk(2)
1467 		 * directly, but the alternative would be to leak memory for
1468 		 * the sake of poorly designed multi-threaded programs.
1469 		 */
1470 		if (brk_cur == brk_max
1471 		    && (void *)((uintptr_t)chunk + size) == brk_max
1472 		    && sbrk(-(intptr_t)size) == brk_max) {
1473 			malloc_mutex_unlock(&brk_mtx);
1474 			if (brk_prev == brk_max) {
1475 				/* Success. */
1476 				brk_prev = (void *)((intptr_t)brk_max
1477 				    - (intptr_t)size);
1478 				brk_max = brk_prev;
1479 			}
1480 		} else {
1481 			size_t offset;
1482 
1483 			malloc_mutex_unlock(&brk_mtx);
1484 			madvise(chunk, size, MADV_FREE);
1485 
1486 			/*
1487 			 * Iteratively create records of each chunk-sized
1488 			 * memory region that 'chunk' is comprised of, so that
1489 			 * the address range can be recycled if memory usage
1490 			 * increases later on.
1491 			 */
1492 			for (offset = 0; offset < size; offset += chunksize) {
1493 				node = base_chunk_node_alloc();
1494 				if (node == NULL)
1495 					break;
1496 
1497 				node->chunk = (void *)((uintptr_t)chunk
1498 				    + (uintptr_t)offset);
1499 				node->size = chunksize;
1500 				/* LINTED */
1501 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1502 			}
1503 		}
1504 	} else {
1505 #endif
1506 		pages_unmap(chunk, size);
1507 
1508 		/*
1509 		 * Make a record of the chunk's address, so that the address
1510 		 * range can be recycled if memory usage increases later on.
1511 		 * Don't bother to create entries if (size > chunksize), since
1512 		 * doing so could cause scalability issues for truly gargantuan
1513 		 * objects (many gigabytes or larger).
1514 		 */
1515 		if (size == chunksize) {
1516 			node = base_chunk_node_alloc();
1517 			if (node != NULL) {
1518 				node->chunk = (void *)(uintptr_t)chunk;
1519 				node->size = chunksize;
1520 				/* LINTED */
1521 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1522 			}
1523 		}
1524 #ifdef USE_BRK
1525 	}
1526 #endif
1527 
1528 #ifdef MALLOC_STATS
1529 	stats_chunks.curchunks -= (size / chunksize);
1530 #endif
1531 	malloc_mutex_unlock(&chunks_mtx);
1532 }
1533 
1534 /*
1535  * End chunk management functions.
1536  */
1537 /******************************************************************************/
1538 /*
1539  * Begin arena.
1540  */
1541 
1542 /*
1543  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1544  *
1545  * We maintain at least one block of arenas.  Usually there are more.
1546  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1547  * amongst threads.  To accomplish this, next_arena advances only in
1548  * ncpu steps.
1549  */
1550 static __noinline arena_t *
1551 choose_arena_hard(void)
1552 {
1553 	unsigned i, curcpu;
1554 	arena_t **map;
1555 
1556 	/* Initialize the current block of arenas and advance to next. */
1557 	malloc_mutex_lock(&arenas_mtx);
1558 	assert(next_arena % ncpus == 0);
1559 	assert(narenas % ncpus == 0);
1560 	map = &arenas[next_arena];
1561 	set_arenas_map(map);
1562 	for (i = 0; i < ncpus; i++) {
1563 		if (arenas[next_arena] == NULL)
1564 			arenas_extend(next_arena);
1565 		next_arena = (next_arena + 1) % narenas;
1566 	}
1567 	malloc_mutex_unlock(&arenas_mtx);
1568 
1569 	/*
1570 	 * If we were unable to allocate an arena above, then default to
1571 	 * the first arena, which is always present.
1572 	 */
1573 	curcpu = thr_curcpu();
1574 	if (map[curcpu] != NULL)
1575 		return map[curcpu];
1576 	return arenas[0];
1577 }
1578 
1579 static inline arena_t *
1580 choose_arena(void)
1581 {
1582 	unsigned curcpu;
1583 	arena_t **map;
1584 
1585 	map = get_arenas_map();
1586 	curcpu = thr_curcpu();
1587 	if (__predict_true(map != NULL && map[curcpu] != NULL))
1588 		return map[curcpu];
1589 
1590         return choose_arena_hard();
1591 }
1592 
1593 #ifndef lint
1594 static inline int
1595 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1596 {
1597 
1598 	assert(a != NULL);
1599 	assert(b != NULL);
1600 
1601 	if ((uintptr_t)a < (uintptr_t)b)
1602 		return (-1);
1603 	else if (a == b)
1604 		return (0);
1605 	else
1606 		return (1);
1607 }
1608 
1609 /* Generate red-black tree code for arena chunks. */
1610 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1611 #endif
1612 
1613 #ifndef lint
1614 static inline int
1615 arena_run_comp(arena_run_t *a, arena_run_t *b)
1616 {
1617 
1618 	assert(a != NULL);
1619 	assert(b != NULL);
1620 
1621 	if ((uintptr_t)a < (uintptr_t)b)
1622 		return (-1);
1623 	else if (a == b)
1624 		return (0);
1625 	else
1626 		return (1);
1627 }
1628 
1629 /* Generate red-black tree code for arena runs. */
1630 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1631 #endif
1632 
1633 static inline void *
1634 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1635 {
1636 	void *ret;
1637 	unsigned i, mask, bit, regind;
1638 
1639 	assert(run->magic == ARENA_RUN_MAGIC);
1640 	assert(run->regs_minelm < bin->regs_mask_nelms);
1641 
1642 	/*
1643 	 * Move the first check outside the loop, so that run->regs_minelm can
1644 	 * be updated unconditionally, without the possibility of updating it
1645 	 * multiple times.
1646 	 */
1647 	i = run->regs_minelm;
1648 	mask = run->regs_mask[i];
1649 	if (mask != 0) {
1650 		/* Usable allocation found. */
1651 		bit = ffs((int)mask) - 1;
1652 
1653 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1654 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1655 		    + (bin->reg_size * regind));
1656 
1657 		/* Clear bit. */
1658 		mask ^= (1 << bit);
1659 		run->regs_mask[i] = mask;
1660 
1661 		return (ret);
1662 	}
1663 
1664 	for (i++; i < bin->regs_mask_nelms; i++) {
1665 		mask = run->regs_mask[i];
1666 		if (mask != 0) {
1667 			/* Usable allocation found. */
1668 			bit = ffs((int)mask) - 1;
1669 
1670 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1671 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1672 			    + (bin->reg_size * regind));
1673 
1674 			/* Clear bit. */
1675 			mask ^= (1 << bit);
1676 			run->regs_mask[i] = mask;
1677 
1678 			/*
1679 			 * Make a note that nothing before this element
1680 			 * contains a free region.
1681 			 */
1682 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1683 
1684 			return (ret);
1685 		}
1686 	}
1687 	/* Not reached. */
1688 	/* LINTED */
1689 	assert(0);
1690 	return (NULL);
1691 }
1692 
1693 static inline void
1694 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1695 {
1696 	/*
1697 	 * To divide by a number D that is not a power of two we multiply
1698 	 * by (2^21 / D) and then right shift by 21 positions.
1699 	 *
1700 	 *   X / D
1701 	 *
1702 	 * becomes
1703 	 *
1704 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1705 	 */
1706 #define SIZE_INV_SHIFT 21
1707 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1708 	static const unsigned size_invs[] = {
1709 	    SIZE_INV(3),
1710 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1711 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1712 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1713 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1714 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1715 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1716 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1717 #if (QUANTUM_2POW_MIN < 4)
1718 	    ,
1719 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1720 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1721 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1722 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1723 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1724 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1725 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1726 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1727 #endif
1728 	};
1729 	unsigned diff, regind, elm, bit;
1730 
1731 	/* LINTED */
1732 	assert(run->magic == ARENA_RUN_MAGIC);
1733 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1734 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1735 
1736 	/*
1737 	 * Avoid doing division with a variable divisor if possible.  Using
1738 	 * actual division here can reduce allocator throughput by over 20%!
1739 	 */
1740 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1741 	if ((size & (size - 1)) == 0) {
1742 		/*
1743 		 * log2_table allows fast division of a power of two in the
1744 		 * [1..128] range.
1745 		 *
1746 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1747 		 */
1748 		static const unsigned char log2_table[] = {
1749 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1750 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1751 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1752 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1753 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1754 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1755 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1756 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1757 		};
1758 
1759 		if (size <= 128)
1760 			regind = (diff >> log2_table[size - 1]);
1761 		else if (size <= 32768)
1762 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1763 		else {
1764 			/*
1765 			 * The page size is too large for us to use the lookup
1766 			 * table.  Use real division.
1767 			 */
1768 			regind = (unsigned)(diff / size);
1769 		}
1770 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1771 	    << QUANTUM_2POW_MIN) + 2) {
1772 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1773 		regind >>= SIZE_INV_SHIFT;
1774 	} else {
1775 		/*
1776 		 * size_invs isn't large enough to handle this size class, so
1777 		 * calculate regind using actual division.  This only happens
1778 		 * if the user increases small_max via the 'S' runtime
1779 		 * configuration option.
1780 		 */
1781 		regind = (unsigned)(diff / size);
1782 	};
1783 	assert(diff == regind * size);
1784 	assert(regind < bin->nregs);
1785 
1786 	elm = regind >> (SIZEOF_INT_2POW + 3);
1787 	if (elm < run->regs_minelm)
1788 		run->regs_minelm = elm;
1789 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1790 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1791 	run->regs_mask[elm] |= (1 << bit);
1792 #undef SIZE_INV
1793 #undef SIZE_INV_SHIFT
1794 }
1795 
1796 static void
1797 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1798 {
1799 	arena_chunk_t *chunk;
1800 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1801 	unsigned i;
1802 
1803 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1804 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1805 	    >> pagesize_2pow);
1806 	total_pages = chunk->map[run_ind].npages;
1807 	need_pages = (unsigned)(size >> pagesize_2pow);
1808 	assert(need_pages <= total_pages);
1809 	rem_pages = total_pages - need_pages;
1810 
1811 	/* Split enough pages from the front of run to fit allocation size. */
1812 	map_offset = run_ind;
1813 	for (i = 0; i < need_pages; i++) {
1814 		chunk->map[map_offset + i].npages = need_pages;
1815 		chunk->map[map_offset + i].pos = i;
1816 	}
1817 
1818 	/* Keep track of trailing unused pages for later use. */
1819 	if (rem_pages > 0) {
1820 		/* Update map for trailing pages. */
1821 		map_offset += need_pages;
1822 		chunk->map[map_offset].npages = rem_pages;
1823 		chunk->map[map_offset].pos = POS_FREE;
1824 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1825 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1826 	}
1827 
1828 	chunk->pages_used += need_pages;
1829 }
1830 
1831 static arena_chunk_t *
1832 arena_chunk_alloc(arena_t *arena)
1833 {
1834 	arena_chunk_t *chunk;
1835 
1836 	if (arena->spare != NULL) {
1837 		chunk = arena->spare;
1838 		arena->spare = NULL;
1839 
1840 		/* LINTED */
1841 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1842 	} else {
1843 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1844 		if (chunk == NULL)
1845 			return (NULL);
1846 #ifdef MALLOC_STATS
1847 		arena->stats.mapped += chunksize;
1848 #endif
1849 
1850 		chunk->arena = arena;
1851 
1852 		/* LINTED */
1853 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1854 
1855 		/*
1856 		 * Claim that no pages are in use, since the header is merely
1857 		 * overhead.
1858 		 */
1859 		chunk->pages_used = 0;
1860 
1861 		chunk->max_frun_npages = chunk_npages -
1862 		    arena_chunk_header_npages;
1863 		chunk->min_frun_ind = arena_chunk_header_npages;
1864 
1865 		/*
1866 		 * Initialize enough of the map to support one maximal free run.
1867 		 */
1868 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1869 		    arena_chunk_header_npages;
1870 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1871 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1872 		    arena_chunk_header_npages;
1873 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1874 	}
1875 
1876 	return (chunk);
1877 }
1878 
1879 static void
1880 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1881 {
1882 
1883 	/*
1884 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1885 	 * will be cached, so that the arena does not use it.
1886 	 */
1887 	/* LINTED */
1888 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1889 
1890 	if (opt_hint == false) {
1891 		if (arena->spare != NULL) {
1892 			chunk_dealloc((void *)arena->spare, chunksize);
1893 #ifdef MALLOC_STATS
1894 			arena->stats.mapped -= chunksize;
1895 #endif
1896 		}
1897 		arena->spare = chunk;
1898 	} else {
1899 		assert(arena->spare == NULL);
1900 		chunk_dealloc((void *)chunk, chunksize);
1901 #ifdef MALLOC_STATS
1902 		arena->stats.mapped -= chunksize;
1903 #endif
1904 	}
1905 }
1906 
1907 static arena_run_t *
1908 arena_run_alloc(arena_t *arena, size_t size)
1909 {
1910 	arena_chunk_t *chunk;
1911 	arena_run_t *run;
1912 	unsigned need_npages, limit_pages, compl_need_npages;
1913 
1914 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1915 	    pagesize_2pow)));
1916 	assert((size & pagesize_mask) == 0);
1917 
1918 	/*
1919 	 * Search through arena's chunks in address order for a free run that is
1920 	 * large enough.  Look for the first fit.
1921 	 */
1922 	need_npages = (unsigned)(size >> pagesize_2pow);
1923 	limit_pages = chunk_npages - arena_chunk_header_npages;
1924 	compl_need_npages = limit_pages - need_npages;
1925 	/* LINTED */
1926 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1927 		/*
1928 		 * Avoid searching this chunk if there are not enough
1929 		 * contiguous free pages for there to possibly be a large
1930 		 * enough free run.
1931 		 */
1932 		if (chunk->pages_used <= compl_need_npages &&
1933 		    need_npages <= chunk->max_frun_npages) {
1934 			arena_chunk_map_t *mapelm;
1935 			unsigned i;
1936 			unsigned max_frun_npages = 0;
1937 			unsigned min_frun_ind = chunk_npages;
1938 
1939 			assert(chunk->min_frun_ind >=
1940 			    arena_chunk_header_npages);
1941 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1942 				mapelm = &chunk->map[i];
1943 				if (mapelm->pos == POS_FREE) {
1944 					if (mapelm->npages >= need_npages) {
1945 						run = (arena_run_t *)
1946 						    ((uintptr_t)chunk + (i <<
1947 						    pagesize_2pow));
1948 						/* Update page map. */
1949 						arena_run_split(arena, run,
1950 						    size);
1951 						return (run);
1952 					}
1953 					if (mapelm->npages >
1954 					    max_frun_npages) {
1955 						max_frun_npages =
1956 						    mapelm->npages;
1957 					}
1958 					if (i < min_frun_ind) {
1959 						min_frun_ind = i;
1960 						if (i < chunk->min_frun_ind)
1961 							chunk->min_frun_ind = i;
1962 					}
1963 				}
1964 				i += mapelm->npages;
1965 			}
1966 			/*
1967 			 * Search failure.  Reset cached chunk->max_frun_npages.
1968 			 * chunk->min_frun_ind was already reset above (if
1969 			 * necessary).
1970 			 */
1971 			chunk->max_frun_npages = max_frun_npages;
1972 		}
1973 	}
1974 
1975 	/*
1976 	 * No usable runs.  Create a new chunk from which to allocate the run.
1977 	 */
1978 	chunk = arena_chunk_alloc(arena);
1979 	if (chunk == NULL)
1980 		return (NULL);
1981 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1982 	    pagesize_2pow));
1983 	/* Update page map. */
1984 	arena_run_split(arena, run, size);
1985 	return (run);
1986 }
1987 
1988 static void
1989 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1990 {
1991 	arena_chunk_t *chunk;
1992 	unsigned run_ind, run_pages;
1993 
1994 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1995 
1996 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1997 	    >> pagesize_2pow);
1998 	assert(run_ind >= arena_chunk_header_npages);
1999 	assert(run_ind < (chunksize >> pagesize_2pow));
2000 	run_pages = (unsigned)(size >> pagesize_2pow);
2001 	assert(run_pages == chunk->map[run_ind].npages);
2002 
2003 	/* Subtract pages from count of pages used in chunk. */
2004 	chunk->pages_used -= run_pages;
2005 
2006 	/* Mark run as deallocated. */
2007 	assert(chunk->map[run_ind].npages == run_pages);
2008 	chunk->map[run_ind].pos = POS_FREE;
2009 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
2010 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2011 
2012 	/*
2013 	 * Tell the kernel that we don't need the data in this run, but only if
2014 	 * requested via runtime configuration.
2015 	 */
2016 	if (opt_hint)
2017 		madvise(run, size, MADV_FREE);
2018 
2019 	/* Try to coalesce with neighboring runs. */
2020 	if (run_ind > arena_chunk_header_npages &&
2021 	    chunk->map[run_ind - 1].pos == POS_FREE) {
2022 		unsigned prev_npages;
2023 
2024 		/* Coalesce with previous run. */
2025 		prev_npages = chunk->map[run_ind - 1].npages;
2026 		run_ind -= prev_npages;
2027 		assert(chunk->map[run_ind].npages == prev_npages);
2028 		assert(chunk->map[run_ind].pos == POS_FREE);
2029 		run_pages += prev_npages;
2030 
2031 		chunk->map[run_ind].npages = run_pages;
2032 		assert(chunk->map[run_ind].pos == POS_FREE);
2033 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2034 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2035 	}
2036 
2037 	if (run_ind + run_pages < chunk_npages &&
2038 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2039 		unsigned next_npages;
2040 
2041 		/* Coalesce with next run. */
2042 		next_npages = chunk->map[run_ind + run_pages].npages;
2043 		run_pages += next_npages;
2044 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2045 		    next_npages);
2046 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2047 
2048 		chunk->map[run_ind].npages = run_pages;
2049 		chunk->map[run_ind].pos = POS_FREE;
2050 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2051 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2052 	}
2053 
2054 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2055 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2056 	if (run_ind < chunk->min_frun_ind)
2057 		chunk->min_frun_ind = run_ind;
2058 
2059 	/* Deallocate chunk if it is now completely unused. */
2060 	if (chunk->pages_used == 0)
2061 		arena_chunk_dealloc(arena, chunk);
2062 }
2063 
2064 static arena_run_t *
2065 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2066 {
2067 	arena_run_t *run;
2068 	unsigned i, remainder;
2069 
2070 	/* Look for a usable run. */
2071 	/* LINTED */
2072 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2073 		/* run is guaranteed to have available space. */
2074 		/* LINTED */
2075 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2076 #ifdef MALLOC_STATS
2077 		bin->stats.reruns++;
2078 #endif
2079 		return (run);
2080 	}
2081 	/* No existing runs have any space available. */
2082 
2083 	/* Allocate a new run. */
2084 	run = arena_run_alloc(arena, bin->run_size);
2085 	if (run == NULL)
2086 		return (NULL);
2087 
2088 	/* Initialize run internals. */
2089 	run->bin = bin;
2090 
2091 	for (i = 0; i < bin->regs_mask_nelms; i++)
2092 		run->regs_mask[i] = UINT_MAX;
2093 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2094 	if (remainder != 0) {
2095 		/* The last element has spare bits that need to be unset. */
2096 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2097 		    - remainder));
2098 	}
2099 
2100 	run->regs_minelm = 0;
2101 
2102 	run->nfree = bin->nregs;
2103 #ifdef MALLOC_DEBUG
2104 	run->magic = ARENA_RUN_MAGIC;
2105 #endif
2106 
2107 #ifdef MALLOC_STATS
2108 	bin->stats.nruns++;
2109 	bin->stats.curruns++;
2110 	if (bin->stats.curruns > bin->stats.highruns)
2111 		bin->stats.highruns = bin->stats.curruns;
2112 #endif
2113 	return (run);
2114 }
2115 
2116 /* bin->runcur must have space available before this function is called. */
2117 static inline void *
2118 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2119 {
2120 	void *ret;
2121 
2122 	assert(run->magic == ARENA_RUN_MAGIC);
2123 	assert(run->nfree > 0);
2124 
2125 	ret = arena_run_reg_alloc(run, bin);
2126 	assert(ret != NULL);
2127 	run->nfree--;
2128 
2129 	return (ret);
2130 }
2131 
2132 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2133 static void *
2134 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2135 {
2136 
2137 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2138 	if (bin->runcur == NULL)
2139 		return (NULL);
2140 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2141 	assert(bin->runcur->nfree > 0);
2142 
2143 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2144 }
2145 
2146 /*
2147  * Calculate bin->run_size such that it meets the following constraints:
2148  *
2149  *   *) bin->run_size >= min_run_size
2150  *   *) bin->run_size <= arena_maxclass
2151  *   *) bin->run_size <= RUN_MAX_SMALL
2152  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2153  *
2154  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2155  * also calculated here, since these settings are all interdependent.
2156  */
2157 static size_t
2158 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2159 {
2160 	size_t try_run_size, good_run_size;
2161 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2162 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2163 
2164 	assert(min_run_size >= pagesize);
2165 	assert(min_run_size <= arena_maxclass);
2166 	assert(min_run_size <= RUN_MAX_SMALL);
2167 
2168 	/*
2169 	 * Calculate known-valid settings before entering the run_size
2170 	 * expansion loop, so that the first part of the loop always copies
2171 	 * valid settings.
2172 	 *
2173 	 * The do..while loop iteratively reduces the number of regions until
2174 	 * the run header and the regions no longer overlap.  A closed formula
2175 	 * would be quite messy, since there is an interdependency between the
2176 	 * header's mask length and the number of regions.
2177 	 */
2178 	try_run_size = min_run_size;
2179 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2180 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2181 	do {
2182 		try_nregs--;
2183 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2184 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2185 		try_reg0_offset = (unsigned)(try_run_size -
2186 		    (try_nregs * bin->reg_size));
2187 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2188 	    > try_reg0_offset);
2189 
2190 	/* run_size expansion loop. */
2191 	do {
2192 		/*
2193 		 * Copy valid settings before trying more aggressive settings.
2194 		 */
2195 		good_run_size = try_run_size;
2196 		good_nregs = try_nregs;
2197 		good_mask_nelms = try_mask_nelms;
2198 		good_reg0_offset = try_reg0_offset;
2199 
2200 		/* Try more aggressive settings. */
2201 		try_run_size += pagesize;
2202 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2203 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2204 		do {
2205 			try_nregs--;
2206 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2207 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2208 			    1 : 0);
2209 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2210 			    bin->reg_size));
2211 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2212 		    (try_mask_nelms - 1)) > try_reg0_offset);
2213 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2214 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2215 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2216 
2217 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2218 	    <= good_reg0_offset);
2219 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2220 
2221 	/* Copy final settings. */
2222 	bin->run_size = good_run_size;
2223 	bin->nregs = good_nregs;
2224 	bin->regs_mask_nelms = good_mask_nelms;
2225 	bin->reg0_offset = good_reg0_offset;
2226 
2227 	return (good_run_size);
2228 }
2229 
2230 static void *
2231 arena_malloc(arena_t *arena, size_t size)
2232 {
2233 	void *ret;
2234 
2235 	assert(arena != NULL);
2236 	assert(arena->magic == ARENA_MAGIC);
2237 	assert(size != 0);
2238 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2239 
2240 	if (size <= bin_maxclass) {
2241 		arena_bin_t *bin;
2242 		arena_run_t *run;
2243 
2244 		/* Small allocation. */
2245 
2246 		if (size < small_min) {
2247 			/* Tiny. */
2248 			size = pow2_ceil(size);
2249 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2250 			    1)))];
2251 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2252 			/*
2253 			 * Bin calculation is always correct, but we may need
2254 			 * to fix size for the purposes of assertions and/or
2255 			 * stats accuracy.
2256 			 */
2257 			if (size < (1 << TINY_MIN_2POW))
2258 				size = (1 << TINY_MIN_2POW);
2259 #endif
2260 		} else if (size <= small_max) {
2261 			/* Quantum-spaced. */
2262 			size = QUANTUM_CEILING(size);
2263 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2264 			    - 1];
2265 		} else {
2266 			/* Sub-page. */
2267 			size = pow2_ceil(size);
2268 			bin = &arena->bins[ntbins + nqbins
2269 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2270 		}
2271 		assert(size == bin->reg_size);
2272 
2273 		malloc_mutex_lock(&arena->mtx);
2274 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2275 			ret = arena_bin_malloc_easy(arena, bin, run);
2276 		else
2277 			ret = arena_bin_malloc_hard(arena, bin);
2278 
2279 		if (ret == NULL) {
2280 			malloc_mutex_unlock(&arena->mtx);
2281 			return (NULL);
2282 		}
2283 
2284 #ifdef MALLOC_STATS
2285 		bin->stats.nrequests++;
2286 		arena->stats.nmalloc_small++;
2287 		arena->stats.allocated_small += size;
2288 #endif
2289 	} else {
2290 		/* Large allocation. */
2291 		size = PAGE_CEILING(size);
2292 		malloc_mutex_lock(&arena->mtx);
2293 		ret = (void *)arena_run_alloc(arena, size);
2294 		if (ret == NULL) {
2295 			malloc_mutex_unlock(&arena->mtx);
2296 			return (NULL);
2297 		}
2298 #ifdef MALLOC_STATS
2299 		arena->stats.nmalloc_large++;
2300 		arena->stats.allocated_large += size;
2301 #endif
2302 	}
2303 
2304 	malloc_mutex_unlock(&arena->mtx);
2305 
2306 	if (opt_junk)
2307 		memset(ret, 0xa5, size);
2308 	else if (opt_zero)
2309 		memset(ret, 0, size);
2310 	return (ret);
2311 }
2312 
2313 static inline void
2314 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2315     unsigned npages)
2316 {
2317 	unsigned i;
2318 
2319 	assert(npages > 0);
2320 
2321 	/*
2322 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2323 	 * separately allocated.
2324 	 */
2325 	for (i = 0; i < npages; i++) {
2326 		chunk->map[pageind + i].npages = npages;
2327 		chunk->map[pageind + i].pos = i;
2328 	}
2329 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2330 	    pagesize_2pow)), npages << pagesize_2pow);
2331 }
2332 
2333 /* Only handles large allocations that require more than page alignment. */
2334 static void *
2335 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2336 {
2337 	void *ret;
2338 	size_t offset;
2339 	arena_chunk_t *chunk;
2340 	unsigned pageind, i, npages;
2341 
2342 	assert((size & pagesize_mask) == 0);
2343 	assert((alignment & pagesize_mask) == 0);
2344 
2345 	npages = (unsigned)(size >> pagesize_2pow);
2346 
2347 	malloc_mutex_lock(&arena->mtx);
2348 	ret = (void *)arena_run_alloc(arena, alloc_size);
2349 	if (ret == NULL) {
2350 		malloc_mutex_unlock(&arena->mtx);
2351 		return (NULL);
2352 	}
2353 
2354 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2355 
2356 	offset = (uintptr_t)ret & (alignment - 1);
2357 	assert((offset & pagesize_mask) == 0);
2358 	assert(offset < alloc_size);
2359 	if (offset == 0) {
2360 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2361 		    pagesize_2pow);
2362 
2363 		/* Update the map for the run to be kept. */
2364 		for (i = 0; i < npages; i++) {
2365 			chunk->map[pageind + i].npages = npages;
2366 			assert(chunk->map[pageind + i].pos == i);
2367 		}
2368 
2369 		/* Trim trailing space. */
2370 		arena_palloc_trim(arena, chunk, pageind + npages,
2371 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2372 	} else {
2373 		size_t leadsize, trailsize;
2374 
2375 		leadsize = alignment - offset;
2376 		ret = (void *)((uintptr_t)ret + leadsize);
2377 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2378 		    pagesize_2pow);
2379 
2380 		/* Update the map for the run to be kept. */
2381 		for (i = 0; i < npages; i++) {
2382 			chunk->map[pageind + i].npages = npages;
2383 			chunk->map[pageind + i].pos = i;
2384 		}
2385 
2386 		/* Trim leading space. */
2387 		arena_palloc_trim(arena, chunk,
2388 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2389 		    (unsigned)(leadsize >> pagesize_2pow));
2390 
2391 		trailsize = alloc_size - leadsize - size;
2392 		if (trailsize != 0) {
2393 			/* Trim trailing space. */
2394 			assert(trailsize < alloc_size);
2395 			arena_palloc_trim(arena, chunk, pageind + npages,
2396 			    (unsigned)(trailsize >> pagesize_2pow));
2397 		}
2398 	}
2399 
2400 #ifdef MALLOC_STATS
2401 	arena->stats.nmalloc_large++;
2402 	arena->stats.allocated_large += size;
2403 #endif
2404 	malloc_mutex_unlock(&arena->mtx);
2405 
2406 	if (opt_junk)
2407 		memset(ret, 0xa5, size);
2408 	else if (opt_zero)
2409 		memset(ret, 0, size);
2410 	return (ret);
2411 }
2412 
2413 /* Return the size of the allocation pointed to by ptr. */
2414 static size_t
2415 arena_salloc(const void *ptr)
2416 {
2417 	size_t ret;
2418 	arena_chunk_t *chunk;
2419 	arena_chunk_map_t *mapelm;
2420 	unsigned pageind;
2421 
2422 	assert(ptr != NULL);
2423 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2424 
2425 	/*
2426 	 * No arena data structures that we query here can change in a way that
2427 	 * affects this function, so we don't need to lock.
2428 	 */
2429 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2430 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2431 	    pagesize_2pow);
2432 	mapelm = &chunk->map[pageind];
2433 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2434 	    pagesize_2pow)) {
2435 		arena_run_t *run;
2436 
2437 		pageind -= mapelm->pos;
2438 
2439 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2440 		    pagesize_2pow));
2441 		assert(run->magic == ARENA_RUN_MAGIC);
2442 		ret = run->bin->reg_size;
2443 	} else
2444 		ret = mapelm->npages << pagesize_2pow;
2445 
2446 	return (ret);
2447 }
2448 
2449 static void *
2450 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2451 {
2452 	void *ret;
2453 
2454 	/* Avoid moving the allocation if the size class would not change. */
2455 	if (size < small_min) {
2456 		if (oldsize < small_min &&
2457 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2458 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2459 			goto IN_PLACE;
2460 	} else if (size <= small_max) {
2461 		if (oldsize >= small_min && oldsize <= small_max &&
2462 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2463 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2464 			goto IN_PLACE;
2465 	} else {
2466 		/*
2467 		 * We make no attempt to resize runs here, though it would be
2468 		 * possible to do so.
2469 		 */
2470 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2471 			goto IN_PLACE;
2472 	}
2473 
2474 	/*
2475 	 * If we get here, then size and oldsize are different enough that we
2476 	 * need to use a different size class.  In that case, fall back to
2477 	 * allocating new space and copying.
2478 	 */
2479 	ret = arena_malloc(choose_arena(), size);
2480 	if (ret == NULL)
2481 		return (NULL);
2482 
2483 	/* Junk/zero-filling were already done by arena_malloc(). */
2484 	if (size < oldsize)
2485 		memcpy(ret, ptr, size);
2486 	else
2487 		memcpy(ret, ptr, oldsize);
2488 	idalloc(ptr);
2489 	return (ret);
2490 IN_PLACE:
2491 	if (opt_junk && size < oldsize)
2492 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2493 	else if (opt_zero && size > oldsize)
2494 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2495 	return (ptr);
2496 }
2497 
2498 static void
2499 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2500 {
2501 	unsigned pageind;
2502 	arena_chunk_map_t *mapelm;
2503 	size_t size;
2504 
2505 	assert(arena != NULL);
2506 	assert(arena->magic == ARENA_MAGIC);
2507 	assert(chunk->arena == arena);
2508 	assert(ptr != NULL);
2509 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2510 
2511 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2512 	    pagesize_2pow);
2513 	mapelm = &chunk->map[pageind];
2514 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2515 	    pagesize_2pow)) {
2516 		arena_run_t *run;
2517 		arena_bin_t *bin;
2518 
2519 		/* Small allocation. */
2520 
2521 		pageind -= mapelm->pos;
2522 
2523 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2524 		    pagesize_2pow));
2525 		assert(run->magic == ARENA_RUN_MAGIC);
2526 		bin = run->bin;
2527 		size = bin->reg_size;
2528 
2529 		if (opt_junk)
2530 			memset(ptr, 0x5a, size);
2531 
2532 		malloc_mutex_lock(&arena->mtx);
2533 		arena_run_reg_dalloc(run, bin, ptr, size);
2534 		run->nfree++;
2535 
2536 		if (run->nfree == bin->nregs) {
2537 			/* Deallocate run. */
2538 			if (run == bin->runcur)
2539 				bin->runcur = NULL;
2540 			else if (bin->nregs != 1) {
2541 				/*
2542 				 * This block's conditional is necessary because
2543 				 * if the run only contains one region, then it
2544 				 * never gets inserted into the non-full runs
2545 				 * tree.
2546 				 */
2547 				/* LINTED */
2548 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2549 			}
2550 #ifdef MALLOC_DEBUG
2551 			run->magic = 0;
2552 #endif
2553 			arena_run_dalloc(arena, run, bin->run_size);
2554 #ifdef MALLOC_STATS
2555 			bin->stats.curruns--;
2556 #endif
2557 		} else if (run->nfree == 1 && run != bin->runcur) {
2558 			/*
2559 			 * Make sure that bin->runcur always refers to the
2560 			 * lowest non-full run, if one exists.
2561 			 */
2562 			if (bin->runcur == NULL)
2563 				bin->runcur = run;
2564 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2565 				/* Switch runcur. */
2566 				if (bin->runcur->nfree > 0) {
2567 					/* Insert runcur. */
2568 					/* LINTED */
2569 					RB_INSERT(arena_run_tree_s, &bin->runs,
2570 					    bin->runcur);
2571 				}
2572 				bin->runcur = run;
2573 			} else {
2574 				/* LINTED */
2575 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2576 			}
2577 		}
2578 #ifdef MALLOC_STATS
2579 		arena->stats.allocated_small -= size;
2580 		arena->stats.ndalloc_small++;
2581 #endif
2582 	} else {
2583 		/* Large allocation. */
2584 
2585 		size = mapelm->npages << pagesize_2pow;
2586 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2587 
2588 		if (opt_junk)
2589 			memset(ptr, 0x5a, size);
2590 
2591 		malloc_mutex_lock(&arena->mtx);
2592 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2593 #ifdef MALLOC_STATS
2594 		arena->stats.allocated_large -= size;
2595 		arena->stats.ndalloc_large++;
2596 #endif
2597 	}
2598 
2599 	malloc_mutex_unlock(&arena->mtx);
2600 }
2601 
2602 static bool
2603 arena_new(arena_t *arena)
2604 {
2605 	unsigned i;
2606 	arena_bin_t *bin;
2607 	size_t prev_run_size;
2608 
2609 	malloc_mutex_init(&arena->mtx);
2610 
2611 #ifdef MALLOC_STATS
2612 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2613 #endif
2614 
2615 	/* Initialize chunks. */
2616 	RB_INIT(&arena->chunks);
2617 	arena->spare = NULL;
2618 
2619 	/* Initialize bins. */
2620 	prev_run_size = pagesize;
2621 
2622 	/* (2^n)-spaced tiny bins. */
2623 	for (i = 0; i < ntbins; i++) {
2624 		bin = &arena->bins[i];
2625 		bin->runcur = NULL;
2626 		RB_INIT(&bin->runs);
2627 
2628 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2629 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2630 
2631 #ifdef MALLOC_STATS
2632 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2633 #endif
2634 	}
2635 
2636 	/* Quantum-spaced bins. */
2637 	for (; i < ntbins + nqbins; i++) {
2638 		bin = &arena->bins[i];
2639 		bin->runcur = NULL;
2640 		RB_INIT(&bin->runs);
2641 
2642 		bin->reg_size = quantum * (i - ntbins + 1);
2643 /*
2644 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2645 */
2646 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2647 
2648 #ifdef MALLOC_STATS
2649 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2650 #endif
2651 	}
2652 
2653 	/* (2^n)-spaced sub-page bins. */
2654 	for (; i < ntbins + nqbins + nsbins; i++) {
2655 		bin = &arena->bins[i];
2656 		bin->runcur = NULL;
2657 		RB_INIT(&bin->runs);
2658 
2659 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2660 
2661 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2662 
2663 #ifdef MALLOC_STATS
2664 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2665 #endif
2666 	}
2667 
2668 #ifdef MALLOC_DEBUG
2669 	arena->magic = ARENA_MAGIC;
2670 #endif
2671 
2672 	return (false);
2673 }
2674 
2675 /* Create a new arena and insert it into the arenas array at index ind. */
2676 static arena_t *
2677 arenas_extend(unsigned ind)
2678 {
2679 	arena_t *ret;
2680 
2681 	/* Allocate enough space for trailing bins. */
2682 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2683 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2684 	if (ret != NULL && arena_new(ret) == false) {
2685 		arenas[ind] = ret;
2686 		return (ret);
2687 	}
2688 	/* Only reached if there is an OOM error. */
2689 
2690 	/*
2691 	 * OOM here is quite inconvenient to propagate, since dealing with it
2692 	 * would require a check for failure in the fast path.  Instead, punt
2693 	 * by using arenas[0].  In practice, this is an extremely unlikely
2694 	 * failure.
2695 	 */
2696 	_malloc_message(getprogname(),
2697 	    ": (malloc) Error initializing arena\n", "", "");
2698 	if (opt_abort)
2699 		abort();
2700 
2701 	return (arenas[0]);
2702 }
2703 
2704 /*
2705  * End arena.
2706  */
2707 /******************************************************************************/
2708 /*
2709  * Begin general internal functions.
2710  */
2711 
2712 static void *
2713 huge_malloc(size_t size)
2714 {
2715 	void *ret;
2716 	size_t csize;
2717 	chunk_node_t *node;
2718 
2719 	/* Allocate one or more contiguous chunks for this request. */
2720 
2721 	csize = CHUNK_CEILING(size);
2722 	if (csize == 0) {
2723 		/* size is large enough to cause size_t wrap-around. */
2724 		return (NULL);
2725 	}
2726 
2727 	/* Allocate a chunk node with which to track the chunk. */
2728 	node = base_chunk_node_alloc();
2729 	if (node == NULL)
2730 		return (NULL);
2731 
2732 	ret = chunk_alloc(csize);
2733 	if (ret == NULL) {
2734 		base_chunk_node_dealloc(node);
2735 		return (NULL);
2736 	}
2737 
2738 	/* Insert node into huge. */
2739 	node->chunk = ret;
2740 	node->size = csize;
2741 
2742 	malloc_mutex_lock(&chunks_mtx);
2743 	RB_INSERT(chunk_tree_s, &huge, node);
2744 #ifdef MALLOC_STATS
2745 	huge_nmalloc++;
2746 	huge_allocated += csize;
2747 #endif
2748 	malloc_mutex_unlock(&chunks_mtx);
2749 
2750 	if (opt_junk)
2751 		memset(ret, 0xa5, csize);
2752 	else if (opt_zero)
2753 		memset(ret, 0, csize);
2754 
2755 	return (ret);
2756 }
2757 
2758 /* Only handles large allocations that require more than chunk alignment. */
2759 static void *
2760 huge_palloc(size_t alignment, size_t size)
2761 {
2762 	void *ret;
2763 	size_t alloc_size, chunk_size, offset;
2764 	chunk_node_t *node;
2765 
2766 	/*
2767 	 * This allocation requires alignment that is even larger than chunk
2768 	 * alignment.  This means that huge_malloc() isn't good enough.
2769 	 *
2770 	 * Allocate almost twice as many chunks as are demanded by the size or
2771 	 * alignment, in order to assure the alignment can be achieved, then
2772 	 * unmap leading and trailing chunks.
2773 	 */
2774 	assert(alignment >= chunksize);
2775 
2776 	chunk_size = CHUNK_CEILING(size);
2777 
2778 	if (size >= alignment)
2779 		alloc_size = chunk_size + alignment - chunksize;
2780 	else
2781 		alloc_size = (alignment << 1) - chunksize;
2782 
2783 	/* Allocate a chunk node with which to track the chunk. */
2784 	node = base_chunk_node_alloc();
2785 	if (node == NULL)
2786 		return (NULL);
2787 
2788 	ret = chunk_alloc(alloc_size);
2789 	if (ret == NULL) {
2790 		base_chunk_node_dealloc(node);
2791 		return (NULL);
2792 	}
2793 
2794 	offset = (uintptr_t)ret & (alignment - 1);
2795 	assert((offset & chunksize_mask) == 0);
2796 	assert(offset < alloc_size);
2797 	if (offset == 0) {
2798 		/* Trim trailing space. */
2799 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2800 		    - chunk_size);
2801 	} else {
2802 		size_t trailsize;
2803 
2804 		/* Trim leading space. */
2805 		chunk_dealloc(ret, alignment - offset);
2806 
2807 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2808 
2809 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2810 		if (trailsize != 0) {
2811 		    /* Trim trailing space. */
2812 		    assert(trailsize < alloc_size);
2813 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2814 			trailsize);
2815 		}
2816 	}
2817 
2818 	/* Insert node into huge. */
2819 	node->chunk = ret;
2820 	node->size = chunk_size;
2821 
2822 	malloc_mutex_lock(&chunks_mtx);
2823 	RB_INSERT(chunk_tree_s, &huge, node);
2824 #ifdef MALLOC_STATS
2825 	huge_nmalloc++;
2826 	huge_allocated += chunk_size;
2827 #endif
2828 	malloc_mutex_unlock(&chunks_mtx);
2829 
2830 	if (opt_junk)
2831 		memset(ret, 0xa5, chunk_size);
2832 	else if (opt_zero)
2833 		memset(ret, 0, chunk_size);
2834 
2835 	return (ret);
2836 }
2837 
2838 static void *
2839 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2840 {
2841 	void *ret;
2842 
2843 	/* Avoid moving the allocation if the size class would not change. */
2844 	if (oldsize > arena_maxclass &&
2845 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2846 		if (opt_junk && size < oldsize) {
2847 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2848 			    - size);
2849 		} else if (opt_zero && size > oldsize) {
2850 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2851 			    - oldsize);
2852 		}
2853 		return (ptr);
2854 	}
2855 
2856 	if (CHUNK_ADDR2BASE(ptr) == ptr
2857 #ifdef USE_BRK
2858 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2859 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2860 #endif
2861 	    ) {
2862 		chunk_node_t *node, key;
2863 		void *newptr;
2864 		size_t oldcsize;
2865 		size_t newcsize;
2866 
2867 		newcsize = CHUNK_CEILING(size);
2868 		oldcsize = CHUNK_CEILING(oldsize);
2869 		assert(oldcsize != newcsize);
2870 		if (newcsize == 0) {
2871 			/* size_t wrap-around */
2872 			return (NULL);
2873 		}
2874 
2875 		/*
2876 		 * Remove the old region from the tree now.  If mremap()
2877 		 * returns the region to the system, other thread may
2878 		 * map it for same huge allocation and insert it to the
2879 		 * tree before we acquire the mutex lock again.
2880 		 */
2881 		malloc_mutex_lock(&chunks_mtx);
2882 		key.chunk = __DECONST(void *, ptr);
2883 		/* LINTED */
2884 		node = RB_FIND(chunk_tree_s, &huge, &key);
2885 		assert(node != NULL);
2886 		assert(node->chunk == ptr);
2887 		assert(node->size == oldcsize);
2888 		RB_REMOVE(chunk_tree_s, &huge, node);
2889 		malloc_mutex_unlock(&chunks_mtx);
2890 
2891 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2892 		    MAP_ALIGNED(chunksize_2pow));
2893 		if (newptr == MAP_FAILED) {
2894 			/* We still own the old region. */
2895 			malloc_mutex_lock(&chunks_mtx);
2896 			RB_INSERT(chunk_tree_s, &huge, node);
2897 			malloc_mutex_unlock(&chunks_mtx);
2898 		} else {
2899 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2900 
2901 			/* Insert new or resized old region. */
2902 			malloc_mutex_lock(&chunks_mtx);
2903 			node->size = newcsize;
2904 			node->chunk = newptr;
2905 			RB_INSERT(chunk_tree_s, &huge, node);
2906 #ifdef MALLOC_STATS
2907 			huge_nralloc++;
2908 			huge_allocated += newcsize - oldcsize;
2909 			if (newcsize > oldcsize) {
2910 				stats_chunks.curchunks +=
2911 				    (newcsize - oldcsize) / chunksize;
2912 				if (stats_chunks.curchunks >
2913 				    stats_chunks.highchunks)
2914 					stats_chunks.highchunks =
2915 					    stats_chunks.curchunks;
2916 			} else {
2917 				stats_chunks.curchunks -=
2918 				    (oldcsize - newcsize) / chunksize;
2919 			}
2920 #endif
2921 			malloc_mutex_unlock(&chunks_mtx);
2922 
2923 			if (opt_junk && size < oldsize) {
2924 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2925 				    newcsize - size);
2926 			} else if (opt_zero && size > oldsize) {
2927 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2928 				    size - oldsize);
2929 			}
2930 			return (newptr);
2931 		}
2932 	}
2933 
2934 	/*
2935 	 * If we get here, then size and oldsize are different enough that we
2936 	 * need to use a different size class.  In that case, fall back to
2937 	 * allocating new space and copying.
2938 	 */
2939 	ret = huge_malloc(size);
2940 	if (ret == NULL)
2941 		return (NULL);
2942 
2943 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2944 		/* The old allocation is a chunk. */
2945 		if (size < oldsize)
2946 			memcpy(ret, ptr, size);
2947 		else
2948 			memcpy(ret, ptr, oldsize);
2949 	} else {
2950 		/* The old allocation is a region. */
2951 		assert(oldsize < size);
2952 		memcpy(ret, ptr, oldsize);
2953 	}
2954 	idalloc(ptr);
2955 	return (ret);
2956 }
2957 
2958 static void
2959 huge_dalloc(void *ptr)
2960 {
2961 	chunk_node_t key;
2962 	chunk_node_t *node;
2963 
2964 	malloc_mutex_lock(&chunks_mtx);
2965 
2966 	/* Extract from tree of huge allocations. */
2967 	key.chunk = ptr;
2968 	/* LINTED */
2969 	node = RB_FIND(chunk_tree_s, &huge, &key);
2970 	assert(node != NULL);
2971 	assert(node->chunk == ptr);
2972 	/* LINTED */
2973 	RB_REMOVE(chunk_tree_s, &huge, node);
2974 
2975 #ifdef MALLOC_STATS
2976 	huge_ndalloc++;
2977 	huge_allocated -= node->size;
2978 #endif
2979 
2980 	malloc_mutex_unlock(&chunks_mtx);
2981 
2982 	/* Unmap chunk. */
2983 #ifdef USE_BRK
2984 	if (opt_junk)
2985 		memset(node->chunk, 0x5a, node->size);
2986 #endif
2987 	chunk_dealloc(node->chunk, node->size);
2988 
2989 	base_chunk_node_dealloc(node);
2990 }
2991 
2992 static void *
2993 imalloc(size_t size)
2994 {
2995 	void *ret;
2996 
2997 	assert(size != 0);
2998 
2999 	if (size <= arena_maxclass)
3000 		ret = arena_malloc(choose_arena(), size);
3001 	else
3002 		ret = huge_malloc(size);
3003 
3004 	return (ret);
3005 }
3006 
3007 static void *
3008 ipalloc(size_t alignment, size_t size)
3009 {
3010 	void *ret;
3011 	size_t ceil_size;
3012 
3013 	/*
3014 	 * Round size up to the nearest multiple of alignment.
3015 	 *
3016 	 * This done, we can take advantage of the fact that for each small
3017 	 * size class, every object is aligned at the smallest power of two
3018 	 * that is non-zero in the base two representation of the size.  For
3019 	 * example:
3020 	 *
3021 	 *   Size |   Base 2 | Minimum alignment
3022 	 *   -----+----------+------------------
3023 	 *     96 |  1100000 |  32
3024 	 *    144 | 10100000 |  32
3025 	 *    192 | 11000000 |  64
3026 	 *
3027 	 * Depending on runtime settings, it is possible that arena_malloc()
3028 	 * will further round up to a power of two, but that never causes
3029 	 * correctness issues.
3030 	 */
3031 	ceil_size = (size + (alignment - 1)) & (-alignment);
3032 	/*
3033 	 * (ceil_size < size) protects against the combination of maximal
3034 	 * alignment and size greater than maximal alignment.
3035 	 */
3036 	if (ceil_size < size) {
3037 		/* size_t overflow. */
3038 		return (NULL);
3039 	}
3040 
3041 	if (ceil_size <= pagesize || (alignment <= pagesize
3042 	    && ceil_size <= arena_maxclass))
3043 		ret = arena_malloc(choose_arena(), ceil_size);
3044 	else {
3045 		size_t run_size;
3046 
3047 		/*
3048 		 * We can't achieve sub-page alignment, so round up alignment
3049 		 * permanently; it makes later calculations simpler.
3050 		 */
3051 		alignment = PAGE_CEILING(alignment);
3052 		ceil_size = PAGE_CEILING(size);
3053 		/*
3054 		 * (ceil_size < size) protects against very large sizes within
3055 		 * pagesize of SIZE_T_MAX.
3056 		 *
3057 		 * (ceil_size + alignment < ceil_size) protects against the
3058 		 * combination of maximal alignment and ceil_size large enough
3059 		 * to cause overflow.  This is similar to the first overflow
3060 		 * check above, but it needs to be repeated due to the new
3061 		 * ceil_size value, which may now be *equal* to maximal
3062 		 * alignment, whereas before we only detected overflow if the
3063 		 * original size was *greater* than maximal alignment.
3064 		 */
3065 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3066 			/* size_t overflow. */
3067 			return (NULL);
3068 		}
3069 
3070 		/*
3071 		 * Calculate the size of the over-size run that arena_palloc()
3072 		 * would need to allocate in order to guarantee the alignment.
3073 		 */
3074 		if (ceil_size >= alignment)
3075 			run_size = ceil_size + alignment - pagesize;
3076 		else {
3077 			/*
3078 			 * It is possible that (alignment << 1) will cause
3079 			 * overflow, but it doesn't matter because we also
3080 			 * subtract pagesize, which in the case of overflow
3081 			 * leaves us with a very large run_size.  That causes
3082 			 * the first conditional below to fail, which means
3083 			 * that the bogus run_size value never gets used for
3084 			 * anything important.
3085 			 */
3086 			run_size = (alignment << 1) - pagesize;
3087 		}
3088 
3089 		if (run_size <= arena_maxclass) {
3090 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3091 			    run_size);
3092 		} else if (alignment <= chunksize)
3093 			ret = huge_malloc(ceil_size);
3094 		else
3095 			ret = huge_palloc(alignment, ceil_size);
3096 	}
3097 
3098 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3099 	return (ret);
3100 }
3101 
3102 static void *
3103 icalloc(size_t size)
3104 {
3105 	void *ret;
3106 
3107 	if (size <= arena_maxclass) {
3108 		ret = arena_malloc(choose_arena(), size);
3109 		if (ret == NULL)
3110 			return (NULL);
3111 		memset(ret, 0, size);
3112 	} else {
3113 		/*
3114 		 * The virtual memory system provides zero-filled pages, so
3115 		 * there is no need to do so manually, unless opt_junk is
3116 		 * enabled, in which case huge_malloc() fills huge allocations
3117 		 * with junk.
3118 		 */
3119 		ret = huge_malloc(size);
3120 		if (ret == NULL)
3121 			return (NULL);
3122 
3123 		if (opt_junk)
3124 			memset(ret, 0, size);
3125 #ifdef USE_BRK
3126 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3127 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3128 			/*
3129 			 * This may be a re-used brk chunk.  Therefore, zero
3130 			 * the memory.
3131 			 */
3132 			memset(ret, 0, size);
3133 		}
3134 #endif
3135 	}
3136 
3137 	return (ret);
3138 }
3139 
3140 static size_t
3141 isalloc(const void *ptr)
3142 {
3143 	size_t ret;
3144 	arena_chunk_t *chunk;
3145 
3146 	assert(ptr != NULL);
3147 
3148 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3149 	if (chunk != ptr) {
3150 		/* Region. */
3151 		assert(chunk->arena->magic == ARENA_MAGIC);
3152 
3153 		ret = arena_salloc(ptr);
3154 	} else {
3155 		chunk_node_t *node, key;
3156 
3157 		/* Chunk (huge allocation). */
3158 
3159 		malloc_mutex_lock(&chunks_mtx);
3160 
3161 		/* Extract from tree of huge allocations. */
3162 		key.chunk = __DECONST(void *, ptr);
3163 		/* LINTED */
3164 		node = RB_FIND(chunk_tree_s, &huge, &key);
3165 		assert(node != NULL);
3166 
3167 		ret = node->size;
3168 
3169 		malloc_mutex_unlock(&chunks_mtx);
3170 	}
3171 
3172 	return (ret);
3173 }
3174 
3175 static void *
3176 iralloc(void *ptr, size_t size)
3177 {
3178 	void *ret;
3179 	size_t oldsize;
3180 
3181 	assert(ptr != NULL);
3182 	assert(size != 0);
3183 
3184 	oldsize = isalloc(ptr);
3185 
3186 	if (size <= arena_maxclass)
3187 		ret = arena_ralloc(ptr, size, oldsize);
3188 	else
3189 		ret = huge_ralloc(ptr, size, oldsize);
3190 
3191 	return (ret);
3192 }
3193 
3194 static void
3195 idalloc(void *ptr)
3196 {
3197 	arena_chunk_t *chunk;
3198 
3199 	assert(ptr != NULL);
3200 
3201 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3202 	if (chunk != ptr) {
3203 		/* Region. */
3204 		arena_dalloc(chunk->arena, chunk, ptr);
3205 	} else
3206 		huge_dalloc(ptr);
3207 }
3208 
3209 static void
3210 malloc_print_stats(void)
3211 {
3212 
3213 	if (opt_print_stats) {
3214 		char s[UMAX2S_BUFSIZE];
3215 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3216 		    "");
3217 		_malloc_message("Assertions ",
3218 #ifdef NDEBUG
3219 		    "disabled",
3220 #else
3221 		    "enabled",
3222 #endif
3223 		    "\n", "");
3224 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3225 		    opt_abort ? "A" : "a",
3226 		    opt_junk ? "J" : "j",
3227 		    opt_hint ? "H" : "h");
3228 		_malloc_message(opt_utrace ? "PU" : "Pu",
3229 		    opt_sysv ? "V" : "v",
3230 		    opt_xmalloc ? "X" : "x",
3231 		    opt_zero ? "Z\n" : "z\n");
3232 
3233 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3234 		_malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3235 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3236 		    "\n", "");
3237 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3238 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3239 		    "");
3240 
3241 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3242 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3243 		    ")\n", "");
3244 
3245 #ifdef MALLOC_STATS
3246 		{
3247 			size_t allocated, mapped;
3248 			unsigned i;
3249 			arena_t *arena;
3250 
3251 			/* Calculate and print allocated/mapped stats. */
3252 
3253 			/* arenas. */
3254 			for (i = 0, allocated = 0; i < narenas; i++) {
3255 				if (arenas[i] != NULL) {
3256 					malloc_mutex_lock(&arenas[i]->mtx);
3257 					allocated +=
3258 					    arenas[i]->stats.allocated_small;
3259 					allocated +=
3260 					    arenas[i]->stats.allocated_large;
3261 					malloc_mutex_unlock(&arenas[i]->mtx);
3262 				}
3263 			}
3264 
3265 			/* huge/base. */
3266 			malloc_mutex_lock(&chunks_mtx);
3267 			allocated += huge_allocated;
3268 			mapped = stats_chunks.curchunks * chunksize;
3269 			malloc_mutex_unlock(&chunks_mtx);
3270 
3271 			malloc_mutex_lock(&base_mtx);
3272 			mapped += base_mapped;
3273 			malloc_mutex_unlock(&base_mtx);
3274 
3275 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3276 			    allocated, mapped);
3277 
3278 			/* Print chunk stats. */
3279 			{
3280 				chunk_stats_t chunks_stats;
3281 
3282 				malloc_mutex_lock(&chunks_mtx);
3283 				chunks_stats = stats_chunks;
3284 				malloc_mutex_unlock(&chunks_mtx);
3285 
3286 				malloc_printf("chunks: nchunks   "
3287 				    "highchunks    curchunks\n");
3288 				malloc_printf("  %13llu%13lu%13lu\n",
3289 				    chunks_stats.nchunks,
3290 				    chunks_stats.highchunks,
3291 				    chunks_stats.curchunks);
3292 			}
3293 
3294 			/* Print chunk stats. */
3295 			malloc_printf(
3296 			    "huge: nmalloc      ndalloc      "
3297 			    "nralloc    allocated\n");
3298 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3299 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3300 			    huge_allocated);
3301 
3302 			/* Print stats for each arena. */
3303 			for (i = 0; i < narenas; i++) {
3304 				arena = arenas[i];
3305 				if (arena != NULL) {
3306 					malloc_printf(
3307 					    "\narenas[%u] @ %p\n", i, arena);
3308 					malloc_mutex_lock(&arena->mtx);
3309 					stats_print(arena);
3310 					malloc_mutex_unlock(&arena->mtx);
3311 				}
3312 			}
3313 		}
3314 #endif /* #ifdef MALLOC_STATS */
3315 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3316 	}
3317 }
3318 
3319 /*
3320  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3321  * implementation has to take pains to avoid infinite recursion during
3322  * initialization.
3323  */
3324 static inline bool
3325 malloc_init(void)
3326 {
3327 
3328 	if (malloc_initialized == false)
3329 		return (malloc_init_hard());
3330 
3331 	return (false);
3332 }
3333 
3334 static bool
3335 malloc_init_hard(void)
3336 {
3337 	unsigned i, j;
3338 	ssize_t linklen;
3339 	char buf[PATH_MAX + 1];
3340 	const char *opts = "";
3341 	int serrno;
3342 
3343 	malloc_mutex_lock(&init_lock);
3344 	if (malloc_initialized) {
3345 		/*
3346 		 * Another thread initialized the allocator before this one
3347 		 * acquired init_lock.
3348 		 */
3349 		malloc_mutex_unlock(&init_lock);
3350 		return (false);
3351 	}
3352 
3353 	serrno = errno;
3354 	/* Get number of CPUs. */
3355 	{
3356 		int mib[2];
3357 		size_t len;
3358 
3359 		mib[0] = CTL_HW;
3360 		mib[1] = HW_NCPU;
3361 		len = sizeof(ncpus);
3362 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3363 			/* Error. */
3364 			ncpus = 1;
3365 		}
3366 	}
3367 
3368 	/* Get page size. */
3369 	{
3370 		long result;
3371 
3372 		result = sysconf(_SC_PAGESIZE);
3373 		assert(result != -1);
3374 		pagesize = (unsigned) result;
3375 
3376 		/*
3377 		 * We assume that pagesize is a power of 2 when calculating
3378 		 * pagesize_mask and pagesize_2pow.
3379 		 */
3380 		assert(((result - 1) & result) == 0);
3381 		pagesize_mask = result - 1;
3382 		pagesize_2pow = ffs((int)result) - 1;
3383 	}
3384 
3385 	for (i = 0; i < 3; i++) {
3386 		/* Get runtime configuration. */
3387 		switch (i) {
3388 		case 0:
3389 			if ((linklen = readlink("/etc/malloc.conf", buf,
3390 						sizeof(buf) - 1)) != -1) {
3391 				/*
3392 				 * Use the contents of the "/etc/malloc.conf"
3393 				 * symbolic link's name.
3394 				 */
3395 				buf[linklen] = '\0';
3396 				opts = buf;
3397 			} else {
3398 				/* No configuration specified. */
3399 				buf[0] = '\0';
3400 				opts = buf;
3401 			}
3402 			break;
3403 		case 1:
3404 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3405 			    issetugid() == 0) {
3406 				/*
3407 				 * Do nothing; opts is already initialized to
3408 				 * the value of the MALLOC_OPTIONS environment
3409 				 * variable.
3410 				 */
3411 			} else {
3412 				/* No configuration specified. */
3413 				buf[0] = '\0';
3414 				opts = buf;
3415 			}
3416 			break;
3417 		case 2:
3418 			if (_malloc_options != NULL) {
3419 			    /*
3420 			     * Use options that were compiled into the program.
3421 			     */
3422 			    opts = _malloc_options;
3423 			} else {
3424 				/* No configuration specified. */
3425 				buf[0] = '\0';
3426 				opts = buf;
3427 			}
3428 			break;
3429 		default:
3430 			/* NOTREACHED */
3431 			/* LINTED */
3432 			assert(false);
3433 		}
3434 
3435 		for (j = 0; opts[j] != '\0'; j++) {
3436 			switch (opts[j]) {
3437 			case 'a':
3438 				opt_abort = false;
3439 				break;
3440 			case 'A':
3441 				opt_abort = true;
3442 				break;
3443 			case 'h':
3444 				opt_hint = false;
3445 				break;
3446 			case 'H':
3447 				opt_hint = true;
3448 				break;
3449 			case 'j':
3450 				opt_junk = false;
3451 				break;
3452 			case 'J':
3453 				opt_junk = true;
3454 				break;
3455 			case 'k':
3456 				/*
3457 				 * Chunks always require at least one header
3458 				 * page, so chunks can never be smaller than
3459 				 * two pages.
3460 				 */
3461 				if (opt_chunk_2pow > pagesize_2pow + 1)
3462 					opt_chunk_2pow--;
3463 				break;
3464 			case 'K':
3465 				if (opt_chunk_2pow + 1 <
3466 				    (int)(sizeof(size_t) << 3))
3467 					opt_chunk_2pow++;
3468 				break;
3469 			case 'n':
3470 				opt_narenas_lshift--;
3471 				break;
3472 			case 'N':
3473 				opt_narenas_lshift++;
3474 				break;
3475 			case 'p':
3476 				opt_print_stats = false;
3477 				break;
3478 			case 'P':
3479 				opt_print_stats = true;
3480 				break;
3481 			case 'q':
3482 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3483 					opt_quantum_2pow--;
3484 				break;
3485 			case 'Q':
3486 				if (opt_quantum_2pow < pagesize_2pow - 1)
3487 					opt_quantum_2pow++;
3488 				break;
3489 			case 's':
3490 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3491 					opt_small_max_2pow--;
3492 				break;
3493 			case 'S':
3494 				if (opt_small_max_2pow < pagesize_2pow - 1)
3495 					opt_small_max_2pow++;
3496 				break;
3497 			case 'u':
3498 				opt_utrace = false;
3499 				break;
3500 			case 'U':
3501 				opt_utrace = true;
3502 				break;
3503 			case 'v':
3504 				opt_sysv = false;
3505 				break;
3506 			case 'V':
3507 				opt_sysv = true;
3508 				break;
3509 			case 'x':
3510 				opt_xmalloc = false;
3511 				break;
3512 			case 'X':
3513 				opt_xmalloc = true;
3514 				break;
3515 			case 'z':
3516 				opt_zero = false;
3517 				break;
3518 			case 'Z':
3519 				opt_zero = true;
3520 				break;
3521 			default: {
3522 				char cbuf[2];
3523 
3524 				cbuf[0] = opts[j];
3525 				cbuf[1] = '\0';
3526 				_malloc_message(getprogname(),
3527 				    ": (malloc) Unsupported character in "
3528 				    "malloc options: '", cbuf, "'\n");
3529 			}
3530 			}
3531 		}
3532 	}
3533 	errno = serrno;
3534 
3535 	/* Take care to call atexit() only once. */
3536 	if (opt_print_stats) {
3537 		/* Print statistics at exit. */
3538 		atexit(malloc_print_stats);
3539 	}
3540 
3541 	/* Set variables according to the value of opt_small_max_2pow. */
3542 	if (opt_small_max_2pow < opt_quantum_2pow)
3543 		opt_small_max_2pow = opt_quantum_2pow;
3544 	small_max = (1 << opt_small_max_2pow);
3545 
3546 	/* Set bin-related variables. */
3547 	bin_maxclass = (pagesize >> 1);
3548 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3549 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3550 	assert(ntbins <= opt_quantum_2pow);
3551 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3552 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3553 
3554 	/* Set variables according to the value of opt_quantum_2pow. */
3555 	quantum = (1 << opt_quantum_2pow);
3556 	quantum_mask = quantum - 1;
3557 	if (ntbins > 0)
3558 		small_min = (quantum >> 1) + 1;
3559 	else
3560 		small_min = 1;
3561 	assert(small_min <= quantum);
3562 
3563 	/* Set variables according to the value of opt_chunk_2pow. */
3564 	chunksize = (1LU << opt_chunk_2pow);
3565 	chunksize_mask = chunksize - 1;
3566 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3567 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3568 	{
3569 		unsigned header_size;
3570 
3571 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3572 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3573 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3574 		if ((header_size & pagesize_mask) != 0)
3575 			arena_chunk_header_npages++;
3576 	}
3577 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3578 	    pagesize_2pow);
3579 
3580 	UTRACE(0, 0, 0);
3581 
3582 #ifdef MALLOC_STATS
3583 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3584 #endif
3585 
3586 	/* Various sanity checks that regard configuration. */
3587 	assert(quantum >= sizeof(void *));
3588 	assert(quantum <= pagesize);
3589 	assert(chunksize >= pagesize);
3590 	assert(quantum * 4 <= chunksize);
3591 
3592 	/* Initialize chunks data. */
3593 	malloc_mutex_init(&chunks_mtx);
3594 	RB_INIT(&huge);
3595 #ifdef USE_BRK
3596 	malloc_mutex_init(&brk_mtx);
3597 	brk_base = sbrk(0);
3598 	brk_prev = brk_base;
3599 	brk_max = brk_base;
3600 #endif
3601 #ifdef MALLOC_STATS
3602 	huge_nmalloc = 0;
3603 	huge_ndalloc = 0;
3604 	huge_nralloc = 0;
3605 	huge_allocated = 0;
3606 #endif
3607 	RB_INIT(&old_chunks);
3608 
3609 	/* Initialize base allocation data structures. */
3610 #ifdef MALLOC_STATS
3611 	base_mapped = 0;
3612 #endif
3613 #ifdef USE_BRK
3614 	/*
3615 	 * Allocate a base chunk here, since it doesn't actually have to be
3616 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3617 	 * the use of space that would otherwise be wasted.
3618 	 */
3619 	base_pages_alloc(0);
3620 #endif
3621 	base_chunk_nodes = NULL;
3622 	malloc_mutex_init(&base_mtx);
3623 
3624 	if (ncpus > 1) {
3625 		/*
3626 		 * For SMP systems, create four times as many arenas as there
3627 		 * are CPUs by default.
3628 		 */
3629 		opt_narenas_lshift += 2;
3630 	}
3631 
3632 #ifdef NO_TLS
3633 	/* Initialize arena key. */
3634 	(void)thr_keycreate(&arenas_map_key, NULL);
3635 #endif
3636 
3637 	/* Determine how many arenas to use. */
3638 	narenas = ncpus;
3639 	if (opt_narenas_lshift > 0) {
3640 		if ((narenas << opt_narenas_lshift) > narenas)
3641 			narenas <<= opt_narenas_lshift;
3642 		/*
3643 		 * Make sure not to exceed the limits of what base_malloc()
3644 		 * can handle.
3645 		 */
3646 		if (narenas * sizeof(arena_t *) > chunksize)
3647 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3648 	} else if (opt_narenas_lshift < 0) {
3649 		if ((narenas << opt_narenas_lshift) < narenas)
3650 			narenas <<= opt_narenas_lshift;
3651 		/* Make sure there is at least one arena. */
3652 		if (narenas == 0)
3653 			narenas = 1;
3654 	}
3655 
3656 	next_arena = 0;
3657 
3658 	/* Allocate and initialize arenas. */
3659 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3660 	if (arenas == NULL) {
3661 		malloc_mutex_unlock(&init_lock);
3662 		return (true);
3663 	}
3664 	/*
3665 	 * Zero the array.  In practice, this should always be pre-zeroed,
3666 	 * since it was just mmap()ed, but let's be sure.
3667 	 */
3668 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3669 
3670 	/*
3671 	 * Initialize one arena here.  The rest are lazily created in
3672 	 * arena_choose_hard().
3673 	 */
3674 	arenas_extend(0);
3675 	if (arenas[0] == NULL) {
3676 		malloc_mutex_unlock(&init_lock);
3677 		return (true);
3678 	}
3679 
3680 	malloc_mutex_init(&arenas_mtx);
3681 
3682 	malloc_initialized = true;
3683 	malloc_mutex_unlock(&init_lock);
3684 	return (false);
3685 }
3686 
3687 /*
3688  * End general internal functions.
3689  */
3690 /******************************************************************************/
3691 /*
3692  * Begin malloc(3)-compatible functions.
3693  */
3694 
3695 void *
3696 malloc(size_t size)
3697 {
3698 	void *ret;
3699 
3700 	if (malloc_init()) {
3701 		ret = NULL;
3702 		goto RETURN;
3703 	}
3704 
3705 	if (size == 0) {
3706 		if (opt_sysv == false)
3707 			size = 1;
3708 		else {
3709 			ret = NULL;
3710 			goto RETURN;
3711 		}
3712 	}
3713 
3714 	ret = imalloc(size);
3715 
3716 RETURN:
3717 	if (ret == NULL) {
3718 		if (opt_xmalloc) {
3719 			_malloc_message(getprogname(),
3720 			    ": (malloc) Error in malloc(): out of memory\n", "",
3721 			    "");
3722 			abort();
3723 		}
3724 		errno = ENOMEM;
3725 	}
3726 
3727 	UTRACE(0, size, ret);
3728 	return (ret);
3729 }
3730 
3731 int
3732 posix_memalign(void **memptr, size_t alignment, size_t size)
3733 {
3734 	int ret;
3735 	void *result;
3736 
3737 	if (malloc_init())
3738 		result = NULL;
3739 	else {
3740 		/* Make sure that alignment is a large enough power of 2. */
3741 		if (((alignment - 1) & alignment) != 0
3742 		    || alignment < sizeof(void *)) {
3743 			if (opt_xmalloc) {
3744 				_malloc_message(getprogname(),
3745 				    ": (malloc) Error in posix_memalign(): "
3746 				    "invalid alignment\n", "", "");
3747 				abort();
3748 			}
3749 			result = NULL;
3750 			ret = EINVAL;
3751 			goto RETURN;
3752 		}
3753 
3754 		result = ipalloc(alignment, size);
3755 	}
3756 
3757 	if (result == NULL) {
3758 		if (opt_xmalloc) {
3759 			_malloc_message(getprogname(),
3760 			": (malloc) Error in posix_memalign(): out of memory\n",
3761 			"", "");
3762 			abort();
3763 		}
3764 		ret = ENOMEM;
3765 		goto RETURN;
3766 	}
3767 
3768 	*memptr = result;
3769 	ret = 0;
3770 
3771 RETURN:
3772 	UTRACE(0, size, result);
3773 	return (ret);
3774 }
3775 
3776 void *
3777 calloc(size_t num, size_t size)
3778 {
3779 	void *ret;
3780 	size_t num_size;
3781 
3782 	if (malloc_init()) {
3783 		num_size = 0;
3784 		ret = NULL;
3785 		goto RETURN;
3786 	}
3787 
3788 	num_size = num * size;
3789 	if (num_size == 0) {
3790 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3791 			num_size = 1;
3792 		else {
3793 			ret = NULL;
3794 			goto RETURN;
3795 		}
3796 	/*
3797 	 * Try to avoid division here.  We know that it isn't possible to
3798 	 * overflow during multiplication if neither operand uses any of the
3799 	 * most significant half of the bits in a size_t.
3800 	 */
3801 	} else if ((unsigned long long)((num | size) &
3802 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3803 	   (num_size / size != num)) {
3804 		/* size_t overflow. */
3805 		ret = NULL;
3806 		goto RETURN;
3807 	}
3808 
3809 	ret = icalloc(num_size);
3810 
3811 RETURN:
3812 	if (ret == NULL) {
3813 		if (opt_xmalloc) {
3814 			_malloc_message(getprogname(),
3815 			    ": (malloc) Error in calloc(): out of memory\n", "",
3816 			    "");
3817 			abort();
3818 		}
3819 		errno = ENOMEM;
3820 	}
3821 
3822 	UTRACE(0, num_size, ret);
3823 	return (ret);
3824 }
3825 
3826 void *
3827 realloc(void *ptr, size_t size)
3828 {
3829 	void *ret;
3830 
3831 	if (size == 0) {
3832 		if (opt_sysv == false)
3833 			size = 1;
3834 		else {
3835 			if (ptr != NULL)
3836 				idalloc(ptr);
3837 			ret = NULL;
3838 			goto RETURN;
3839 		}
3840 	}
3841 
3842 	if (ptr != NULL) {
3843 		assert(malloc_initialized);
3844 
3845 		ret = iralloc(ptr, size);
3846 
3847 		if (ret == NULL) {
3848 			if (opt_xmalloc) {
3849 				_malloc_message(getprogname(),
3850 				    ": (malloc) Error in realloc(): out of "
3851 				    "memory\n", "", "");
3852 				abort();
3853 			}
3854 			errno = ENOMEM;
3855 		}
3856 	} else {
3857 		if (malloc_init())
3858 			ret = NULL;
3859 		else
3860 			ret = imalloc(size);
3861 
3862 		if (ret == NULL) {
3863 			if (opt_xmalloc) {
3864 				_malloc_message(getprogname(),
3865 				    ": (malloc) Error in realloc(): out of "
3866 				    "memory\n", "", "");
3867 				abort();
3868 			}
3869 			errno = ENOMEM;
3870 		}
3871 	}
3872 
3873 RETURN:
3874 	UTRACE(ptr, size, ret);
3875 	return (ret);
3876 }
3877 
3878 void
3879 free(void *ptr)
3880 {
3881 
3882 	UTRACE(ptr, 0, 0);
3883 	if (ptr != NULL) {
3884 		assert(malloc_initialized);
3885 
3886 		idalloc(ptr);
3887 	}
3888 }
3889 
3890 /*
3891  * End malloc(3)-compatible functions.
3892  */
3893 /******************************************************************************/
3894 /*
3895  * Begin non-standard functions.
3896  */
3897 #ifndef __NetBSD__
3898 size_t
3899 malloc_usable_size(const void *ptr)
3900 {
3901 
3902 	assert(ptr != NULL);
3903 
3904 	return (isalloc(ptr));
3905 }
3906 #endif
3907 
3908 /*
3909  * End non-standard functions.
3910  */
3911 /******************************************************************************/
3912 /*
3913  * Begin library-private functions, used by threading libraries for protection
3914  * of malloc during fork().  These functions are only called if the program is
3915  * running in threaded mode, so there is no need to check whether the program
3916  * is threaded here.
3917  */
3918 
3919 void
3920 _malloc_prefork(void)
3921 {
3922 	unsigned i;
3923 
3924 	/* Acquire all mutexes in a safe order. */
3925 
3926 	malloc_mutex_lock(&arenas_mtx);
3927 	for (i = 0; i < narenas; i++) {
3928 		if (arenas[i] != NULL)
3929 			malloc_mutex_lock(&arenas[i]->mtx);
3930 	}
3931 	malloc_mutex_unlock(&arenas_mtx);
3932 
3933 	malloc_mutex_lock(&base_mtx);
3934 
3935 	malloc_mutex_lock(&chunks_mtx);
3936 }
3937 
3938 void
3939 _malloc_postfork(void)
3940 {
3941 	unsigned i;
3942 
3943 	/* Release all mutexes, now that fork() has completed. */
3944 
3945 	malloc_mutex_unlock(&chunks_mtx);
3946 
3947 	malloc_mutex_unlock(&base_mtx);
3948 
3949 	malloc_mutex_lock(&arenas_mtx);
3950 	for (i = 0; i < narenas; i++) {
3951 		if (arenas[i] != NULL)
3952 			malloc_mutex_unlock(&arenas[i]->mtx);
3953 	}
3954 	malloc_mutex_unlock(&arenas_mtx);
3955 }
3956 
3957 /*
3958  * End library-private functions.
3959  */
3960 /******************************************************************************/
3961