xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision 0df165c04d0a9ca1adde9ed2b890344c937954a6)
1 /*	$NetBSD: jemalloc.c,v 1.11 2007/11/19 14:48:42 ad Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.11 2007/11/19 14:48:42 ad Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 void	_malloc_prefork(void);
165 void	_malloc_postfork(void);
166 ssize_t	_write(int, const void *, size_t);
167 const char	*_getprogname(void);
168 #endif
169 
170 #ifdef __FreeBSD__
171 #include "un-namespace.h"
172 #endif
173 
174 /* MALLOC_STATS enables statistics calculation. */
175 #ifndef MALLOC_PRODUCTION
176 #  define MALLOC_STATS
177 #endif
178 
179 #ifdef MALLOC_DEBUG
180 #  ifdef NDEBUG
181 #    undef NDEBUG
182 #  endif
183 #else
184 #  ifndef NDEBUG
185 #    define NDEBUG
186 #  endif
187 #endif
188 #include <assert.h>
189 
190 #ifdef MALLOC_DEBUG
191    /* Disable inlining to make debugging easier. */
192 #  define inline
193 #endif
194 
195 /* Size of stack-allocated buffer passed to strerror_r(). */
196 #define	STRERROR_BUF		64
197 
198 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
199 #ifdef __i386__
200 #  define QUANTUM_2POW_MIN	4
201 #  define SIZEOF_PTR_2POW	2
202 #  define USE_BRK
203 #endif
204 #ifdef __ia64__
205 #  define QUANTUM_2POW_MIN	4
206 #  define SIZEOF_PTR_2POW	3
207 #endif
208 #ifdef __alpha__
209 #  define QUANTUM_2POW_MIN	4
210 #  define SIZEOF_PTR_2POW	3
211 #  define NO_TLS
212 #endif
213 #ifdef __sparc64__
214 #  define QUANTUM_2POW_MIN	4
215 #  define SIZEOF_PTR_2POW	3
216 #  define NO_TLS
217 #endif
218 #ifdef __amd64__
219 #  define QUANTUM_2POW_MIN	4
220 #  define SIZEOF_PTR_2POW	3
221 #endif
222 #ifdef __arm__
223 #  define QUANTUM_2POW_MIN	3
224 #  define SIZEOF_PTR_2POW	2
225 #  define USE_BRK
226 #  define NO_TLS
227 #endif
228 #ifdef __powerpc__
229 #  define QUANTUM_2POW_MIN	4
230 #  define SIZEOF_PTR_2POW	2
231 #  define USE_BRK
232 #endif
233 #if defined(__sparc__) && !defined(__sparc64__)
234 #  define QUANTUM_2POW_MIN	4
235 #  define SIZEOF_PTR_2POW	2
236 #  define USE_BRK
237 #endif
238 #ifdef __vax__
239 #  define QUANTUM_2POW_MIN	4
240 #  define SIZEOF_PTR_2POW	2
241 #  define USE_BRK
242 #endif
243 #ifdef __sh__
244 #  define QUANTUM_2POW_MIN	4
245 #  define SIZEOF_PTR_2POW	2
246 #  define USE_BRK
247 #endif
248 #ifdef __m68k__
249 #  define QUANTUM_2POW_MIN	4
250 #  define SIZEOF_PTR_2POW	2
251 #  define USE_BRK
252 #endif
253 #ifdef __mips__
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	2
256 #  define USE_BRK
257 #endif
258 #ifdef __hppa__
259 #  define QUANTUM_2POW_MIN     4
260 #  define SIZEOF_PTR_2POW      2
261 #  define USE_BRK
262 #endif
263 
264 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
265 
266 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
267 #ifndef SIZEOF_INT_2POW
268 #  define SIZEOF_INT_2POW	2
269 #endif
270 
271 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
272 #if (!defined(PIC) && !defined(NO_TLS))
273 #  define NO_TLS
274 #endif
275 
276 /*
277  * Size and alignment of memory chunks that are allocated by the OS's virtual
278  * memory system.
279  */
280 #define	CHUNK_2POW_DEFAULT	20
281 
282 /*
283  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
284  * so over-estimates are okay (up to a point), but under-estimates will
285  * negatively affect performance.
286  */
287 #define	CACHELINE_2POW		6
288 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
289 
290 /* Smallest size class to support. */
291 #define	TINY_MIN_2POW		1
292 
293 /*
294  * Maximum size class that is a multiple of the quantum, but not (necessarily)
295  * a power of 2.  Above this size, allocations are rounded up to the nearest
296  * power of 2.
297  */
298 #define	SMALL_MAX_2POW_DEFAULT	9
299 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
300 
301 /*
302  * Maximum desired run header overhead.  Runs are sized as small as possible
303  * such that this setting is still honored, without violating other constraints.
304  * The goal is to make runs as small as possible without exceeding a per run
305  * external fragmentation threshold.
306  *
307  * Note that it is possible to set this low enough that it cannot be honored
308  * for some/all object sizes, since there is one bit of header overhead per
309  * object (plus a constant).  In such cases, this constraint is relaxed.
310  *
311  * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
312  * overhead for which RUN_MAX_OVRHD is relaxed.
313  */
314 #define RUN_MAX_OVRHD		0.015
315 #define RUN_MAX_OVRHD_RELAX	1.5
316 
317 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
318 #define RUN_MAX_SMALL_2POW	15
319 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
320 
321 /******************************************************************************/
322 
323 #ifdef __FreeBSD__
324 /*
325  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
326  * they require malloc()ed memory.
327  */
328 typedef struct {
329 	spinlock_t	lock;
330 } malloc_mutex_t;
331 
332 /* Set to true once the allocator has been initialized. */
333 static bool malloc_initialized = false;
334 
335 /* Used to avoid initialization races. */
336 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
337 #else
338 #define	malloc_mutex_t	mutex_t
339 
340 /* Set to true once the allocator has been initialized. */
341 static bool malloc_initialized = false;
342 
343 /* Used to avoid initialization races. */
344 static mutex_t init_lock = MUTEX_INITIALIZER;
345 #endif
346 
347 /******************************************************************************/
348 /*
349  * Statistics data structures.
350  */
351 
352 #ifdef MALLOC_STATS
353 
354 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
355 struct malloc_bin_stats_s {
356 	/*
357 	 * Number of allocation requests that corresponded to the size of this
358 	 * bin.
359 	 */
360 	uint64_t	nrequests;
361 
362 	/* Total number of runs created for this bin's size class. */
363 	uint64_t	nruns;
364 
365 	/*
366 	 * Total number of runs reused by extracting them from the runs tree for
367 	 * this bin's size class.
368 	 */
369 	uint64_t	reruns;
370 
371 	/* High-water mark for this bin. */
372 	unsigned long	highruns;
373 
374 	/* Current number of runs in this bin. */
375 	unsigned long	curruns;
376 };
377 
378 typedef struct arena_stats_s arena_stats_t;
379 struct arena_stats_s {
380 	/* Number of bytes currently mapped. */
381 	size_t		mapped;
382 
383 	/* Per-size-category statistics. */
384 	size_t		allocated_small;
385 	uint64_t	nmalloc_small;
386 	uint64_t	ndalloc_small;
387 
388 	size_t		allocated_large;
389 	uint64_t	nmalloc_large;
390 	uint64_t	ndalloc_large;
391 };
392 
393 typedef struct chunk_stats_s chunk_stats_t;
394 struct chunk_stats_s {
395 	/* Number of chunks that were allocated. */
396 	uint64_t	nchunks;
397 
398 	/* High-water mark for number of chunks allocated. */
399 	unsigned long	highchunks;
400 
401 	/*
402 	 * Current number of chunks allocated.  This value isn't maintained for
403 	 * any other purpose, so keep track of it in order to be able to set
404 	 * highchunks.
405 	 */
406 	unsigned long	curchunks;
407 };
408 
409 #endif /* #ifdef MALLOC_STATS */
410 
411 /******************************************************************************/
412 /*
413  * Chunk data structures.
414  */
415 
416 /* Tree of chunks. */
417 typedef struct chunk_node_s chunk_node_t;
418 struct chunk_node_s {
419 	/* Linkage for the chunk tree. */
420 	RB_ENTRY(chunk_node_s) link;
421 
422 	/*
423 	 * Pointer to the chunk that this tree node is responsible for.  In some
424 	 * (but certainly not all) cases, this data structure is placed at the
425 	 * beginning of the corresponding chunk, so this field may point to this
426 	 * node.
427 	 */
428 	void	*chunk;
429 
430 	/* Total chunk size. */
431 	size_t	size;
432 };
433 typedef struct chunk_tree_s chunk_tree_t;
434 RB_HEAD(chunk_tree_s, chunk_node_s);
435 
436 /******************************************************************************/
437 /*
438  * Arena data structures.
439  */
440 
441 typedef struct arena_s arena_t;
442 typedef struct arena_bin_s arena_bin_t;
443 
444 typedef struct arena_chunk_map_s arena_chunk_map_t;
445 struct arena_chunk_map_s {
446 	/* Number of pages in run. */
447 	uint32_t	npages;
448 	/*
449 	 * Position within run.  For a free run, this is POS_FREE for the first
450 	 * and last pages.  The POS_FREE special value makes it possible to
451 	 * quickly coalesce free runs.
452 	 *
453 	 * This is the limiting factor for chunksize; there can be at most 2^31
454 	 * pages in a run.
455 	 */
456 #define POS_FREE ((uint32_t)0xffffffffU)
457 	uint32_t	pos;
458 };
459 
460 /* Arena chunk header. */
461 typedef struct arena_chunk_s arena_chunk_t;
462 struct arena_chunk_s {
463 	/* Arena that owns the chunk. */
464 	arena_t *arena;
465 
466 	/* Linkage for the arena's chunk tree. */
467 	RB_ENTRY(arena_chunk_s) link;
468 
469 	/*
470 	 * Number of pages in use.  This is maintained in order to make
471 	 * detection of empty chunks fast.
472 	 */
473 	uint32_t pages_used;
474 
475 	/*
476 	 * Every time a free run larger than this value is created/coalesced,
477 	 * this value is increased.  The only way that the value decreases is if
478 	 * arena_run_alloc() fails to find a free run as large as advertised by
479 	 * this value.
480 	 */
481 	uint32_t max_frun_npages;
482 
483 	/*
484 	 * Every time a free run that starts at an earlier page than this value
485 	 * is created/coalesced, this value is decreased.  It is reset in a
486 	 * similar fashion to max_frun_npages.
487 	 */
488 	uint32_t min_frun_ind;
489 
490 	/*
491 	 * Map of pages within chunk that keeps track of free/large/small.  For
492 	 * free runs, only the map entries for the first and last pages are
493 	 * kept up to date, so that free runs can be quickly coalesced.
494 	 */
495 	arena_chunk_map_t map[1]; /* Dynamically sized. */
496 };
497 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
498 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
499 
500 typedef struct arena_run_s arena_run_t;
501 struct arena_run_s {
502 	/* Linkage for run trees. */
503 	RB_ENTRY(arena_run_s) link;
504 
505 #ifdef MALLOC_DEBUG
506 	uint32_t	magic;
507 #  define ARENA_RUN_MAGIC 0x384adf93
508 #endif
509 
510 	/* Bin this run is associated with. */
511 	arena_bin_t	*bin;
512 
513 	/* Index of first element that might have a free region. */
514 	unsigned	regs_minelm;
515 
516 	/* Number of free regions in run. */
517 	unsigned	nfree;
518 
519 	/* Bitmask of in-use regions (0: in use, 1: free). */
520 	unsigned	regs_mask[1]; /* Dynamically sized. */
521 };
522 typedef struct arena_run_tree_s arena_run_tree_t;
523 RB_HEAD(arena_run_tree_s, arena_run_s);
524 
525 struct arena_bin_s {
526 	/*
527 	 * Current run being used to service allocations of this bin's size
528 	 * class.
529 	 */
530 	arena_run_t	*runcur;
531 
532 	/*
533 	 * Tree of non-full runs.  This tree is used when looking for an
534 	 * existing run when runcur is no longer usable.  We choose the
535 	 * non-full run that is lowest in memory; this policy tends to keep
536 	 * objects packed well, and it can also help reduce the number of
537 	 * almost-empty chunks.
538 	 */
539 	arena_run_tree_t runs;
540 
541 	/* Size of regions in a run for this bin's size class. */
542 	size_t		reg_size;
543 
544 	/* Total size of a run for this bin's size class. */
545 	size_t		run_size;
546 
547 	/* Total number of regions in a run for this bin's size class. */
548 	uint32_t	nregs;
549 
550 	/* Number of elements in a run's regs_mask for this bin's size class. */
551 	uint32_t	regs_mask_nelms;
552 
553 	/* Offset of first region in a run for this bin's size class. */
554 	uint32_t	reg0_offset;
555 
556 #ifdef MALLOC_STATS
557 	/* Bin statistics. */
558 	malloc_bin_stats_t stats;
559 #endif
560 };
561 
562 struct arena_s {
563 #ifdef MALLOC_DEBUG
564 	uint32_t		magic;
565 #  define ARENA_MAGIC 0x947d3d24
566 #endif
567 
568 	/* All operations on this arena require that mtx be locked. */
569 	malloc_mutex_t		mtx;
570 
571 #ifdef MALLOC_STATS
572 	arena_stats_t		stats;
573 #endif
574 
575 	/*
576 	 * Tree of chunks this arena manages.
577 	 */
578 	arena_chunk_tree_t	chunks;
579 
580 	/*
581 	 * In order to avoid rapid chunk allocation/deallocation when an arena
582 	 * oscillates right on the cusp of needing a new chunk, cache the most
583 	 * recently freed chunk.  This caching is disabled by opt_hint.
584 	 *
585 	 * There is one spare chunk per arena, rather than one spare total, in
586 	 * order to avoid interactions between multiple threads that could make
587 	 * a single spare inadequate.
588 	 */
589 	arena_chunk_t *spare;
590 
591 	/*
592 	 * bins is used to store rings of free regions of the following sizes,
593 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
594 	 *
595 	 *   bins[i] | size |
596 	 *   --------+------+
597 	 *        0  |    2 |
598 	 *        1  |    4 |
599 	 *        2  |    8 |
600 	 *   --------+------+
601 	 *        3  |   16 |
602 	 *        4  |   32 |
603 	 *        5  |   48 |
604 	 *        6  |   64 |
605 	 *           :      :
606 	 *           :      :
607 	 *       33  |  496 |
608 	 *       34  |  512 |
609 	 *   --------+------+
610 	 *       35  | 1024 |
611 	 *       36  | 2048 |
612 	 *   --------+------+
613 	 */
614 	arena_bin_t		bins[1]; /* Dynamically sized. */
615 };
616 
617 /******************************************************************************/
618 /*
619  * Data.
620  */
621 
622 /* Number of CPUs. */
623 static unsigned		ncpus;
624 
625 /* VM page size. */
626 static size_t		pagesize;
627 static size_t		pagesize_mask;
628 static int		pagesize_2pow;
629 
630 /* Various bin-related settings. */
631 static size_t		bin_maxclass; /* Max size class for bins. */
632 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
633 static unsigned		nqbins; /* Number of quantum-spaced bins. */
634 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
635 static size_t		small_min;
636 static size_t		small_max;
637 
638 /* Various quantum-related settings. */
639 static size_t		quantum;
640 static size_t		quantum_mask; /* (quantum - 1). */
641 
642 /* Various chunk-related settings. */
643 static size_t		chunksize;
644 static size_t		chunksize_mask; /* (chunksize - 1). */
645 static int		chunksize_2pow;
646 static unsigned		chunk_npages;
647 static unsigned		arena_chunk_header_npages;
648 static size_t		arena_maxclass; /* Max size class for arenas. */
649 
650 /********/
651 /*
652  * Chunks.
653  */
654 
655 /* Protects chunk-related data structures. */
656 static malloc_mutex_t	chunks_mtx;
657 
658 /* Tree of chunks that are stand-alone huge allocations. */
659 static chunk_tree_t	huge;
660 
661 #ifdef USE_BRK
662 /*
663  * Try to use brk for chunk-size allocations, due to address space constraints.
664  */
665 /*
666  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
667  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
668  * could cause recursive lock acquisition).
669  */
670 static malloc_mutex_t	brk_mtx;
671 /* Result of first sbrk(0) call. */
672 static void		*brk_base;
673 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
674 static void		*brk_prev;
675 /* Current upper limit on brk addresses. */
676 static void		*brk_max;
677 #endif
678 
679 #ifdef MALLOC_STATS
680 /* Huge allocation statistics. */
681 static uint64_t		huge_nmalloc;
682 static uint64_t		huge_ndalloc;
683 static uint64_t		huge_nralloc;
684 static size_t		huge_allocated;
685 #endif
686 
687 /*
688  * Tree of chunks that were previously allocated.  This is used when allocating
689  * chunks, in an attempt to re-use address space.
690  */
691 static chunk_tree_t	old_chunks;
692 
693 /****************************/
694 /*
695  * base (internal allocation).
696  */
697 
698 /*
699  * Current pages that are being used for internal memory allocations.  These
700  * pages are carved up in cacheline-size quanta, so that there is no chance of
701  * false cache line sharing.
702  */
703 static void		*base_pages;
704 static void		*base_next_addr;
705 static void		*base_past_addr; /* Addr immediately past base_pages. */
706 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
707 static malloc_mutex_t	base_mtx;
708 #ifdef MALLOC_STATS
709 static size_t		base_mapped;
710 #endif
711 
712 /********/
713 /*
714  * Arenas.
715  */
716 
717 /*
718  * Arenas that are used to service external requests.  Not all elements of the
719  * arenas array are necessarily used; arenas are created lazily as needed.
720  */
721 static arena_t		**arenas;
722 static unsigned		narenas;
723 static unsigned		next_arena;
724 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
725 
726 #ifndef NO_TLS
727 /*
728  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
729  * for allocations.
730  */
731 static __thread arena_t	*arenas_map;
732 #define	get_arenas_map()	(arenas_map)
733 #define	set_arenas_map(x)	(arenas_map = x)
734 #else
735 static thread_key_t arenas_map_key;
736 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
737 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
738 #endif
739 
740 #ifdef MALLOC_STATS
741 /* Chunk statistics. */
742 static chunk_stats_t	stats_chunks;
743 #endif
744 
745 /*******************************/
746 /*
747  * Runtime configuration options.
748  */
749 const char	*_malloc_options;
750 
751 #ifndef MALLOC_PRODUCTION
752 static bool	opt_abort = true;
753 static bool	opt_junk = true;
754 #else
755 static bool	opt_abort = false;
756 static bool	opt_junk = false;
757 #endif
758 static bool	opt_hint = false;
759 static bool	opt_print_stats = false;
760 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
761 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
762 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
763 static bool	opt_utrace = false;
764 static bool	opt_sysv = false;
765 static bool	opt_xmalloc = false;
766 static bool	opt_zero = false;
767 static int32_t	opt_narenas_lshift = 0;
768 
769 typedef struct {
770 	void	*p;
771 	size_t	s;
772 	void	*r;
773 } malloc_utrace_t;
774 
775 #define	UTRACE(a, b, c)							\
776 	if (opt_utrace) {						\
777 		malloc_utrace_t ut;					\
778 		ut.p = a;						\
779 		ut.s = b;						\
780 		ut.r = c;						\
781 		xutrace(&ut, sizeof(ut));				\
782 	}
783 
784 /******************************************************************************/
785 /*
786  * Begin function prototypes for non-inline static functions.
787  */
788 
789 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
790 		const char *p4);
791 #ifdef MALLOC_STATS
792 static void	malloc_printf(const char *format, ...);
793 #endif
794 static char	*umax2s(uintmax_t x, char *s);
795 static bool	base_pages_alloc(size_t minsize);
796 static void	*base_alloc(size_t size);
797 static chunk_node_t *base_chunk_node_alloc(void);
798 static void	base_chunk_node_dealloc(chunk_node_t *node);
799 #ifdef MALLOC_STATS
800 static void	stats_print(arena_t *arena);
801 #endif
802 static void	*pages_map(void *addr, size_t size);
803 static void	*pages_map_align(void *addr, size_t size, int align);
804 static void	pages_unmap(void *addr, size_t size);
805 static void	*chunk_alloc(size_t size);
806 static void	chunk_dealloc(void *chunk, size_t size);
807 static arena_t	*choose_arena_hard(void);
808 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
809 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
810 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
811 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
812 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
813 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
814 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
815 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
816 static void	*arena_malloc(arena_t *arena, size_t size);
817 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
818     size_t alloc_size);
819 static size_t	arena_salloc(const void *ptr);
820 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
821 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
822 static bool	arena_new(arena_t *arena);
823 static arena_t	*arenas_extend(unsigned ind);
824 static void	*huge_malloc(size_t size);
825 static void	*huge_palloc(size_t alignment, size_t size);
826 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
827 static void	huge_dalloc(void *ptr);
828 static void	*imalloc(size_t size);
829 static void	*ipalloc(size_t alignment, size_t size);
830 static void	*icalloc(size_t size);
831 static size_t	isalloc(const void *ptr);
832 static void	*iralloc(void *ptr, size_t size);
833 static void	idalloc(void *ptr);
834 static void	malloc_print_stats(void);
835 static bool	malloc_init_hard(void);
836 
837 /*
838  * End function prototypes.
839  */
840 /******************************************************************************/
841 /*
842  * Begin mutex.
843  */
844 
845 #ifdef __NetBSD__
846 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
847 #define	malloc_mutex_lock(m)	mutex_lock(m)
848 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
849 #else	/* __NetBSD__ */
850 static inline void
851 malloc_mutex_init(malloc_mutex_t *a_mutex)
852 {
853 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
854 
855 	a_mutex->lock = lock;
856 }
857 
858 static inline void
859 malloc_mutex_lock(malloc_mutex_t *a_mutex)
860 {
861 
862 	if (__isthreaded)
863 		_SPINLOCK(&a_mutex->lock);
864 }
865 
866 static inline void
867 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
868 {
869 
870 	if (__isthreaded)
871 		_SPINUNLOCK(&a_mutex->lock);
872 }
873 #endif	/* __NetBSD__ */
874 
875 /*
876  * End mutex.
877  */
878 /******************************************************************************/
879 /*
880  * Begin Utility functions/macros.
881  */
882 
883 /* Return the chunk address for allocation address a. */
884 #define	CHUNK_ADDR2BASE(a)						\
885 	((void *)((uintptr_t)(a) & ~chunksize_mask))
886 
887 /* Return the chunk offset of address a. */
888 #define	CHUNK_ADDR2OFFSET(a)						\
889 	((size_t)((uintptr_t)(a) & chunksize_mask))
890 
891 /* Return the smallest chunk multiple that is >= s. */
892 #define	CHUNK_CEILING(s)						\
893 	(((s) + chunksize_mask) & ~chunksize_mask)
894 
895 /* Return the smallest cacheline multiple that is >= s. */
896 #define	CACHELINE_CEILING(s)						\
897 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
898 
899 /* Return the smallest quantum multiple that is >= a. */
900 #define	QUANTUM_CEILING(a)						\
901 	(((a) + quantum_mask) & ~quantum_mask)
902 
903 /* Return the smallest pagesize multiple that is >= s. */
904 #define	PAGE_CEILING(s)							\
905 	(((s) + pagesize_mask) & ~pagesize_mask)
906 
907 /* Compute the smallest power of 2 that is >= x. */
908 static inline size_t
909 pow2_ceil(size_t x)
910 {
911 
912 	x--;
913 	x |= x >> 1;
914 	x |= x >> 2;
915 	x |= x >> 4;
916 	x |= x >> 8;
917 	x |= x >> 16;
918 #if (SIZEOF_PTR == 8)
919 	x |= x >> 32;
920 #endif
921 	x++;
922 	return (x);
923 }
924 
925 static void
926 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
927 {
928 
929 	_write(STDERR_FILENO, p1, strlen(p1));
930 	_write(STDERR_FILENO, p2, strlen(p2));
931 	_write(STDERR_FILENO, p3, strlen(p3));
932 	_write(STDERR_FILENO, p4, strlen(p4));
933 }
934 
935 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
936 	    const char *p4) = wrtmessage;
937 
938 #ifdef MALLOC_STATS
939 /*
940  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
941  */
942 static void
943 malloc_printf(const char *format, ...)
944 {
945 	char buf[4096];
946 	va_list ap;
947 
948 	va_start(ap, format);
949 	vsnprintf(buf, sizeof(buf), format, ap);
950 	va_end(ap);
951 	_malloc_message(buf, "", "", "");
952 }
953 #endif
954 
955 /*
956  * We don't want to depend on vsnprintf() for production builds, since that can
957  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
958  * integer printing functionality, so that malloc_printf() use can be limited to
959  * MALLOC_STATS code.
960  */
961 #define UMAX2S_BUFSIZE	21
962 static char *
963 umax2s(uintmax_t x, char *s)
964 {
965 	unsigned i;
966 
967 	/* Make sure UMAX2S_BUFSIZE is large enough. */
968 	/* LINTED */
969 	assert(sizeof(uintmax_t) <= 8);
970 
971 	i = UMAX2S_BUFSIZE - 1;
972 	s[i] = '\0';
973 	do {
974 		i--;
975 		s[i] = "0123456789"[(int)x % 10];
976 		x /= (uintmax_t)10LL;
977 	} while (x > 0);
978 
979 	return (&s[i]);
980 }
981 
982 /******************************************************************************/
983 
984 static bool
985 base_pages_alloc(size_t minsize)
986 {
987 	size_t csize = 0;
988 
989 #ifdef USE_BRK
990 	/*
991 	 * Do special brk allocation here, since base allocations don't need to
992 	 * be chunk-aligned.
993 	 */
994 	if (brk_prev != (void *)-1) {
995 		void *brk_cur;
996 		intptr_t incr;
997 
998 		if (minsize != 0)
999 			csize = CHUNK_CEILING(minsize);
1000 
1001 		malloc_mutex_lock(&brk_mtx);
1002 		do {
1003 			/* Get the current end of brk. */
1004 			brk_cur = sbrk(0);
1005 
1006 			/*
1007 			 * Calculate how much padding is necessary to
1008 			 * chunk-align the end of brk.  Don't worry about
1009 			 * brk_cur not being chunk-aligned though.
1010 			 */
1011 			incr = (intptr_t)chunksize
1012 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1013 			if (incr < minsize)
1014 				incr += csize;
1015 
1016 			brk_prev = sbrk(incr);
1017 			if (brk_prev == brk_cur) {
1018 				/* Success. */
1019 				malloc_mutex_unlock(&brk_mtx);
1020 				base_pages = brk_cur;
1021 				base_next_addr = base_pages;
1022 				base_past_addr = (void *)((uintptr_t)base_pages
1023 				    + incr);
1024 #ifdef MALLOC_STATS
1025 				base_mapped += incr;
1026 #endif
1027 				return (false);
1028 			}
1029 		} while (brk_prev != (void *)-1);
1030 		malloc_mutex_unlock(&brk_mtx);
1031 	}
1032 	if (minsize == 0) {
1033 		/*
1034 		 * Failure during initialization doesn't matter, so avoid
1035 		 * falling through to the mmap-based page mapping code.
1036 		 */
1037 		return (true);
1038 	}
1039 #endif
1040 	assert(minsize != 0);
1041 	csize = PAGE_CEILING(minsize);
1042 	base_pages = pages_map(NULL, csize);
1043 	if (base_pages == NULL)
1044 		return (true);
1045 	base_next_addr = base_pages;
1046 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1047 #ifdef MALLOC_STATS
1048 	base_mapped += csize;
1049 #endif
1050 	return (false);
1051 }
1052 
1053 static void *
1054 base_alloc(size_t size)
1055 {
1056 	void *ret;
1057 	size_t csize;
1058 
1059 	/* Round size up to nearest multiple of the cacheline size. */
1060 	csize = CACHELINE_CEILING(size);
1061 
1062 	malloc_mutex_lock(&base_mtx);
1063 
1064 	/* Make sure there's enough space for the allocation. */
1065 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1066 		if (base_pages_alloc(csize)) {
1067 			ret = NULL;
1068 			goto RETURN;
1069 		}
1070 	}
1071 
1072 	/* Allocate. */
1073 	ret = base_next_addr;
1074 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1075 
1076 RETURN:
1077 	malloc_mutex_unlock(&base_mtx);
1078 	return (ret);
1079 }
1080 
1081 static chunk_node_t *
1082 base_chunk_node_alloc(void)
1083 {
1084 	chunk_node_t *ret;
1085 
1086 	malloc_mutex_lock(&base_mtx);
1087 	if (base_chunk_nodes != NULL) {
1088 		ret = base_chunk_nodes;
1089 		/* LINTED */
1090 		base_chunk_nodes = *(chunk_node_t **)ret;
1091 		malloc_mutex_unlock(&base_mtx);
1092 	} else {
1093 		malloc_mutex_unlock(&base_mtx);
1094 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1095 	}
1096 
1097 	return (ret);
1098 }
1099 
1100 static void
1101 base_chunk_node_dealloc(chunk_node_t *node)
1102 {
1103 
1104 	malloc_mutex_lock(&base_mtx);
1105 	/* LINTED */
1106 	*(chunk_node_t **)node = base_chunk_nodes;
1107 	base_chunk_nodes = node;
1108 	malloc_mutex_unlock(&base_mtx);
1109 }
1110 
1111 /******************************************************************************/
1112 
1113 #ifdef MALLOC_STATS
1114 static void
1115 stats_print(arena_t *arena)
1116 {
1117 	unsigned i;
1118 	int gap_start;
1119 
1120 	malloc_printf(
1121 	    "          allocated/mapped            nmalloc      ndalloc\n");
1122 
1123 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1124 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1125 	    arena->stats.ndalloc_small);
1126 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1127 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1128 	    arena->stats.ndalloc_large);
1129 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1130 	    arena->stats.allocated_small + arena->stats.allocated_large,
1131 	    arena->stats.mapped,
1132 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1133 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1134 
1135 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1136 	    "    reruns maxruns curruns\n");
1137 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1138 		if (arena->bins[i].stats.nrequests == 0) {
1139 			if (gap_start == -1)
1140 				gap_start = i;
1141 		} else {
1142 			if (gap_start != -1) {
1143 				if (i > gap_start + 1) {
1144 					/* Gap of more than one size class. */
1145 					malloc_printf("[%u..%u]\n",
1146 					    gap_start, i - 1);
1147 				} else {
1148 					/* Gap of one size class. */
1149 					malloc_printf("[%u]\n", gap_start);
1150 				}
1151 				gap_start = -1;
1152 			}
1153 			malloc_printf(
1154 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1155 			    " %9llu %7lu %7lu\n",
1156 			    i,
1157 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1158 			    arena->bins[i].reg_size,
1159 			    arena->bins[i].nregs,
1160 			    arena->bins[i].run_size >> pagesize_2pow,
1161 			    arena->bins[i].stats.nrequests,
1162 			    arena->bins[i].stats.nruns,
1163 			    arena->bins[i].stats.reruns,
1164 			    arena->bins[i].stats.highruns,
1165 			    arena->bins[i].stats.curruns);
1166 		}
1167 	}
1168 	if (gap_start != -1) {
1169 		if (i > gap_start + 1) {
1170 			/* Gap of more than one size class. */
1171 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1172 		} else {
1173 			/* Gap of one size class. */
1174 			malloc_printf("[%u]\n", gap_start);
1175 		}
1176 	}
1177 }
1178 #endif
1179 
1180 /*
1181  * End Utility functions/macros.
1182  */
1183 /******************************************************************************/
1184 /*
1185  * Begin chunk management functions.
1186  */
1187 
1188 #ifndef lint
1189 static inline int
1190 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1191 {
1192 
1193 	assert(a != NULL);
1194 	assert(b != NULL);
1195 
1196 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1197 		return (-1);
1198 	else if (a->chunk == b->chunk)
1199 		return (0);
1200 	else
1201 		return (1);
1202 }
1203 
1204 /* Generate red-black tree code for chunks. */
1205 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1206 #endif
1207 
1208 static void *
1209 pages_map_align(void *addr, size_t size, int align)
1210 {
1211 	void *ret;
1212 
1213 	/*
1214 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1215 	 * of existing mappings, and we only want to create new mappings.
1216 	 */
1217 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1218 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1219 	assert(ret != NULL);
1220 
1221 	if (ret == MAP_FAILED)
1222 		ret = NULL;
1223 	else if (addr != NULL && ret != addr) {
1224 		/*
1225 		 * We succeeded in mapping memory, but not in the right place.
1226 		 */
1227 		if (munmap(ret, size) == -1) {
1228 			char buf[STRERROR_BUF];
1229 
1230 			strerror_r(errno, buf, sizeof(buf));
1231 			_malloc_message(_getprogname(),
1232 			    ": (malloc) Error in munmap(): ", buf, "\n");
1233 			if (opt_abort)
1234 				abort();
1235 		}
1236 		ret = NULL;
1237 	}
1238 
1239 	assert(ret == NULL || (addr == NULL && ret != addr)
1240 	    || (addr != NULL && ret == addr));
1241 	return (ret);
1242 }
1243 
1244 static void *
1245 pages_map(void *addr, size_t size)
1246 {
1247 
1248 	return pages_map_align(addr, size, 0);
1249 }
1250 
1251 static void
1252 pages_unmap(void *addr, size_t size)
1253 {
1254 
1255 	if (munmap(addr, size) == -1) {
1256 		char buf[STRERROR_BUF];
1257 
1258 		strerror_r(errno, buf, sizeof(buf));
1259 		_malloc_message(_getprogname(),
1260 		    ": (malloc) Error in munmap(): ", buf, "\n");
1261 		if (opt_abort)
1262 			abort();
1263 	}
1264 }
1265 
1266 static void *
1267 chunk_alloc(size_t size)
1268 {
1269 	void *ret, *chunk;
1270 	chunk_node_t *tchunk, *delchunk;
1271 
1272 	assert(size != 0);
1273 	assert((size & chunksize_mask) == 0);
1274 
1275 	malloc_mutex_lock(&chunks_mtx);
1276 
1277 	if (size == chunksize) {
1278 		/*
1279 		 * Check for address ranges that were previously chunks and try
1280 		 * to use them.
1281 		 */
1282 
1283 		/* LINTED */
1284 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1285 		while (tchunk != NULL) {
1286 			/* Found an address range.  Try to recycle it. */
1287 
1288 			chunk = tchunk->chunk;
1289 			delchunk = tchunk;
1290 			/* LINTED */
1291 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1292 
1293 			/* Remove delchunk from the tree. */
1294 			/* LINTED */
1295 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1296 			base_chunk_node_dealloc(delchunk);
1297 
1298 #ifdef USE_BRK
1299 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1300 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1301 				/* Re-use a previously freed brk chunk. */
1302 				ret = chunk;
1303 				goto RETURN;
1304 			}
1305 #endif
1306 			if ((ret = pages_map(chunk, size)) != NULL) {
1307 				/* Success. */
1308 				goto RETURN;
1309 			}
1310 		}
1311 	}
1312 
1313 	/*
1314 	 * Try to over-allocate, but allow the OS to place the allocation
1315 	 * anywhere.  Beware of size_t wrap-around.
1316 	 */
1317 	if (size + chunksize > size) {
1318 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1319 		    != NULL) {
1320 			goto RETURN;
1321 		}
1322 	}
1323 
1324 #ifdef USE_BRK
1325 	/*
1326 	 * Try to create allocations in brk, in order to make full use of
1327 	 * limited address space.
1328 	 */
1329 	if (brk_prev != (void *)-1) {
1330 		void *brk_cur;
1331 		intptr_t incr;
1332 
1333 		/*
1334 		 * The loop is necessary to recover from races with other
1335 		 * threads that are using brk for something other than malloc.
1336 		 */
1337 		malloc_mutex_lock(&brk_mtx);
1338 		do {
1339 			/* Get the current end of brk. */
1340 			brk_cur = sbrk(0);
1341 
1342 			/*
1343 			 * Calculate how much padding is necessary to
1344 			 * chunk-align the end of brk.
1345 			 */
1346 			incr = (intptr_t)size
1347 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1348 			if (incr == size) {
1349 				ret = brk_cur;
1350 			} else {
1351 				ret = (void *)((intptr_t)brk_cur + incr);
1352 				incr += size;
1353 			}
1354 
1355 			brk_prev = sbrk(incr);
1356 			if (brk_prev == brk_cur) {
1357 				/* Success. */
1358 				malloc_mutex_unlock(&brk_mtx);
1359 				brk_max = (void *)((intptr_t)ret + size);
1360 				goto RETURN;
1361 			}
1362 		} while (brk_prev != (void *)-1);
1363 		malloc_mutex_unlock(&brk_mtx);
1364 	}
1365 #endif
1366 
1367 	/* All strategies for allocation failed. */
1368 	ret = NULL;
1369 RETURN:
1370 	if (ret != NULL) {
1371 		chunk_node_t key;
1372 		/*
1373 		 * Clean out any entries in old_chunks that overlap with the
1374 		 * memory we just allocated.
1375 		 */
1376 		key.chunk = ret;
1377 		/* LINTED */
1378 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1379 		while (tchunk != NULL
1380 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1381 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1382 			delchunk = tchunk;
1383 			/* LINTED */
1384 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1385 			/* LINTED */
1386 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1387 			base_chunk_node_dealloc(delchunk);
1388 		}
1389 
1390 	}
1391 #ifdef MALLOC_STATS
1392 	if (ret != NULL) {
1393 		stats_chunks.nchunks += (size / chunksize);
1394 		stats_chunks.curchunks += (size / chunksize);
1395 	}
1396 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1397 		stats_chunks.highchunks = stats_chunks.curchunks;
1398 #endif
1399 	malloc_mutex_unlock(&chunks_mtx);
1400 
1401 	assert(CHUNK_ADDR2BASE(ret) == ret);
1402 	return (ret);
1403 }
1404 
1405 static void
1406 chunk_dealloc(void *chunk, size_t size)
1407 {
1408 	chunk_node_t *node;
1409 
1410 	assert(chunk != NULL);
1411 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1412 	assert(size != 0);
1413 	assert((size & chunksize_mask) == 0);
1414 
1415 	malloc_mutex_lock(&chunks_mtx);
1416 
1417 #ifdef USE_BRK
1418 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1419 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1420 		void *brk_cur;
1421 
1422 		malloc_mutex_lock(&brk_mtx);
1423 		/* Get the current end of brk. */
1424 		brk_cur = sbrk(0);
1425 
1426 		/*
1427 		 * Try to shrink the data segment if this chunk is at the end
1428 		 * of the data segment.  The sbrk() call here is subject to a
1429 		 * race condition with threads that use brk(2) or sbrk(2)
1430 		 * directly, but the alternative would be to leak memory for
1431 		 * the sake of poorly designed multi-threaded programs.
1432 		 */
1433 		if (brk_cur == brk_max
1434 		    && (void *)((uintptr_t)chunk + size) == brk_max
1435 		    && sbrk(-(intptr_t)size) == brk_max) {
1436 			malloc_mutex_unlock(&brk_mtx);
1437 			if (brk_prev == brk_max) {
1438 				/* Success. */
1439 				brk_prev = (void *)((intptr_t)brk_max
1440 				    - (intptr_t)size);
1441 				brk_max = brk_prev;
1442 			}
1443 		} else {
1444 			size_t offset;
1445 
1446 			malloc_mutex_unlock(&brk_mtx);
1447 			madvise(chunk, size, MADV_FREE);
1448 
1449 			/*
1450 			 * Iteratively create records of each chunk-sized
1451 			 * memory region that 'chunk' is comprised of, so that
1452 			 * the address range can be recycled if memory usage
1453 			 * increases later on.
1454 			 */
1455 			for (offset = 0; offset < size; offset += chunksize) {
1456 				node = base_chunk_node_alloc();
1457 				if (node == NULL)
1458 					break;
1459 
1460 				node->chunk = (void *)((uintptr_t)chunk
1461 				    + (uintptr_t)offset);
1462 				node->size = chunksize;
1463 				/* LINTED */
1464 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1465 			}
1466 		}
1467 	} else {
1468 #endif
1469 		pages_unmap(chunk, size);
1470 
1471 		/*
1472 		 * Make a record of the chunk's address, so that the address
1473 		 * range can be recycled if memory usage increases later on.
1474 		 * Don't bother to create entries if (size > chunksize), since
1475 		 * doing so could cause scalability issues for truly gargantuan
1476 		 * objects (many gigabytes or larger).
1477 		 */
1478 		if (size == chunksize) {
1479 			node = base_chunk_node_alloc();
1480 			if (node != NULL) {
1481 				node->chunk = (void *)(uintptr_t)chunk;
1482 				node->size = chunksize;
1483 				/* LINTED */
1484 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1485 			}
1486 		}
1487 #ifdef USE_BRK
1488 	}
1489 #endif
1490 
1491 #ifdef MALLOC_STATS
1492 	stats_chunks.curchunks -= (size / chunksize);
1493 #endif
1494 	malloc_mutex_unlock(&chunks_mtx);
1495 }
1496 
1497 /*
1498  * End chunk management functions.
1499  */
1500 /******************************************************************************/
1501 /*
1502  * Begin arena.
1503  */
1504 
1505 /*
1506  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1507  * code if necessary).
1508  */
1509 static inline arena_t *
1510 choose_arena(void)
1511 {
1512 	arena_t *ret;
1513 
1514 	/*
1515 	 * We can only use TLS if this is a PIC library, since for the static
1516 	 * library version, libc's malloc is used by TLS allocation, which
1517 	 * introduces a bootstrapping issue.
1518 	 */
1519 	if (__isthreaded == false) {
1520 	    /*
1521 	     * Avoid the overhead of TLS for single-threaded operation.  If the
1522 	     * app switches to threaded mode, the initial thread may end up
1523 	     * being assigned to some other arena, but this one-time switch
1524 	     * shouldn't cause significant issues.
1525 	     */
1526 	    return (arenas[0]);
1527 	}
1528 
1529 	ret = get_arenas_map();
1530 	if (ret == NULL)
1531 		ret = choose_arena_hard();
1532 
1533 	assert(ret != NULL);
1534 	return (ret);
1535 }
1536 
1537 /*
1538  * Choose an arena based on a per-thread value (slow-path code only, called
1539  * only by choose_arena()).
1540  */
1541 static arena_t *
1542 choose_arena_hard(void)
1543 {
1544 	arena_t *ret;
1545 
1546 	assert(__isthreaded);
1547 
1548 	/* Assign one of the arenas to this thread, in a round-robin fashion. */
1549 	malloc_mutex_lock(&arenas_mtx);
1550 	ret = arenas[next_arena];
1551 	if (ret == NULL)
1552 		ret = arenas_extend(next_arena);
1553 	if (ret == NULL) {
1554 		/*
1555 		 * Make sure that this function never returns NULL, so that
1556 		 * choose_arena() doesn't have to check for a NULL return
1557 		 * value.
1558 		 */
1559 		ret = arenas[0];
1560 	}
1561 	next_arena = (next_arena + 1) % narenas;
1562 	malloc_mutex_unlock(&arenas_mtx);
1563 	set_arenas_map(ret);
1564 
1565 	return (ret);
1566 }
1567 
1568 #ifndef lint
1569 static inline int
1570 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1571 {
1572 
1573 	assert(a != NULL);
1574 	assert(b != NULL);
1575 
1576 	if ((uintptr_t)a < (uintptr_t)b)
1577 		return (-1);
1578 	else if (a == b)
1579 		return (0);
1580 	else
1581 		return (1);
1582 }
1583 
1584 /* Generate red-black tree code for arena chunks. */
1585 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1586 #endif
1587 
1588 #ifndef lint
1589 static inline int
1590 arena_run_comp(arena_run_t *a, arena_run_t *b)
1591 {
1592 
1593 	assert(a != NULL);
1594 	assert(b != NULL);
1595 
1596 	if ((uintptr_t)a < (uintptr_t)b)
1597 		return (-1);
1598 	else if (a == b)
1599 		return (0);
1600 	else
1601 		return (1);
1602 }
1603 
1604 /* Generate red-black tree code for arena runs. */
1605 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1606 #endif
1607 
1608 static inline void *
1609 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1610 {
1611 	void *ret;
1612 	unsigned i, mask, bit, regind;
1613 
1614 	assert(run->magic == ARENA_RUN_MAGIC);
1615 	assert(run->regs_minelm < bin->regs_mask_nelms);
1616 
1617 	/*
1618 	 * Move the first check outside the loop, so that run->regs_minelm can
1619 	 * be updated unconditionally, without the possibility of updating it
1620 	 * multiple times.
1621 	 */
1622 	i = run->regs_minelm;
1623 	mask = run->regs_mask[i];
1624 	if (mask != 0) {
1625 		/* Usable allocation found. */
1626 		bit = ffs((int)mask) - 1;
1627 
1628 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1629 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1630 		    + (bin->reg_size * regind));
1631 
1632 		/* Clear bit. */
1633 		mask ^= (1 << bit);
1634 		run->regs_mask[i] = mask;
1635 
1636 		return (ret);
1637 	}
1638 
1639 	for (i++; i < bin->regs_mask_nelms; i++) {
1640 		mask = run->regs_mask[i];
1641 		if (mask != 0) {
1642 			/* Usable allocation found. */
1643 			bit = ffs((int)mask) - 1;
1644 
1645 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1646 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1647 			    + (bin->reg_size * regind));
1648 
1649 			/* Clear bit. */
1650 			mask ^= (1 << bit);
1651 			run->regs_mask[i] = mask;
1652 
1653 			/*
1654 			 * Make a note that nothing before this element
1655 			 * contains a free region.
1656 			 */
1657 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1658 
1659 			return (ret);
1660 		}
1661 	}
1662 	/* Not reached. */
1663 	/* LINTED */
1664 	assert(0);
1665 	return (NULL);
1666 }
1667 
1668 static inline void
1669 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1670 {
1671 	/*
1672 	 * To divide by a number D that is not a power of two we multiply
1673 	 * by (2^21 / D) and then right shift by 21 positions.
1674 	 *
1675 	 *   X / D
1676 	 *
1677 	 * becomes
1678 	 *
1679 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1680 	 */
1681 #define SIZE_INV_SHIFT 21
1682 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1683 	static const unsigned size_invs[] = {
1684 	    SIZE_INV(3),
1685 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1686 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1687 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1688 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1689 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1690 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1691 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1692 #if (QUANTUM_2POW_MIN < 4)
1693 	    ,
1694 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1695 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1696 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1697 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1698 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1699 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1700 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1701 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1702 #endif
1703 	};
1704 	unsigned diff, regind, elm, bit;
1705 
1706 	/* LINTED */
1707 	assert(run->magic == ARENA_RUN_MAGIC);
1708 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1709 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1710 
1711 	/*
1712 	 * Avoid doing division with a variable divisor if possible.  Using
1713 	 * actual division here can reduce allocator throughput by over 20%!
1714 	 */
1715 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1716 	if ((size & (size - 1)) == 0) {
1717 		/*
1718 		 * log2_table allows fast division of a power of two in the
1719 		 * [1..128] range.
1720 		 *
1721 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1722 		 */
1723 		static const unsigned char log2_table[] = {
1724 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1725 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1726 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1727 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1728 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1729 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1730 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1731 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1732 		};
1733 
1734 		if (size <= 128)
1735 			regind = (diff >> log2_table[size - 1]);
1736 		else if (size <= 32768)
1737 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1738 		else {
1739 			/*
1740 			 * The page size is too large for us to use the lookup
1741 			 * table.  Use real division.
1742 			 */
1743 			regind = (unsigned)(diff / size);
1744 		}
1745 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1746 	    << QUANTUM_2POW_MIN) + 2) {
1747 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1748 		regind >>= SIZE_INV_SHIFT;
1749 	} else {
1750 		/*
1751 		 * size_invs isn't large enough to handle this size class, so
1752 		 * calculate regind using actual division.  This only happens
1753 		 * if the user increases small_max via the 'S' runtime
1754 		 * configuration option.
1755 		 */
1756 		regind = (unsigned)(diff / size);
1757 	};
1758 	assert(diff == regind * size);
1759 	assert(regind < bin->nregs);
1760 
1761 	elm = regind >> (SIZEOF_INT_2POW + 3);
1762 	if (elm < run->regs_minelm)
1763 		run->regs_minelm = elm;
1764 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1765 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1766 	run->regs_mask[elm] |= (1 << bit);
1767 #undef SIZE_INV
1768 #undef SIZE_INV_SHIFT
1769 }
1770 
1771 static void
1772 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1773 {
1774 	arena_chunk_t *chunk;
1775 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1776 	unsigned i;
1777 
1778 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1779 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1780 	    >> pagesize_2pow);
1781 	total_pages = chunk->map[run_ind].npages;
1782 	need_pages = (unsigned)(size >> pagesize_2pow);
1783 	assert(need_pages <= total_pages);
1784 	rem_pages = total_pages - need_pages;
1785 
1786 	/* Split enough pages from the front of run to fit allocation size. */
1787 	map_offset = run_ind;
1788 	for (i = 0; i < need_pages; i++) {
1789 		chunk->map[map_offset + i].npages = need_pages;
1790 		chunk->map[map_offset + i].pos = i;
1791 	}
1792 
1793 	/* Keep track of trailing unused pages for later use. */
1794 	if (rem_pages > 0) {
1795 		/* Update map for trailing pages. */
1796 		map_offset += need_pages;
1797 		chunk->map[map_offset].npages = rem_pages;
1798 		chunk->map[map_offset].pos = POS_FREE;
1799 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1800 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1801 	}
1802 
1803 	chunk->pages_used += need_pages;
1804 }
1805 
1806 static arena_chunk_t *
1807 arena_chunk_alloc(arena_t *arena)
1808 {
1809 	arena_chunk_t *chunk;
1810 
1811 	if (arena->spare != NULL) {
1812 		chunk = arena->spare;
1813 		arena->spare = NULL;
1814 
1815 		/* LINTED */
1816 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1817 	} else {
1818 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1819 		if (chunk == NULL)
1820 			return (NULL);
1821 #ifdef MALLOC_STATS
1822 		arena->stats.mapped += chunksize;
1823 #endif
1824 
1825 		chunk->arena = arena;
1826 
1827 		/* LINTED */
1828 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1829 
1830 		/*
1831 		 * Claim that no pages are in use, since the header is merely
1832 		 * overhead.
1833 		 */
1834 		chunk->pages_used = 0;
1835 
1836 		chunk->max_frun_npages = chunk_npages -
1837 		    arena_chunk_header_npages;
1838 		chunk->min_frun_ind = arena_chunk_header_npages;
1839 
1840 		/*
1841 		 * Initialize enough of the map to support one maximal free run.
1842 		 */
1843 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1844 		    arena_chunk_header_npages;
1845 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1846 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1847 		    arena_chunk_header_npages;
1848 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1849 	}
1850 
1851 	return (chunk);
1852 }
1853 
1854 static void
1855 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1856 {
1857 
1858 	/*
1859 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1860 	 * will be cached, so that the arena does not use it.
1861 	 */
1862 	/* LINTED */
1863 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1864 
1865 	if (opt_hint == false) {
1866 		if (arena->spare != NULL) {
1867 			chunk_dealloc((void *)arena->spare, chunksize);
1868 #ifdef MALLOC_STATS
1869 			arena->stats.mapped -= chunksize;
1870 #endif
1871 		}
1872 		arena->spare = chunk;
1873 	} else {
1874 		assert(arena->spare == NULL);
1875 		chunk_dealloc((void *)chunk, chunksize);
1876 #ifdef MALLOC_STATS
1877 		arena->stats.mapped -= chunksize;
1878 #endif
1879 	}
1880 }
1881 
1882 static arena_run_t *
1883 arena_run_alloc(arena_t *arena, size_t size)
1884 {
1885 	arena_chunk_t *chunk;
1886 	arena_run_t *run;
1887 	unsigned need_npages, limit_pages, compl_need_npages;
1888 
1889 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1890 	    pagesize_2pow)));
1891 	assert((size & pagesize_mask) == 0);
1892 
1893 	/*
1894 	 * Search through arena's chunks in address order for a free run that is
1895 	 * large enough.  Look for the first fit.
1896 	 */
1897 	need_npages = (unsigned)(size >> pagesize_2pow);
1898 	limit_pages = chunk_npages - arena_chunk_header_npages;
1899 	compl_need_npages = limit_pages - need_npages;
1900 	/* LINTED */
1901 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1902 		/*
1903 		 * Avoid searching this chunk if there are not enough
1904 		 * contiguous free pages for there to possibly be a large
1905 		 * enough free run.
1906 		 */
1907 		if (chunk->pages_used <= compl_need_npages &&
1908 		    need_npages <= chunk->max_frun_npages) {
1909 			arena_chunk_map_t *mapelm;
1910 			unsigned i;
1911 			unsigned max_frun_npages = 0;
1912 			unsigned min_frun_ind = chunk_npages;
1913 
1914 			assert(chunk->min_frun_ind >=
1915 			    arena_chunk_header_npages);
1916 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1917 				mapelm = &chunk->map[i];
1918 				if (mapelm->pos == POS_FREE) {
1919 					if (mapelm->npages >= need_npages) {
1920 						run = (arena_run_t *)
1921 						    ((uintptr_t)chunk + (i <<
1922 						    pagesize_2pow));
1923 						/* Update page map. */
1924 						arena_run_split(arena, run,
1925 						    size);
1926 						return (run);
1927 					}
1928 					if (mapelm->npages >
1929 					    max_frun_npages) {
1930 						max_frun_npages =
1931 						    mapelm->npages;
1932 					}
1933 					if (i < min_frun_ind) {
1934 						min_frun_ind = i;
1935 						if (i < chunk->min_frun_ind)
1936 							chunk->min_frun_ind = i;
1937 					}
1938 				}
1939 				i += mapelm->npages;
1940 			}
1941 			/*
1942 			 * Search failure.  Reset cached chunk->max_frun_npages.
1943 			 * chunk->min_frun_ind was already reset above (if
1944 			 * necessary).
1945 			 */
1946 			chunk->max_frun_npages = max_frun_npages;
1947 		}
1948 	}
1949 
1950 	/*
1951 	 * No usable runs.  Create a new chunk from which to allocate the run.
1952 	 */
1953 	chunk = arena_chunk_alloc(arena);
1954 	if (chunk == NULL)
1955 		return (NULL);
1956 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1957 	    pagesize_2pow));
1958 	/* Update page map. */
1959 	arena_run_split(arena, run, size);
1960 	return (run);
1961 }
1962 
1963 static void
1964 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1965 {
1966 	arena_chunk_t *chunk;
1967 	unsigned run_ind, run_pages;
1968 
1969 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1970 
1971 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1972 	    >> pagesize_2pow);
1973 	assert(run_ind >= arena_chunk_header_npages);
1974 	assert(run_ind < (chunksize >> pagesize_2pow));
1975 	run_pages = (unsigned)(size >> pagesize_2pow);
1976 	assert(run_pages == chunk->map[run_ind].npages);
1977 
1978 	/* Subtract pages from count of pages used in chunk. */
1979 	chunk->pages_used -= run_pages;
1980 
1981 	/* Mark run as deallocated. */
1982 	assert(chunk->map[run_ind].npages == run_pages);
1983 	chunk->map[run_ind].pos = POS_FREE;
1984 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1985 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1986 
1987 	/*
1988 	 * Tell the kernel that we don't need the data in this run, but only if
1989 	 * requested via runtime configuration.
1990 	 */
1991 	if (opt_hint)
1992 		madvise(run, size, MADV_FREE);
1993 
1994 	/* Try to coalesce with neighboring runs. */
1995 	if (run_ind > arena_chunk_header_npages &&
1996 	    chunk->map[run_ind - 1].pos == POS_FREE) {
1997 		unsigned prev_npages;
1998 
1999 		/* Coalesce with previous run. */
2000 		prev_npages = chunk->map[run_ind - 1].npages;
2001 		run_ind -= prev_npages;
2002 		assert(chunk->map[run_ind].npages == prev_npages);
2003 		assert(chunk->map[run_ind].pos == POS_FREE);
2004 		run_pages += prev_npages;
2005 
2006 		chunk->map[run_ind].npages = run_pages;
2007 		assert(chunk->map[run_ind].pos == POS_FREE);
2008 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2009 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2010 	}
2011 
2012 	if (run_ind + run_pages < chunk_npages &&
2013 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2014 		unsigned next_npages;
2015 
2016 		/* Coalesce with next run. */
2017 		next_npages = chunk->map[run_ind + run_pages].npages;
2018 		run_pages += next_npages;
2019 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2020 		    next_npages);
2021 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2022 
2023 		chunk->map[run_ind].npages = run_pages;
2024 		chunk->map[run_ind].pos = POS_FREE;
2025 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2026 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2027 	}
2028 
2029 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2030 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2031 	if (run_ind < chunk->min_frun_ind)
2032 		chunk->min_frun_ind = run_ind;
2033 
2034 	/* Deallocate chunk if it is now completely unused. */
2035 	if (chunk->pages_used == 0)
2036 		arena_chunk_dealloc(arena, chunk);
2037 }
2038 
2039 static arena_run_t *
2040 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2041 {
2042 	arena_run_t *run;
2043 	unsigned i, remainder;
2044 
2045 	/* Look for a usable run. */
2046 	/* LINTED */
2047 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2048 		/* run is guaranteed to have available space. */
2049 		/* LINTED */
2050 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2051 #ifdef MALLOC_STATS
2052 		bin->stats.reruns++;
2053 #endif
2054 		return (run);
2055 	}
2056 	/* No existing runs have any space available. */
2057 
2058 	/* Allocate a new run. */
2059 	run = arena_run_alloc(arena, bin->run_size);
2060 	if (run == NULL)
2061 		return (NULL);
2062 
2063 	/* Initialize run internals. */
2064 	run->bin = bin;
2065 
2066 	for (i = 0; i < bin->regs_mask_nelms; i++)
2067 		run->regs_mask[i] = UINT_MAX;
2068 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2069 	if (remainder != 0) {
2070 		/* The last element has spare bits that need to be unset. */
2071 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2072 		    - remainder));
2073 	}
2074 
2075 	run->regs_minelm = 0;
2076 
2077 	run->nfree = bin->nregs;
2078 #ifdef MALLOC_DEBUG
2079 	run->magic = ARENA_RUN_MAGIC;
2080 #endif
2081 
2082 #ifdef MALLOC_STATS
2083 	bin->stats.nruns++;
2084 	bin->stats.curruns++;
2085 	if (bin->stats.curruns > bin->stats.highruns)
2086 		bin->stats.highruns = bin->stats.curruns;
2087 #endif
2088 	return (run);
2089 }
2090 
2091 /* bin->runcur must have space available before this function is called. */
2092 static inline void *
2093 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2094 {
2095 	void *ret;
2096 
2097 	assert(run->magic == ARENA_RUN_MAGIC);
2098 	assert(run->nfree > 0);
2099 
2100 	ret = arena_run_reg_alloc(run, bin);
2101 	assert(ret != NULL);
2102 	run->nfree--;
2103 
2104 	return (ret);
2105 }
2106 
2107 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2108 static void *
2109 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2110 {
2111 
2112 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2113 	if (bin->runcur == NULL)
2114 		return (NULL);
2115 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2116 	assert(bin->runcur->nfree > 0);
2117 
2118 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2119 }
2120 
2121 /*
2122  * Calculate bin->run_size such that it meets the following constraints:
2123  *
2124  *   *) bin->run_size >= min_run_size
2125  *   *) bin->run_size <= arena_maxclass
2126  *   *) bin->run_size <= RUN_MAX_SMALL
2127  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2128  *
2129  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2130  * also calculated here, since these settings are all interdependent.
2131  */
2132 static size_t
2133 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2134 {
2135 	size_t try_run_size, good_run_size;
2136 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2137 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2138 	float max_ovrhd = RUN_MAX_OVRHD;
2139 
2140 	assert(min_run_size >= pagesize);
2141 	assert(min_run_size <= arena_maxclass);
2142 	assert(min_run_size <= RUN_MAX_SMALL);
2143 
2144 	/*
2145 	 * Calculate known-valid settings before entering the run_size
2146 	 * expansion loop, so that the first part of the loop always copies
2147 	 * valid settings.
2148 	 *
2149 	 * The do..while loop iteratively reduces the number of regions until
2150 	 * the run header and the regions no longer overlap.  A closed formula
2151 	 * would be quite messy, since there is an interdependency between the
2152 	 * header's mask length and the number of regions.
2153 	 */
2154 	try_run_size = min_run_size;
2155 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2156 	    bin->reg_size) + 1); /* Counter-act the first line of the loop. */
2157 	do {
2158 		try_nregs--;
2159 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2160 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2161 		try_reg0_offset = (unsigned)(try_run_size -
2162 		    (try_nregs * bin->reg_size));
2163 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2164 	    > try_reg0_offset);
2165 
2166 	/* run_size expansion loop. */
2167 	do {
2168 		/*
2169 		 * Copy valid settings before trying more aggressive settings.
2170 		 */
2171 		good_run_size = try_run_size;
2172 		good_nregs = try_nregs;
2173 		good_mask_nelms = try_mask_nelms;
2174 		good_reg0_offset = try_reg0_offset;
2175 
2176 		/* Try more aggressive settings. */
2177 		try_run_size += pagesize;
2178 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2179 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2180 		do {
2181 			try_nregs--;
2182 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2183 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2184 			    1 : 0);
2185 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2186 			    bin->reg_size));
2187 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2188 		    (try_mask_nelms - 1)) > try_reg0_offset);
2189 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2190 	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
2191 	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
2192 	    max_ovrhd);
2193 
2194 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2195 	    <= good_reg0_offset);
2196 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2197 
2198 	/* Copy final settings. */
2199 	bin->run_size = good_run_size;
2200 	bin->nregs = good_nregs;
2201 	bin->regs_mask_nelms = good_mask_nelms;
2202 	bin->reg0_offset = good_reg0_offset;
2203 
2204 	return (good_run_size);
2205 }
2206 
2207 static void *
2208 arena_malloc(arena_t *arena, size_t size)
2209 {
2210 	void *ret;
2211 
2212 	assert(arena != NULL);
2213 	assert(arena->magic == ARENA_MAGIC);
2214 	assert(size != 0);
2215 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2216 
2217 	if (size <= bin_maxclass) {
2218 		arena_bin_t *bin;
2219 		arena_run_t *run;
2220 
2221 		/* Small allocation. */
2222 
2223 		if (size < small_min) {
2224 			/* Tiny. */
2225 			size = pow2_ceil(size);
2226 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2227 			    1)))];
2228 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2229 			/*
2230 			 * Bin calculation is always correct, but we may need
2231 			 * to fix size for the purposes of assertions and/or
2232 			 * stats accuracy.
2233 			 */
2234 			if (size < (1 << TINY_MIN_2POW))
2235 				size = (1 << TINY_MIN_2POW);
2236 #endif
2237 		} else if (size <= small_max) {
2238 			/* Quantum-spaced. */
2239 			size = QUANTUM_CEILING(size);
2240 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2241 			    - 1];
2242 		} else {
2243 			/* Sub-page. */
2244 			size = pow2_ceil(size);
2245 			bin = &arena->bins[ntbins + nqbins
2246 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2247 		}
2248 		assert(size == bin->reg_size);
2249 
2250 		malloc_mutex_lock(&arena->mtx);
2251 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2252 			ret = arena_bin_malloc_easy(arena, bin, run);
2253 		else
2254 			ret = arena_bin_malloc_hard(arena, bin);
2255 
2256 		if (ret == NULL) {
2257 			malloc_mutex_unlock(&arena->mtx);
2258 			return (NULL);
2259 		}
2260 
2261 #ifdef MALLOC_STATS
2262 		bin->stats.nrequests++;
2263 		arena->stats.nmalloc_small++;
2264 		arena->stats.allocated_small += size;
2265 #endif
2266 	} else {
2267 		/* Large allocation. */
2268 		size = PAGE_CEILING(size);
2269 		malloc_mutex_lock(&arena->mtx);
2270 		ret = (void *)arena_run_alloc(arena, size);
2271 		if (ret == NULL) {
2272 			malloc_mutex_unlock(&arena->mtx);
2273 			return (NULL);
2274 		}
2275 #ifdef MALLOC_STATS
2276 		arena->stats.nmalloc_large++;
2277 		arena->stats.allocated_large += size;
2278 #endif
2279 	}
2280 
2281 	malloc_mutex_unlock(&arena->mtx);
2282 
2283 	if (opt_junk)
2284 		memset(ret, 0xa5, size);
2285 	else if (opt_zero)
2286 		memset(ret, 0, size);
2287 	return (ret);
2288 }
2289 
2290 static inline void
2291 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2292     unsigned npages)
2293 {
2294 	unsigned i;
2295 
2296 	assert(npages > 0);
2297 
2298 	/*
2299 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2300 	 * separately allocated.
2301 	 */
2302 	for (i = 0; i < npages; i++) {
2303 		chunk->map[pageind + i].npages = npages;
2304 		chunk->map[pageind + i].pos = i;
2305 	}
2306 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2307 	    pagesize_2pow)), npages << pagesize_2pow);
2308 }
2309 
2310 /* Only handles large allocations that require more than page alignment. */
2311 static void *
2312 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2313 {
2314 	void *ret;
2315 	size_t offset;
2316 	arena_chunk_t *chunk;
2317 	unsigned pageind, i, npages;
2318 
2319 	assert((size & pagesize_mask) == 0);
2320 	assert((alignment & pagesize_mask) == 0);
2321 
2322 	npages = (unsigned)(size >> pagesize_2pow);
2323 
2324 	malloc_mutex_lock(&arena->mtx);
2325 	ret = (void *)arena_run_alloc(arena, alloc_size);
2326 	if (ret == NULL) {
2327 		malloc_mutex_unlock(&arena->mtx);
2328 		return (NULL);
2329 	}
2330 
2331 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2332 
2333 	offset = (uintptr_t)ret & (alignment - 1);
2334 	assert((offset & pagesize_mask) == 0);
2335 	assert(offset < alloc_size);
2336 	if (offset == 0) {
2337 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2338 		    pagesize_2pow);
2339 
2340 		/* Update the map for the run to be kept. */
2341 		for (i = 0; i < npages; i++) {
2342 			chunk->map[pageind + i].npages = npages;
2343 			assert(chunk->map[pageind + i].pos == i);
2344 		}
2345 
2346 		/* Trim trailing space. */
2347 		arena_palloc_trim(arena, chunk, pageind + npages,
2348 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2349 	} else {
2350 		size_t leadsize, trailsize;
2351 
2352 		leadsize = alignment - offset;
2353 		ret = (void *)((uintptr_t)ret + leadsize);
2354 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2355 		    pagesize_2pow);
2356 
2357 		/* Update the map for the run to be kept. */
2358 		for (i = 0; i < npages; i++) {
2359 			chunk->map[pageind + i].npages = npages;
2360 			chunk->map[pageind + i].pos = i;
2361 		}
2362 
2363 		/* Trim leading space. */
2364 		arena_palloc_trim(arena, chunk,
2365 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2366 		    (unsigned)(leadsize >> pagesize_2pow));
2367 
2368 		trailsize = alloc_size - leadsize - size;
2369 		if (trailsize != 0) {
2370 			/* Trim trailing space. */
2371 			assert(trailsize < alloc_size);
2372 			arena_palloc_trim(arena, chunk, pageind + npages,
2373 			    (unsigned)(trailsize >> pagesize_2pow));
2374 		}
2375 	}
2376 
2377 #ifdef MALLOC_STATS
2378 	arena->stats.nmalloc_large++;
2379 	arena->stats.allocated_large += size;
2380 #endif
2381 	malloc_mutex_unlock(&arena->mtx);
2382 
2383 	if (opt_junk)
2384 		memset(ret, 0xa5, size);
2385 	else if (opt_zero)
2386 		memset(ret, 0, size);
2387 	return (ret);
2388 }
2389 
2390 /* Return the size of the allocation pointed to by ptr. */
2391 static size_t
2392 arena_salloc(const void *ptr)
2393 {
2394 	size_t ret;
2395 	arena_chunk_t *chunk;
2396 	arena_chunk_map_t *mapelm;
2397 	unsigned pageind;
2398 
2399 	assert(ptr != NULL);
2400 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2401 
2402 	/*
2403 	 * No arena data structures that we query here can change in a way that
2404 	 * affects this function, so we don't need to lock.
2405 	 */
2406 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2407 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2408 	    pagesize_2pow);
2409 	mapelm = &chunk->map[pageind];
2410 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2411 	    pagesize_2pow)) {
2412 		arena_run_t *run;
2413 
2414 		pageind -= mapelm->pos;
2415 
2416 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2417 		    pagesize_2pow));
2418 		assert(run->magic == ARENA_RUN_MAGIC);
2419 		ret = run->bin->reg_size;
2420 	} else
2421 		ret = mapelm->npages << pagesize_2pow;
2422 
2423 	return (ret);
2424 }
2425 
2426 static void *
2427 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2428 {
2429 	void *ret;
2430 
2431 	/* Avoid moving the allocation if the size class would not change. */
2432 	if (size < small_min) {
2433 		if (oldsize < small_min &&
2434 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2435 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2436 			goto IN_PLACE;
2437 	} else if (size <= small_max) {
2438 		if (oldsize >= small_min && oldsize <= small_max &&
2439 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2440 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2441 			goto IN_PLACE;
2442 	} else {
2443 		/*
2444 		 * We make no attempt to resize runs here, though it would be
2445 		 * possible to do so.
2446 		 */
2447 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2448 			goto IN_PLACE;
2449 	}
2450 
2451 	/*
2452 	 * If we get here, then size and oldsize are different enough that we
2453 	 * need to use a different size class.  In that case, fall back to
2454 	 * allocating new space and copying.
2455 	 */
2456 	ret = arena_malloc(choose_arena(), size);
2457 	if (ret == NULL)
2458 		return (NULL);
2459 
2460 	/* Junk/zero-filling were already done by arena_malloc(). */
2461 	if (size < oldsize)
2462 		memcpy(ret, ptr, size);
2463 	else
2464 		memcpy(ret, ptr, oldsize);
2465 	idalloc(ptr);
2466 	return (ret);
2467 IN_PLACE:
2468 	if (opt_junk && size < oldsize)
2469 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2470 	else if (opt_zero && size > oldsize)
2471 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2472 	return (ptr);
2473 }
2474 
2475 static void
2476 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2477 {
2478 	unsigned pageind;
2479 	arena_chunk_map_t *mapelm;
2480 	size_t size;
2481 
2482 	assert(arena != NULL);
2483 	assert(arena->magic == ARENA_MAGIC);
2484 	assert(chunk->arena == arena);
2485 	assert(ptr != NULL);
2486 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2487 
2488 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2489 	    pagesize_2pow);
2490 	mapelm = &chunk->map[pageind];
2491 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2492 	    pagesize_2pow)) {
2493 		arena_run_t *run;
2494 		arena_bin_t *bin;
2495 
2496 		/* Small allocation. */
2497 
2498 		pageind -= mapelm->pos;
2499 
2500 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2501 		    pagesize_2pow));
2502 		assert(run->magic == ARENA_RUN_MAGIC);
2503 		bin = run->bin;
2504 		size = bin->reg_size;
2505 
2506 		if (opt_junk)
2507 			memset(ptr, 0x5a, size);
2508 
2509 		malloc_mutex_lock(&arena->mtx);
2510 		arena_run_reg_dalloc(run, bin, ptr, size);
2511 		run->nfree++;
2512 
2513 		if (run->nfree == bin->nregs) {
2514 			/* Deallocate run. */
2515 			if (run == bin->runcur)
2516 				bin->runcur = NULL;
2517 			else if (bin->nregs != 1) {
2518 				/*
2519 				 * This block's conditional is necessary because
2520 				 * if the run only contains one region, then it
2521 				 * never gets inserted into the non-full runs
2522 				 * tree.
2523 				 */
2524 				/* LINTED */
2525 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2526 			}
2527 #ifdef MALLOC_DEBUG
2528 			run->magic = 0;
2529 #endif
2530 			arena_run_dalloc(arena, run, bin->run_size);
2531 #ifdef MALLOC_STATS
2532 			bin->stats.curruns--;
2533 #endif
2534 		} else if (run->nfree == 1 && run != bin->runcur) {
2535 			/*
2536 			 * Make sure that bin->runcur always refers to the
2537 			 * lowest non-full run, if one exists.
2538 			 */
2539 			if (bin->runcur == NULL)
2540 				bin->runcur = run;
2541 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2542 				/* Switch runcur. */
2543 				if (bin->runcur->nfree > 0) {
2544 					/* Insert runcur. */
2545 					/* LINTED */
2546 					RB_INSERT(arena_run_tree_s, &bin->runs,
2547 					    bin->runcur);
2548 				}
2549 				bin->runcur = run;
2550 			} else {
2551 				/* LINTED */
2552 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2553 			}
2554 		}
2555 #ifdef MALLOC_STATS
2556 		arena->stats.allocated_small -= size;
2557 		arena->stats.ndalloc_small++;
2558 #endif
2559 	} else {
2560 		/* Large allocation. */
2561 
2562 		size = mapelm->npages << pagesize_2pow;
2563 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2564 
2565 		if (opt_junk)
2566 			memset(ptr, 0x5a, size);
2567 
2568 		malloc_mutex_lock(&arena->mtx);
2569 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2570 #ifdef MALLOC_STATS
2571 		arena->stats.allocated_large -= size;
2572 		arena->stats.ndalloc_large++;
2573 #endif
2574 	}
2575 
2576 	malloc_mutex_unlock(&arena->mtx);
2577 }
2578 
2579 static bool
2580 arena_new(arena_t *arena)
2581 {
2582 	unsigned i;
2583 	arena_bin_t *bin;
2584 	size_t prev_run_size;
2585 
2586 	malloc_mutex_init(&arena->mtx);
2587 
2588 #ifdef MALLOC_STATS
2589 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2590 #endif
2591 
2592 	/* Initialize chunks. */
2593 	RB_INIT(&arena->chunks);
2594 	arena->spare = NULL;
2595 
2596 	/* Initialize bins. */
2597 	prev_run_size = pagesize;
2598 
2599 	/* (2^n)-spaced tiny bins. */
2600 	for (i = 0; i < ntbins; i++) {
2601 		bin = &arena->bins[i];
2602 		bin->runcur = NULL;
2603 		RB_INIT(&bin->runs);
2604 
2605 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2606 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2607 
2608 #ifdef MALLOC_STATS
2609 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2610 #endif
2611 	}
2612 
2613 	/* Quantum-spaced bins. */
2614 	for (; i < ntbins + nqbins; i++) {
2615 		bin = &arena->bins[i];
2616 		bin->runcur = NULL;
2617 		RB_INIT(&bin->runs);
2618 
2619 		bin->reg_size = quantum * (i - ntbins + 1);
2620 /*
2621 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2622 */
2623 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2624 
2625 #ifdef MALLOC_STATS
2626 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2627 #endif
2628 	}
2629 
2630 	/* (2^n)-spaced sub-page bins. */
2631 	for (; i < ntbins + nqbins + nsbins; i++) {
2632 		bin = &arena->bins[i];
2633 		bin->runcur = NULL;
2634 		RB_INIT(&bin->runs);
2635 
2636 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2637 
2638 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2639 
2640 #ifdef MALLOC_STATS
2641 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2642 #endif
2643 	}
2644 
2645 #ifdef MALLOC_DEBUG
2646 	arena->magic = ARENA_MAGIC;
2647 #endif
2648 
2649 	return (false);
2650 }
2651 
2652 /* Create a new arena and insert it into the arenas array at index ind. */
2653 static arena_t *
2654 arenas_extend(unsigned ind)
2655 {
2656 	arena_t *ret;
2657 
2658 	/* Allocate enough space for trailing bins. */
2659 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2660 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2661 	if (ret != NULL && arena_new(ret) == false) {
2662 		arenas[ind] = ret;
2663 		return (ret);
2664 	}
2665 	/* Only reached if there is an OOM error. */
2666 
2667 	/*
2668 	 * OOM here is quite inconvenient to propagate, since dealing with it
2669 	 * would require a check for failure in the fast path.  Instead, punt
2670 	 * by using arenas[0].  In practice, this is an extremely unlikely
2671 	 * failure.
2672 	 */
2673 	_malloc_message(_getprogname(),
2674 	    ": (malloc) Error initializing arena\n", "", "");
2675 	if (opt_abort)
2676 		abort();
2677 
2678 	return (arenas[0]);
2679 }
2680 
2681 /*
2682  * End arena.
2683  */
2684 /******************************************************************************/
2685 /*
2686  * Begin general internal functions.
2687  */
2688 
2689 static void *
2690 huge_malloc(size_t size)
2691 {
2692 	void *ret;
2693 	size_t csize;
2694 	chunk_node_t *node;
2695 
2696 	/* Allocate one or more contiguous chunks for this request. */
2697 
2698 	csize = CHUNK_CEILING(size);
2699 	if (csize == 0) {
2700 		/* size is large enough to cause size_t wrap-around. */
2701 		return (NULL);
2702 	}
2703 
2704 	/* Allocate a chunk node with which to track the chunk. */
2705 	node = base_chunk_node_alloc();
2706 	if (node == NULL)
2707 		return (NULL);
2708 
2709 	ret = chunk_alloc(csize);
2710 	if (ret == NULL) {
2711 		base_chunk_node_dealloc(node);
2712 		return (NULL);
2713 	}
2714 
2715 	/* Insert node into huge. */
2716 	node->chunk = ret;
2717 	node->size = csize;
2718 
2719 	malloc_mutex_lock(&chunks_mtx);
2720 	RB_INSERT(chunk_tree_s, &huge, node);
2721 #ifdef MALLOC_STATS
2722 	huge_nmalloc++;
2723 	huge_allocated += csize;
2724 #endif
2725 	malloc_mutex_unlock(&chunks_mtx);
2726 
2727 	if (opt_junk)
2728 		memset(ret, 0xa5, csize);
2729 	else if (opt_zero)
2730 		memset(ret, 0, csize);
2731 
2732 	return (ret);
2733 }
2734 
2735 /* Only handles large allocations that require more than chunk alignment. */
2736 static void *
2737 huge_palloc(size_t alignment, size_t size)
2738 {
2739 	void *ret;
2740 	size_t alloc_size, chunk_size, offset;
2741 	chunk_node_t *node;
2742 
2743 	/*
2744 	 * This allocation requires alignment that is even larger than chunk
2745 	 * alignment.  This means that huge_malloc() isn't good enough.
2746 	 *
2747 	 * Allocate almost twice as many chunks as are demanded by the size or
2748 	 * alignment, in order to assure the alignment can be achieved, then
2749 	 * unmap leading and trailing chunks.
2750 	 */
2751 	assert(alignment >= chunksize);
2752 
2753 	chunk_size = CHUNK_CEILING(size);
2754 
2755 	if (size >= alignment)
2756 		alloc_size = chunk_size + alignment - chunksize;
2757 	else
2758 		alloc_size = (alignment << 1) - chunksize;
2759 
2760 	/* Allocate a chunk node with which to track the chunk. */
2761 	node = base_chunk_node_alloc();
2762 	if (node == NULL)
2763 		return (NULL);
2764 
2765 	ret = chunk_alloc(alloc_size);
2766 	if (ret == NULL) {
2767 		base_chunk_node_dealloc(node);
2768 		return (NULL);
2769 	}
2770 
2771 	offset = (uintptr_t)ret & (alignment - 1);
2772 	assert((offset & chunksize_mask) == 0);
2773 	assert(offset < alloc_size);
2774 	if (offset == 0) {
2775 		/* Trim trailing space. */
2776 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2777 		    - chunk_size);
2778 	} else {
2779 		size_t trailsize;
2780 
2781 		/* Trim leading space. */
2782 		chunk_dealloc(ret, alignment - offset);
2783 
2784 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2785 
2786 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2787 		if (trailsize != 0) {
2788 		    /* Trim trailing space. */
2789 		    assert(trailsize < alloc_size);
2790 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2791 			trailsize);
2792 		}
2793 	}
2794 
2795 	/* Insert node into huge. */
2796 	node->chunk = ret;
2797 	node->size = chunk_size;
2798 
2799 	malloc_mutex_lock(&chunks_mtx);
2800 	RB_INSERT(chunk_tree_s, &huge, node);
2801 #ifdef MALLOC_STATS
2802 	huge_nmalloc++;
2803 	huge_allocated += chunk_size;
2804 #endif
2805 	malloc_mutex_unlock(&chunks_mtx);
2806 
2807 	if (opt_junk)
2808 		memset(ret, 0xa5, chunk_size);
2809 	else if (opt_zero)
2810 		memset(ret, 0, chunk_size);
2811 
2812 	return (ret);
2813 }
2814 
2815 static void *
2816 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2817 {
2818 	void *ret;
2819 
2820 	/* Avoid moving the allocation if the size class would not change. */
2821 	if (oldsize > arena_maxclass &&
2822 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2823 		if (opt_junk && size < oldsize) {
2824 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2825 			    - size);
2826 		} else if (opt_zero && size > oldsize) {
2827 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2828 			    - oldsize);
2829 		}
2830 		return (ptr);
2831 	}
2832 
2833 	if (CHUNK_ADDR2BASE(ptr) == ptr
2834 #ifdef USE_BRK
2835 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2836 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2837 #endif
2838 	    ) {
2839 		chunk_node_t *node, key;
2840 		void *newptr;
2841 		size_t oldcsize;
2842 		size_t newcsize;
2843 
2844 		newcsize = CHUNK_CEILING(size);
2845 		oldcsize = CHUNK_CEILING(oldsize);
2846 		assert(oldcsize != newcsize);
2847 		if (newcsize == 0) {
2848 			/* size_t wrap-around */
2849 			return (NULL);
2850 		}
2851 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2852 		    MAP_ALIGNED(chunksize_2pow));
2853 		if (newptr != MAP_FAILED) {
2854 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2855 
2856 			/* update tree */
2857 			malloc_mutex_lock(&chunks_mtx);
2858 			key.chunk = __DECONST(void *, ptr);
2859 			/* LINTED */
2860 			node = RB_FIND(chunk_tree_s, &huge, &key);
2861 			assert(node != NULL);
2862 			assert(node->chunk == ptr);
2863 			assert(node->size == oldcsize);
2864 			node->size = newcsize;
2865 			if (ptr != newptr) {
2866 				RB_REMOVE(chunk_tree_s, &huge, node);
2867 				node->chunk = newptr;
2868 				RB_INSERT(chunk_tree_s, &huge, node);
2869 			}
2870 #ifdef MALLOC_STATS
2871 			huge_nralloc++;
2872 			huge_allocated += newcsize - oldcsize;
2873 			if (newcsize > oldcsize) {
2874 				stats_chunks.curchunks +=
2875 				    (newcsize - oldcsize) / chunksize;
2876 				if (stats_chunks.curchunks >
2877 				    stats_chunks.highchunks)
2878 					stats_chunks.highchunks =
2879 					    stats_chunks.curchunks;
2880 			} else {
2881 				stats_chunks.curchunks -=
2882 				    (oldcsize - newcsize) / chunksize;
2883 			}
2884 #endif
2885 			malloc_mutex_unlock(&chunks_mtx);
2886 
2887 			if (opt_junk && size < oldsize) {
2888 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2889 				    newcsize - size);
2890 			} else if (opt_zero && size > oldsize) {
2891 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2892 				    size - oldsize);
2893 			}
2894 			return (newptr);
2895 		}
2896 	}
2897 
2898 	/*
2899 	 * If we get here, then size and oldsize are different enough that we
2900 	 * need to use a different size class.  In that case, fall back to
2901 	 * allocating new space and copying.
2902 	 */
2903 	ret = huge_malloc(size);
2904 	if (ret == NULL)
2905 		return (NULL);
2906 
2907 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2908 		/* The old allocation is a chunk. */
2909 		if (size < oldsize)
2910 			memcpy(ret, ptr, size);
2911 		else
2912 			memcpy(ret, ptr, oldsize);
2913 	} else {
2914 		/* The old allocation is a region. */
2915 		assert(oldsize < size);
2916 		memcpy(ret, ptr, oldsize);
2917 	}
2918 	idalloc(ptr);
2919 	return (ret);
2920 }
2921 
2922 static void
2923 huge_dalloc(void *ptr)
2924 {
2925 	chunk_node_t key;
2926 	chunk_node_t *node;
2927 
2928 	malloc_mutex_lock(&chunks_mtx);
2929 
2930 	/* Extract from tree of huge allocations. */
2931 	key.chunk = ptr;
2932 	/* LINTED */
2933 	node = RB_FIND(chunk_tree_s, &huge, &key);
2934 	assert(node != NULL);
2935 	assert(node->chunk == ptr);
2936 	/* LINTED */
2937 	RB_REMOVE(chunk_tree_s, &huge, node);
2938 
2939 #ifdef MALLOC_STATS
2940 	huge_ndalloc++;
2941 	huge_allocated -= node->size;
2942 #endif
2943 
2944 	malloc_mutex_unlock(&chunks_mtx);
2945 
2946 	/* Unmap chunk. */
2947 #ifdef USE_BRK
2948 	if (opt_junk)
2949 		memset(node->chunk, 0x5a, node->size);
2950 #endif
2951 	chunk_dealloc(node->chunk, node->size);
2952 
2953 	base_chunk_node_dealloc(node);
2954 }
2955 
2956 static void *
2957 imalloc(size_t size)
2958 {
2959 	void *ret;
2960 
2961 	assert(size != 0);
2962 
2963 	if (size <= arena_maxclass)
2964 		ret = arena_malloc(choose_arena(), size);
2965 	else
2966 		ret = huge_malloc(size);
2967 
2968 	return (ret);
2969 }
2970 
2971 static void *
2972 ipalloc(size_t alignment, size_t size)
2973 {
2974 	void *ret;
2975 	size_t ceil_size;
2976 
2977 	/*
2978 	 * Round size up to the nearest multiple of alignment.
2979 	 *
2980 	 * This done, we can take advantage of the fact that for each small
2981 	 * size class, every object is aligned at the smallest power of two
2982 	 * that is non-zero in the base two representation of the size.  For
2983 	 * example:
2984 	 *
2985 	 *   Size |   Base 2 | Minimum alignment
2986 	 *   -----+----------+------------------
2987 	 *     96 |  1100000 |  32
2988 	 *    144 | 10100000 |  32
2989 	 *    192 | 11000000 |  64
2990 	 *
2991 	 * Depending on runtime settings, it is possible that arena_malloc()
2992 	 * will further round up to a power of two, but that never causes
2993 	 * correctness issues.
2994 	 */
2995 	ceil_size = (size + (alignment - 1)) & (-alignment);
2996 	/*
2997 	 * (ceil_size < size) protects against the combination of maximal
2998 	 * alignment and size greater than maximal alignment.
2999 	 */
3000 	if (ceil_size < size) {
3001 		/* size_t overflow. */
3002 		return (NULL);
3003 	}
3004 
3005 	if (ceil_size <= pagesize || (alignment <= pagesize
3006 	    && ceil_size <= arena_maxclass))
3007 		ret = arena_malloc(choose_arena(), ceil_size);
3008 	else {
3009 		size_t run_size;
3010 
3011 		/*
3012 		 * We can't achieve sub-page alignment, so round up alignment
3013 		 * permanently; it makes later calculations simpler.
3014 		 */
3015 		alignment = PAGE_CEILING(alignment);
3016 		ceil_size = PAGE_CEILING(size);
3017 		/*
3018 		 * (ceil_size < size) protects against very large sizes within
3019 		 * pagesize of SIZE_T_MAX.
3020 		 *
3021 		 * (ceil_size + alignment < ceil_size) protects against the
3022 		 * combination of maximal alignment and ceil_size large enough
3023 		 * to cause overflow.  This is similar to the first overflow
3024 		 * check above, but it needs to be repeated due to the new
3025 		 * ceil_size value, which may now be *equal* to maximal
3026 		 * alignment, whereas before we only detected overflow if the
3027 		 * original size was *greater* than maximal alignment.
3028 		 */
3029 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3030 			/* size_t overflow. */
3031 			return (NULL);
3032 		}
3033 
3034 		/*
3035 		 * Calculate the size of the over-size run that arena_palloc()
3036 		 * would need to allocate in order to guarantee the alignment.
3037 		 */
3038 		if (ceil_size >= alignment)
3039 			run_size = ceil_size + alignment - pagesize;
3040 		else {
3041 			/*
3042 			 * It is possible that (alignment << 1) will cause
3043 			 * overflow, but it doesn't matter because we also
3044 			 * subtract pagesize, which in the case of overflow
3045 			 * leaves us with a very large run_size.  That causes
3046 			 * the first conditional below to fail, which means
3047 			 * that the bogus run_size value never gets used for
3048 			 * anything important.
3049 			 */
3050 			run_size = (alignment << 1) - pagesize;
3051 		}
3052 
3053 		if (run_size <= arena_maxclass) {
3054 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3055 			    run_size);
3056 		} else if (alignment <= chunksize)
3057 			ret = huge_malloc(ceil_size);
3058 		else
3059 			ret = huge_palloc(alignment, ceil_size);
3060 	}
3061 
3062 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3063 	return (ret);
3064 }
3065 
3066 static void *
3067 icalloc(size_t size)
3068 {
3069 	void *ret;
3070 
3071 	if (size <= arena_maxclass) {
3072 		ret = arena_malloc(choose_arena(), size);
3073 		if (ret == NULL)
3074 			return (NULL);
3075 		memset(ret, 0, size);
3076 	} else {
3077 		/*
3078 		 * The virtual memory system provides zero-filled pages, so
3079 		 * there is no need to do so manually, unless opt_junk is
3080 		 * enabled, in which case huge_malloc() fills huge allocations
3081 		 * with junk.
3082 		 */
3083 		ret = huge_malloc(size);
3084 		if (ret == NULL)
3085 			return (NULL);
3086 
3087 		if (opt_junk)
3088 			memset(ret, 0, size);
3089 #ifdef USE_BRK
3090 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3091 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3092 			/*
3093 			 * This may be a re-used brk chunk.  Therefore, zero
3094 			 * the memory.
3095 			 */
3096 			memset(ret, 0, size);
3097 		}
3098 #endif
3099 	}
3100 
3101 	return (ret);
3102 }
3103 
3104 static size_t
3105 isalloc(const void *ptr)
3106 {
3107 	size_t ret;
3108 	arena_chunk_t *chunk;
3109 
3110 	assert(ptr != NULL);
3111 
3112 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3113 	if (chunk != ptr) {
3114 		/* Region. */
3115 		assert(chunk->arena->magic == ARENA_MAGIC);
3116 
3117 		ret = arena_salloc(ptr);
3118 	} else {
3119 		chunk_node_t *node, key;
3120 
3121 		/* Chunk (huge allocation). */
3122 
3123 		malloc_mutex_lock(&chunks_mtx);
3124 
3125 		/* Extract from tree of huge allocations. */
3126 		key.chunk = __DECONST(void *, ptr);
3127 		/* LINTED */
3128 		node = RB_FIND(chunk_tree_s, &huge, &key);
3129 		assert(node != NULL);
3130 
3131 		ret = node->size;
3132 
3133 		malloc_mutex_unlock(&chunks_mtx);
3134 	}
3135 
3136 	return (ret);
3137 }
3138 
3139 static void *
3140 iralloc(void *ptr, size_t size)
3141 {
3142 	void *ret;
3143 	size_t oldsize;
3144 
3145 	assert(ptr != NULL);
3146 	assert(size != 0);
3147 
3148 	oldsize = isalloc(ptr);
3149 
3150 	if (size <= arena_maxclass)
3151 		ret = arena_ralloc(ptr, size, oldsize);
3152 	else
3153 		ret = huge_ralloc(ptr, size, oldsize);
3154 
3155 	return (ret);
3156 }
3157 
3158 static void
3159 idalloc(void *ptr)
3160 {
3161 	arena_chunk_t *chunk;
3162 
3163 	assert(ptr != NULL);
3164 
3165 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3166 	if (chunk != ptr) {
3167 		/* Region. */
3168 		arena_dalloc(chunk->arena, chunk, ptr);
3169 	} else
3170 		huge_dalloc(ptr);
3171 }
3172 
3173 static void
3174 malloc_print_stats(void)
3175 {
3176 
3177 	if (opt_print_stats) {
3178 		char s[UMAX2S_BUFSIZE];
3179 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3180 		    "");
3181 		_malloc_message("Assertions ",
3182 #ifdef NDEBUG
3183 		    "disabled",
3184 #else
3185 		    "enabled",
3186 #endif
3187 		    "\n", "");
3188 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3189 		    opt_abort ? "A" : "a",
3190 		    opt_junk ? "J" : "j",
3191 		    opt_hint ? "H" : "h");
3192 		_malloc_message(opt_utrace ? "PU" : "Pu",
3193 		    opt_sysv ? "V" : "v",
3194 		    opt_xmalloc ? "X" : "x",
3195 		    opt_zero ? "Z\n" : "z\n");
3196 
3197 		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
3198 		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
3199 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
3200 		    "\n", "");
3201 		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
3202 		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
3203 		    "");
3204 
3205 		_malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
3206 		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
3207 
3208 #ifdef MALLOC_STATS
3209 		{
3210 			size_t allocated, mapped;
3211 			unsigned i;
3212 			arena_t *arena;
3213 
3214 			/* Calculate and print allocated/mapped stats. */
3215 
3216 			/* arenas. */
3217 			for (i = 0, allocated = 0; i < narenas; i++) {
3218 				if (arenas[i] != NULL) {
3219 					malloc_mutex_lock(&arenas[i]->mtx);
3220 					allocated +=
3221 					    arenas[i]->stats.allocated_small;
3222 					allocated +=
3223 					    arenas[i]->stats.allocated_large;
3224 					malloc_mutex_unlock(&arenas[i]->mtx);
3225 				}
3226 			}
3227 
3228 			/* huge/base. */
3229 			malloc_mutex_lock(&chunks_mtx);
3230 			allocated += huge_allocated;
3231 			mapped = stats_chunks.curchunks * chunksize;
3232 			malloc_mutex_unlock(&chunks_mtx);
3233 
3234 			malloc_mutex_lock(&base_mtx);
3235 			mapped += base_mapped;
3236 			malloc_mutex_unlock(&base_mtx);
3237 
3238 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3239 			    allocated, mapped);
3240 
3241 			/* Print chunk stats. */
3242 			{
3243 				chunk_stats_t chunks_stats;
3244 
3245 				malloc_mutex_lock(&chunks_mtx);
3246 				chunks_stats = stats_chunks;
3247 				malloc_mutex_unlock(&chunks_mtx);
3248 
3249 				malloc_printf("chunks: nchunks   "
3250 				    "highchunks    curchunks\n");
3251 				malloc_printf("  %13llu%13lu%13lu\n",
3252 				    chunks_stats.nchunks,
3253 				    chunks_stats.highchunks,
3254 				    chunks_stats.curchunks);
3255 			}
3256 
3257 			/* Print chunk stats. */
3258 			malloc_printf(
3259 			    "huge: nmalloc      ndalloc      "
3260 			    "nralloc    allocated\n");
3261 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3262 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3263 			    huge_allocated);
3264 
3265 			/* Print stats for each arena. */
3266 			for (i = 0; i < narenas; i++) {
3267 				arena = arenas[i];
3268 				if (arena != NULL) {
3269 					malloc_printf(
3270 					    "\narenas[%u] @ %p\n", i, arena);
3271 					malloc_mutex_lock(&arena->mtx);
3272 					stats_print(arena);
3273 					malloc_mutex_unlock(&arena->mtx);
3274 				}
3275 			}
3276 		}
3277 #endif /* #ifdef MALLOC_STATS */
3278 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3279 	}
3280 }
3281 
3282 /*
3283  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3284  * implementation has to take pains to avoid infinite recursion during
3285  * initialization.
3286  */
3287 static inline bool
3288 malloc_init(void)
3289 {
3290 
3291 	if (malloc_initialized == false)
3292 		return (malloc_init_hard());
3293 
3294 	return (false);
3295 }
3296 
3297 static bool
3298 malloc_init_hard(void)
3299 {
3300 	unsigned i, j;
3301 	ssize_t linklen;
3302 	char buf[PATH_MAX + 1];
3303 	const char *opts = "";
3304 
3305 	malloc_mutex_lock(&init_lock);
3306 	if (malloc_initialized) {
3307 		/*
3308 		 * Another thread initialized the allocator before this one
3309 		 * acquired init_lock.
3310 		 */
3311 		malloc_mutex_unlock(&init_lock);
3312 		return (false);
3313 	}
3314 
3315 	/* Get number of CPUs. */
3316 	{
3317 		int mib[2];
3318 		size_t len;
3319 
3320 		mib[0] = CTL_HW;
3321 		mib[1] = HW_NCPU;
3322 		len = sizeof(ncpus);
3323 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3324 			/* Error. */
3325 			ncpus = 1;
3326 		}
3327 	}
3328 
3329 	/* Get page size. */
3330 	{
3331 		long result;
3332 
3333 		result = sysconf(_SC_PAGESIZE);
3334 		assert(result != -1);
3335 		pagesize = (unsigned) result;
3336 
3337 		/*
3338 		 * We assume that pagesize is a power of 2 when calculating
3339 		 * pagesize_mask and pagesize_2pow.
3340 		 */
3341 		assert(((result - 1) & result) == 0);
3342 		pagesize_mask = result - 1;
3343 		pagesize_2pow = ffs((int)result) - 1;
3344 	}
3345 
3346 	for (i = 0; i < 3; i++) {
3347 		/* Get runtime configuration. */
3348 		switch (i) {
3349 		case 0:
3350 			if ((linklen = readlink("/etc/malloc.conf", buf,
3351 						sizeof(buf) - 1)) != -1) {
3352 				/*
3353 				 * Use the contents of the "/etc/malloc.conf"
3354 				 * symbolic link's name.
3355 				 */
3356 				buf[linklen] = '\0';
3357 				opts = buf;
3358 			} else {
3359 				/* No configuration specified. */
3360 				buf[0] = '\0';
3361 				opts = buf;
3362 			}
3363 			break;
3364 		case 1:
3365 			if (issetugid() == 0 && (opts =
3366 			    getenv("MALLOC_OPTIONS")) != NULL) {
3367 				/*
3368 				 * Do nothing; opts is already initialized to
3369 				 * the value of the MALLOC_OPTIONS environment
3370 				 * variable.
3371 				 */
3372 			} else {
3373 				/* No configuration specified. */
3374 				buf[0] = '\0';
3375 				opts = buf;
3376 			}
3377 			break;
3378 		case 2:
3379 			if (_malloc_options != NULL) {
3380 			    /*
3381 			     * Use options that were compiled into the program.
3382 			     */
3383 			    opts = _malloc_options;
3384 			} else {
3385 				/* No configuration specified. */
3386 				buf[0] = '\0';
3387 				opts = buf;
3388 			}
3389 			break;
3390 		default:
3391 			/* NOTREACHED */
3392 			/* LINTED */
3393 			assert(false);
3394 		}
3395 
3396 		for (j = 0; opts[j] != '\0'; j++) {
3397 			switch (opts[j]) {
3398 			case 'a':
3399 				opt_abort = false;
3400 				break;
3401 			case 'A':
3402 				opt_abort = true;
3403 				break;
3404 			case 'h':
3405 				opt_hint = false;
3406 				break;
3407 			case 'H':
3408 				opt_hint = true;
3409 				break;
3410 			case 'j':
3411 				opt_junk = false;
3412 				break;
3413 			case 'J':
3414 				opt_junk = true;
3415 				break;
3416 			case 'k':
3417 				/*
3418 				 * Chunks always require at least one header
3419 				 * page, so chunks can never be smaller than
3420 				 * two pages.
3421 				 */
3422 				if (opt_chunk_2pow > pagesize_2pow + 1)
3423 					opt_chunk_2pow--;
3424 				break;
3425 			case 'K':
3426 				/*
3427 				 * There must be fewer pages in a chunk than
3428 				 * can be recorded by the pos field of
3429 				 * arena_chunk_map_t, in order to make POS_FREE
3430 				 * special.
3431 				 */
3432 				if (opt_chunk_2pow - pagesize_2pow
3433 				    < (sizeof(uint32_t) << 3) - 1)
3434 					opt_chunk_2pow++;
3435 				break;
3436 			case 'n':
3437 				opt_narenas_lshift--;
3438 				break;
3439 			case 'N':
3440 				opt_narenas_lshift++;
3441 				break;
3442 			case 'p':
3443 				opt_print_stats = false;
3444 				break;
3445 			case 'P':
3446 				opt_print_stats = true;
3447 				break;
3448 			case 'q':
3449 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3450 					opt_quantum_2pow--;
3451 				break;
3452 			case 'Q':
3453 				if (opt_quantum_2pow < pagesize_2pow - 1)
3454 					opt_quantum_2pow++;
3455 				break;
3456 			case 's':
3457 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3458 					opt_small_max_2pow--;
3459 				break;
3460 			case 'S':
3461 				if (opt_small_max_2pow < pagesize_2pow - 1)
3462 					opt_small_max_2pow++;
3463 				break;
3464 			case 'u':
3465 				opt_utrace = false;
3466 				break;
3467 			case 'U':
3468 				opt_utrace = true;
3469 				break;
3470 			case 'v':
3471 				opt_sysv = false;
3472 				break;
3473 			case 'V':
3474 				opt_sysv = true;
3475 				break;
3476 			case 'x':
3477 				opt_xmalloc = false;
3478 				break;
3479 			case 'X':
3480 				opt_xmalloc = true;
3481 				break;
3482 			case 'z':
3483 				opt_zero = false;
3484 				break;
3485 			case 'Z':
3486 				opt_zero = true;
3487 				break;
3488 			default: {
3489 				char cbuf[2];
3490 
3491 				cbuf[0] = opts[j];
3492 				cbuf[1] = '\0';
3493 				_malloc_message(_getprogname(),
3494 				    ": (malloc) Unsupported character in "
3495 				    "malloc options: '", cbuf, "'\n");
3496 			}
3497 			}
3498 		}
3499 	}
3500 
3501 	/* Take care to call atexit() only once. */
3502 	if (opt_print_stats) {
3503 		/* Print statistics at exit. */
3504 		atexit(malloc_print_stats);
3505 	}
3506 
3507 	/* Set variables according to the value of opt_small_max_2pow. */
3508 	if (opt_small_max_2pow < opt_quantum_2pow)
3509 		opt_small_max_2pow = opt_quantum_2pow;
3510 	small_max = (1 << opt_small_max_2pow);
3511 
3512 	/* Set bin-related variables. */
3513 	bin_maxclass = (pagesize >> 1);
3514 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3515 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3516 	assert(ntbins <= opt_quantum_2pow);
3517 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3518 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3519 
3520 	/* Set variables according to the value of opt_quantum_2pow. */
3521 	quantum = (1 << opt_quantum_2pow);
3522 	quantum_mask = quantum - 1;
3523 	if (ntbins > 0)
3524 		small_min = (quantum >> 1) + 1;
3525 	else
3526 		small_min = 1;
3527 	assert(small_min <= quantum);
3528 
3529 	/* Set variables according to the value of opt_chunk_2pow. */
3530 	chunksize = (1LU << opt_chunk_2pow);
3531 	chunksize_mask = chunksize - 1;
3532 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3533 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3534 	{
3535 		unsigned header_size;
3536 
3537 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3538 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3539 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3540 		if ((header_size & pagesize_mask) != 0)
3541 			arena_chunk_header_npages++;
3542 	}
3543 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3544 	    pagesize_2pow);
3545 
3546 	UTRACE(0, 0, 0);
3547 
3548 #ifdef MALLOC_STATS
3549 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3550 #endif
3551 
3552 	/* Various sanity checks that regard configuration. */
3553 	assert(quantum >= sizeof(void *));
3554 	assert(quantum <= pagesize);
3555 	assert(chunksize >= pagesize);
3556 	assert(quantum * 4 <= chunksize);
3557 
3558 	/* Initialize chunks data. */
3559 	malloc_mutex_init(&chunks_mtx);
3560 	RB_INIT(&huge);
3561 #ifdef USE_BRK
3562 	malloc_mutex_init(&brk_mtx);
3563 	brk_base = sbrk(0);
3564 	brk_prev = brk_base;
3565 	brk_max = brk_base;
3566 #endif
3567 #ifdef MALLOC_STATS
3568 	huge_nmalloc = 0;
3569 	huge_ndalloc = 0;
3570 	huge_nralloc = 0;
3571 	huge_allocated = 0;
3572 #endif
3573 	RB_INIT(&old_chunks);
3574 
3575 	/* Initialize base allocation data structures. */
3576 #ifdef MALLOC_STATS
3577 	base_mapped = 0;
3578 #endif
3579 #ifdef USE_BRK
3580 	/*
3581 	 * Allocate a base chunk here, since it doesn't actually have to be
3582 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3583 	 * the use of space that would otherwise be wasted.
3584 	 */
3585 	base_pages_alloc(0);
3586 #endif
3587 	base_chunk_nodes = NULL;
3588 	malloc_mutex_init(&base_mtx);
3589 
3590 	if (ncpus > 1) {
3591 		/*
3592 		 * For SMP systems, create four times as many arenas as there
3593 		 * are CPUs by default.
3594 		 */
3595 		opt_narenas_lshift += 2;
3596 	}
3597 
3598 #ifdef NO_TLS
3599 	/* Initialize arena key. */
3600 	(void)thr_keycreate(&arenas_map_key, NULL);
3601 #endif
3602 
3603 	/* Determine how many arenas to use. */
3604 	narenas = ncpus;
3605 	if (opt_narenas_lshift > 0) {
3606 		if ((narenas << opt_narenas_lshift) > narenas)
3607 			narenas <<= opt_narenas_lshift;
3608 		/*
3609 		 * Make sure not to exceed the limits of what base_malloc()
3610 		 * can handle.
3611 		 */
3612 		if (narenas * sizeof(arena_t *) > chunksize)
3613 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3614 	} else if (opt_narenas_lshift < 0) {
3615 		if ((narenas << opt_narenas_lshift) < narenas)
3616 			narenas <<= opt_narenas_lshift;
3617 		/* Make sure there is at least one arena. */
3618 		if (narenas == 0)
3619 			narenas = 1;
3620 	}
3621 
3622 	next_arena = 0;
3623 
3624 	/* Allocate and initialize arenas. */
3625 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3626 	if (arenas == NULL) {
3627 		malloc_mutex_unlock(&init_lock);
3628 		return (true);
3629 	}
3630 	/*
3631 	 * Zero the array.  In practice, this should always be pre-zeroed,
3632 	 * since it was just mmap()ed, but let's be sure.
3633 	 */
3634 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3635 
3636 	/*
3637 	 * Initialize one arena here.  The rest are lazily created in
3638 	 * arena_choose_hard().
3639 	 */
3640 	arenas_extend(0);
3641 	if (arenas[0] == NULL) {
3642 		malloc_mutex_unlock(&init_lock);
3643 		return (true);
3644 	}
3645 
3646 	malloc_mutex_init(&arenas_mtx);
3647 
3648 	malloc_initialized = true;
3649 	malloc_mutex_unlock(&init_lock);
3650 	return (false);
3651 }
3652 
3653 /*
3654  * End general internal functions.
3655  */
3656 /******************************************************************************/
3657 /*
3658  * Begin malloc(3)-compatible functions.
3659  */
3660 
3661 void *
3662 malloc(size_t size)
3663 {
3664 	void *ret;
3665 
3666 	if (malloc_init()) {
3667 		ret = NULL;
3668 		goto RETURN;
3669 	}
3670 
3671 	if (size == 0) {
3672 		if (opt_sysv == false)
3673 			size = 1;
3674 		else {
3675 			ret = NULL;
3676 			goto RETURN;
3677 		}
3678 	}
3679 
3680 	ret = imalloc(size);
3681 
3682 RETURN:
3683 	if (ret == NULL) {
3684 		if (opt_xmalloc) {
3685 			_malloc_message(_getprogname(),
3686 			    ": (malloc) Error in malloc(): out of memory\n", "",
3687 			    "");
3688 			abort();
3689 		}
3690 		errno = ENOMEM;
3691 	}
3692 
3693 	UTRACE(0, size, ret);
3694 	return (ret);
3695 }
3696 
3697 int
3698 posix_memalign(void **memptr, size_t alignment, size_t size)
3699 {
3700 	int ret;
3701 	void *result;
3702 
3703 	if (malloc_init())
3704 		result = NULL;
3705 	else {
3706 		/* Make sure that alignment is a large enough power of 2. */
3707 		if (((alignment - 1) & alignment) != 0
3708 		    || alignment < sizeof(void *)) {
3709 			if (opt_xmalloc) {
3710 				_malloc_message(_getprogname(),
3711 				    ": (malloc) Error in posix_memalign(): "
3712 				    "invalid alignment\n", "", "");
3713 				abort();
3714 			}
3715 			result = NULL;
3716 			ret = EINVAL;
3717 			goto RETURN;
3718 		}
3719 
3720 		result = ipalloc(alignment, size);
3721 	}
3722 
3723 	if (result == NULL) {
3724 		if (opt_xmalloc) {
3725 			_malloc_message(_getprogname(),
3726 			": (malloc) Error in posix_memalign(): out of memory\n",
3727 			"", "");
3728 			abort();
3729 		}
3730 		ret = ENOMEM;
3731 		goto RETURN;
3732 	}
3733 
3734 	*memptr = result;
3735 	ret = 0;
3736 
3737 RETURN:
3738 	UTRACE(0, size, result);
3739 	return (ret);
3740 }
3741 
3742 void *
3743 calloc(size_t num, size_t size)
3744 {
3745 	void *ret;
3746 	size_t num_size;
3747 
3748 	if (malloc_init()) {
3749 		num_size = 0;
3750 		ret = NULL;
3751 		goto RETURN;
3752 	}
3753 
3754 	num_size = num * size;
3755 	if (num_size == 0) {
3756 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3757 			num_size = 1;
3758 		else {
3759 			ret = NULL;
3760 			goto RETURN;
3761 		}
3762 	/*
3763 	 * Try to avoid division here.  We know that it isn't possible to
3764 	 * overflow during multiplication if neither operand uses any of the
3765 	 * most significant half of the bits in a size_t.
3766 	 */
3767 	} else if ((unsigned long long)((num | size) &
3768 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3769 	   (num_size / size != num)) {
3770 		/* size_t overflow. */
3771 		ret = NULL;
3772 		goto RETURN;
3773 	}
3774 
3775 	ret = icalloc(num_size);
3776 
3777 RETURN:
3778 	if (ret == NULL) {
3779 		if (opt_xmalloc) {
3780 			_malloc_message(_getprogname(),
3781 			    ": (malloc) Error in calloc(): out of memory\n", "",
3782 			    "");
3783 			abort();
3784 		}
3785 		errno = ENOMEM;
3786 	}
3787 
3788 	UTRACE(0, num_size, ret);
3789 	return (ret);
3790 }
3791 
3792 void *
3793 realloc(void *ptr, size_t size)
3794 {
3795 	void *ret;
3796 
3797 	if (size == 0) {
3798 		if (opt_sysv == false)
3799 			size = 1;
3800 		else {
3801 			if (ptr != NULL)
3802 				idalloc(ptr);
3803 			ret = NULL;
3804 			goto RETURN;
3805 		}
3806 	}
3807 
3808 	if (ptr != NULL) {
3809 		assert(malloc_initialized);
3810 
3811 		ret = iralloc(ptr, size);
3812 
3813 		if (ret == NULL) {
3814 			if (opt_xmalloc) {
3815 				_malloc_message(_getprogname(),
3816 				    ": (malloc) Error in realloc(): out of "
3817 				    "memory\n", "", "");
3818 				abort();
3819 			}
3820 			errno = ENOMEM;
3821 		}
3822 	} else {
3823 		if (malloc_init())
3824 			ret = NULL;
3825 		else
3826 			ret = imalloc(size);
3827 
3828 		if (ret == NULL) {
3829 			if (opt_xmalloc) {
3830 				_malloc_message(_getprogname(),
3831 				    ": (malloc) Error in realloc(): out of "
3832 				    "memory\n", "", "");
3833 				abort();
3834 			}
3835 			errno = ENOMEM;
3836 		}
3837 	}
3838 
3839 RETURN:
3840 	UTRACE(ptr, size, ret);
3841 	return (ret);
3842 }
3843 
3844 void
3845 free(void *ptr)
3846 {
3847 
3848 	UTRACE(ptr, 0, 0);
3849 	if (ptr != NULL) {
3850 		assert(malloc_initialized);
3851 
3852 		idalloc(ptr);
3853 	}
3854 }
3855 
3856 /*
3857  * End malloc(3)-compatible functions.
3858  */
3859 /******************************************************************************/
3860 /*
3861  * Begin non-standard functions.
3862  */
3863 #ifndef __NetBSD__
3864 size_t
3865 malloc_usable_size(const void *ptr)
3866 {
3867 
3868 	assert(ptr != NULL);
3869 
3870 	return (isalloc(ptr));
3871 }
3872 #endif
3873 
3874 /*
3875  * End non-standard functions.
3876  */
3877 /******************************************************************************/
3878 /*
3879  * Begin library-private functions, used by threading libraries for protection
3880  * of malloc during fork().  These functions are only called if the program is
3881  * running in threaded mode, so there is no need to check whether the program
3882  * is threaded here.
3883  */
3884 
3885 void
3886 _malloc_prefork(void)
3887 {
3888 	unsigned i;
3889 
3890 	/* Acquire all mutexes in a safe order. */
3891 
3892 	malloc_mutex_lock(&arenas_mtx);
3893 	for (i = 0; i < narenas; i++) {
3894 		if (arenas[i] != NULL)
3895 			malloc_mutex_lock(&arenas[i]->mtx);
3896 	}
3897 	malloc_mutex_unlock(&arenas_mtx);
3898 
3899 	malloc_mutex_lock(&base_mtx);
3900 
3901 	malloc_mutex_lock(&chunks_mtx);
3902 }
3903 
3904 void
3905 _malloc_postfork(void)
3906 {
3907 	unsigned i;
3908 
3909 	/* Release all mutexes, now that fork() has completed. */
3910 
3911 	malloc_mutex_unlock(&chunks_mtx);
3912 
3913 	malloc_mutex_unlock(&base_mtx);
3914 
3915 	malloc_mutex_lock(&arenas_mtx);
3916 	for (i = 0; i < narenas; i++) {
3917 		if (arenas[i] != NULL)
3918 			malloc_mutex_unlock(&arenas[i]->mtx);
3919 	}
3920 	malloc_mutex_unlock(&arenas_mtx);
3921 }
3922 
3923 /*
3924  * End library-private functions.
3925  */
3926 /******************************************************************************/
3927