xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision aad9773e38ed2370a628a6416e098f9008fc10a7)
1 /*	$NetBSD: jemalloc.c,v 1.36 2014/09/19 17:42:19 matt Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.36 2014/09/19 17:42:19 matt Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 
220 /*
221  * If you touch the TINY_MIN_2POW definition for any architecture, please
222  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
223  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
224  * gcc is still buildable!
225  */
226 
227 #ifdef __i386__
228 #  define QUANTUM_2POW_MIN	4
229 #  define SIZEOF_PTR_2POW	2
230 #  define USE_BRK
231 #endif
232 #ifdef __ia64__
233 #  define QUANTUM_2POW_MIN	4
234 #  define SIZEOF_PTR_2POW	3
235 #endif
236 #ifdef __aarch64__
237 #  define QUANTUM_2POW_MIN	4
238 #  define SIZEOF_PTR_2POW	3
239 #  define NO_TLS
240 #endif
241 #ifdef __alpha__
242 #  define QUANTUM_2POW_MIN	4
243 #  define SIZEOF_PTR_2POW	3
244 #  define TINY_MIN_2POW		3
245 #  define NO_TLS
246 #endif
247 #ifdef __sparc64__
248 #  define QUANTUM_2POW_MIN	4
249 #  define SIZEOF_PTR_2POW	3
250 #  define TINY_MIN_2POW		3
251 #  define NO_TLS
252 #endif
253 #ifdef __amd64__
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	3
256 #  define TINY_MIN_2POW		3
257 #endif
258 #ifdef __arm__
259 #  define QUANTUM_2POW_MIN	3
260 #  define SIZEOF_PTR_2POW	2
261 #  define USE_BRK
262 #  ifdef __ARM_EABI__
263 #    define TINY_MIN_2POW	3
264 #  endif
265 #  define NO_TLS
266 #endif
267 #ifdef __powerpc__
268 #  define QUANTUM_2POW_MIN	4
269 #  define SIZEOF_PTR_2POW	2
270 #  define USE_BRK
271 #  define TINY_MIN_2POW		3
272 #endif
273 #if defined(__sparc__) && !defined(__sparc64__)
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __or1k__
279 #  define QUANTUM_2POW_MIN	4
280 #  define SIZEOF_PTR_2POW	2
281 #  define USE_BRK
282 #endif
283 #ifdef __vax__
284 #  define QUANTUM_2POW_MIN	4
285 #  define SIZEOF_PTR_2POW	2
286 #  define USE_BRK
287 #endif
288 #ifdef __sh__
289 #  define QUANTUM_2POW_MIN	4
290 #  define SIZEOF_PTR_2POW	2
291 #  define USE_BRK
292 #endif
293 #ifdef __m68k__
294 #  define QUANTUM_2POW_MIN	4
295 #  define SIZEOF_PTR_2POW	2
296 #  define USE_BRK
297 #endif
298 #if defined(__mips__) || defined(__riscv__)
299 # ifdef _LP64
300 #  define SIZEOF_PTR_2POW	3
301 #  define TINY_MIN_2POW		3
302 # else
303 #  define SIZEOF_PTR_2POW	2
304 # endif
305 # define QUANTUM_2POW_MIN	4
306 # define USE_BRK
307 #endif
308 #ifdef __hppa__
309 #  define QUANTUM_2POW_MIN     4
310 #  define SIZEOF_PTR_2POW      2
311 #  define USE_BRK
312 #endif
313 
314 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
315 
316 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
317 #ifndef SIZEOF_INT_2POW
318 #  define SIZEOF_INT_2POW	2
319 #endif
320 
321 /*
322  * Size and alignment of memory chunks that are allocated by the OS's virtual
323  * memory system.
324  */
325 #define	CHUNK_2POW_DEFAULT	20
326 
327 /*
328  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
329  * so over-estimates are okay (up to a point), but under-estimates will
330  * negatively affect performance.
331  */
332 #define	CACHELINE_2POW		6
333 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
334 
335 /* Smallest size class to support. */
336 #ifndef TINY_MIN_2POW
337 #define	TINY_MIN_2POW		2
338 #endif
339 
340 /*
341  * Maximum size class that is a multiple of the quantum, but not (necessarily)
342  * a power of 2.  Above this size, allocations are rounded up to the nearest
343  * power of 2.
344  */
345 #define	SMALL_MAX_2POW_DEFAULT	9
346 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
347 
348 /*
349  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
350  * as small as possible such that this setting is still honored, without
351  * violating other constraints.  The goal is to make runs as small as possible
352  * without exceeding a per run external fragmentation threshold.
353  *
354  * We use binary fixed point math for overhead computations, where the binary
355  * point is implicitly RUN_BFP bits to the left.
356  *
357  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
358  * honored for some/all object sizes, since there is one bit of header overhead
359  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
360  * that are so small that the per-region overhead is greater than:
361  *
362  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
363  */
364 #define RUN_BFP			12
365 /*                              \/   Implicit binary fixed point. */
366 #define RUN_MAX_OVRHD		0x0000003dU
367 #define RUN_MAX_OVRHD_RELAX	0x00001800U
368 
369 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
370 #define RUN_MAX_SMALL_2POW	15
371 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
372 
373 /******************************************************************************/
374 
375 #ifdef __FreeBSD__
376 /*
377  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
378  * they require malloc()ed memory.
379  */
380 typedef struct {
381 	spinlock_t	lock;
382 } malloc_mutex_t;
383 
384 /* Set to true once the allocator has been initialized. */
385 static bool malloc_initialized = false;
386 
387 /* Used to avoid initialization races. */
388 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
389 #else
390 #define	malloc_mutex_t	mutex_t
391 
392 /* Set to true once the allocator has been initialized. */
393 static bool malloc_initialized = false;
394 
395 /* Used to avoid initialization races. */
396 static mutex_t init_lock = MUTEX_INITIALIZER;
397 #endif
398 
399 /******************************************************************************/
400 /*
401  * Statistics data structures.
402  */
403 
404 #ifdef MALLOC_STATS
405 
406 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
407 struct malloc_bin_stats_s {
408 	/*
409 	 * Number of allocation requests that corresponded to the size of this
410 	 * bin.
411 	 */
412 	uint64_t	nrequests;
413 
414 	/* Total number of runs created for this bin's size class. */
415 	uint64_t	nruns;
416 
417 	/*
418 	 * Total number of runs reused by extracting them from the runs tree for
419 	 * this bin's size class.
420 	 */
421 	uint64_t	reruns;
422 
423 	/* High-water mark for this bin. */
424 	unsigned long	highruns;
425 
426 	/* Current number of runs in this bin. */
427 	unsigned long	curruns;
428 };
429 
430 typedef struct arena_stats_s arena_stats_t;
431 struct arena_stats_s {
432 	/* Number of bytes currently mapped. */
433 	size_t		mapped;
434 
435 	/* Per-size-category statistics. */
436 	size_t		allocated_small;
437 	uint64_t	nmalloc_small;
438 	uint64_t	ndalloc_small;
439 
440 	size_t		allocated_large;
441 	uint64_t	nmalloc_large;
442 	uint64_t	ndalloc_large;
443 };
444 
445 typedef struct chunk_stats_s chunk_stats_t;
446 struct chunk_stats_s {
447 	/* Number of chunks that were allocated. */
448 	uint64_t	nchunks;
449 
450 	/* High-water mark for number of chunks allocated. */
451 	unsigned long	highchunks;
452 
453 	/*
454 	 * Current number of chunks allocated.  This value isn't maintained for
455 	 * any other purpose, so keep track of it in order to be able to set
456 	 * highchunks.
457 	 */
458 	unsigned long	curchunks;
459 };
460 
461 #endif /* #ifdef MALLOC_STATS */
462 
463 /******************************************************************************/
464 /*
465  * Chunk data structures.
466  */
467 
468 /* Tree of chunks. */
469 typedef struct chunk_node_s chunk_node_t;
470 struct chunk_node_s {
471 	/* Linkage for the chunk tree. */
472 	RB_ENTRY(chunk_node_s) link;
473 
474 	/*
475 	 * Pointer to the chunk that this tree node is responsible for.  In some
476 	 * (but certainly not all) cases, this data structure is placed at the
477 	 * beginning of the corresponding chunk, so this field may point to this
478 	 * node.
479 	 */
480 	void	*chunk;
481 
482 	/* Total chunk size. */
483 	size_t	size;
484 };
485 typedef struct chunk_tree_s chunk_tree_t;
486 RB_HEAD(chunk_tree_s, chunk_node_s);
487 
488 /******************************************************************************/
489 /*
490  * Arena data structures.
491  */
492 
493 typedef struct arena_s arena_t;
494 typedef struct arena_bin_s arena_bin_t;
495 
496 typedef struct arena_chunk_map_s arena_chunk_map_t;
497 struct arena_chunk_map_s {
498 	/* Number of pages in run. */
499 	uint32_t	npages;
500 	/*
501 	 * Position within run.  For a free run, this is POS_FREE for the first
502 	 * and last pages.  The POS_FREE special value makes it possible to
503 	 * quickly coalesce free runs.
504 	 *
505 	 * This is the limiting factor for chunksize; there can be at most 2^31
506 	 * pages in a run.
507 	 */
508 #define POS_FREE ((uint32_t)0xffffffffU)
509 	uint32_t	pos;
510 };
511 
512 /* Arena chunk header. */
513 typedef struct arena_chunk_s arena_chunk_t;
514 struct arena_chunk_s {
515 	/* Arena that owns the chunk. */
516 	arena_t *arena;
517 
518 	/* Linkage for the arena's chunk tree. */
519 	RB_ENTRY(arena_chunk_s) link;
520 
521 	/*
522 	 * Number of pages in use.  This is maintained in order to make
523 	 * detection of empty chunks fast.
524 	 */
525 	uint32_t pages_used;
526 
527 	/*
528 	 * Every time a free run larger than this value is created/coalesced,
529 	 * this value is increased.  The only way that the value decreases is if
530 	 * arena_run_alloc() fails to find a free run as large as advertised by
531 	 * this value.
532 	 */
533 	uint32_t max_frun_npages;
534 
535 	/*
536 	 * Every time a free run that starts at an earlier page than this value
537 	 * is created/coalesced, this value is decreased.  It is reset in a
538 	 * similar fashion to max_frun_npages.
539 	 */
540 	uint32_t min_frun_ind;
541 
542 	/*
543 	 * Map of pages within chunk that keeps track of free/large/small.  For
544 	 * free runs, only the map entries for the first and last pages are
545 	 * kept up to date, so that free runs can be quickly coalesced.
546 	 */
547 	arena_chunk_map_t map[1]; /* Dynamically sized. */
548 };
549 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
550 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
551 
552 typedef struct arena_run_s arena_run_t;
553 struct arena_run_s {
554 	/* Linkage for run trees. */
555 	RB_ENTRY(arena_run_s) link;
556 
557 #ifdef MALLOC_DEBUG
558 	uint32_t	magic;
559 #  define ARENA_RUN_MAGIC 0x384adf93
560 #endif
561 
562 	/* Bin this run is associated with. */
563 	arena_bin_t	*bin;
564 
565 	/* Index of first element that might have a free region. */
566 	unsigned	regs_minelm;
567 
568 	/* Number of free regions in run. */
569 	unsigned	nfree;
570 
571 	/* Bitmask of in-use regions (0: in use, 1: free). */
572 	unsigned	regs_mask[1]; /* Dynamically sized. */
573 };
574 typedef struct arena_run_tree_s arena_run_tree_t;
575 RB_HEAD(arena_run_tree_s, arena_run_s);
576 
577 struct arena_bin_s {
578 	/*
579 	 * Current run being used to service allocations of this bin's size
580 	 * class.
581 	 */
582 	arena_run_t	*runcur;
583 
584 	/*
585 	 * Tree of non-full runs.  This tree is used when looking for an
586 	 * existing run when runcur is no longer usable.  We choose the
587 	 * non-full run that is lowest in memory; this policy tends to keep
588 	 * objects packed well, and it can also help reduce the number of
589 	 * almost-empty chunks.
590 	 */
591 	arena_run_tree_t runs;
592 
593 	/* Size of regions in a run for this bin's size class. */
594 	size_t		reg_size;
595 
596 	/* Total size of a run for this bin's size class. */
597 	size_t		run_size;
598 
599 	/* Total number of regions in a run for this bin's size class. */
600 	uint32_t	nregs;
601 
602 	/* Number of elements in a run's regs_mask for this bin's size class. */
603 	uint32_t	regs_mask_nelms;
604 
605 	/* Offset of first region in a run for this bin's size class. */
606 	uint32_t	reg0_offset;
607 
608 #ifdef MALLOC_STATS
609 	/* Bin statistics. */
610 	malloc_bin_stats_t stats;
611 #endif
612 };
613 
614 struct arena_s {
615 #ifdef MALLOC_DEBUG
616 	uint32_t		magic;
617 #  define ARENA_MAGIC 0x947d3d24
618 #endif
619 
620 	/* All operations on this arena require that mtx be locked. */
621 	malloc_mutex_t		mtx;
622 
623 #ifdef MALLOC_STATS
624 	arena_stats_t		stats;
625 #endif
626 
627 	/*
628 	 * Tree of chunks this arena manages.
629 	 */
630 	arena_chunk_tree_t	chunks;
631 
632 	/*
633 	 * In order to avoid rapid chunk allocation/deallocation when an arena
634 	 * oscillates right on the cusp of needing a new chunk, cache the most
635 	 * recently freed chunk.  This caching is disabled by opt_hint.
636 	 *
637 	 * There is one spare chunk per arena, rather than one spare total, in
638 	 * order to avoid interactions between multiple threads that could make
639 	 * a single spare inadequate.
640 	 */
641 	arena_chunk_t *spare;
642 
643 	/*
644 	 * bins is used to store rings of free regions of the following sizes,
645 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
646 	 *
647 	 *   bins[i] | size |
648 	 *   --------+------+
649 	 *        0  |    2 |
650 	 *        1  |    4 |
651 	 *        2  |    8 |
652 	 *   --------+------+
653 	 *        3  |   16 |
654 	 *        4  |   32 |
655 	 *        5  |   48 |
656 	 *        6  |   64 |
657 	 *           :      :
658 	 *           :      :
659 	 *       33  |  496 |
660 	 *       34  |  512 |
661 	 *   --------+------+
662 	 *       35  | 1024 |
663 	 *       36  | 2048 |
664 	 *   --------+------+
665 	 */
666 	arena_bin_t		bins[1]; /* Dynamically sized. */
667 };
668 
669 /******************************************************************************/
670 /*
671  * Data.
672  */
673 
674 /* Number of CPUs. */
675 static unsigned		ncpus;
676 
677 /* VM page size. */
678 static size_t		pagesize;
679 static size_t		pagesize_mask;
680 static int		pagesize_2pow;
681 
682 /* Various bin-related settings. */
683 static size_t		bin_maxclass; /* Max size class for bins. */
684 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
685 static unsigned		nqbins; /* Number of quantum-spaced bins. */
686 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
687 static size_t		small_min;
688 static size_t		small_max;
689 
690 /* Various quantum-related settings. */
691 static size_t		quantum;
692 static size_t		quantum_mask; /* (quantum - 1). */
693 
694 /* Various chunk-related settings. */
695 static size_t		chunksize;
696 static size_t		chunksize_mask; /* (chunksize - 1). */
697 static int		chunksize_2pow;
698 static unsigned		chunk_npages;
699 static unsigned		arena_chunk_header_npages;
700 static size_t		arena_maxclass; /* Max size class for arenas. */
701 
702 /********/
703 /*
704  * Chunks.
705  */
706 
707 /* Protects chunk-related data structures. */
708 static malloc_mutex_t	chunks_mtx;
709 
710 /* Tree of chunks that are stand-alone huge allocations. */
711 static chunk_tree_t	huge;
712 
713 #ifdef USE_BRK
714 /*
715  * Try to use brk for chunk-size allocations, due to address space constraints.
716  */
717 /*
718  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
719  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
720  * could cause recursive lock acquisition).
721  */
722 static malloc_mutex_t	brk_mtx;
723 /* Result of first sbrk(0) call. */
724 static void		*brk_base;
725 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
726 static void		*brk_prev;
727 /* Current upper limit on brk addresses. */
728 static void		*brk_max;
729 #endif
730 
731 #ifdef MALLOC_STATS
732 /* Huge allocation statistics. */
733 static uint64_t		huge_nmalloc;
734 static uint64_t		huge_ndalloc;
735 static uint64_t		huge_nralloc;
736 static size_t		huge_allocated;
737 #endif
738 
739 /*
740  * Tree of chunks that were previously allocated.  This is used when allocating
741  * chunks, in an attempt to re-use address space.
742  */
743 static chunk_tree_t	old_chunks;
744 
745 /****************************/
746 /*
747  * base (internal allocation).
748  */
749 
750 /*
751  * Current pages that are being used for internal memory allocations.  These
752  * pages are carved up in cacheline-size quanta, so that there is no chance of
753  * false cache line sharing.
754  */
755 static void		*base_pages;
756 static void		*base_next_addr;
757 static void		*base_past_addr; /* Addr immediately past base_pages. */
758 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
759 static malloc_mutex_t	base_mtx;
760 #ifdef MALLOC_STATS
761 static size_t		base_mapped;
762 #endif
763 
764 /********/
765 /*
766  * Arenas.
767  */
768 
769 /*
770  * Arenas that are used to service external requests.  Not all elements of the
771  * arenas array are necessarily used; arenas are created lazily as needed.
772  */
773 static arena_t		**arenas;
774 static unsigned		narenas;
775 static unsigned		next_arena;
776 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
777 
778 #ifndef NO_TLS
779 /*
780  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
781  * for allocations.
782  */
783 static __thread arena_t	*arenas_map;
784 #define	get_arenas_map()	(arenas_map)
785 #define	set_arenas_map(x)	(arenas_map = x)
786 #else
787 static thread_key_t arenas_map_key;
788 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
789 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
790 #endif
791 
792 #ifdef MALLOC_STATS
793 /* Chunk statistics. */
794 static chunk_stats_t	stats_chunks;
795 #endif
796 
797 /*******************************/
798 /*
799  * Runtime configuration options.
800  */
801 const char	*_malloc_options;
802 
803 #ifndef MALLOC_PRODUCTION
804 static bool	opt_abort = true;
805 static bool	opt_junk = true;
806 #else
807 static bool	opt_abort = false;
808 static bool	opt_junk = false;
809 #endif
810 static bool	opt_hint = false;
811 static bool	opt_print_stats = false;
812 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
813 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
814 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
815 static bool	opt_utrace = false;
816 static bool	opt_sysv = false;
817 static bool	opt_xmalloc = false;
818 static bool	opt_zero = false;
819 static int32_t	opt_narenas_lshift = 0;
820 
821 typedef struct {
822 	void	*p;
823 	size_t	s;
824 	void	*r;
825 } malloc_utrace_t;
826 
827 #define	UTRACE(a, b, c)							\
828 	if (opt_utrace) {						\
829 		malloc_utrace_t ut;					\
830 		ut.p = a;						\
831 		ut.s = b;						\
832 		ut.r = c;						\
833 		xutrace(&ut, sizeof(ut));				\
834 	}
835 
836 /******************************************************************************/
837 /*
838  * Begin function prototypes for non-inline static functions.
839  */
840 
841 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
842 		const char *p4);
843 #ifdef MALLOC_STATS
844 static void	malloc_printf(const char *format, ...);
845 #endif
846 static char	*size_t2s(size_t x, char *s);
847 static bool	base_pages_alloc(size_t minsize);
848 static void	*base_alloc(size_t size);
849 static chunk_node_t *base_chunk_node_alloc(void);
850 static void	base_chunk_node_dealloc(chunk_node_t *node);
851 #ifdef MALLOC_STATS
852 static void	stats_print(arena_t *arena);
853 #endif
854 static void	*pages_map(void *addr, size_t size);
855 static void	*pages_map_align(void *addr, size_t size, int align);
856 static void	pages_unmap(void *addr, size_t size);
857 static void	*chunk_alloc(size_t size);
858 static void	chunk_dealloc(void *chunk, size_t size);
859 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
860 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
861 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
862 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
863 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
864 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
865 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
866 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
867 static void	*arena_malloc(arena_t *arena, size_t size);
868 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
869     size_t alloc_size);
870 static size_t	arena_salloc(const void *ptr);
871 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
872 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
873 static bool	arena_new(arena_t *arena);
874 static arena_t	*arenas_extend(unsigned ind);
875 static void	*huge_malloc(size_t size);
876 static void	*huge_palloc(size_t alignment, size_t size);
877 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
878 static void	huge_dalloc(void *ptr);
879 static void	*imalloc(size_t size);
880 static void	*ipalloc(size_t alignment, size_t size);
881 static void	*icalloc(size_t size);
882 static size_t	isalloc(const void *ptr);
883 static void	*iralloc(void *ptr, size_t size);
884 static void	idalloc(void *ptr);
885 static void	malloc_print_stats(void);
886 static bool	malloc_init_hard(void);
887 
888 /*
889  * End function prototypes.
890  */
891 /******************************************************************************/
892 /*
893  * Begin mutex.
894  */
895 
896 #ifdef __NetBSD__
897 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
898 #define	malloc_mutex_lock(m)	mutex_lock(m)
899 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
900 #else	/* __NetBSD__ */
901 static inline void
902 malloc_mutex_init(malloc_mutex_t *a_mutex)
903 {
904 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
905 
906 	a_mutex->lock = lock;
907 }
908 
909 static inline void
910 malloc_mutex_lock(malloc_mutex_t *a_mutex)
911 {
912 
913 	if (__isthreaded)
914 		_SPINLOCK(&a_mutex->lock);
915 }
916 
917 static inline void
918 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
919 {
920 
921 	if (__isthreaded)
922 		_SPINUNLOCK(&a_mutex->lock);
923 }
924 #endif	/* __NetBSD__ */
925 
926 /*
927  * End mutex.
928  */
929 /******************************************************************************/
930 /*
931  * Begin Utility functions/macros.
932  */
933 
934 /* Return the chunk address for allocation address a. */
935 #define	CHUNK_ADDR2BASE(a)						\
936 	((void *)((uintptr_t)(a) & ~chunksize_mask))
937 
938 /* Return the chunk offset of address a. */
939 #define	CHUNK_ADDR2OFFSET(a)						\
940 	((size_t)((uintptr_t)(a) & chunksize_mask))
941 
942 /* Return the smallest chunk multiple that is >= s. */
943 #define	CHUNK_CEILING(s)						\
944 	(((s) + chunksize_mask) & ~chunksize_mask)
945 
946 /* Return the smallest cacheline multiple that is >= s. */
947 #define	CACHELINE_CEILING(s)						\
948 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
949 
950 /* Return the smallest quantum multiple that is >= a. */
951 #define	QUANTUM_CEILING(a)						\
952 	(((a) + quantum_mask) & ~quantum_mask)
953 
954 /* Return the smallest pagesize multiple that is >= s. */
955 #define	PAGE_CEILING(s)							\
956 	(((s) + pagesize_mask) & ~pagesize_mask)
957 
958 /* Compute the smallest power of 2 that is >= x. */
959 static inline size_t
960 pow2_ceil(size_t x)
961 {
962 
963 	x--;
964 	x |= x >> 1;
965 	x |= x >> 2;
966 	x |= x >> 4;
967 	x |= x >> 8;
968 	x |= x >> 16;
969 #if (SIZEOF_PTR == 8)
970 	x |= x >> 32;
971 #endif
972 	x++;
973 	return (x);
974 }
975 
976 static void
977 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
978 {
979 
980 	write(STDERR_FILENO, p1, strlen(p1));
981 	write(STDERR_FILENO, p2, strlen(p2));
982 	write(STDERR_FILENO, p3, strlen(p3));
983 	write(STDERR_FILENO, p4, strlen(p4));
984 }
985 
986 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
987 	    const char *p4) = wrtmessage;
988 
989 #ifdef MALLOC_STATS
990 /*
991  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
992  */
993 static void
994 malloc_printf(const char *format, ...)
995 {
996 	char buf[4096];
997 	va_list ap;
998 
999 	va_start(ap, format);
1000 	vsnprintf(buf, sizeof(buf), format, ap);
1001 	va_end(ap);
1002 	_malloc_message(buf, "", "", "");
1003 }
1004 #endif
1005 
1006 /*
1007  * We don't want to depend on vsnprintf() for production builds, since that can
1008  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
1009  * integer printing functionality, so that malloc_printf() use can be limited to
1010  * MALLOC_STATS code.
1011  */
1012 #define UMAX2S_BUFSIZE	21
1013 static char *
1014 size_t2s(size_t x, char *s)
1015 {
1016 	unsigned i;
1017 
1018 	/* Make sure UMAX2S_BUFSIZE is large enough. */
1019 	/* LINTED */
1020 	assert(sizeof(size_t) <= 8);
1021 
1022 	i = UMAX2S_BUFSIZE - 1;
1023 	s[i] = '\0';
1024 	do {
1025 		i--;
1026 		s[i] = "0123456789"[(int)x % 10];
1027 		x /= (uintmax_t)10LL;
1028 	} while (x > 0);
1029 
1030 	return (&s[i]);
1031 }
1032 
1033 /******************************************************************************/
1034 
1035 static bool
1036 base_pages_alloc(size_t minsize)
1037 {
1038 	size_t csize = 0;
1039 
1040 #ifdef USE_BRK
1041 	/*
1042 	 * Do special brk allocation here, since base allocations don't need to
1043 	 * be chunk-aligned.
1044 	 */
1045 	if (brk_prev != (void *)-1) {
1046 		void *brk_cur;
1047 		intptr_t incr;
1048 
1049 		if (minsize != 0)
1050 			csize = CHUNK_CEILING(minsize);
1051 
1052 		malloc_mutex_lock(&brk_mtx);
1053 		do {
1054 			/* Get the current end of brk. */
1055 			brk_cur = sbrk(0);
1056 
1057 			/*
1058 			 * Calculate how much padding is necessary to
1059 			 * chunk-align the end of brk.  Don't worry about
1060 			 * brk_cur not being chunk-aligned though.
1061 			 */
1062 			incr = (intptr_t)chunksize
1063 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1064 			assert(incr >= 0);
1065 			if ((size_t)incr < minsize)
1066 				incr += csize;
1067 
1068 			brk_prev = sbrk(incr);
1069 			if (brk_prev == brk_cur) {
1070 				/* Success. */
1071 				malloc_mutex_unlock(&brk_mtx);
1072 				base_pages = brk_cur;
1073 				base_next_addr = base_pages;
1074 				base_past_addr = (void *)((uintptr_t)base_pages
1075 				    + incr);
1076 #ifdef MALLOC_STATS
1077 				base_mapped += incr;
1078 #endif
1079 				return (false);
1080 			}
1081 		} while (brk_prev != (void *)-1);
1082 		malloc_mutex_unlock(&brk_mtx);
1083 	}
1084 	if (minsize == 0) {
1085 		/*
1086 		 * Failure during initialization doesn't matter, so avoid
1087 		 * falling through to the mmap-based page mapping code.
1088 		 */
1089 		return (true);
1090 	}
1091 #endif
1092 	assert(minsize != 0);
1093 	csize = PAGE_CEILING(minsize);
1094 	base_pages = pages_map(NULL, csize);
1095 	if (base_pages == NULL)
1096 		return (true);
1097 	base_next_addr = base_pages;
1098 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1099 #ifdef MALLOC_STATS
1100 	base_mapped += csize;
1101 #endif
1102 	return (false);
1103 }
1104 
1105 static void *
1106 base_alloc(size_t size)
1107 {
1108 	void *ret;
1109 	size_t csize;
1110 
1111 	/* Round size up to nearest multiple of the cacheline size. */
1112 	csize = CACHELINE_CEILING(size);
1113 
1114 	malloc_mutex_lock(&base_mtx);
1115 
1116 	/* Make sure there's enough space for the allocation. */
1117 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1118 		if (base_pages_alloc(csize)) {
1119 			ret = NULL;
1120 			goto RETURN;
1121 		}
1122 	}
1123 
1124 	/* Allocate. */
1125 	ret = base_next_addr;
1126 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1127 
1128 RETURN:
1129 	malloc_mutex_unlock(&base_mtx);
1130 	return (ret);
1131 }
1132 
1133 static chunk_node_t *
1134 base_chunk_node_alloc(void)
1135 {
1136 	chunk_node_t *ret;
1137 
1138 	malloc_mutex_lock(&base_mtx);
1139 	if (base_chunk_nodes != NULL) {
1140 		ret = base_chunk_nodes;
1141 		/* LINTED */
1142 		base_chunk_nodes = *(chunk_node_t **)ret;
1143 		malloc_mutex_unlock(&base_mtx);
1144 	} else {
1145 		malloc_mutex_unlock(&base_mtx);
1146 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1147 	}
1148 
1149 	return (ret);
1150 }
1151 
1152 static void
1153 base_chunk_node_dealloc(chunk_node_t *node)
1154 {
1155 
1156 	malloc_mutex_lock(&base_mtx);
1157 	/* LINTED */
1158 	*(chunk_node_t **)node = base_chunk_nodes;
1159 	base_chunk_nodes = node;
1160 	malloc_mutex_unlock(&base_mtx);
1161 }
1162 
1163 /******************************************************************************/
1164 
1165 #ifdef MALLOC_STATS
1166 static void
1167 stats_print(arena_t *arena)
1168 {
1169 	unsigned i;
1170 	int gap_start;
1171 
1172 	malloc_printf(
1173 	    "          allocated/mapped            nmalloc      ndalloc\n");
1174 
1175 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1176 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1177 	    arena->stats.ndalloc_small);
1178 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1179 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1180 	    arena->stats.ndalloc_large);
1181 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1182 	    arena->stats.allocated_small + arena->stats.allocated_large,
1183 	    arena->stats.mapped,
1184 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1185 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1186 
1187 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1188 	    "    reruns maxruns curruns\n");
1189 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1190 		if (arena->bins[i].stats.nrequests == 0) {
1191 			if (gap_start == -1)
1192 				gap_start = i;
1193 		} else {
1194 			if (gap_start != -1) {
1195 				if (i > gap_start + 1) {
1196 					/* Gap of more than one size class. */
1197 					malloc_printf("[%u..%u]\n",
1198 					    gap_start, i - 1);
1199 				} else {
1200 					/* Gap of one size class. */
1201 					malloc_printf("[%u]\n", gap_start);
1202 				}
1203 				gap_start = -1;
1204 			}
1205 			malloc_printf(
1206 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1207 			    " %9llu %7lu %7lu\n",
1208 			    i,
1209 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1210 			    arena->bins[i].reg_size,
1211 			    arena->bins[i].nregs,
1212 			    arena->bins[i].run_size >> pagesize_2pow,
1213 			    arena->bins[i].stats.nrequests,
1214 			    arena->bins[i].stats.nruns,
1215 			    arena->bins[i].stats.reruns,
1216 			    arena->bins[i].stats.highruns,
1217 			    arena->bins[i].stats.curruns);
1218 		}
1219 	}
1220 	if (gap_start != -1) {
1221 		if (i > gap_start + 1) {
1222 			/* Gap of more than one size class. */
1223 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1224 		} else {
1225 			/* Gap of one size class. */
1226 			malloc_printf("[%u]\n", gap_start);
1227 		}
1228 	}
1229 }
1230 #endif
1231 
1232 /*
1233  * End Utility functions/macros.
1234  */
1235 /******************************************************************************/
1236 /*
1237  * Begin chunk management functions.
1238  */
1239 
1240 #ifndef lint
1241 static inline int
1242 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1243 {
1244 
1245 	assert(a != NULL);
1246 	assert(b != NULL);
1247 
1248 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1249 		return (-1);
1250 	else if (a->chunk == b->chunk)
1251 		return (0);
1252 	else
1253 		return (1);
1254 }
1255 
1256 /* Generate red-black tree code for chunks. */
1257 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1258 #endif
1259 
1260 static void *
1261 pages_map_align(void *addr, size_t size, int align)
1262 {
1263 	void *ret;
1264 
1265 	/*
1266 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1267 	 * of existing mappings, and we only want to create new mappings.
1268 	 */
1269 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1270 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1271 	assert(ret != NULL);
1272 
1273 	if (ret == MAP_FAILED)
1274 		ret = NULL;
1275 	else if (addr != NULL && ret != addr) {
1276 		/*
1277 		 * We succeeded in mapping memory, but not in the right place.
1278 		 */
1279 		if (munmap(ret, size) == -1) {
1280 			char buf[STRERROR_BUF];
1281 
1282 			STRERROR_R(errno, buf, sizeof(buf));
1283 			_malloc_message(getprogname(),
1284 			    ": (malloc) Error in munmap(): ", buf, "\n");
1285 			if (opt_abort)
1286 				abort();
1287 		}
1288 		ret = NULL;
1289 	}
1290 
1291 	assert(ret == NULL || (addr == NULL && ret != addr)
1292 	    || (addr != NULL && ret == addr));
1293 	return (ret);
1294 }
1295 
1296 static void *
1297 pages_map(void *addr, size_t size)
1298 {
1299 
1300 	return pages_map_align(addr, size, 0);
1301 }
1302 
1303 static void
1304 pages_unmap(void *addr, size_t size)
1305 {
1306 
1307 	if (munmap(addr, size) == -1) {
1308 		char buf[STRERROR_BUF];
1309 
1310 		STRERROR_R(errno, buf, sizeof(buf));
1311 		_malloc_message(getprogname(),
1312 		    ": (malloc) Error in munmap(): ", buf, "\n");
1313 		if (opt_abort)
1314 			abort();
1315 	}
1316 }
1317 
1318 static void *
1319 chunk_alloc(size_t size)
1320 {
1321 	void *ret, *chunk;
1322 	chunk_node_t *tchunk, *delchunk;
1323 
1324 	assert(size != 0);
1325 	assert((size & chunksize_mask) == 0);
1326 
1327 	malloc_mutex_lock(&chunks_mtx);
1328 
1329 	if (size == chunksize) {
1330 		/*
1331 		 * Check for address ranges that were previously chunks and try
1332 		 * to use them.
1333 		 */
1334 
1335 		/* LINTED */
1336 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1337 		while (tchunk != NULL) {
1338 			/* Found an address range.  Try to recycle it. */
1339 
1340 			chunk = tchunk->chunk;
1341 			delchunk = tchunk;
1342 			/* LINTED */
1343 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1344 
1345 			/* Remove delchunk from the tree. */
1346 			/* LINTED */
1347 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1348 			base_chunk_node_dealloc(delchunk);
1349 
1350 #ifdef USE_BRK
1351 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1352 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1353 				/* Re-use a previously freed brk chunk. */
1354 				ret = chunk;
1355 				goto RETURN;
1356 			}
1357 #endif
1358 			if ((ret = pages_map(chunk, size)) != NULL) {
1359 				/* Success. */
1360 				goto RETURN;
1361 			}
1362 		}
1363 	}
1364 
1365 	/*
1366 	 * Try to over-allocate, but allow the OS to place the allocation
1367 	 * anywhere.  Beware of size_t wrap-around.
1368 	 */
1369 	if (size + chunksize > size) {
1370 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1371 		    != NULL) {
1372 			goto RETURN;
1373 		}
1374 	}
1375 
1376 #ifdef USE_BRK
1377 	/*
1378 	 * Try to create allocations in brk, in order to make full use of
1379 	 * limited address space.
1380 	 */
1381 	if (brk_prev != (void *)-1) {
1382 		void *brk_cur;
1383 		intptr_t incr;
1384 
1385 		/*
1386 		 * The loop is necessary to recover from races with other
1387 		 * threads that are using brk for something other than malloc.
1388 		 */
1389 		malloc_mutex_lock(&brk_mtx);
1390 		do {
1391 			/* Get the current end of brk. */
1392 			brk_cur = sbrk(0);
1393 
1394 			/*
1395 			 * Calculate how much padding is necessary to
1396 			 * chunk-align the end of brk.
1397 			 */
1398 			incr = (intptr_t)size
1399 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1400 			if (incr == (intptr_t)size) {
1401 				ret = brk_cur;
1402 			} else {
1403 				ret = (void *)((intptr_t)brk_cur + incr);
1404 				incr += size;
1405 			}
1406 
1407 			brk_prev = sbrk(incr);
1408 			if (brk_prev == brk_cur) {
1409 				/* Success. */
1410 				malloc_mutex_unlock(&brk_mtx);
1411 				brk_max = (void *)((intptr_t)ret + size);
1412 				goto RETURN;
1413 			}
1414 		} while (brk_prev != (void *)-1);
1415 		malloc_mutex_unlock(&brk_mtx);
1416 	}
1417 #endif
1418 
1419 	/* All strategies for allocation failed. */
1420 	ret = NULL;
1421 RETURN:
1422 	if (ret != NULL) {
1423 		chunk_node_t key;
1424 		/*
1425 		 * Clean out any entries in old_chunks that overlap with the
1426 		 * memory we just allocated.
1427 		 */
1428 		key.chunk = ret;
1429 		/* LINTED */
1430 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1431 		while (tchunk != NULL
1432 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1433 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1434 			delchunk = tchunk;
1435 			/* LINTED */
1436 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1437 			/* LINTED */
1438 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1439 			base_chunk_node_dealloc(delchunk);
1440 		}
1441 
1442 	}
1443 #ifdef MALLOC_STATS
1444 	if (ret != NULL) {
1445 		stats_chunks.nchunks += (size / chunksize);
1446 		stats_chunks.curchunks += (size / chunksize);
1447 	}
1448 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1449 		stats_chunks.highchunks = stats_chunks.curchunks;
1450 #endif
1451 	malloc_mutex_unlock(&chunks_mtx);
1452 
1453 	assert(CHUNK_ADDR2BASE(ret) == ret);
1454 	return (ret);
1455 }
1456 
1457 static void
1458 chunk_dealloc(void *chunk, size_t size)
1459 {
1460 	chunk_node_t *node;
1461 
1462 	assert(chunk != NULL);
1463 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1464 	assert(size != 0);
1465 	assert((size & chunksize_mask) == 0);
1466 
1467 	malloc_mutex_lock(&chunks_mtx);
1468 
1469 #ifdef USE_BRK
1470 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1471 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1472 		void *brk_cur;
1473 
1474 		malloc_mutex_lock(&brk_mtx);
1475 		/* Get the current end of brk. */
1476 		brk_cur = sbrk(0);
1477 
1478 		/*
1479 		 * Try to shrink the data segment if this chunk is at the end
1480 		 * of the data segment.  The sbrk() call here is subject to a
1481 		 * race condition with threads that use brk(2) or sbrk(2)
1482 		 * directly, but the alternative would be to leak memory for
1483 		 * the sake of poorly designed multi-threaded programs.
1484 		 */
1485 		if (brk_cur == brk_max
1486 		    && (void *)((uintptr_t)chunk + size) == brk_max
1487 		    && sbrk(-(intptr_t)size) == brk_max) {
1488 			malloc_mutex_unlock(&brk_mtx);
1489 			if (brk_prev == brk_max) {
1490 				/* Success. */
1491 				brk_prev = (void *)((intptr_t)brk_max
1492 				    - (intptr_t)size);
1493 				brk_max = brk_prev;
1494 			}
1495 		} else {
1496 			size_t offset;
1497 
1498 			malloc_mutex_unlock(&brk_mtx);
1499 			madvise(chunk, size, MADV_FREE);
1500 
1501 			/*
1502 			 * Iteratively create records of each chunk-sized
1503 			 * memory region that 'chunk' is comprised of, so that
1504 			 * the address range can be recycled if memory usage
1505 			 * increases later on.
1506 			 */
1507 			for (offset = 0; offset < size; offset += chunksize) {
1508 				node = base_chunk_node_alloc();
1509 				if (node == NULL)
1510 					break;
1511 
1512 				node->chunk = (void *)((uintptr_t)chunk
1513 				    + (uintptr_t)offset);
1514 				node->size = chunksize;
1515 				/* LINTED */
1516 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1517 			}
1518 		}
1519 	} else {
1520 #endif
1521 		pages_unmap(chunk, size);
1522 
1523 		/*
1524 		 * Make a record of the chunk's address, so that the address
1525 		 * range can be recycled if memory usage increases later on.
1526 		 * Don't bother to create entries if (size > chunksize), since
1527 		 * doing so could cause scalability issues for truly gargantuan
1528 		 * objects (many gigabytes or larger).
1529 		 */
1530 		if (size == chunksize) {
1531 			node = base_chunk_node_alloc();
1532 			if (node != NULL) {
1533 				node->chunk = (void *)(uintptr_t)chunk;
1534 				node->size = chunksize;
1535 				/* LINTED */
1536 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1537 			}
1538 		}
1539 #ifdef USE_BRK
1540 	}
1541 #endif
1542 
1543 #ifdef MALLOC_STATS
1544 	stats_chunks.curchunks -= (size / chunksize);
1545 #endif
1546 	malloc_mutex_unlock(&chunks_mtx);
1547 }
1548 
1549 /*
1550  * End chunk management functions.
1551  */
1552 /******************************************************************************/
1553 /*
1554  * Begin arena.
1555  */
1556 
1557 /*
1558  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1559  *
1560  * We maintain at least one block of arenas.  Usually there are more.
1561  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1562  * amongst threads.  To accomplish this, next_arena advances only in
1563  * ncpu steps.
1564  */
1565 static __noinline arena_t *
1566 choose_arena_hard(void)
1567 {
1568 	unsigned i, curcpu;
1569 	arena_t **map;
1570 
1571 	/* Initialize the current block of arenas and advance to next. */
1572 	malloc_mutex_lock(&arenas_mtx);
1573 	assert(next_arena % ncpus == 0);
1574 	assert(narenas % ncpus == 0);
1575 	map = &arenas[next_arena];
1576 	set_arenas_map(map);
1577 	for (i = 0; i < ncpus; i++) {
1578 		if (arenas[next_arena] == NULL)
1579 			arenas_extend(next_arena);
1580 		next_arena = (next_arena + 1) % narenas;
1581 	}
1582 	malloc_mutex_unlock(&arenas_mtx);
1583 
1584 	/*
1585 	 * If we were unable to allocate an arena above, then default to
1586 	 * the first arena, which is always present.
1587 	 */
1588 	curcpu = thr_curcpu();
1589 	if (map[curcpu] != NULL)
1590 		return map[curcpu];
1591 	return arenas[0];
1592 }
1593 
1594 static inline arena_t *
1595 choose_arena(void)
1596 {
1597 	unsigned curcpu;
1598 	arena_t **map;
1599 
1600 	map = get_arenas_map();
1601 	curcpu = thr_curcpu();
1602 	if (__predict_true(map != NULL && map[curcpu] != NULL))
1603 		return map[curcpu];
1604 
1605         return choose_arena_hard();
1606 }
1607 
1608 #ifndef lint
1609 static inline int
1610 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1611 {
1612 
1613 	assert(a != NULL);
1614 	assert(b != NULL);
1615 
1616 	if ((uintptr_t)a < (uintptr_t)b)
1617 		return (-1);
1618 	else if (a == b)
1619 		return (0);
1620 	else
1621 		return (1);
1622 }
1623 
1624 /* Generate red-black tree code for arena chunks. */
1625 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1626 #endif
1627 
1628 #ifndef lint
1629 static inline int
1630 arena_run_comp(arena_run_t *a, arena_run_t *b)
1631 {
1632 
1633 	assert(a != NULL);
1634 	assert(b != NULL);
1635 
1636 	if ((uintptr_t)a < (uintptr_t)b)
1637 		return (-1);
1638 	else if (a == b)
1639 		return (0);
1640 	else
1641 		return (1);
1642 }
1643 
1644 /* Generate red-black tree code for arena runs. */
1645 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1646 #endif
1647 
1648 static inline void *
1649 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1650 {
1651 	void *ret;
1652 	unsigned i, mask, bit, regind;
1653 
1654 	assert(run->magic == ARENA_RUN_MAGIC);
1655 	assert(run->regs_minelm < bin->regs_mask_nelms);
1656 
1657 	/*
1658 	 * Move the first check outside the loop, so that run->regs_minelm can
1659 	 * be updated unconditionally, without the possibility of updating it
1660 	 * multiple times.
1661 	 */
1662 	i = run->regs_minelm;
1663 	mask = run->regs_mask[i];
1664 	if (mask != 0) {
1665 		/* Usable allocation found. */
1666 		bit = ffs((int)mask) - 1;
1667 
1668 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1669 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1670 		    + (bin->reg_size * regind));
1671 
1672 		/* Clear bit. */
1673 		mask ^= (1 << bit);
1674 		run->regs_mask[i] = mask;
1675 
1676 		return (ret);
1677 	}
1678 
1679 	for (i++; i < bin->regs_mask_nelms; i++) {
1680 		mask = run->regs_mask[i];
1681 		if (mask != 0) {
1682 			/* Usable allocation found. */
1683 			bit = ffs((int)mask) - 1;
1684 
1685 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1686 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1687 			    + (bin->reg_size * regind));
1688 
1689 			/* Clear bit. */
1690 			mask ^= (1 << bit);
1691 			run->regs_mask[i] = mask;
1692 
1693 			/*
1694 			 * Make a note that nothing before this element
1695 			 * contains a free region.
1696 			 */
1697 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1698 
1699 			return (ret);
1700 		}
1701 	}
1702 	/* Not reached. */
1703 	/* LINTED */
1704 	assert(0);
1705 	return (NULL);
1706 }
1707 
1708 static inline void
1709 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1710 {
1711 	/*
1712 	 * To divide by a number D that is not a power of two we multiply
1713 	 * by (2^21 / D) and then right shift by 21 positions.
1714 	 *
1715 	 *   X / D
1716 	 *
1717 	 * becomes
1718 	 *
1719 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1720 	 */
1721 #define SIZE_INV_SHIFT 21
1722 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1723 	static const unsigned size_invs[] = {
1724 	    SIZE_INV(3),
1725 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1726 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1727 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1728 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1729 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1730 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1731 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1732 #if (QUANTUM_2POW_MIN < 4)
1733 	    ,
1734 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1735 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1736 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1737 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1738 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1739 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1740 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1741 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1742 #endif
1743 	};
1744 	unsigned diff, regind, elm, bit;
1745 
1746 	/* LINTED */
1747 	assert(run->magic == ARENA_RUN_MAGIC);
1748 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1749 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1750 
1751 	/*
1752 	 * Avoid doing division with a variable divisor if possible.  Using
1753 	 * actual division here can reduce allocator throughput by over 20%!
1754 	 */
1755 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1756 	if ((size & (size - 1)) == 0) {
1757 		/*
1758 		 * log2_table allows fast division of a power of two in the
1759 		 * [1..128] range.
1760 		 *
1761 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1762 		 */
1763 		static const unsigned char log2_table[] = {
1764 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1765 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1766 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1767 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1768 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1769 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1770 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1771 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1772 		};
1773 
1774 		if (size <= 128)
1775 			regind = (diff >> log2_table[size - 1]);
1776 		else if (size <= 32768)
1777 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1778 		else {
1779 			/*
1780 			 * The page size is too large for us to use the lookup
1781 			 * table.  Use real division.
1782 			 */
1783 			regind = (unsigned)(diff / size);
1784 		}
1785 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1786 	    << QUANTUM_2POW_MIN) + 2) {
1787 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1788 		regind >>= SIZE_INV_SHIFT;
1789 	} else {
1790 		/*
1791 		 * size_invs isn't large enough to handle this size class, so
1792 		 * calculate regind using actual division.  This only happens
1793 		 * if the user increases small_max via the 'S' runtime
1794 		 * configuration option.
1795 		 */
1796 		regind = (unsigned)(diff / size);
1797 	};
1798 	assert(diff == regind * size);
1799 	assert(regind < bin->nregs);
1800 
1801 	elm = regind >> (SIZEOF_INT_2POW + 3);
1802 	if (elm < run->regs_minelm)
1803 		run->regs_minelm = elm;
1804 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1805 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1806 	run->regs_mask[elm] |= (1 << bit);
1807 #undef SIZE_INV
1808 #undef SIZE_INV_SHIFT
1809 }
1810 
1811 static void
1812 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1813 {
1814 	arena_chunk_t *chunk;
1815 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1816 	unsigned i;
1817 
1818 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1819 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1820 	    >> pagesize_2pow);
1821 	total_pages = chunk->map[run_ind].npages;
1822 	need_pages = (unsigned)(size >> pagesize_2pow);
1823 	assert(need_pages <= total_pages);
1824 	rem_pages = total_pages - need_pages;
1825 
1826 	/* Split enough pages from the front of run to fit allocation size. */
1827 	map_offset = run_ind;
1828 	for (i = 0; i < need_pages; i++) {
1829 		chunk->map[map_offset + i].npages = need_pages;
1830 		chunk->map[map_offset + i].pos = i;
1831 	}
1832 
1833 	/* Keep track of trailing unused pages for later use. */
1834 	if (rem_pages > 0) {
1835 		/* Update map for trailing pages. */
1836 		map_offset += need_pages;
1837 		chunk->map[map_offset].npages = rem_pages;
1838 		chunk->map[map_offset].pos = POS_FREE;
1839 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1840 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1841 	}
1842 
1843 	chunk->pages_used += need_pages;
1844 }
1845 
1846 static arena_chunk_t *
1847 arena_chunk_alloc(arena_t *arena)
1848 {
1849 	arena_chunk_t *chunk;
1850 
1851 	if (arena->spare != NULL) {
1852 		chunk = arena->spare;
1853 		arena->spare = NULL;
1854 
1855 		/* LINTED */
1856 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1857 	} else {
1858 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1859 		if (chunk == NULL)
1860 			return (NULL);
1861 #ifdef MALLOC_STATS
1862 		arena->stats.mapped += chunksize;
1863 #endif
1864 
1865 		chunk->arena = arena;
1866 
1867 		/* LINTED */
1868 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1869 
1870 		/*
1871 		 * Claim that no pages are in use, since the header is merely
1872 		 * overhead.
1873 		 */
1874 		chunk->pages_used = 0;
1875 
1876 		chunk->max_frun_npages = chunk_npages -
1877 		    arena_chunk_header_npages;
1878 		chunk->min_frun_ind = arena_chunk_header_npages;
1879 
1880 		/*
1881 		 * Initialize enough of the map to support one maximal free run.
1882 		 */
1883 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1884 		    arena_chunk_header_npages;
1885 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1886 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1887 		    arena_chunk_header_npages;
1888 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1889 	}
1890 
1891 	return (chunk);
1892 }
1893 
1894 static void
1895 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1896 {
1897 
1898 	/*
1899 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1900 	 * will be cached, so that the arena does not use it.
1901 	 */
1902 	/* LINTED */
1903 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1904 
1905 	if (opt_hint == false) {
1906 		if (arena->spare != NULL) {
1907 			chunk_dealloc((void *)arena->spare, chunksize);
1908 #ifdef MALLOC_STATS
1909 			arena->stats.mapped -= chunksize;
1910 #endif
1911 		}
1912 		arena->spare = chunk;
1913 	} else {
1914 		assert(arena->spare == NULL);
1915 		chunk_dealloc((void *)chunk, chunksize);
1916 #ifdef MALLOC_STATS
1917 		arena->stats.mapped -= chunksize;
1918 #endif
1919 	}
1920 }
1921 
1922 static arena_run_t *
1923 arena_run_alloc(arena_t *arena, size_t size)
1924 {
1925 	arena_chunk_t *chunk;
1926 	arena_run_t *run;
1927 	unsigned need_npages, limit_pages, compl_need_npages;
1928 
1929 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1930 	    pagesize_2pow)));
1931 	assert((size & pagesize_mask) == 0);
1932 
1933 	/*
1934 	 * Search through arena's chunks in address order for a free run that is
1935 	 * large enough.  Look for the first fit.
1936 	 */
1937 	need_npages = (unsigned)(size >> pagesize_2pow);
1938 	limit_pages = chunk_npages - arena_chunk_header_npages;
1939 	compl_need_npages = limit_pages - need_npages;
1940 	/* LINTED */
1941 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1942 		/*
1943 		 * Avoid searching this chunk if there are not enough
1944 		 * contiguous free pages for there to possibly be a large
1945 		 * enough free run.
1946 		 */
1947 		if (chunk->pages_used <= compl_need_npages &&
1948 		    need_npages <= chunk->max_frun_npages) {
1949 			arena_chunk_map_t *mapelm;
1950 			unsigned i;
1951 			unsigned max_frun_npages = 0;
1952 			unsigned min_frun_ind = chunk_npages;
1953 
1954 			assert(chunk->min_frun_ind >=
1955 			    arena_chunk_header_npages);
1956 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1957 				mapelm = &chunk->map[i];
1958 				if (mapelm->pos == POS_FREE) {
1959 					if (mapelm->npages >= need_npages) {
1960 						run = (arena_run_t *)
1961 						    ((uintptr_t)chunk + (i <<
1962 						    pagesize_2pow));
1963 						/* Update page map. */
1964 						arena_run_split(arena, run,
1965 						    size);
1966 						return (run);
1967 					}
1968 					if (mapelm->npages >
1969 					    max_frun_npages) {
1970 						max_frun_npages =
1971 						    mapelm->npages;
1972 					}
1973 					if (i < min_frun_ind) {
1974 						min_frun_ind = i;
1975 						if (i < chunk->min_frun_ind)
1976 							chunk->min_frun_ind = i;
1977 					}
1978 				}
1979 				i += mapelm->npages;
1980 			}
1981 			/*
1982 			 * Search failure.  Reset cached chunk->max_frun_npages.
1983 			 * chunk->min_frun_ind was already reset above (if
1984 			 * necessary).
1985 			 */
1986 			chunk->max_frun_npages = max_frun_npages;
1987 		}
1988 	}
1989 
1990 	/*
1991 	 * No usable runs.  Create a new chunk from which to allocate the run.
1992 	 */
1993 	chunk = arena_chunk_alloc(arena);
1994 	if (chunk == NULL)
1995 		return (NULL);
1996 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1997 	    pagesize_2pow));
1998 	/* Update page map. */
1999 	arena_run_split(arena, run, size);
2000 	return (run);
2001 }
2002 
2003 static void
2004 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
2005 {
2006 	arena_chunk_t *chunk;
2007 	unsigned run_ind, run_pages;
2008 
2009 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2010 
2011 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2012 	    >> pagesize_2pow);
2013 	assert(run_ind >= arena_chunk_header_npages);
2014 	assert(run_ind < (chunksize >> pagesize_2pow));
2015 	run_pages = (unsigned)(size >> pagesize_2pow);
2016 	assert(run_pages == chunk->map[run_ind].npages);
2017 
2018 	/* Subtract pages from count of pages used in chunk. */
2019 	chunk->pages_used -= run_pages;
2020 
2021 	/* Mark run as deallocated. */
2022 	assert(chunk->map[run_ind].npages == run_pages);
2023 	chunk->map[run_ind].pos = POS_FREE;
2024 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
2025 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2026 
2027 	/*
2028 	 * Tell the kernel that we don't need the data in this run, but only if
2029 	 * requested via runtime configuration.
2030 	 */
2031 	if (opt_hint)
2032 		madvise(run, size, MADV_FREE);
2033 
2034 	/* Try to coalesce with neighboring runs. */
2035 	if (run_ind > arena_chunk_header_npages &&
2036 	    chunk->map[run_ind - 1].pos == POS_FREE) {
2037 		unsigned prev_npages;
2038 
2039 		/* Coalesce with previous run. */
2040 		prev_npages = chunk->map[run_ind - 1].npages;
2041 		run_ind -= prev_npages;
2042 		assert(chunk->map[run_ind].npages == prev_npages);
2043 		assert(chunk->map[run_ind].pos == POS_FREE);
2044 		run_pages += prev_npages;
2045 
2046 		chunk->map[run_ind].npages = run_pages;
2047 		assert(chunk->map[run_ind].pos == POS_FREE);
2048 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2049 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2050 	}
2051 
2052 	if (run_ind + run_pages < chunk_npages &&
2053 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2054 		unsigned next_npages;
2055 
2056 		/* Coalesce with next run. */
2057 		next_npages = chunk->map[run_ind + run_pages].npages;
2058 		run_pages += next_npages;
2059 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2060 		    next_npages);
2061 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2062 
2063 		chunk->map[run_ind].npages = run_pages;
2064 		chunk->map[run_ind].pos = POS_FREE;
2065 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2066 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2067 	}
2068 
2069 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2070 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2071 	if (run_ind < chunk->min_frun_ind)
2072 		chunk->min_frun_ind = run_ind;
2073 
2074 	/* Deallocate chunk if it is now completely unused. */
2075 	if (chunk->pages_used == 0)
2076 		arena_chunk_dealloc(arena, chunk);
2077 }
2078 
2079 static arena_run_t *
2080 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2081 {
2082 	arena_run_t *run;
2083 	unsigned i, remainder;
2084 
2085 	/* Look for a usable run. */
2086 	/* LINTED */
2087 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2088 		/* run is guaranteed to have available space. */
2089 		/* LINTED */
2090 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2091 #ifdef MALLOC_STATS
2092 		bin->stats.reruns++;
2093 #endif
2094 		return (run);
2095 	}
2096 	/* No existing runs have any space available. */
2097 
2098 	/* Allocate a new run. */
2099 	run = arena_run_alloc(arena, bin->run_size);
2100 	if (run == NULL)
2101 		return (NULL);
2102 
2103 	/* Initialize run internals. */
2104 	run->bin = bin;
2105 
2106 	for (i = 0; i < bin->regs_mask_nelms; i++)
2107 		run->regs_mask[i] = UINT_MAX;
2108 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2109 	if (remainder != 0) {
2110 		/* The last element has spare bits that need to be unset. */
2111 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2112 		    - remainder));
2113 	}
2114 
2115 	run->regs_minelm = 0;
2116 
2117 	run->nfree = bin->nregs;
2118 #ifdef MALLOC_DEBUG
2119 	run->magic = ARENA_RUN_MAGIC;
2120 #endif
2121 
2122 #ifdef MALLOC_STATS
2123 	bin->stats.nruns++;
2124 	bin->stats.curruns++;
2125 	if (bin->stats.curruns > bin->stats.highruns)
2126 		bin->stats.highruns = bin->stats.curruns;
2127 #endif
2128 	return (run);
2129 }
2130 
2131 /* bin->runcur must have space available before this function is called. */
2132 static inline void *
2133 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2134 {
2135 	void *ret;
2136 
2137 	assert(run->magic == ARENA_RUN_MAGIC);
2138 	assert(run->nfree > 0);
2139 
2140 	ret = arena_run_reg_alloc(run, bin);
2141 	assert(ret != NULL);
2142 	run->nfree--;
2143 
2144 	return (ret);
2145 }
2146 
2147 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2148 static void *
2149 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2150 {
2151 
2152 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2153 	if (bin->runcur == NULL)
2154 		return (NULL);
2155 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2156 	assert(bin->runcur->nfree > 0);
2157 
2158 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2159 }
2160 
2161 /*
2162  * Calculate bin->run_size such that it meets the following constraints:
2163  *
2164  *   *) bin->run_size >= min_run_size
2165  *   *) bin->run_size <= arena_maxclass
2166  *   *) bin->run_size <= RUN_MAX_SMALL
2167  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2168  *
2169  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2170  * also calculated here, since these settings are all interdependent.
2171  */
2172 static size_t
2173 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2174 {
2175 	size_t try_run_size, good_run_size;
2176 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2177 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2178 
2179 	assert(min_run_size >= pagesize);
2180 	assert(min_run_size <= arena_maxclass);
2181 	assert(min_run_size <= RUN_MAX_SMALL);
2182 
2183 	/*
2184 	 * Calculate known-valid settings before entering the run_size
2185 	 * expansion loop, so that the first part of the loop always copies
2186 	 * valid settings.
2187 	 *
2188 	 * The do..while loop iteratively reduces the number of regions until
2189 	 * the run header and the regions no longer overlap.  A closed formula
2190 	 * would be quite messy, since there is an interdependency between the
2191 	 * header's mask length and the number of regions.
2192 	 */
2193 	try_run_size = min_run_size;
2194 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2195 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2196 	do {
2197 		try_nregs--;
2198 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2199 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2200 		try_reg0_offset = (unsigned)(try_run_size -
2201 		    (try_nregs * bin->reg_size));
2202 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2203 	    > try_reg0_offset);
2204 
2205 	/* run_size expansion loop. */
2206 	do {
2207 		/*
2208 		 * Copy valid settings before trying more aggressive settings.
2209 		 */
2210 		good_run_size = try_run_size;
2211 		good_nregs = try_nregs;
2212 		good_mask_nelms = try_mask_nelms;
2213 		good_reg0_offset = try_reg0_offset;
2214 
2215 		/* Try more aggressive settings. */
2216 		try_run_size += pagesize;
2217 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2218 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2219 		do {
2220 			try_nregs--;
2221 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2222 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2223 			    1 : 0);
2224 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2225 			    bin->reg_size));
2226 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2227 		    (try_mask_nelms - 1)) > try_reg0_offset);
2228 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2229 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2230 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2231 
2232 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2233 	    <= good_reg0_offset);
2234 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2235 
2236 	/* Copy final settings. */
2237 	bin->run_size = good_run_size;
2238 	bin->nregs = good_nregs;
2239 	bin->regs_mask_nelms = good_mask_nelms;
2240 	bin->reg0_offset = good_reg0_offset;
2241 
2242 	return (good_run_size);
2243 }
2244 
2245 static void *
2246 arena_malloc(arena_t *arena, size_t size)
2247 {
2248 	void *ret;
2249 
2250 	assert(arena != NULL);
2251 	assert(arena->magic == ARENA_MAGIC);
2252 	assert(size != 0);
2253 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2254 
2255 	if (size <= bin_maxclass) {
2256 		arena_bin_t *bin;
2257 		arena_run_t *run;
2258 
2259 		/* Small allocation. */
2260 
2261 		if (size < small_min) {
2262 			/* Tiny. */
2263 			size = pow2_ceil(size);
2264 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2265 			    1)))];
2266 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2267 			/*
2268 			 * Bin calculation is always correct, but we may need
2269 			 * to fix size for the purposes of assertions and/or
2270 			 * stats accuracy.
2271 			 */
2272 			if (size < (1 << TINY_MIN_2POW))
2273 				size = (1 << TINY_MIN_2POW);
2274 #endif
2275 		} else if (size <= small_max) {
2276 			/* Quantum-spaced. */
2277 			size = QUANTUM_CEILING(size);
2278 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2279 			    - 1];
2280 		} else {
2281 			/* Sub-page. */
2282 			size = pow2_ceil(size);
2283 			bin = &arena->bins[ntbins + nqbins
2284 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2285 		}
2286 		assert(size == bin->reg_size);
2287 
2288 		malloc_mutex_lock(&arena->mtx);
2289 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2290 			ret = arena_bin_malloc_easy(arena, bin, run);
2291 		else
2292 			ret = arena_bin_malloc_hard(arena, bin);
2293 
2294 		if (ret == NULL) {
2295 			malloc_mutex_unlock(&arena->mtx);
2296 			return (NULL);
2297 		}
2298 
2299 #ifdef MALLOC_STATS
2300 		bin->stats.nrequests++;
2301 		arena->stats.nmalloc_small++;
2302 		arena->stats.allocated_small += size;
2303 #endif
2304 	} else {
2305 		/* Large allocation. */
2306 		size = PAGE_CEILING(size);
2307 		malloc_mutex_lock(&arena->mtx);
2308 		ret = (void *)arena_run_alloc(arena, size);
2309 		if (ret == NULL) {
2310 			malloc_mutex_unlock(&arena->mtx);
2311 			return (NULL);
2312 		}
2313 #ifdef MALLOC_STATS
2314 		arena->stats.nmalloc_large++;
2315 		arena->stats.allocated_large += size;
2316 #endif
2317 	}
2318 
2319 	malloc_mutex_unlock(&arena->mtx);
2320 
2321 	if (opt_junk)
2322 		memset(ret, 0xa5, size);
2323 	else if (opt_zero)
2324 		memset(ret, 0, size);
2325 	return (ret);
2326 }
2327 
2328 static inline void
2329 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2330     unsigned npages)
2331 {
2332 	unsigned i;
2333 
2334 	assert(npages > 0);
2335 
2336 	/*
2337 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2338 	 * separately allocated.
2339 	 */
2340 	for (i = 0; i < npages; i++) {
2341 		chunk->map[pageind + i].npages = npages;
2342 		chunk->map[pageind + i].pos = i;
2343 	}
2344 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2345 	    pagesize_2pow)), npages << pagesize_2pow);
2346 }
2347 
2348 /* Only handles large allocations that require more than page alignment. */
2349 static void *
2350 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2351 {
2352 	void *ret;
2353 	size_t offset;
2354 	arena_chunk_t *chunk;
2355 	unsigned pageind, i, npages;
2356 
2357 	assert((size & pagesize_mask) == 0);
2358 	assert((alignment & pagesize_mask) == 0);
2359 
2360 	npages = (unsigned)(size >> pagesize_2pow);
2361 
2362 	malloc_mutex_lock(&arena->mtx);
2363 	ret = (void *)arena_run_alloc(arena, alloc_size);
2364 	if (ret == NULL) {
2365 		malloc_mutex_unlock(&arena->mtx);
2366 		return (NULL);
2367 	}
2368 
2369 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2370 
2371 	offset = (uintptr_t)ret & (alignment - 1);
2372 	assert((offset & pagesize_mask) == 0);
2373 	assert(offset < alloc_size);
2374 	if (offset == 0) {
2375 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2376 		    pagesize_2pow);
2377 
2378 		/* Update the map for the run to be kept. */
2379 		for (i = 0; i < npages; i++) {
2380 			chunk->map[pageind + i].npages = npages;
2381 			assert(chunk->map[pageind + i].pos == i);
2382 		}
2383 
2384 		/* Trim trailing space. */
2385 		arena_palloc_trim(arena, chunk, pageind + npages,
2386 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2387 	} else {
2388 		size_t leadsize, trailsize;
2389 
2390 		leadsize = alignment - offset;
2391 		ret = (void *)((uintptr_t)ret + leadsize);
2392 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2393 		    pagesize_2pow);
2394 
2395 		/* Update the map for the run to be kept. */
2396 		for (i = 0; i < npages; i++) {
2397 			chunk->map[pageind + i].npages = npages;
2398 			chunk->map[pageind + i].pos = i;
2399 		}
2400 
2401 		/* Trim leading space. */
2402 		arena_palloc_trim(arena, chunk,
2403 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2404 		    (unsigned)(leadsize >> pagesize_2pow));
2405 
2406 		trailsize = alloc_size - leadsize - size;
2407 		if (trailsize != 0) {
2408 			/* Trim trailing space. */
2409 			assert(trailsize < alloc_size);
2410 			arena_palloc_trim(arena, chunk, pageind + npages,
2411 			    (unsigned)(trailsize >> pagesize_2pow));
2412 		}
2413 	}
2414 
2415 #ifdef MALLOC_STATS
2416 	arena->stats.nmalloc_large++;
2417 	arena->stats.allocated_large += size;
2418 #endif
2419 	malloc_mutex_unlock(&arena->mtx);
2420 
2421 	if (opt_junk)
2422 		memset(ret, 0xa5, size);
2423 	else if (opt_zero)
2424 		memset(ret, 0, size);
2425 	return (ret);
2426 }
2427 
2428 /* Return the size of the allocation pointed to by ptr. */
2429 static size_t
2430 arena_salloc(const void *ptr)
2431 {
2432 	size_t ret;
2433 	arena_chunk_t *chunk;
2434 	arena_chunk_map_t *mapelm;
2435 	unsigned pageind;
2436 
2437 	assert(ptr != NULL);
2438 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2439 
2440 	/*
2441 	 * No arena data structures that we query here can change in a way that
2442 	 * affects this function, so we don't need to lock.
2443 	 */
2444 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2445 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2446 	    pagesize_2pow);
2447 	mapelm = &chunk->map[pageind];
2448 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2449 	    pagesize_2pow)) {
2450 		arena_run_t *run;
2451 
2452 		pageind -= mapelm->pos;
2453 
2454 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2455 		    pagesize_2pow));
2456 		assert(run->magic == ARENA_RUN_MAGIC);
2457 		ret = run->bin->reg_size;
2458 	} else
2459 		ret = mapelm->npages << pagesize_2pow;
2460 
2461 	return (ret);
2462 }
2463 
2464 static void *
2465 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2466 {
2467 	void *ret;
2468 
2469 	/* Avoid moving the allocation if the size class would not change. */
2470 	if (size < small_min) {
2471 		if (oldsize < small_min &&
2472 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2473 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2474 			goto IN_PLACE;
2475 	} else if (size <= small_max) {
2476 		if (oldsize >= small_min && oldsize <= small_max &&
2477 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2478 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2479 			goto IN_PLACE;
2480 	} else {
2481 		/*
2482 		 * We make no attempt to resize runs here, though it would be
2483 		 * possible to do so.
2484 		 */
2485 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2486 			goto IN_PLACE;
2487 	}
2488 
2489 	/*
2490 	 * If we get here, then size and oldsize are different enough that we
2491 	 * need to use a different size class.  In that case, fall back to
2492 	 * allocating new space and copying.
2493 	 */
2494 	ret = arena_malloc(choose_arena(), size);
2495 	if (ret == NULL)
2496 		return (NULL);
2497 
2498 	/* Junk/zero-filling were already done by arena_malloc(). */
2499 	if (size < oldsize)
2500 		memcpy(ret, ptr, size);
2501 	else
2502 		memcpy(ret, ptr, oldsize);
2503 	idalloc(ptr);
2504 	return (ret);
2505 IN_PLACE:
2506 	if (opt_junk && size < oldsize)
2507 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2508 	else if (opt_zero && size > oldsize)
2509 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2510 	return (ptr);
2511 }
2512 
2513 static void
2514 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2515 {
2516 	unsigned pageind;
2517 	arena_chunk_map_t *mapelm;
2518 	size_t size;
2519 
2520 	assert(arena != NULL);
2521 	assert(arena->magic == ARENA_MAGIC);
2522 	assert(chunk->arena == arena);
2523 	assert(ptr != NULL);
2524 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2525 
2526 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2527 	    pagesize_2pow);
2528 	mapelm = &chunk->map[pageind];
2529 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2530 	    pagesize_2pow)) {
2531 		arena_run_t *run;
2532 		arena_bin_t *bin;
2533 
2534 		/* Small allocation. */
2535 
2536 		pageind -= mapelm->pos;
2537 
2538 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2539 		    pagesize_2pow));
2540 		assert(run->magic == ARENA_RUN_MAGIC);
2541 		bin = run->bin;
2542 		size = bin->reg_size;
2543 
2544 		if (opt_junk)
2545 			memset(ptr, 0x5a, size);
2546 
2547 		malloc_mutex_lock(&arena->mtx);
2548 		arena_run_reg_dalloc(run, bin, ptr, size);
2549 		run->nfree++;
2550 
2551 		if (run->nfree == bin->nregs) {
2552 			/* Deallocate run. */
2553 			if (run == bin->runcur)
2554 				bin->runcur = NULL;
2555 			else if (bin->nregs != 1) {
2556 				/*
2557 				 * This block's conditional is necessary because
2558 				 * if the run only contains one region, then it
2559 				 * never gets inserted into the non-full runs
2560 				 * tree.
2561 				 */
2562 				/* LINTED */
2563 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2564 			}
2565 #ifdef MALLOC_DEBUG
2566 			run->magic = 0;
2567 #endif
2568 			arena_run_dalloc(arena, run, bin->run_size);
2569 #ifdef MALLOC_STATS
2570 			bin->stats.curruns--;
2571 #endif
2572 		} else if (run->nfree == 1 && run != bin->runcur) {
2573 			/*
2574 			 * Make sure that bin->runcur always refers to the
2575 			 * lowest non-full run, if one exists.
2576 			 */
2577 			if (bin->runcur == NULL)
2578 				bin->runcur = run;
2579 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2580 				/* Switch runcur. */
2581 				if (bin->runcur->nfree > 0) {
2582 					/* Insert runcur. */
2583 					/* LINTED */
2584 					RB_INSERT(arena_run_tree_s, &bin->runs,
2585 					    bin->runcur);
2586 				}
2587 				bin->runcur = run;
2588 			} else {
2589 				/* LINTED */
2590 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2591 			}
2592 		}
2593 #ifdef MALLOC_STATS
2594 		arena->stats.allocated_small -= size;
2595 		arena->stats.ndalloc_small++;
2596 #endif
2597 	} else {
2598 		/* Large allocation. */
2599 
2600 		size = mapelm->npages << pagesize_2pow;
2601 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2602 
2603 		if (opt_junk)
2604 			memset(ptr, 0x5a, size);
2605 
2606 		malloc_mutex_lock(&arena->mtx);
2607 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2608 #ifdef MALLOC_STATS
2609 		arena->stats.allocated_large -= size;
2610 		arena->stats.ndalloc_large++;
2611 #endif
2612 	}
2613 
2614 	malloc_mutex_unlock(&arena->mtx);
2615 }
2616 
2617 static bool
2618 arena_new(arena_t *arena)
2619 {
2620 	unsigned i;
2621 	arena_bin_t *bin;
2622 	size_t prev_run_size;
2623 
2624 	malloc_mutex_init(&arena->mtx);
2625 
2626 #ifdef MALLOC_STATS
2627 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2628 #endif
2629 
2630 	/* Initialize chunks. */
2631 	RB_INIT(&arena->chunks);
2632 	arena->spare = NULL;
2633 
2634 	/* Initialize bins. */
2635 	prev_run_size = pagesize;
2636 
2637 	/* (2^n)-spaced tiny bins. */
2638 	for (i = 0; i < ntbins; i++) {
2639 		bin = &arena->bins[i];
2640 		bin->runcur = NULL;
2641 		RB_INIT(&bin->runs);
2642 
2643 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2644 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2645 
2646 #ifdef MALLOC_STATS
2647 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2648 #endif
2649 	}
2650 
2651 	/* Quantum-spaced bins. */
2652 	for (; i < ntbins + nqbins; i++) {
2653 		bin = &arena->bins[i];
2654 		bin->runcur = NULL;
2655 		RB_INIT(&bin->runs);
2656 
2657 		bin->reg_size = quantum * (i - ntbins + 1);
2658 /*
2659 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2660 */
2661 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2662 
2663 #ifdef MALLOC_STATS
2664 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2665 #endif
2666 	}
2667 
2668 	/* (2^n)-spaced sub-page bins. */
2669 	for (; i < ntbins + nqbins + nsbins; i++) {
2670 		bin = &arena->bins[i];
2671 		bin->runcur = NULL;
2672 		RB_INIT(&bin->runs);
2673 
2674 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2675 
2676 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2677 
2678 #ifdef MALLOC_STATS
2679 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2680 #endif
2681 	}
2682 
2683 #ifdef MALLOC_DEBUG
2684 	arena->magic = ARENA_MAGIC;
2685 #endif
2686 
2687 	return (false);
2688 }
2689 
2690 /* Create a new arena and insert it into the arenas array at index ind. */
2691 static arena_t *
2692 arenas_extend(unsigned ind)
2693 {
2694 	arena_t *ret;
2695 
2696 	/* Allocate enough space for trailing bins. */
2697 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2698 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2699 	if (ret != NULL && arena_new(ret) == false) {
2700 		arenas[ind] = ret;
2701 		return (ret);
2702 	}
2703 	/* Only reached if there is an OOM error. */
2704 
2705 	/*
2706 	 * OOM here is quite inconvenient to propagate, since dealing with it
2707 	 * would require a check for failure in the fast path.  Instead, punt
2708 	 * by using arenas[0].  In practice, this is an extremely unlikely
2709 	 * failure.
2710 	 */
2711 	_malloc_message(getprogname(),
2712 	    ": (malloc) Error initializing arena\n", "", "");
2713 	if (opt_abort)
2714 		abort();
2715 
2716 	return (arenas[0]);
2717 }
2718 
2719 /*
2720  * End arena.
2721  */
2722 /******************************************************************************/
2723 /*
2724  * Begin general internal functions.
2725  */
2726 
2727 static void *
2728 huge_malloc(size_t size)
2729 {
2730 	void *ret;
2731 	size_t csize;
2732 	chunk_node_t *node;
2733 
2734 	/* Allocate one or more contiguous chunks for this request. */
2735 
2736 	csize = CHUNK_CEILING(size);
2737 	if (csize == 0) {
2738 		/* size is large enough to cause size_t wrap-around. */
2739 		return (NULL);
2740 	}
2741 
2742 	/* Allocate a chunk node with which to track the chunk. */
2743 	node = base_chunk_node_alloc();
2744 	if (node == NULL)
2745 		return (NULL);
2746 
2747 	ret = chunk_alloc(csize);
2748 	if (ret == NULL) {
2749 		base_chunk_node_dealloc(node);
2750 		return (NULL);
2751 	}
2752 
2753 	/* Insert node into huge. */
2754 	node->chunk = ret;
2755 	node->size = csize;
2756 
2757 	malloc_mutex_lock(&chunks_mtx);
2758 	RB_INSERT(chunk_tree_s, &huge, node);
2759 #ifdef MALLOC_STATS
2760 	huge_nmalloc++;
2761 	huge_allocated += csize;
2762 #endif
2763 	malloc_mutex_unlock(&chunks_mtx);
2764 
2765 	if (opt_junk)
2766 		memset(ret, 0xa5, csize);
2767 	else if (opt_zero)
2768 		memset(ret, 0, csize);
2769 
2770 	return (ret);
2771 }
2772 
2773 /* Only handles large allocations that require more than chunk alignment. */
2774 static void *
2775 huge_palloc(size_t alignment, size_t size)
2776 {
2777 	void *ret;
2778 	size_t alloc_size, chunk_size, offset;
2779 	chunk_node_t *node;
2780 
2781 	/*
2782 	 * This allocation requires alignment that is even larger than chunk
2783 	 * alignment.  This means that huge_malloc() isn't good enough.
2784 	 *
2785 	 * Allocate almost twice as many chunks as are demanded by the size or
2786 	 * alignment, in order to assure the alignment can be achieved, then
2787 	 * unmap leading and trailing chunks.
2788 	 */
2789 	assert(alignment >= chunksize);
2790 
2791 	chunk_size = CHUNK_CEILING(size);
2792 
2793 	if (size >= alignment)
2794 		alloc_size = chunk_size + alignment - chunksize;
2795 	else
2796 		alloc_size = (alignment << 1) - chunksize;
2797 
2798 	/* Allocate a chunk node with which to track the chunk. */
2799 	node = base_chunk_node_alloc();
2800 	if (node == NULL)
2801 		return (NULL);
2802 
2803 	ret = chunk_alloc(alloc_size);
2804 	if (ret == NULL) {
2805 		base_chunk_node_dealloc(node);
2806 		return (NULL);
2807 	}
2808 
2809 	offset = (uintptr_t)ret & (alignment - 1);
2810 	assert((offset & chunksize_mask) == 0);
2811 	assert(offset < alloc_size);
2812 	if (offset == 0) {
2813 		/* Trim trailing space. */
2814 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2815 		    - chunk_size);
2816 	} else {
2817 		size_t trailsize;
2818 
2819 		/* Trim leading space. */
2820 		chunk_dealloc(ret, alignment - offset);
2821 
2822 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2823 
2824 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2825 		if (trailsize != 0) {
2826 		    /* Trim trailing space. */
2827 		    assert(trailsize < alloc_size);
2828 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2829 			trailsize);
2830 		}
2831 	}
2832 
2833 	/* Insert node into huge. */
2834 	node->chunk = ret;
2835 	node->size = chunk_size;
2836 
2837 	malloc_mutex_lock(&chunks_mtx);
2838 	RB_INSERT(chunk_tree_s, &huge, node);
2839 #ifdef MALLOC_STATS
2840 	huge_nmalloc++;
2841 	huge_allocated += chunk_size;
2842 #endif
2843 	malloc_mutex_unlock(&chunks_mtx);
2844 
2845 	if (opt_junk)
2846 		memset(ret, 0xa5, chunk_size);
2847 	else if (opt_zero)
2848 		memset(ret, 0, chunk_size);
2849 
2850 	return (ret);
2851 }
2852 
2853 static void *
2854 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2855 {
2856 	void *ret;
2857 
2858 	/* Avoid moving the allocation if the size class would not change. */
2859 	if (oldsize > arena_maxclass &&
2860 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2861 		if (opt_junk && size < oldsize) {
2862 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2863 			    - size);
2864 		} else if (opt_zero && size > oldsize) {
2865 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2866 			    - oldsize);
2867 		}
2868 		return (ptr);
2869 	}
2870 
2871 	if (CHUNK_ADDR2BASE(ptr) == ptr
2872 #ifdef USE_BRK
2873 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2874 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2875 #endif
2876 	    ) {
2877 		chunk_node_t *node, key;
2878 		void *newptr;
2879 		size_t oldcsize;
2880 		size_t newcsize;
2881 
2882 		newcsize = CHUNK_CEILING(size);
2883 		oldcsize = CHUNK_CEILING(oldsize);
2884 		assert(oldcsize != newcsize);
2885 		if (newcsize == 0) {
2886 			/* size_t wrap-around */
2887 			return (NULL);
2888 		}
2889 
2890 		/*
2891 		 * Remove the old region from the tree now.  If mremap()
2892 		 * returns the region to the system, other thread may
2893 		 * map it for same huge allocation and insert it to the
2894 		 * tree before we acquire the mutex lock again.
2895 		 */
2896 		malloc_mutex_lock(&chunks_mtx);
2897 		key.chunk = __DECONST(void *, ptr);
2898 		/* LINTED */
2899 		node = RB_FIND(chunk_tree_s, &huge, &key);
2900 		assert(node != NULL);
2901 		assert(node->chunk == ptr);
2902 		assert(node->size == oldcsize);
2903 		RB_REMOVE(chunk_tree_s, &huge, node);
2904 		malloc_mutex_unlock(&chunks_mtx);
2905 
2906 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2907 		    MAP_ALIGNED(chunksize_2pow));
2908 		if (newptr == MAP_FAILED) {
2909 			/* We still own the old region. */
2910 			malloc_mutex_lock(&chunks_mtx);
2911 			RB_INSERT(chunk_tree_s, &huge, node);
2912 			malloc_mutex_unlock(&chunks_mtx);
2913 		} else {
2914 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2915 
2916 			/* Insert new or resized old region. */
2917 			malloc_mutex_lock(&chunks_mtx);
2918 			node->size = newcsize;
2919 			node->chunk = newptr;
2920 			RB_INSERT(chunk_tree_s, &huge, node);
2921 #ifdef MALLOC_STATS
2922 			huge_nralloc++;
2923 			huge_allocated += newcsize - oldcsize;
2924 			if (newcsize > oldcsize) {
2925 				stats_chunks.curchunks +=
2926 				    (newcsize - oldcsize) / chunksize;
2927 				if (stats_chunks.curchunks >
2928 				    stats_chunks.highchunks)
2929 					stats_chunks.highchunks =
2930 					    stats_chunks.curchunks;
2931 			} else {
2932 				stats_chunks.curchunks -=
2933 				    (oldcsize - newcsize) / chunksize;
2934 			}
2935 #endif
2936 			malloc_mutex_unlock(&chunks_mtx);
2937 
2938 			if (opt_junk && size < oldsize) {
2939 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2940 				    newcsize - size);
2941 			} else if (opt_zero && size > oldsize) {
2942 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2943 				    size - oldsize);
2944 			}
2945 			return (newptr);
2946 		}
2947 	}
2948 
2949 	/*
2950 	 * If we get here, then size and oldsize are different enough that we
2951 	 * need to use a different size class.  In that case, fall back to
2952 	 * allocating new space and copying.
2953 	 */
2954 	ret = huge_malloc(size);
2955 	if (ret == NULL)
2956 		return (NULL);
2957 
2958 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2959 		/* The old allocation is a chunk. */
2960 		if (size < oldsize)
2961 			memcpy(ret, ptr, size);
2962 		else
2963 			memcpy(ret, ptr, oldsize);
2964 	} else {
2965 		/* The old allocation is a region. */
2966 		assert(oldsize < size);
2967 		memcpy(ret, ptr, oldsize);
2968 	}
2969 	idalloc(ptr);
2970 	return (ret);
2971 }
2972 
2973 static void
2974 huge_dalloc(void *ptr)
2975 {
2976 	chunk_node_t key;
2977 	chunk_node_t *node;
2978 
2979 	malloc_mutex_lock(&chunks_mtx);
2980 
2981 	/* Extract from tree of huge allocations. */
2982 	key.chunk = ptr;
2983 	/* LINTED */
2984 	node = RB_FIND(chunk_tree_s, &huge, &key);
2985 	assert(node != NULL);
2986 	assert(node->chunk == ptr);
2987 	/* LINTED */
2988 	RB_REMOVE(chunk_tree_s, &huge, node);
2989 
2990 #ifdef MALLOC_STATS
2991 	huge_ndalloc++;
2992 	huge_allocated -= node->size;
2993 #endif
2994 
2995 	malloc_mutex_unlock(&chunks_mtx);
2996 
2997 	/* Unmap chunk. */
2998 #ifdef USE_BRK
2999 	if (opt_junk)
3000 		memset(node->chunk, 0x5a, node->size);
3001 #endif
3002 	chunk_dealloc(node->chunk, node->size);
3003 
3004 	base_chunk_node_dealloc(node);
3005 }
3006 
3007 static void *
3008 imalloc(size_t size)
3009 {
3010 	void *ret;
3011 
3012 	assert(size != 0);
3013 
3014 	if (size <= arena_maxclass)
3015 		ret = arena_malloc(choose_arena(), size);
3016 	else
3017 		ret = huge_malloc(size);
3018 
3019 	return (ret);
3020 }
3021 
3022 static void *
3023 ipalloc(size_t alignment, size_t size)
3024 {
3025 	void *ret;
3026 	size_t ceil_size;
3027 
3028 	/*
3029 	 * Round size up to the nearest multiple of alignment.
3030 	 *
3031 	 * This done, we can take advantage of the fact that for each small
3032 	 * size class, every object is aligned at the smallest power of two
3033 	 * that is non-zero in the base two representation of the size.  For
3034 	 * example:
3035 	 *
3036 	 *   Size |   Base 2 | Minimum alignment
3037 	 *   -----+----------+------------------
3038 	 *     96 |  1100000 |  32
3039 	 *    144 | 10100000 |  32
3040 	 *    192 | 11000000 |  64
3041 	 *
3042 	 * Depending on runtime settings, it is possible that arena_malloc()
3043 	 * will further round up to a power of two, but that never causes
3044 	 * correctness issues.
3045 	 */
3046 	ceil_size = (size + (alignment - 1)) & (-alignment);
3047 	/*
3048 	 * (ceil_size < size) protects against the combination of maximal
3049 	 * alignment and size greater than maximal alignment.
3050 	 */
3051 	if (ceil_size < size) {
3052 		/* size_t overflow. */
3053 		return (NULL);
3054 	}
3055 
3056 	if (ceil_size <= pagesize || (alignment <= pagesize
3057 	    && ceil_size <= arena_maxclass))
3058 		ret = arena_malloc(choose_arena(), ceil_size);
3059 	else {
3060 		size_t run_size;
3061 
3062 		/*
3063 		 * We can't achieve sub-page alignment, so round up alignment
3064 		 * permanently; it makes later calculations simpler.
3065 		 */
3066 		alignment = PAGE_CEILING(alignment);
3067 		ceil_size = PAGE_CEILING(size);
3068 		/*
3069 		 * (ceil_size < size) protects against very large sizes within
3070 		 * pagesize of SIZE_T_MAX.
3071 		 *
3072 		 * (ceil_size + alignment < ceil_size) protects against the
3073 		 * combination of maximal alignment and ceil_size large enough
3074 		 * to cause overflow.  This is similar to the first overflow
3075 		 * check above, but it needs to be repeated due to the new
3076 		 * ceil_size value, which may now be *equal* to maximal
3077 		 * alignment, whereas before we only detected overflow if the
3078 		 * original size was *greater* than maximal alignment.
3079 		 */
3080 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3081 			/* size_t overflow. */
3082 			return (NULL);
3083 		}
3084 
3085 		/*
3086 		 * Calculate the size of the over-size run that arena_palloc()
3087 		 * would need to allocate in order to guarantee the alignment.
3088 		 */
3089 		if (ceil_size >= alignment)
3090 			run_size = ceil_size + alignment - pagesize;
3091 		else {
3092 			/*
3093 			 * It is possible that (alignment << 1) will cause
3094 			 * overflow, but it doesn't matter because we also
3095 			 * subtract pagesize, which in the case of overflow
3096 			 * leaves us with a very large run_size.  That causes
3097 			 * the first conditional below to fail, which means
3098 			 * that the bogus run_size value never gets used for
3099 			 * anything important.
3100 			 */
3101 			run_size = (alignment << 1) - pagesize;
3102 		}
3103 
3104 		if (run_size <= arena_maxclass) {
3105 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3106 			    run_size);
3107 		} else if (alignment <= chunksize)
3108 			ret = huge_malloc(ceil_size);
3109 		else
3110 			ret = huge_palloc(alignment, ceil_size);
3111 	}
3112 
3113 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3114 	return (ret);
3115 }
3116 
3117 static void *
3118 icalloc(size_t size)
3119 {
3120 	void *ret;
3121 
3122 	if (size <= arena_maxclass) {
3123 		ret = arena_malloc(choose_arena(), size);
3124 		if (ret == NULL)
3125 			return (NULL);
3126 		memset(ret, 0, size);
3127 	} else {
3128 		/*
3129 		 * The virtual memory system provides zero-filled pages, so
3130 		 * there is no need to do so manually, unless opt_junk is
3131 		 * enabled, in which case huge_malloc() fills huge allocations
3132 		 * with junk.
3133 		 */
3134 		ret = huge_malloc(size);
3135 		if (ret == NULL)
3136 			return (NULL);
3137 
3138 		if (opt_junk)
3139 			memset(ret, 0, size);
3140 #ifdef USE_BRK
3141 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3142 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3143 			/*
3144 			 * This may be a re-used brk chunk.  Therefore, zero
3145 			 * the memory.
3146 			 */
3147 			memset(ret, 0, size);
3148 		}
3149 #endif
3150 	}
3151 
3152 	return (ret);
3153 }
3154 
3155 static size_t
3156 isalloc(const void *ptr)
3157 {
3158 	size_t ret;
3159 	arena_chunk_t *chunk;
3160 
3161 	assert(ptr != NULL);
3162 
3163 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3164 	if (chunk != ptr) {
3165 		/* Region. */
3166 		assert(chunk->arena->magic == ARENA_MAGIC);
3167 
3168 		ret = arena_salloc(ptr);
3169 	} else {
3170 		chunk_node_t *node, key;
3171 
3172 		/* Chunk (huge allocation). */
3173 
3174 		malloc_mutex_lock(&chunks_mtx);
3175 
3176 		/* Extract from tree of huge allocations. */
3177 		key.chunk = __DECONST(void *, ptr);
3178 		/* LINTED */
3179 		node = RB_FIND(chunk_tree_s, &huge, &key);
3180 		assert(node != NULL);
3181 
3182 		ret = node->size;
3183 
3184 		malloc_mutex_unlock(&chunks_mtx);
3185 	}
3186 
3187 	return (ret);
3188 }
3189 
3190 static void *
3191 iralloc(void *ptr, size_t size)
3192 {
3193 	void *ret;
3194 	size_t oldsize;
3195 
3196 	assert(ptr != NULL);
3197 	assert(size != 0);
3198 
3199 	oldsize = isalloc(ptr);
3200 
3201 	if (size <= arena_maxclass)
3202 		ret = arena_ralloc(ptr, size, oldsize);
3203 	else
3204 		ret = huge_ralloc(ptr, size, oldsize);
3205 
3206 	return (ret);
3207 }
3208 
3209 static void
3210 idalloc(void *ptr)
3211 {
3212 	arena_chunk_t *chunk;
3213 
3214 	assert(ptr != NULL);
3215 
3216 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3217 	if (chunk != ptr) {
3218 		/* Region. */
3219 		arena_dalloc(chunk->arena, chunk, ptr);
3220 	} else
3221 		huge_dalloc(ptr);
3222 }
3223 
3224 static void
3225 malloc_print_stats(void)
3226 {
3227 
3228 	if (opt_print_stats) {
3229 		char s[UMAX2S_BUFSIZE];
3230 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3231 		    "");
3232 		_malloc_message("Assertions ",
3233 #ifdef NDEBUG
3234 		    "disabled",
3235 #else
3236 		    "enabled",
3237 #endif
3238 		    "\n", "");
3239 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3240 		    opt_abort ? "A" : "a",
3241 		    opt_junk ? "J" : "j",
3242 		    opt_hint ? "H" : "h");
3243 		_malloc_message(opt_utrace ? "PU" : "Pu",
3244 		    opt_sysv ? "V" : "v",
3245 		    opt_xmalloc ? "X" : "x",
3246 		    opt_zero ? "Z\n" : "z\n");
3247 
3248 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3249 		_malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3250 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3251 		    "\n", "");
3252 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3253 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3254 		    "");
3255 
3256 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3257 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3258 		    ")\n", "");
3259 
3260 #ifdef MALLOC_STATS
3261 		{
3262 			size_t allocated, mapped;
3263 			unsigned i;
3264 			arena_t *arena;
3265 
3266 			/* Calculate and print allocated/mapped stats. */
3267 
3268 			/* arenas. */
3269 			for (i = 0, allocated = 0; i < narenas; i++) {
3270 				if (arenas[i] != NULL) {
3271 					malloc_mutex_lock(&arenas[i]->mtx);
3272 					allocated +=
3273 					    arenas[i]->stats.allocated_small;
3274 					allocated +=
3275 					    arenas[i]->stats.allocated_large;
3276 					malloc_mutex_unlock(&arenas[i]->mtx);
3277 				}
3278 			}
3279 
3280 			/* huge/base. */
3281 			malloc_mutex_lock(&chunks_mtx);
3282 			allocated += huge_allocated;
3283 			mapped = stats_chunks.curchunks * chunksize;
3284 			malloc_mutex_unlock(&chunks_mtx);
3285 
3286 			malloc_mutex_lock(&base_mtx);
3287 			mapped += base_mapped;
3288 			malloc_mutex_unlock(&base_mtx);
3289 
3290 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3291 			    allocated, mapped);
3292 
3293 			/* Print chunk stats. */
3294 			{
3295 				chunk_stats_t chunks_stats;
3296 
3297 				malloc_mutex_lock(&chunks_mtx);
3298 				chunks_stats = stats_chunks;
3299 				malloc_mutex_unlock(&chunks_mtx);
3300 
3301 				malloc_printf("chunks: nchunks   "
3302 				    "highchunks    curchunks\n");
3303 				malloc_printf("  %13llu%13lu%13lu\n",
3304 				    chunks_stats.nchunks,
3305 				    chunks_stats.highchunks,
3306 				    chunks_stats.curchunks);
3307 			}
3308 
3309 			/* Print chunk stats. */
3310 			malloc_printf(
3311 			    "huge: nmalloc      ndalloc      "
3312 			    "nralloc    allocated\n");
3313 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3314 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3315 			    huge_allocated);
3316 
3317 			/* Print stats for each arena. */
3318 			for (i = 0; i < narenas; i++) {
3319 				arena = arenas[i];
3320 				if (arena != NULL) {
3321 					malloc_printf(
3322 					    "\narenas[%u] @ %p\n", i, arena);
3323 					malloc_mutex_lock(&arena->mtx);
3324 					stats_print(arena);
3325 					malloc_mutex_unlock(&arena->mtx);
3326 				}
3327 			}
3328 		}
3329 #endif /* #ifdef MALLOC_STATS */
3330 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3331 	}
3332 }
3333 
3334 /*
3335  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3336  * implementation has to take pains to avoid infinite recursion during
3337  * initialization.
3338  */
3339 static inline bool
3340 malloc_init(void)
3341 {
3342 
3343 	if (malloc_initialized == false)
3344 		return (malloc_init_hard());
3345 
3346 	return (false);
3347 }
3348 
3349 static bool
3350 malloc_init_hard(void)
3351 {
3352 	unsigned i, j;
3353 	ssize_t linklen;
3354 	char buf[PATH_MAX + 1];
3355 	const char *opts = "";
3356 	int serrno;
3357 
3358 	malloc_mutex_lock(&init_lock);
3359 	if (malloc_initialized) {
3360 		/*
3361 		 * Another thread initialized the allocator before this one
3362 		 * acquired init_lock.
3363 		 */
3364 		malloc_mutex_unlock(&init_lock);
3365 		return (false);
3366 	}
3367 
3368 	serrno = errno;
3369 	/* Get number of CPUs. */
3370 	{
3371 		int mib[2];
3372 		size_t len;
3373 
3374 		mib[0] = CTL_HW;
3375 		mib[1] = HW_NCPU;
3376 		len = sizeof(ncpus);
3377 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3378 			/* Error. */
3379 			ncpus = 1;
3380 		}
3381 	}
3382 
3383 	/* Get page size. */
3384 	{
3385 		long result;
3386 
3387 		result = sysconf(_SC_PAGESIZE);
3388 		assert(result != -1);
3389 		pagesize = (unsigned) result;
3390 
3391 		/*
3392 		 * We assume that pagesize is a power of 2 when calculating
3393 		 * pagesize_mask and pagesize_2pow.
3394 		 */
3395 		assert(((result - 1) & result) == 0);
3396 		pagesize_mask = result - 1;
3397 		pagesize_2pow = ffs((int)result) - 1;
3398 	}
3399 
3400 	for (i = 0; i < 3; i++) {
3401 		/* Get runtime configuration. */
3402 		switch (i) {
3403 		case 0:
3404 			if ((linklen = readlink("/etc/malloc.conf", buf,
3405 						sizeof(buf) - 1)) != -1) {
3406 				/*
3407 				 * Use the contents of the "/etc/malloc.conf"
3408 				 * symbolic link's name.
3409 				 */
3410 				buf[linklen] = '\0';
3411 				opts = buf;
3412 			} else {
3413 				/* No configuration specified. */
3414 				buf[0] = '\0';
3415 				opts = buf;
3416 			}
3417 			break;
3418 		case 1:
3419 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3420 			    issetugid() == 0) {
3421 				/*
3422 				 * Do nothing; opts is already initialized to
3423 				 * the value of the MALLOC_OPTIONS environment
3424 				 * variable.
3425 				 */
3426 			} else {
3427 				/* No configuration specified. */
3428 				buf[0] = '\0';
3429 				opts = buf;
3430 			}
3431 			break;
3432 		case 2:
3433 			if (_malloc_options != NULL) {
3434 			    /*
3435 			     * Use options that were compiled into the program.
3436 			     */
3437 			    opts = _malloc_options;
3438 			} else {
3439 				/* No configuration specified. */
3440 				buf[0] = '\0';
3441 				opts = buf;
3442 			}
3443 			break;
3444 		default:
3445 			/* NOTREACHED */
3446 			/* LINTED */
3447 			assert(false);
3448 		}
3449 
3450 		for (j = 0; opts[j] != '\0'; j++) {
3451 			switch (opts[j]) {
3452 			case 'a':
3453 				opt_abort = false;
3454 				break;
3455 			case 'A':
3456 				opt_abort = true;
3457 				break;
3458 			case 'h':
3459 				opt_hint = false;
3460 				break;
3461 			case 'H':
3462 				opt_hint = true;
3463 				break;
3464 			case 'j':
3465 				opt_junk = false;
3466 				break;
3467 			case 'J':
3468 				opt_junk = true;
3469 				break;
3470 			case 'k':
3471 				/*
3472 				 * Chunks always require at least one header
3473 				 * page, so chunks can never be smaller than
3474 				 * two pages.
3475 				 */
3476 				if (opt_chunk_2pow > pagesize_2pow + 1)
3477 					opt_chunk_2pow--;
3478 				break;
3479 			case 'K':
3480 				if (opt_chunk_2pow + 1 <
3481 				    (int)(sizeof(size_t) << 3))
3482 					opt_chunk_2pow++;
3483 				break;
3484 			case 'n':
3485 				opt_narenas_lshift--;
3486 				break;
3487 			case 'N':
3488 				opt_narenas_lshift++;
3489 				break;
3490 			case 'p':
3491 				opt_print_stats = false;
3492 				break;
3493 			case 'P':
3494 				opt_print_stats = true;
3495 				break;
3496 			case 'q':
3497 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3498 					opt_quantum_2pow--;
3499 				break;
3500 			case 'Q':
3501 				if (opt_quantum_2pow < pagesize_2pow - 1)
3502 					opt_quantum_2pow++;
3503 				break;
3504 			case 's':
3505 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3506 					opt_small_max_2pow--;
3507 				break;
3508 			case 'S':
3509 				if (opt_small_max_2pow < pagesize_2pow - 1)
3510 					opt_small_max_2pow++;
3511 				break;
3512 			case 'u':
3513 				opt_utrace = false;
3514 				break;
3515 			case 'U':
3516 				opt_utrace = true;
3517 				break;
3518 			case 'v':
3519 				opt_sysv = false;
3520 				break;
3521 			case 'V':
3522 				opt_sysv = true;
3523 				break;
3524 			case 'x':
3525 				opt_xmalloc = false;
3526 				break;
3527 			case 'X':
3528 				opt_xmalloc = true;
3529 				break;
3530 			case 'z':
3531 				opt_zero = false;
3532 				break;
3533 			case 'Z':
3534 				opt_zero = true;
3535 				break;
3536 			default: {
3537 				char cbuf[2];
3538 
3539 				cbuf[0] = opts[j];
3540 				cbuf[1] = '\0';
3541 				_malloc_message(getprogname(),
3542 				    ": (malloc) Unsupported character in "
3543 				    "malloc options: '", cbuf, "'\n");
3544 			}
3545 			}
3546 		}
3547 	}
3548 	errno = serrno;
3549 
3550 	/* Take care to call atexit() only once. */
3551 	if (opt_print_stats) {
3552 		/* Print statistics at exit. */
3553 		atexit(malloc_print_stats);
3554 	}
3555 
3556 	/* Set variables according to the value of opt_small_max_2pow. */
3557 	if (opt_small_max_2pow < opt_quantum_2pow)
3558 		opt_small_max_2pow = opt_quantum_2pow;
3559 	small_max = (1 << opt_small_max_2pow);
3560 
3561 	/* Set bin-related variables. */
3562 	bin_maxclass = (pagesize >> 1);
3563 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3564 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3565 	assert(ntbins <= opt_quantum_2pow);
3566 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3567 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3568 
3569 	/* Set variables according to the value of opt_quantum_2pow. */
3570 	quantum = (1 << opt_quantum_2pow);
3571 	quantum_mask = quantum - 1;
3572 	if (ntbins > 0)
3573 		small_min = (quantum >> 1) + 1;
3574 	else
3575 		small_min = 1;
3576 	assert(small_min <= quantum);
3577 
3578 	/* Set variables according to the value of opt_chunk_2pow. */
3579 	chunksize = (1LU << opt_chunk_2pow);
3580 	chunksize_mask = chunksize - 1;
3581 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3582 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3583 	{
3584 		unsigned header_size;
3585 
3586 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3587 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3588 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3589 		if ((header_size & pagesize_mask) != 0)
3590 			arena_chunk_header_npages++;
3591 	}
3592 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3593 	    pagesize_2pow);
3594 
3595 	UTRACE(0, 0, 0);
3596 
3597 #ifdef MALLOC_STATS
3598 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3599 #endif
3600 
3601 	/* Various sanity checks that regard configuration. */
3602 	assert(quantum >= sizeof(void *));
3603 	assert(quantum <= pagesize);
3604 	assert(chunksize >= pagesize);
3605 	assert(quantum * 4 <= chunksize);
3606 
3607 	/* Initialize chunks data. */
3608 	malloc_mutex_init(&chunks_mtx);
3609 	RB_INIT(&huge);
3610 #ifdef USE_BRK
3611 	malloc_mutex_init(&brk_mtx);
3612 	brk_base = sbrk(0);
3613 	brk_prev = brk_base;
3614 	brk_max = brk_base;
3615 #endif
3616 #ifdef MALLOC_STATS
3617 	huge_nmalloc = 0;
3618 	huge_ndalloc = 0;
3619 	huge_nralloc = 0;
3620 	huge_allocated = 0;
3621 #endif
3622 	RB_INIT(&old_chunks);
3623 
3624 	/* Initialize base allocation data structures. */
3625 #ifdef MALLOC_STATS
3626 	base_mapped = 0;
3627 #endif
3628 #ifdef USE_BRK
3629 	/*
3630 	 * Allocate a base chunk here, since it doesn't actually have to be
3631 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3632 	 * the use of space that would otherwise be wasted.
3633 	 */
3634 	base_pages_alloc(0);
3635 #endif
3636 	base_chunk_nodes = NULL;
3637 	malloc_mutex_init(&base_mtx);
3638 
3639 	if (ncpus > 1) {
3640 		/*
3641 		 * For SMP systems, create four times as many arenas as there
3642 		 * are CPUs by default.
3643 		 */
3644 		opt_narenas_lshift += 2;
3645 	}
3646 
3647 #ifdef NO_TLS
3648 	/* Initialize arena key. */
3649 	(void)thr_keycreate(&arenas_map_key, NULL);
3650 #endif
3651 
3652 	/* Determine how many arenas to use. */
3653 	narenas = ncpus;
3654 	if (opt_narenas_lshift > 0) {
3655 		if ((narenas << opt_narenas_lshift) > narenas)
3656 			narenas <<= opt_narenas_lshift;
3657 		/*
3658 		 * Make sure not to exceed the limits of what base_malloc()
3659 		 * can handle.
3660 		 */
3661 		if (narenas * sizeof(arena_t *) > chunksize)
3662 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3663 	} else if (opt_narenas_lshift < 0) {
3664 		if ((narenas << opt_narenas_lshift) < narenas)
3665 			narenas <<= opt_narenas_lshift;
3666 		/* Make sure there is at least one arena. */
3667 		if (narenas == 0)
3668 			narenas = 1;
3669 	}
3670 
3671 	next_arena = 0;
3672 
3673 	/* Allocate and initialize arenas. */
3674 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3675 	if (arenas == NULL) {
3676 		malloc_mutex_unlock(&init_lock);
3677 		return (true);
3678 	}
3679 	/*
3680 	 * Zero the array.  In practice, this should always be pre-zeroed,
3681 	 * since it was just mmap()ed, but let's be sure.
3682 	 */
3683 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3684 
3685 	/*
3686 	 * Initialize one arena here.  The rest are lazily created in
3687 	 * arena_choose_hard().
3688 	 */
3689 	arenas_extend(0);
3690 	if (arenas[0] == NULL) {
3691 		malloc_mutex_unlock(&init_lock);
3692 		return (true);
3693 	}
3694 
3695 	malloc_mutex_init(&arenas_mtx);
3696 
3697 	malloc_initialized = true;
3698 	malloc_mutex_unlock(&init_lock);
3699 	return (false);
3700 }
3701 
3702 /*
3703  * End general internal functions.
3704  */
3705 /******************************************************************************/
3706 /*
3707  * Begin malloc(3)-compatible functions.
3708  */
3709 
3710 void *
3711 malloc(size_t size)
3712 {
3713 	void *ret;
3714 
3715 	if (malloc_init()) {
3716 		ret = NULL;
3717 		goto RETURN;
3718 	}
3719 
3720 	if (size == 0) {
3721 		if (opt_sysv == false)
3722 			size = 1;
3723 		else {
3724 			ret = NULL;
3725 			goto RETURN;
3726 		}
3727 	}
3728 
3729 	ret = imalloc(size);
3730 
3731 RETURN:
3732 	if (ret == NULL) {
3733 		if (opt_xmalloc) {
3734 			_malloc_message(getprogname(),
3735 			    ": (malloc) Error in malloc(): out of memory\n", "",
3736 			    "");
3737 			abort();
3738 		}
3739 		errno = ENOMEM;
3740 	}
3741 
3742 	UTRACE(0, size, ret);
3743 	return (ret);
3744 }
3745 
3746 int
3747 posix_memalign(void **memptr, size_t alignment, size_t size)
3748 {
3749 	int ret;
3750 	void *result;
3751 
3752 	if (malloc_init())
3753 		result = NULL;
3754 	else {
3755 		/* Make sure that alignment is a large enough power of 2. */
3756 		if (((alignment - 1) & alignment) != 0
3757 		    || alignment < sizeof(void *)) {
3758 			if (opt_xmalloc) {
3759 				_malloc_message(getprogname(),
3760 				    ": (malloc) Error in posix_memalign(): "
3761 				    "invalid alignment\n", "", "");
3762 				abort();
3763 			}
3764 			result = NULL;
3765 			ret = EINVAL;
3766 			goto RETURN;
3767 		}
3768 
3769 		result = ipalloc(alignment, size);
3770 	}
3771 
3772 	if (result == NULL) {
3773 		if (opt_xmalloc) {
3774 			_malloc_message(getprogname(),
3775 			": (malloc) Error in posix_memalign(): out of memory\n",
3776 			"", "");
3777 			abort();
3778 		}
3779 		ret = ENOMEM;
3780 		goto RETURN;
3781 	}
3782 
3783 	*memptr = result;
3784 	ret = 0;
3785 
3786 RETURN:
3787 	UTRACE(0, size, result);
3788 	return (ret);
3789 }
3790 
3791 void *
3792 calloc(size_t num, size_t size)
3793 {
3794 	void *ret;
3795 	size_t num_size;
3796 
3797 	if (malloc_init()) {
3798 		num_size = 0;
3799 		ret = NULL;
3800 		goto RETURN;
3801 	}
3802 
3803 	num_size = num * size;
3804 	if (num_size == 0) {
3805 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3806 			num_size = 1;
3807 		else {
3808 			ret = NULL;
3809 			goto RETURN;
3810 		}
3811 	/*
3812 	 * Try to avoid division here.  We know that it isn't possible to
3813 	 * overflow during multiplication if neither operand uses any of the
3814 	 * most significant half of the bits in a size_t.
3815 	 */
3816 	} else if ((unsigned long long)((num | size) &
3817 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3818 	   (num_size / size != num)) {
3819 		/* size_t overflow. */
3820 		ret = NULL;
3821 		goto RETURN;
3822 	}
3823 
3824 	ret = icalloc(num_size);
3825 
3826 RETURN:
3827 	if (ret == NULL) {
3828 		if (opt_xmalloc) {
3829 			_malloc_message(getprogname(),
3830 			    ": (malloc) Error in calloc(): out of memory\n", "",
3831 			    "");
3832 			abort();
3833 		}
3834 		errno = ENOMEM;
3835 	}
3836 
3837 	UTRACE(0, num_size, ret);
3838 	return (ret);
3839 }
3840 
3841 void *
3842 realloc(void *ptr, size_t size)
3843 {
3844 	void *ret;
3845 
3846 	if (size == 0) {
3847 		if (opt_sysv == false)
3848 			size = 1;
3849 		else {
3850 			if (ptr != NULL)
3851 				idalloc(ptr);
3852 			ret = NULL;
3853 			goto RETURN;
3854 		}
3855 	}
3856 
3857 	if (ptr != NULL) {
3858 		assert(malloc_initialized);
3859 
3860 		ret = iralloc(ptr, size);
3861 
3862 		if (ret == NULL) {
3863 			if (opt_xmalloc) {
3864 				_malloc_message(getprogname(),
3865 				    ": (malloc) Error in realloc(): out of "
3866 				    "memory\n", "", "");
3867 				abort();
3868 			}
3869 			errno = ENOMEM;
3870 		}
3871 	} else {
3872 		if (malloc_init())
3873 			ret = NULL;
3874 		else
3875 			ret = imalloc(size);
3876 
3877 		if (ret == NULL) {
3878 			if (opt_xmalloc) {
3879 				_malloc_message(getprogname(),
3880 				    ": (malloc) Error in realloc(): out of "
3881 				    "memory\n", "", "");
3882 				abort();
3883 			}
3884 			errno = ENOMEM;
3885 		}
3886 	}
3887 
3888 RETURN:
3889 	UTRACE(ptr, size, ret);
3890 	return (ret);
3891 }
3892 
3893 void
3894 free(void *ptr)
3895 {
3896 
3897 	UTRACE(ptr, 0, 0);
3898 	if (ptr != NULL) {
3899 		assert(malloc_initialized);
3900 
3901 		idalloc(ptr);
3902 	}
3903 }
3904 
3905 /*
3906  * End malloc(3)-compatible functions.
3907  */
3908 /******************************************************************************/
3909 /*
3910  * Begin non-standard functions.
3911  */
3912 #ifndef __NetBSD__
3913 size_t
3914 malloc_usable_size(const void *ptr)
3915 {
3916 
3917 	assert(ptr != NULL);
3918 
3919 	return (isalloc(ptr));
3920 }
3921 #endif
3922 
3923 /*
3924  * End non-standard functions.
3925  */
3926 /******************************************************************************/
3927 /*
3928  * Begin library-private functions, used by threading libraries for protection
3929  * of malloc during fork().  These functions are only called if the program is
3930  * running in threaded mode, so there is no need to check whether the program
3931  * is threaded here.
3932  */
3933 
3934 void
3935 _malloc_prefork(void)
3936 {
3937 	unsigned i;
3938 
3939 	/* Acquire all mutexes in a safe order. */
3940 
3941 	malloc_mutex_lock(&arenas_mtx);
3942 	for (i = 0; i < narenas; i++) {
3943 		if (arenas[i] != NULL)
3944 			malloc_mutex_lock(&arenas[i]->mtx);
3945 	}
3946 
3947 	malloc_mutex_lock(&base_mtx);
3948 
3949 	malloc_mutex_lock(&chunks_mtx);
3950 }
3951 
3952 void
3953 _malloc_postfork(void)
3954 {
3955 	unsigned i;
3956 
3957 	/* Release all mutexes, now that fork() has completed. */
3958 
3959 	malloc_mutex_unlock(&chunks_mtx);
3960 
3961 	malloc_mutex_unlock(&base_mtx);
3962 
3963 	for (i = 0; i < narenas; i++) {
3964 		if (arenas[i] != NULL)
3965 			malloc_mutex_unlock(&arenas[i]->mtx);
3966 	}
3967 	malloc_mutex_unlock(&arenas_mtx);
3968 }
3969 
3970 /*
3971  * End library-private functions.
3972  */
3973 /******************************************************************************/
3974