xref: /netbsd-src/lib/libc/stdlib/jemalloc.c (revision 6cf6fe02a981b55727c49c3d37b0d8191a98c0ee)
1 /*	$NetBSD: jemalloc.c,v 1.35 2014/09/03 19:29:40 matt Exp $	*/
2 
3 /*-
4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice(s), this list of conditions and the following disclaimer as
12  *    the first lines of this file unmodified other than the possible
13  *    addition of one or more copyright notices.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice(s), this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  *******************************************************************************
32  *
33  * This allocator implementation is designed to provide scalable performance
34  * for multi-threaded programs on multi-processor systems.  The following
35  * features are included for this purpose:
36  *
37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
38  *     contention and cache sloshing.
39  *
40  *   + Cache line sharing between arenas is avoided for internal data
41  *     structures.
42  *
43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
44  *     rather than as individual pages.  This provides a constant-time
45  *     mechanism for associating allocations with particular arenas.
46  *
47  * Allocation requests are rounded up to the nearest size class, and no record
48  * of the original request size is maintained.  Allocations are broken into
49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
50  * and a 16 byte quantum, the size classes in each category are as follows:
51  *
52  *   |=====================================|
53  *   | Category | Subcategory    |    Size |
54  *   |=====================================|
55  *   | Small    | Tiny           |       2 |
56  *   |          |                |       4 |
57  *   |          |                |       8 |
58  *   |          |----------------+---------|
59  *   |          | Quantum-spaced |      16 |
60  *   |          |                |      32 |
61  *   |          |                |      48 |
62  *   |          |                |     ... |
63  *   |          |                |     480 |
64  *   |          |                |     496 |
65  *   |          |                |     512 |
66  *   |          |----------------+---------|
67  *   |          | Sub-page       |    1 kB |
68  *   |          |                |    2 kB |
69  *   |=====================================|
70  *   | Large                     |    4 kB |
71  *   |                           |    8 kB |
72  *   |                           |   12 kB |
73  *   |                           |     ... |
74  *   |                           | 1012 kB |
75  *   |                           | 1016 kB |
76  *   |                           | 1020 kB |
77  *   |=====================================|
78  *   | Huge                      |    1 MB |
79  *   |                           |    2 MB |
80  *   |                           |    3 MB |
81  *   |                           |     ... |
82  *   |=====================================|
83  *
84  * A different mechanism is used for each category:
85  *
86  *   Small : Each size class is segregated into its own set of runs.  Each run
87  *           maintains a bitmap of which regions are free/allocated.
88  *
89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
90  *           in the associated arena chunk header maps.
91  *
92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93  *          Metadata are stored in a separate red-black tree.
94  *
95  *******************************************************************************
96  */
97 
98 /* LINTLIBRARY */
99 
100 #ifdef __NetBSD__
101 #  define xutrace(a, b)		utrace("malloc", (a), (b))
102 #  define __DECONST(x, y)	((x)__UNCONST(y))
103 #  define NO_TLS
104 #else
105 #  define xutrace(a, b)		utrace((a), (b))
106 #endif	/* __NetBSD__ */
107 
108 /*
109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
110  * defaults the A and J runtime options to off.  These settings are appropriate
111  * for production systems.
112  */
113 #define MALLOC_PRODUCTION
114 
115 #ifndef MALLOC_PRODUCTION
116 #  define MALLOC_DEBUG
117 #endif
118 
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.35 2014/09/03 19:29:40 matt Exp $");
122 
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 #  define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
142 
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
148 
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
161 
162 #ifdef __NetBSD__
163 #  include <reentrant.h>
164 #  include "extern.h"
165 
166 #define STRERROR_R(a, b, c)	__strerror_r(a, b, c);
167 /*
168  * A non localized version of strerror, that avoids bringing in
169  * stdio and the locale code. All the malloc messages are in English
170  * so why bother?
171  */
172 static int
173 __strerror_r(int e, char *s, size_t l)
174 {
175 	int rval;
176 	size_t slen;
177 
178 	if (e >= 0 && e < sys_nerr) {
179 		slen = strlcpy(s, sys_errlist[e], l);
180 		rval = 0;
181 	} else {
182 		slen = snprintf_ss(s, l, "Unknown error %u", e);
183 		rval = EINVAL;
184 	}
185 	return slen >= l ? ERANGE : rval;
186 }
187 #endif
188 
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c)	strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
193 
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 #  define MALLOC_STATS
197 #endif
198 
199 #ifdef MALLOC_DEBUG
200 #  ifdef NDEBUG
201 #    undef NDEBUG
202 #  endif
203 #else
204 #  ifndef NDEBUG
205 #    define NDEBUG
206 #  endif
207 #endif
208 #include <assert.h>
209 
210 #ifdef MALLOC_DEBUG
211    /* Disable inlining to make debugging easier. */
212 #  define inline
213 #endif
214 
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define	STRERROR_BUF		64
217 
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 
220 /*
221  * If you touch the TINY_MIN_2POW definition for any architecture, please
222  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
223  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
224  * gcc is still buildable!
225  */
226 
227 #ifdef __i386__
228 #  define QUANTUM_2POW_MIN	4
229 #  define SIZEOF_PTR_2POW	2
230 #  define USE_BRK
231 #endif
232 #ifdef __ia64__
233 #  define QUANTUM_2POW_MIN	4
234 #  define SIZEOF_PTR_2POW	3
235 #endif
236 #ifdef __aarch64__
237 #  define QUANTUM_2POW_MIN	4
238 #  define SIZEOF_PTR_2POW	3
239 #  define NO_TLS
240 #endif
241 #ifdef __alpha__
242 #  define QUANTUM_2POW_MIN	4
243 #  define SIZEOF_PTR_2POW	3
244 #  define TINY_MIN_2POW		3
245 #  define NO_TLS
246 #endif
247 #ifdef __sparc64__
248 #  define QUANTUM_2POW_MIN	4
249 #  define SIZEOF_PTR_2POW	3
250 #  define TINY_MIN_2POW		3
251 #  define NO_TLS
252 #endif
253 #ifdef __amd64__
254 #  define QUANTUM_2POW_MIN	4
255 #  define SIZEOF_PTR_2POW	3
256 #  define TINY_MIN_2POW		3
257 #endif
258 #ifdef __arm__
259 #  define QUANTUM_2POW_MIN	3
260 #  define SIZEOF_PTR_2POW	2
261 #  define USE_BRK
262 #  ifdef __ARM_EABI__
263 #    define TINY_MIN_2POW	3
264 #  endif
265 #  define NO_TLS
266 #endif
267 #ifdef __powerpc__
268 #  define QUANTUM_2POW_MIN	4
269 #  define SIZEOF_PTR_2POW	2
270 #  define USE_BRK
271 #  define TINY_MIN_2POW		3
272 #endif
273 #if defined(__sparc__) && !defined(__sparc64__)
274 #  define QUANTUM_2POW_MIN	4
275 #  define SIZEOF_PTR_2POW	2
276 #  define USE_BRK
277 #endif
278 #ifdef __or1k__
279 #  define QUANTUM_2POW_MIN	4
280 #  define SIZEOF_PTR_2POW	2
281 #  define USE_BRK
282 #endif
283 #ifdef __vax__
284 #  define QUANTUM_2POW_MIN	4
285 #  define SIZEOF_PTR_2POW	2
286 #  define USE_BRK
287 #endif
288 #ifdef __sh__
289 #  define QUANTUM_2POW_MIN	4
290 #  define SIZEOF_PTR_2POW	2
291 #  define USE_BRK
292 #endif
293 #ifdef __m68k__
294 #  define QUANTUM_2POW_MIN	4
295 #  define SIZEOF_PTR_2POW	2
296 #  define USE_BRK
297 #endif
298 #ifdef __mips__
299 #  define QUANTUM_2POW_MIN	4
300 #  define SIZEOF_PTR_2POW	2
301 #  define USE_BRK
302 #endif
303 #ifdef __hppa__
304 #  define QUANTUM_2POW_MIN     4
305 #  define SIZEOF_PTR_2POW      2
306 #  define USE_BRK
307 #endif
308 
309 #define	SIZEOF_PTR		(1 << SIZEOF_PTR_2POW)
310 
311 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
312 #ifndef SIZEOF_INT_2POW
313 #  define SIZEOF_INT_2POW	2
314 #endif
315 
316 /*
317  * Size and alignment of memory chunks that are allocated by the OS's virtual
318  * memory system.
319  */
320 #define	CHUNK_2POW_DEFAULT	20
321 
322 /*
323  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
324  * so over-estimates are okay (up to a point), but under-estimates will
325  * negatively affect performance.
326  */
327 #define	CACHELINE_2POW		6
328 #define	CACHELINE		((size_t)(1 << CACHELINE_2POW))
329 
330 /* Smallest size class to support. */
331 #ifndef TINY_MIN_2POW
332 #define	TINY_MIN_2POW		2
333 #endif
334 
335 /*
336  * Maximum size class that is a multiple of the quantum, but not (necessarily)
337  * a power of 2.  Above this size, allocations are rounded up to the nearest
338  * power of 2.
339  */
340 #define	SMALL_MAX_2POW_DEFAULT	9
341 #define	SMALL_MAX_DEFAULT	(1 << SMALL_MAX_2POW_DEFAULT)
342 
343 /*
344  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
345  * as small as possible such that this setting is still honored, without
346  * violating other constraints.  The goal is to make runs as small as possible
347  * without exceeding a per run external fragmentation threshold.
348  *
349  * We use binary fixed point math for overhead computations, where the binary
350  * point is implicitly RUN_BFP bits to the left.
351  *
352  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
353  * honored for some/all object sizes, since there is one bit of header overhead
354  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
355  * that are so small that the per-region overhead is greater than:
356  *
357  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
358  */
359 #define RUN_BFP			12
360 /*                              \/   Implicit binary fixed point. */
361 #define RUN_MAX_OVRHD		0x0000003dU
362 #define RUN_MAX_OVRHD_RELAX	0x00001800U
363 
364 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
365 #define RUN_MAX_SMALL_2POW	15
366 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
367 
368 /******************************************************************************/
369 
370 #ifdef __FreeBSD__
371 /*
372  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
373  * they require malloc()ed memory.
374  */
375 typedef struct {
376 	spinlock_t	lock;
377 } malloc_mutex_t;
378 
379 /* Set to true once the allocator has been initialized. */
380 static bool malloc_initialized = false;
381 
382 /* Used to avoid initialization races. */
383 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
384 #else
385 #define	malloc_mutex_t	mutex_t
386 
387 /* Set to true once the allocator has been initialized. */
388 static bool malloc_initialized = false;
389 
390 /* Used to avoid initialization races. */
391 static mutex_t init_lock = MUTEX_INITIALIZER;
392 #endif
393 
394 /******************************************************************************/
395 /*
396  * Statistics data structures.
397  */
398 
399 #ifdef MALLOC_STATS
400 
401 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
402 struct malloc_bin_stats_s {
403 	/*
404 	 * Number of allocation requests that corresponded to the size of this
405 	 * bin.
406 	 */
407 	uint64_t	nrequests;
408 
409 	/* Total number of runs created for this bin's size class. */
410 	uint64_t	nruns;
411 
412 	/*
413 	 * Total number of runs reused by extracting them from the runs tree for
414 	 * this bin's size class.
415 	 */
416 	uint64_t	reruns;
417 
418 	/* High-water mark for this bin. */
419 	unsigned long	highruns;
420 
421 	/* Current number of runs in this bin. */
422 	unsigned long	curruns;
423 };
424 
425 typedef struct arena_stats_s arena_stats_t;
426 struct arena_stats_s {
427 	/* Number of bytes currently mapped. */
428 	size_t		mapped;
429 
430 	/* Per-size-category statistics. */
431 	size_t		allocated_small;
432 	uint64_t	nmalloc_small;
433 	uint64_t	ndalloc_small;
434 
435 	size_t		allocated_large;
436 	uint64_t	nmalloc_large;
437 	uint64_t	ndalloc_large;
438 };
439 
440 typedef struct chunk_stats_s chunk_stats_t;
441 struct chunk_stats_s {
442 	/* Number of chunks that were allocated. */
443 	uint64_t	nchunks;
444 
445 	/* High-water mark for number of chunks allocated. */
446 	unsigned long	highchunks;
447 
448 	/*
449 	 * Current number of chunks allocated.  This value isn't maintained for
450 	 * any other purpose, so keep track of it in order to be able to set
451 	 * highchunks.
452 	 */
453 	unsigned long	curchunks;
454 };
455 
456 #endif /* #ifdef MALLOC_STATS */
457 
458 /******************************************************************************/
459 /*
460  * Chunk data structures.
461  */
462 
463 /* Tree of chunks. */
464 typedef struct chunk_node_s chunk_node_t;
465 struct chunk_node_s {
466 	/* Linkage for the chunk tree. */
467 	RB_ENTRY(chunk_node_s) link;
468 
469 	/*
470 	 * Pointer to the chunk that this tree node is responsible for.  In some
471 	 * (but certainly not all) cases, this data structure is placed at the
472 	 * beginning of the corresponding chunk, so this field may point to this
473 	 * node.
474 	 */
475 	void	*chunk;
476 
477 	/* Total chunk size. */
478 	size_t	size;
479 };
480 typedef struct chunk_tree_s chunk_tree_t;
481 RB_HEAD(chunk_tree_s, chunk_node_s);
482 
483 /******************************************************************************/
484 /*
485  * Arena data structures.
486  */
487 
488 typedef struct arena_s arena_t;
489 typedef struct arena_bin_s arena_bin_t;
490 
491 typedef struct arena_chunk_map_s arena_chunk_map_t;
492 struct arena_chunk_map_s {
493 	/* Number of pages in run. */
494 	uint32_t	npages;
495 	/*
496 	 * Position within run.  For a free run, this is POS_FREE for the first
497 	 * and last pages.  The POS_FREE special value makes it possible to
498 	 * quickly coalesce free runs.
499 	 *
500 	 * This is the limiting factor for chunksize; there can be at most 2^31
501 	 * pages in a run.
502 	 */
503 #define POS_FREE ((uint32_t)0xffffffffU)
504 	uint32_t	pos;
505 };
506 
507 /* Arena chunk header. */
508 typedef struct arena_chunk_s arena_chunk_t;
509 struct arena_chunk_s {
510 	/* Arena that owns the chunk. */
511 	arena_t *arena;
512 
513 	/* Linkage for the arena's chunk tree. */
514 	RB_ENTRY(arena_chunk_s) link;
515 
516 	/*
517 	 * Number of pages in use.  This is maintained in order to make
518 	 * detection of empty chunks fast.
519 	 */
520 	uint32_t pages_used;
521 
522 	/*
523 	 * Every time a free run larger than this value is created/coalesced,
524 	 * this value is increased.  The only way that the value decreases is if
525 	 * arena_run_alloc() fails to find a free run as large as advertised by
526 	 * this value.
527 	 */
528 	uint32_t max_frun_npages;
529 
530 	/*
531 	 * Every time a free run that starts at an earlier page than this value
532 	 * is created/coalesced, this value is decreased.  It is reset in a
533 	 * similar fashion to max_frun_npages.
534 	 */
535 	uint32_t min_frun_ind;
536 
537 	/*
538 	 * Map of pages within chunk that keeps track of free/large/small.  For
539 	 * free runs, only the map entries for the first and last pages are
540 	 * kept up to date, so that free runs can be quickly coalesced.
541 	 */
542 	arena_chunk_map_t map[1]; /* Dynamically sized. */
543 };
544 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
545 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
546 
547 typedef struct arena_run_s arena_run_t;
548 struct arena_run_s {
549 	/* Linkage for run trees. */
550 	RB_ENTRY(arena_run_s) link;
551 
552 #ifdef MALLOC_DEBUG
553 	uint32_t	magic;
554 #  define ARENA_RUN_MAGIC 0x384adf93
555 #endif
556 
557 	/* Bin this run is associated with. */
558 	arena_bin_t	*bin;
559 
560 	/* Index of first element that might have a free region. */
561 	unsigned	regs_minelm;
562 
563 	/* Number of free regions in run. */
564 	unsigned	nfree;
565 
566 	/* Bitmask of in-use regions (0: in use, 1: free). */
567 	unsigned	regs_mask[1]; /* Dynamically sized. */
568 };
569 typedef struct arena_run_tree_s arena_run_tree_t;
570 RB_HEAD(arena_run_tree_s, arena_run_s);
571 
572 struct arena_bin_s {
573 	/*
574 	 * Current run being used to service allocations of this bin's size
575 	 * class.
576 	 */
577 	arena_run_t	*runcur;
578 
579 	/*
580 	 * Tree of non-full runs.  This tree is used when looking for an
581 	 * existing run when runcur is no longer usable.  We choose the
582 	 * non-full run that is lowest in memory; this policy tends to keep
583 	 * objects packed well, and it can also help reduce the number of
584 	 * almost-empty chunks.
585 	 */
586 	arena_run_tree_t runs;
587 
588 	/* Size of regions in a run for this bin's size class. */
589 	size_t		reg_size;
590 
591 	/* Total size of a run for this bin's size class. */
592 	size_t		run_size;
593 
594 	/* Total number of regions in a run for this bin's size class. */
595 	uint32_t	nregs;
596 
597 	/* Number of elements in a run's regs_mask for this bin's size class. */
598 	uint32_t	regs_mask_nelms;
599 
600 	/* Offset of first region in a run for this bin's size class. */
601 	uint32_t	reg0_offset;
602 
603 #ifdef MALLOC_STATS
604 	/* Bin statistics. */
605 	malloc_bin_stats_t stats;
606 #endif
607 };
608 
609 struct arena_s {
610 #ifdef MALLOC_DEBUG
611 	uint32_t		magic;
612 #  define ARENA_MAGIC 0x947d3d24
613 #endif
614 
615 	/* All operations on this arena require that mtx be locked. */
616 	malloc_mutex_t		mtx;
617 
618 #ifdef MALLOC_STATS
619 	arena_stats_t		stats;
620 #endif
621 
622 	/*
623 	 * Tree of chunks this arena manages.
624 	 */
625 	arena_chunk_tree_t	chunks;
626 
627 	/*
628 	 * In order to avoid rapid chunk allocation/deallocation when an arena
629 	 * oscillates right on the cusp of needing a new chunk, cache the most
630 	 * recently freed chunk.  This caching is disabled by opt_hint.
631 	 *
632 	 * There is one spare chunk per arena, rather than one spare total, in
633 	 * order to avoid interactions between multiple threads that could make
634 	 * a single spare inadequate.
635 	 */
636 	arena_chunk_t *spare;
637 
638 	/*
639 	 * bins is used to store rings of free regions of the following sizes,
640 	 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
641 	 *
642 	 *   bins[i] | size |
643 	 *   --------+------+
644 	 *        0  |    2 |
645 	 *        1  |    4 |
646 	 *        2  |    8 |
647 	 *   --------+------+
648 	 *        3  |   16 |
649 	 *        4  |   32 |
650 	 *        5  |   48 |
651 	 *        6  |   64 |
652 	 *           :      :
653 	 *           :      :
654 	 *       33  |  496 |
655 	 *       34  |  512 |
656 	 *   --------+------+
657 	 *       35  | 1024 |
658 	 *       36  | 2048 |
659 	 *   --------+------+
660 	 */
661 	arena_bin_t		bins[1]; /* Dynamically sized. */
662 };
663 
664 /******************************************************************************/
665 /*
666  * Data.
667  */
668 
669 /* Number of CPUs. */
670 static unsigned		ncpus;
671 
672 /* VM page size. */
673 static size_t		pagesize;
674 static size_t		pagesize_mask;
675 static int		pagesize_2pow;
676 
677 /* Various bin-related settings. */
678 static size_t		bin_maxclass; /* Max size class for bins. */
679 static unsigned		ntbins; /* Number of (2^n)-spaced tiny bins. */
680 static unsigned		nqbins; /* Number of quantum-spaced bins. */
681 static unsigned		nsbins; /* Number of (2^n)-spaced sub-page bins. */
682 static size_t		small_min;
683 static size_t		small_max;
684 
685 /* Various quantum-related settings. */
686 static size_t		quantum;
687 static size_t		quantum_mask; /* (quantum - 1). */
688 
689 /* Various chunk-related settings. */
690 static size_t		chunksize;
691 static size_t		chunksize_mask; /* (chunksize - 1). */
692 static int		chunksize_2pow;
693 static unsigned		chunk_npages;
694 static unsigned		arena_chunk_header_npages;
695 static size_t		arena_maxclass; /* Max size class for arenas. */
696 
697 /********/
698 /*
699  * Chunks.
700  */
701 
702 /* Protects chunk-related data structures. */
703 static malloc_mutex_t	chunks_mtx;
704 
705 /* Tree of chunks that are stand-alone huge allocations. */
706 static chunk_tree_t	huge;
707 
708 #ifdef USE_BRK
709 /*
710  * Try to use brk for chunk-size allocations, due to address space constraints.
711  */
712 /*
713  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
714  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
715  * could cause recursive lock acquisition).
716  */
717 static malloc_mutex_t	brk_mtx;
718 /* Result of first sbrk(0) call. */
719 static void		*brk_base;
720 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
721 static void		*brk_prev;
722 /* Current upper limit on brk addresses. */
723 static void		*brk_max;
724 #endif
725 
726 #ifdef MALLOC_STATS
727 /* Huge allocation statistics. */
728 static uint64_t		huge_nmalloc;
729 static uint64_t		huge_ndalloc;
730 static uint64_t		huge_nralloc;
731 static size_t		huge_allocated;
732 #endif
733 
734 /*
735  * Tree of chunks that were previously allocated.  This is used when allocating
736  * chunks, in an attempt to re-use address space.
737  */
738 static chunk_tree_t	old_chunks;
739 
740 /****************************/
741 /*
742  * base (internal allocation).
743  */
744 
745 /*
746  * Current pages that are being used for internal memory allocations.  These
747  * pages are carved up in cacheline-size quanta, so that there is no chance of
748  * false cache line sharing.
749  */
750 static void		*base_pages;
751 static void		*base_next_addr;
752 static void		*base_past_addr; /* Addr immediately past base_pages. */
753 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
754 static malloc_mutex_t	base_mtx;
755 #ifdef MALLOC_STATS
756 static size_t		base_mapped;
757 #endif
758 
759 /********/
760 /*
761  * Arenas.
762  */
763 
764 /*
765  * Arenas that are used to service external requests.  Not all elements of the
766  * arenas array are necessarily used; arenas are created lazily as needed.
767  */
768 static arena_t		**arenas;
769 static unsigned		narenas;
770 static unsigned		next_arena;
771 static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
772 
773 #ifndef NO_TLS
774 /*
775  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
776  * for allocations.
777  */
778 static __thread arena_t	*arenas_map;
779 #define	get_arenas_map()	(arenas_map)
780 #define	set_arenas_map(x)	(arenas_map = x)
781 #else
782 static thread_key_t arenas_map_key;
783 #define	get_arenas_map()	thr_getspecific(arenas_map_key)
784 #define	set_arenas_map(x)	thr_setspecific(arenas_map_key, x)
785 #endif
786 
787 #ifdef MALLOC_STATS
788 /* Chunk statistics. */
789 static chunk_stats_t	stats_chunks;
790 #endif
791 
792 /*******************************/
793 /*
794  * Runtime configuration options.
795  */
796 const char	*_malloc_options;
797 
798 #ifndef MALLOC_PRODUCTION
799 static bool	opt_abort = true;
800 static bool	opt_junk = true;
801 #else
802 static bool	opt_abort = false;
803 static bool	opt_junk = false;
804 #endif
805 static bool	opt_hint = false;
806 static bool	opt_print_stats = false;
807 static int	opt_quantum_2pow = QUANTUM_2POW_MIN;
808 static int	opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
809 static int	opt_chunk_2pow = CHUNK_2POW_DEFAULT;
810 static bool	opt_utrace = false;
811 static bool	opt_sysv = false;
812 static bool	opt_xmalloc = false;
813 static bool	opt_zero = false;
814 static int32_t	opt_narenas_lshift = 0;
815 
816 typedef struct {
817 	void	*p;
818 	size_t	s;
819 	void	*r;
820 } malloc_utrace_t;
821 
822 #define	UTRACE(a, b, c)							\
823 	if (opt_utrace) {						\
824 		malloc_utrace_t ut;					\
825 		ut.p = a;						\
826 		ut.s = b;						\
827 		ut.r = c;						\
828 		xutrace(&ut, sizeof(ut));				\
829 	}
830 
831 /******************************************************************************/
832 /*
833  * Begin function prototypes for non-inline static functions.
834  */
835 
836 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
837 		const char *p4);
838 #ifdef MALLOC_STATS
839 static void	malloc_printf(const char *format, ...);
840 #endif
841 static char	*size_t2s(size_t x, char *s);
842 static bool	base_pages_alloc(size_t minsize);
843 static void	*base_alloc(size_t size);
844 static chunk_node_t *base_chunk_node_alloc(void);
845 static void	base_chunk_node_dealloc(chunk_node_t *node);
846 #ifdef MALLOC_STATS
847 static void	stats_print(arena_t *arena);
848 #endif
849 static void	*pages_map(void *addr, size_t size);
850 static void	*pages_map_align(void *addr, size_t size, int align);
851 static void	pages_unmap(void *addr, size_t size);
852 static void	*chunk_alloc(size_t size);
853 static void	chunk_dealloc(void *chunk, size_t size);
854 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
855 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
856 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
857 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
858 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
859 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
860 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
861 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
862 static void	*arena_malloc(arena_t *arena, size_t size);
863 static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size,
864     size_t alloc_size);
865 static size_t	arena_salloc(const void *ptr);
866 static void	*arena_ralloc(void *ptr, size_t size, size_t oldsize);
867 static void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
868 static bool	arena_new(arena_t *arena);
869 static arena_t	*arenas_extend(unsigned ind);
870 static void	*huge_malloc(size_t size);
871 static void	*huge_palloc(size_t alignment, size_t size);
872 static void	*huge_ralloc(void *ptr, size_t size, size_t oldsize);
873 static void	huge_dalloc(void *ptr);
874 static void	*imalloc(size_t size);
875 static void	*ipalloc(size_t alignment, size_t size);
876 static void	*icalloc(size_t size);
877 static size_t	isalloc(const void *ptr);
878 static void	*iralloc(void *ptr, size_t size);
879 static void	idalloc(void *ptr);
880 static void	malloc_print_stats(void);
881 static bool	malloc_init_hard(void);
882 
883 /*
884  * End function prototypes.
885  */
886 /******************************************************************************/
887 /*
888  * Begin mutex.
889  */
890 
891 #ifdef __NetBSD__
892 #define	malloc_mutex_init(m)	mutex_init(m, NULL)
893 #define	malloc_mutex_lock(m)	mutex_lock(m)
894 #define	malloc_mutex_unlock(m)	mutex_unlock(m)
895 #else	/* __NetBSD__ */
896 static inline void
897 malloc_mutex_init(malloc_mutex_t *a_mutex)
898 {
899 	static const spinlock_t lock = _SPINLOCK_INITIALIZER;
900 
901 	a_mutex->lock = lock;
902 }
903 
904 static inline void
905 malloc_mutex_lock(malloc_mutex_t *a_mutex)
906 {
907 
908 	if (__isthreaded)
909 		_SPINLOCK(&a_mutex->lock);
910 }
911 
912 static inline void
913 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
914 {
915 
916 	if (__isthreaded)
917 		_SPINUNLOCK(&a_mutex->lock);
918 }
919 #endif	/* __NetBSD__ */
920 
921 /*
922  * End mutex.
923  */
924 /******************************************************************************/
925 /*
926  * Begin Utility functions/macros.
927  */
928 
929 /* Return the chunk address for allocation address a. */
930 #define	CHUNK_ADDR2BASE(a)						\
931 	((void *)((uintptr_t)(a) & ~chunksize_mask))
932 
933 /* Return the chunk offset of address a. */
934 #define	CHUNK_ADDR2OFFSET(a)						\
935 	((size_t)((uintptr_t)(a) & chunksize_mask))
936 
937 /* Return the smallest chunk multiple that is >= s. */
938 #define	CHUNK_CEILING(s)						\
939 	(((s) + chunksize_mask) & ~chunksize_mask)
940 
941 /* Return the smallest cacheline multiple that is >= s. */
942 #define	CACHELINE_CEILING(s)						\
943 	(((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
944 
945 /* Return the smallest quantum multiple that is >= a. */
946 #define	QUANTUM_CEILING(a)						\
947 	(((a) + quantum_mask) & ~quantum_mask)
948 
949 /* Return the smallest pagesize multiple that is >= s. */
950 #define	PAGE_CEILING(s)							\
951 	(((s) + pagesize_mask) & ~pagesize_mask)
952 
953 /* Compute the smallest power of 2 that is >= x. */
954 static inline size_t
955 pow2_ceil(size_t x)
956 {
957 
958 	x--;
959 	x |= x >> 1;
960 	x |= x >> 2;
961 	x |= x >> 4;
962 	x |= x >> 8;
963 	x |= x >> 16;
964 #if (SIZEOF_PTR == 8)
965 	x |= x >> 32;
966 #endif
967 	x++;
968 	return (x);
969 }
970 
971 static void
972 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
973 {
974 
975 	write(STDERR_FILENO, p1, strlen(p1));
976 	write(STDERR_FILENO, p2, strlen(p2));
977 	write(STDERR_FILENO, p3, strlen(p3));
978 	write(STDERR_FILENO, p4, strlen(p4));
979 }
980 
981 void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,
982 	    const char *p4) = wrtmessage;
983 
984 #ifdef MALLOC_STATS
985 /*
986  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
987  */
988 static void
989 malloc_printf(const char *format, ...)
990 {
991 	char buf[4096];
992 	va_list ap;
993 
994 	va_start(ap, format);
995 	vsnprintf(buf, sizeof(buf), format, ap);
996 	va_end(ap);
997 	_malloc_message(buf, "", "", "");
998 }
999 #endif
1000 
1001 /*
1002  * We don't want to depend on vsnprintf() for production builds, since that can
1003  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
1004  * integer printing functionality, so that malloc_printf() use can be limited to
1005  * MALLOC_STATS code.
1006  */
1007 #define UMAX2S_BUFSIZE	21
1008 static char *
1009 size_t2s(size_t x, char *s)
1010 {
1011 	unsigned i;
1012 
1013 	/* Make sure UMAX2S_BUFSIZE is large enough. */
1014 	/* LINTED */
1015 	assert(sizeof(size_t) <= 8);
1016 
1017 	i = UMAX2S_BUFSIZE - 1;
1018 	s[i] = '\0';
1019 	do {
1020 		i--;
1021 		s[i] = "0123456789"[(int)x % 10];
1022 		x /= (uintmax_t)10LL;
1023 	} while (x > 0);
1024 
1025 	return (&s[i]);
1026 }
1027 
1028 /******************************************************************************/
1029 
1030 static bool
1031 base_pages_alloc(size_t minsize)
1032 {
1033 	size_t csize = 0;
1034 
1035 #ifdef USE_BRK
1036 	/*
1037 	 * Do special brk allocation here, since base allocations don't need to
1038 	 * be chunk-aligned.
1039 	 */
1040 	if (brk_prev != (void *)-1) {
1041 		void *brk_cur;
1042 		intptr_t incr;
1043 
1044 		if (minsize != 0)
1045 			csize = CHUNK_CEILING(minsize);
1046 
1047 		malloc_mutex_lock(&brk_mtx);
1048 		do {
1049 			/* Get the current end of brk. */
1050 			brk_cur = sbrk(0);
1051 
1052 			/*
1053 			 * Calculate how much padding is necessary to
1054 			 * chunk-align the end of brk.  Don't worry about
1055 			 * brk_cur not being chunk-aligned though.
1056 			 */
1057 			incr = (intptr_t)chunksize
1058 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1059 			assert(incr >= 0);
1060 			if ((size_t)incr < minsize)
1061 				incr += csize;
1062 
1063 			brk_prev = sbrk(incr);
1064 			if (brk_prev == brk_cur) {
1065 				/* Success. */
1066 				malloc_mutex_unlock(&brk_mtx);
1067 				base_pages = brk_cur;
1068 				base_next_addr = base_pages;
1069 				base_past_addr = (void *)((uintptr_t)base_pages
1070 				    + incr);
1071 #ifdef MALLOC_STATS
1072 				base_mapped += incr;
1073 #endif
1074 				return (false);
1075 			}
1076 		} while (brk_prev != (void *)-1);
1077 		malloc_mutex_unlock(&brk_mtx);
1078 	}
1079 	if (minsize == 0) {
1080 		/*
1081 		 * Failure during initialization doesn't matter, so avoid
1082 		 * falling through to the mmap-based page mapping code.
1083 		 */
1084 		return (true);
1085 	}
1086 #endif
1087 	assert(minsize != 0);
1088 	csize = PAGE_CEILING(minsize);
1089 	base_pages = pages_map(NULL, csize);
1090 	if (base_pages == NULL)
1091 		return (true);
1092 	base_next_addr = base_pages;
1093 	base_past_addr = (void *)((uintptr_t)base_pages + csize);
1094 #ifdef MALLOC_STATS
1095 	base_mapped += csize;
1096 #endif
1097 	return (false);
1098 }
1099 
1100 static void *
1101 base_alloc(size_t size)
1102 {
1103 	void *ret;
1104 	size_t csize;
1105 
1106 	/* Round size up to nearest multiple of the cacheline size. */
1107 	csize = CACHELINE_CEILING(size);
1108 
1109 	malloc_mutex_lock(&base_mtx);
1110 
1111 	/* Make sure there's enough space for the allocation. */
1112 	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1113 		if (base_pages_alloc(csize)) {
1114 			ret = NULL;
1115 			goto RETURN;
1116 		}
1117 	}
1118 
1119 	/* Allocate. */
1120 	ret = base_next_addr;
1121 	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1122 
1123 RETURN:
1124 	malloc_mutex_unlock(&base_mtx);
1125 	return (ret);
1126 }
1127 
1128 static chunk_node_t *
1129 base_chunk_node_alloc(void)
1130 {
1131 	chunk_node_t *ret;
1132 
1133 	malloc_mutex_lock(&base_mtx);
1134 	if (base_chunk_nodes != NULL) {
1135 		ret = base_chunk_nodes;
1136 		/* LINTED */
1137 		base_chunk_nodes = *(chunk_node_t **)ret;
1138 		malloc_mutex_unlock(&base_mtx);
1139 	} else {
1140 		malloc_mutex_unlock(&base_mtx);
1141 		ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1142 	}
1143 
1144 	return (ret);
1145 }
1146 
1147 static void
1148 base_chunk_node_dealloc(chunk_node_t *node)
1149 {
1150 
1151 	malloc_mutex_lock(&base_mtx);
1152 	/* LINTED */
1153 	*(chunk_node_t **)node = base_chunk_nodes;
1154 	base_chunk_nodes = node;
1155 	malloc_mutex_unlock(&base_mtx);
1156 }
1157 
1158 /******************************************************************************/
1159 
1160 #ifdef MALLOC_STATS
1161 static void
1162 stats_print(arena_t *arena)
1163 {
1164 	unsigned i;
1165 	int gap_start;
1166 
1167 	malloc_printf(
1168 	    "          allocated/mapped            nmalloc      ndalloc\n");
1169 
1170 	malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1171 	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1172 	    arena->stats.ndalloc_small);
1173 	malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1174 	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1175 	    arena->stats.ndalloc_large);
1176 	malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1177 	    arena->stats.allocated_small + arena->stats.allocated_large,
1178 	    arena->stats.mapped,
1179 	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1180 	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1181 
1182 	malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1183 	    "    reruns maxruns curruns\n");
1184 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1185 		if (arena->bins[i].stats.nrequests == 0) {
1186 			if (gap_start == -1)
1187 				gap_start = i;
1188 		} else {
1189 			if (gap_start != -1) {
1190 				if (i > gap_start + 1) {
1191 					/* Gap of more than one size class. */
1192 					malloc_printf("[%u..%u]\n",
1193 					    gap_start, i - 1);
1194 				} else {
1195 					/* Gap of one size class. */
1196 					malloc_printf("[%u]\n", gap_start);
1197 				}
1198 				gap_start = -1;
1199 			}
1200 			malloc_printf(
1201 			    "%13u %1s %4u %4u %3u %9llu %9llu"
1202 			    " %9llu %7lu %7lu\n",
1203 			    i,
1204 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1205 			    arena->bins[i].reg_size,
1206 			    arena->bins[i].nregs,
1207 			    arena->bins[i].run_size >> pagesize_2pow,
1208 			    arena->bins[i].stats.nrequests,
1209 			    arena->bins[i].stats.nruns,
1210 			    arena->bins[i].stats.reruns,
1211 			    arena->bins[i].stats.highruns,
1212 			    arena->bins[i].stats.curruns);
1213 		}
1214 	}
1215 	if (gap_start != -1) {
1216 		if (i > gap_start + 1) {
1217 			/* Gap of more than one size class. */
1218 			malloc_printf("[%u..%u]\n", gap_start, i - 1);
1219 		} else {
1220 			/* Gap of one size class. */
1221 			malloc_printf("[%u]\n", gap_start);
1222 		}
1223 	}
1224 }
1225 #endif
1226 
1227 /*
1228  * End Utility functions/macros.
1229  */
1230 /******************************************************************************/
1231 /*
1232  * Begin chunk management functions.
1233  */
1234 
1235 #ifndef lint
1236 static inline int
1237 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1238 {
1239 
1240 	assert(a != NULL);
1241 	assert(b != NULL);
1242 
1243 	if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1244 		return (-1);
1245 	else if (a->chunk == b->chunk)
1246 		return (0);
1247 	else
1248 		return (1);
1249 }
1250 
1251 /* Generate red-black tree code for chunks. */
1252 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1253 #endif
1254 
1255 static void *
1256 pages_map_align(void *addr, size_t size, int align)
1257 {
1258 	void *ret;
1259 
1260 	/*
1261 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
1262 	 * of existing mappings, and we only want to create new mappings.
1263 	 */
1264 	ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1265 	    MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1266 	assert(ret != NULL);
1267 
1268 	if (ret == MAP_FAILED)
1269 		ret = NULL;
1270 	else if (addr != NULL && ret != addr) {
1271 		/*
1272 		 * We succeeded in mapping memory, but not in the right place.
1273 		 */
1274 		if (munmap(ret, size) == -1) {
1275 			char buf[STRERROR_BUF];
1276 
1277 			STRERROR_R(errno, buf, sizeof(buf));
1278 			_malloc_message(getprogname(),
1279 			    ": (malloc) Error in munmap(): ", buf, "\n");
1280 			if (opt_abort)
1281 				abort();
1282 		}
1283 		ret = NULL;
1284 	}
1285 
1286 	assert(ret == NULL || (addr == NULL && ret != addr)
1287 	    || (addr != NULL && ret == addr));
1288 	return (ret);
1289 }
1290 
1291 static void *
1292 pages_map(void *addr, size_t size)
1293 {
1294 
1295 	return pages_map_align(addr, size, 0);
1296 }
1297 
1298 static void
1299 pages_unmap(void *addr, size_t size)
1300 {
1301 
1302 	if (munmap(addr, size) == -1) {
1303 		char buf[STRERROR_BUF];
1304 
1305 		STRERROR_R(errno, buf, sizeof(buf));
1306 		_malloc_message(getprogname(),
1307 		    ": (malloc) Error in munmap(): ", buf, "\n");
1308 		if (opt_abort)
1309 			abort();
1310 	}
1311 }
1312 
1313 static void *
1314 chunk_alloc(size_t size)
1315 {
1316 	void *ret, *chunk;
1317 	chunk_node_t *tchunk, *delchunk;
1318 
1319 	assert(size != 0);
1320 	assert((size & chunksize_mask) == 0);
1321 
1322 	malloc_mutex_lock(&chunks_mtx);
1323 
1324 	if (size == chunksize) {
1325 		/*
1326 		 * Check for address ranges that were previously chunks and try
1327 		 * to use them.
1328 		 */
1329 
1330 		/* LINTED */
1331 		tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1332 		while (tchunk != NULL) {
1333 			/* Found an address range.  Try to recycle it. */
1334 
1335 			chunk = tchunk->chunk;
1336 			delchunk = tchunk;
1337 			/* LINTED */
1338 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1339 
1340 			/* Remove delchunk from the tree. */
1341 			/* LINTED */
1342 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1343 			base_chunk_node_dealloc(delchunk);
1344 
1345 #ifdef USE_BRK
1346 			if ((uintptr_t)chunk >= (uintptr_t)brk_base
1347 			    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1348 				/* Re-use a previously freed brk chunk. */
1349 				ret = chunk;
1350 				goto RETURN;
1351 			}
1352 #endif
1353 			if ((ret = pages_map(chunk, size)) != NULL) {
1354 				/* Success. */
1355 				goto RETURN;
1356 			}
1357 		}
1358 	}
1359 
1360 	/*
1361 	 * Try to over-allocate, but allow the OS to place the allocation
1362 	 * anywhere.  Beware of size_t wrap-around.
1363 	 */
1364 	if (size + chunksize > size) {
1365 		if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1366 		    != NULL) {
1367 			goto RETURN;
1368 		}
1369 	}
1370 
1371 #ifdef USE_BRK
1372 	/*
1373 	 * Try to create allocations in brk, in order to make full use of
1374 	 * limited address space.
1375 	 */
1376 	if (brk_prev != (void *)-1) {
1377 		void *brk_cur;
1378 		intptr_t incr;
1379 
1380 		/*
1381 		 * The loop is necessary to recover from races with other
1382 		 * threads that are using brk for something other than malloc.
1383 		 */
1384 		malloc_mutex_lock(&brk_mtx);
1385 		do {
1386 			/* Get the current end of brk. */
1387 			brk_cur = sbrk(0);
1388 
1389 			/*
1390 			 * Calculate how much padding is necessary to
1391 			 * chunk-align the end of brk.
1392 			 */
1393 			incr = (intptr_t)size
1394 			    - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1395 			if (incr == (intptr_t)size) {
1396 				ret = brk_cur;
1397 			} else {
1398 				ret = (void *)((intptr_t)brk_cur + incr);
1399 				incr += size;
1400 			}
1401 
1402 			brk_prev = sbrk(incr);
1403 			if (brk_prev == brk_cur) {
1404 				/* Success. */
1405 				malloc_mutex_unlock(&brk_mtx);
1406 				brk_max = (void *)((intptr_t)ret + size);
1407 				goto RETURN;
1408 			}
1409 		} while (brk_prev != (void *)-1);
1410 		malloc_mutex_unlock(&brk_mtx);
1411 	}
1412 #endif
1413 
1414 	/* All strategies for allocation failed. */
1415 	ret = NULL;
1416 RETURN:
1417 	if (ret != NULL) {
1418 		chunk_node_t key;
1419 		/*
1420 		 * Clean out any entries in old_chunks that overlap with the
1421 		 * memory we just allocated.
1422 		 */
1423 		key.chunk = ret;
1424 		/* LINTED */
1425 		tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1426 		while (tchunk != NULL
1427 		    && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1428 		    && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1429 			delchunk = tchunk;
1430 			/* LINTED */
1431 			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1432 			/* LINTED */
1433 			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1434 			base_chunk_node_dealloc(delchunk);
1435 		}
1436 
1437 	}
1438 #ifdef MALLOC_STATS
1439 	if (ret != NULL) {
1440 		stats_chunks.nchunks += (size / chunksize);
1441 		stats_chunks.curchunks += (size / chunksize);
1442 	}
1443 	if (stats_chunks.curchunks > stats_chunks.highchunks)
1444 		stats_chunks.highchunks = stats_chunks.curchunks;
1445 #endif
1446 	malloc_mutex_unlock(&chunks_mtx);
1447 
1448 	assert(CHUNK_ADDR2BASE(ret) == ret);
1449 	return (ret);
1450 }
1451 
1452 static void
1453 chunk_dealloc(void *chunk, size_t size)
1454 {
1455 	chunk_node_t *node;
1456 
1457 	assert(chunk != NULL);
1458 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
1459 	assert(size != 0);
1460 	assert((size & chunksize_mask) == 0);
1461 
1462 	malloc_mutex_lock(&chunks_mtx);
1463 
1464 #ifdef USE_BRK
1465 	if ((uintptr_t)chunk >= (uintptr_t)brk_base
1466 	    && (uintptr_t)chunk < (uintptr_t)brk_max) {
1467 		void *brk_cur;
1468 
1469 		malloc_mutex_lock(&brk_mtx);
1470 		/* Get the current end of brk. */
1471 		brk_cur = sbrk(0);
1472 
1473 		/*
1474 		 * Try to shrink the data segment if this chunk is at the end
1475 		 * of the data segment.  The sbrk() call here is subject to a
1476 		 * race condition with threads that use brk(2) or sbrk(2)
1477 		 * directly, but the alternative would be to leak memory for
1478 		 * the sake of poorly designed multi-threaded programs.
1479 		 */
1480 		if (brk_cur == brk_max
1481 		    && (void *)((uintptr_t)chunk + size) == brk_max
1482 		    && sbrk(-(intptr_t)size) == brk_max) {
1483 			malloc_mutex_unlock(&brk_mtx);
1484 			if (brk_prev == brk_max) {
1485 				/* Success. */
1486 				brk_prev = (void *)((intptr_t)brk_max
1487 				    - (intptr_t)size);
1488 				brk_max = brk_prev;
1489 			}
1490 		} else {
1491 			size_t offset;
1492 
1493 			malloc_mutex_unlock(&brk_mtx);
1494 			madvise(chunk, size, MADV_FREE);
1495 
1496 			/*
1497 			 * Iteratively create records of each chunk-sized
1498 			 * memory region that 'chunk' is comprised of, so that
1499 			 * the address range can be recycled if memory usage
1500 			 * increases later on.
1501 			 */
1502 			for (offset = 0; offset < size; offset += chunksize) {
1503 				node = base_chunk_node_alloc();
1504 				if (node == NULL)
1505 					break;
1506 
1507 				node->chunk = (void *)((uintptr_t)chunk
1508 				    + (uintptr_t)offset);
1509 				node->size = chunksize;
1510 				/* LINTED */
1511 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1512 			}
1513 		}
1514 	} else {
1515 #endif
1516 		pages_unmap(chunk, size);
1517 
1518 		/*
1519 		 * Make a record of the chunk's address, so that the address
1520 		 * range can be recycled if memory usage increases later on.
1521 		 * Don't bother to create entries if (size > chunksize), since
1522 		 * doing so could cause scalability issues for truly gargantuan
1523 		 * objects (many gigabytes or larger).
1524 		 */
1525 		if (size == chunksize) {
1526 			node = base_chunk_node_alloc();
1527 			if (node != NULL) {
1528 				node->chunk = (void *)(uintptr_t)chunk;
1529 				node->size = chunksize;
1530 				/* LINTED */
1531 				RB_INSERT(chunk_tree_s, &old_chunks, node);
1532 			}
1533 		}
1534 #ifdef USE_BRK
1535 	}
1536 #endif
1537 
1538 #ifdef MALLOC_STATS
1539 	stats_chunks.curchunks -= (size / chunksize);
1540 #endif
1541 	malloc_mutex_unlock(&chunks_mtx);
1542 }
1543 
1544 /*
1545  * End chunk management functions.
1546  */
1547 /******************************************************************************/
1548 /*
1549  * Begin arena.
1550  */
1551 
1552 /*
1553  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1554  *
1555  * We maintain at least one block of arenas.  Usually there are more.
1556  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1557  * amongst threads.  To accomplish this, next_arena advances only in
1558  * ncpu steps.
1559  */
1560 static __noinline arena_t *
1561 choose_arena_hard(void)
1562 {
1563 	unsigned i, curcpu;
1564 	arena_t **map;
1565 
1566 	/* Initialize the current block of arenas and advance to next. */
1567 	malloc_mutex_lock(&arenas_mtx);
1568 	assert(next_arena % ncpus == 0);
1569 	assert(narenas % ncpus == 0);
1570 	map = &arenas[next_arena];
1571 	set_arenas_map(map);
1572 	for (i = 0; i < ncpus; i++) {
1573 		if (arenas[next_arena] == NULL)
1574 			arenas_extend(next_arena);
1575 		next_arena = (next_arena + 1) % narenas;
1576 	}
1577 	malloc_mutex_unlock(&arenas_mtx);
1578 
1579 	/*
1580 	 * If we were unable to allocate an arena above, then default to
1581 	 * the first arena, which is always present.
1582 	 */
1583 	curcpu = thr_curcpu();
1584 	if (map[curcpu] != NULL)
1585 		return map[curcpu];
1586 	return arenas[0];
1587 }
1588 
1589 static inline arena_t *
1590 choose_arena(void)
1591 {
1592 	unsigned curcpu;
1593 	arena_t **map;
1594 
1595 	map = get_arenas_map();
1596 	curcpu = thr_curcpu();
1597 	if (__predict_true(map != NULL && map[curcpu] != NULL))
1598 		return map[curcpu];
1599 
1600         return choose_arena_hard();
1601 }
1602 
1603 #ifndef lint
1604 static inline int
1605 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1606 {
1607 
1608 	assert(a != NULL);
1609 	assert(b != NULL);
1610 
1611 	if ((uintptr_t)a < (uintptr_t)b)
1612 		return (-1);
1613 	else if (a == b)
1614 		return (0);
1615 	else
1616 		return (1);
1617 }
1618 
1619 /* Generate red-black tree code for arena chunks. */
1620 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1621 #endif
1622 
1623 #ifndef lint
1624 static inline int
1625 arena_run_comp(arena_run_t *a, arena_run_t *b)
1626 {
1627 
1628 	assert(a != NULL);
1629 	assert(b != NULL);
1630 
1631 	if ((uintptr_t)a < (uintptr_t)b)
1632 		return (-1);
1633 	else if (a == b)
1634 		return (0);
1635 	else
1636 		return (1);
1637 }
1638 
1639 /* Generate red-black tree code for arena runs. */
1640 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1641 #endif
1642 
1643 static inline void *
1644 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1645 {
1646 	void *ret;
1647 	unsigned i, mask, bit, regind;
1648 
1649 	assert(run->magic == ARENA_RUN_MAGIC);
1650 	assert(run->regs_minelm < bin->regs_mask_nelms);
1651 
1652 	/*
1653 	 * Move the first check outside the loop, so that run->regs_minelm can
1654 	 * be updated unconditionally, without the possibility of updating it
1655 	 * multiple times.
1656 	 */
1657 	i = run->regs_minelm;
1658 	mask = run->regs_mask[i];
1659 	if (mask != 0) {
1660 		/* Usable allocation found. */
1661 		bit = ffs((int)mask) - 1;
1662 
1663 		regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1664 		ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1665 		    + (bin->reg_size * regind));
1666 
1667 		/* Clear bit. */
1668 		mask ^= (1 << bit);
1669 		run->regs_mask[i] = mask;
1670 
1671 		return (ret);
1672 	}
1673 
1674 	for (i++; i < bin->regs_mask_nelms; i++) {
1675 		mask = run->regs_mask[i];
1676 		if (mask != 0) {
1677 			/* Usable allocation found. */
1678 			bit = ffs((int)mask) - 1;
1679 
1680 			regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1681 			ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1682 			    + (bin->reg_size * regind));
1683 
1684 			/* Clear bit. */
1685 			mask ^= (1 << bit);
1686 			run->regs_mask[i] = mask;
1687 
1688 			/*
1689 			 * Make a note that nothing before this element
1690 			 * contains a free region.
1691 			 */
1692 			run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1693 
1694 			return (ret);
1695 		}
1696 	}
1697 	/* Not reached. */
1698 	/* LINTED */
1699 	assert(0);
1700 	return (NULL);
1701 }
1702 
1703 static inline void
1704 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1705 {
1706 	/*
1707 	 * To divide by a number D that is not a power of two we multiply
1708 	 * by (2^21 / D) and then right shift by 21 positions.
1709 	 *
1710 	 *   X / D
1711 	 *
1712 	 * becomes
1713 	 *
1714 	 *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1715 	 */
1716 #define SIZE_INV_SHIFT 21
1717 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1718 	static const unsigned size_invs[] = {
1719 	    SIZE_INV(3),
1720 	    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1721 	    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1722 	    SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1723 	    SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1724 	    SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1725 	    SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1726 	    SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1727 #if (QUANTUM_2POW_MIN < 4)
1728 	    ,
1729 	    SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1730 	    SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1731 	    SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1732 	    SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1733 	    SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1734 	    SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1735 	    SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1736 	    SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1737 #endif
1738 	};
1739 	unsigned diff, regind, elm, bit;
1740 
1741 	/* LINTED */
1742 	assert(run->magic == ARENA_RUN_MAGIC);
1743 	assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1744 	    >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1745 
1746 	/*
1747 	 * Avoid doing division with a variable divisor if possible.  Using
1748 	 * actual division here can reduce allocator throughput by over 20%!
1749 	 */
1750 	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1751 	if ((size & (size - 1)) == 0) {
1752 		/*
1753 		 * log2_table allows fast division of a power of two in the
1754 		 * [1..128] range.
1755 		 *
1756 		 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1757 		 */
1758 		static const unsigned char log2_table[] = {
1759 		    0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1760 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1761 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1762 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1763 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1764 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1765 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1766 		    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1767 		};
1768 
1769 		if (size <= 128)
1770 			regind = (diff >> log2_table[size - 1]);
1771 		else if (size <= 32768)
1772 			regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1773 		else {
1774 			/*
1775 			 * The page size is too large for us to use the lookup
1776 			 * table.  Use real division.
1777 			 */
1778 			regind = (unsigned)(diff / size);
1779 		}
1780 	} else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1781 	    << QUANTUM_2POW_MIN) + 2) {
1782 		regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1783 		regind >>= SIZE_INV_SHIFT;
1784 	} else {
1785 		/*
1786 		 * size_invs isn't large enough to handle this size class, so
1787 		 * calculate regind using actual division.  This only happens
1788 		 * if the user increases small_max via the 'S' runtime
1789 		 * configuration option.
1790 		 */
1791 		regind = (unsigned)(diff / size);
1792 	};
1793 	assert(diff == regind * size);
1794 	assert(regind < bin->nregs);
1795 
1796 	elm = regind >> (SIZEOF_INT_2POW + 3);
1797 	if (elm < run->regs_minelm)
1798 		run->regs_minelm = elm;
1799 	bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1800 	assert((run->regs_mask[elm] & (1 << bit)) == 0);
1801 	run->regs_mask[elm] |= (1 << bit);
1802 #undef SIZE_INV
1803 #undef SIZE_INV_SHIFT
1804 }
1805 
1806 static void
1807 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1808 {
1809 	arena_chunk_t *chunk;
1810 	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1811 	unsigned i;
1812 
1813 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1814 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1815 	    >> pagesize_2pow);
1816 	total_pages = chunk->map[run_ind].npages;
1817 	need_pages = (unsigned)(size >> pagesize_2pow);
1818 	assert(need_pages <= total_pages);
1819 	rem_pages = total_pages - need_pages;
1820 
1821 	/* Split enough pages from the front of run to fit allocation size. */
1822 	map_offset = run_ind;
1823 	for (i = 0; i < need_pages; i++) {
1824 		chunk->map[map_offset + i].npages = need_pages;
1825 		chunk->map[map_offset + i].pos = i;
1826 	}
1827 
1828 	/* Keep track of trailing unused pages for later use. */
1829 	if (rem_pages > 0) {
1830 		/* Update map for trailing pages. */
1831 		map_offset += need_pages;
1832 		chunk->map[map_offset].npages = rem_pages;
1833 		chunk->map[map_offset].pos = POS_FREE;
1834 		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1835 		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1836 	}
1837 
1838 	chunk->pages_used += need_pages;
1839 }
1840 
1841 static arena_chunk_t *
1842 arena_chunk_alloc(arena_t *arena)
1843 {
1844 	arena_chunk_t *chunk;
1845 
1846 	if (arena->spare != NULL) {
1847 		chunk = arena->spare;
1848 		arena->spare = NULL;
1849 
1850 		/* LINTED */
1851 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1852 	} else {
1853 		chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1854 		if (chunk == NULL)
1855 			return (NULL);
1856 #ifdef MALLOC_STATS
1857 		arena->stats.mapped += chunksize;
1858 #endif
1859 
1860 		chunk->arena = arena;
1861 
1862 		/* LINTED */
1863 		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1864 
1865 		/*
1866 		 * Claim that no pages are in use, since the header is merely
1867 		 * overhead.
1868 		 */
1869 		chunk->pages_used = 0;
1870 
1871 		chunk->max_frun_npages = chunk_npages -
1872 		    arena_chunk_header_npages;
1873 		chunk->min_frun_ind = arena_chunk_header_npages;
1874 
1875 		/*
1876 		 * Initialize enough of the map to support one maximal free run.
1877 		 */
1878 		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1879 		    arena_chunk_header_npages;
1880 		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1881 		chunk->map[chunk_npages - 1].npages = chunk_npages -
1882 		    arena_chunk_header_npages;
1883 		chunk->map[chunk_npages - 1].pos = POS_FREE;
1884 	}
1885 
1886 	return (chunk);
1887 }
1888 
1889 static void
1890 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1891 {
1892 
1893 	/*
1894 	 * Remove chunk from the chunk tree, regardless of whether this chunk
1895 	 * will be cached, so that the arena does not use it.
1896 	 */
1897 	/* LINTED */
1898 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1899 
1900 	if (opt_hint == false) {
1901 		if (arena->spare != NULL) {
1902 			chunk_dealloc((void *)arena->spare, chunksize);
1903 #ifdef MALLOC_STATS
1904 			arena->stats.mapped -= chunksize;
1905 #endif
1906 		}
1907 		arena->spare = chunk;
1908 	} else {
1909 		assert(arena->spare == NULL);
1910 		chunk_dealloc((void *)chunk, chunksize);
1911 #ifdef MALLOC_STATS
1912 		arena->stats.mapped -= chunksize;
1913 #endif
1914 	}
1915 }
1916 
1917 static arena_run_t *
1918 arena_run_alloc(arena_t *arena, size_t size)
1919 {
1920 	arena_chunk_t *chunk;
1921 	arena_run_t *run;
1922 	unsigned need_npages, limit_pages, compl_need_npages;
1923 
1924 	assert(size <= (chunksize - (arena_chunk_header_npages <<
1925 	    pagesize_2pow)));
1926 	assert((size & pagesize_mask) == 0);
1927 
1928 	/*
1929 	 * Search through arena's chunks in address order for a free run that is
1930 	 * large enough.  Look for the first fit.
1931 	 */
1932 	need_npages = (unsigned)(size >> pagesize_2pow);
1933 	limit_pages = chunk_npages - arena_chunk_header_npages;
1934 	compl_need_npages = limit_pages - need_npages;
1935 	/* LINTED */
1936 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1937 		/*
1938 		 * Avoid searching this chunk if there are not enough
1939 		 * contiguous free pages for there to possibly be a large
1940 		 * enough free run.
1941 		 */
1942 		if (chunk->pages_used <= compl_need_npages &&
1943 		    need_npages <= chunk->max_frun_npages) {
1944 			arena_chunk_map_t *mapelm;
1945 			unsigned i;
1946 			unsigned max_frun_npages = 0;
1947 			unsigned min_frun_ind = chunk_npages;
1948 
1949 			assert(chunk->min_frun_ind >=
1950 			    arena_chunk_header_npages);
1951 			for (i = chunk->min_frun_ind; i < chunk_npages;) {
1952 				mapelm = &chunk->map[i];
1953 				if (mapelm->pos == POS_FREE) {
1954 					if (mapelm->npages >= need_npages) {
1955 						run = (arena_run_t *)
1956 						    ((uintptr_t)chunk + (i <<
1957 						    pagesize_2pow));
1958 						/* Update page map. */
1959 						arena_run_split(arena, run,
1960 						    size);
1961 						return (run);
1962 					}
1963 					if (mapelm->npages >
1964 					    max_frun_npages) {
1965 						max_frun_npages =
1966 						    mapelm->npages;
1967 					}
1968 					if (i < min_frun_ind) {
1969 						min_frun_ind = i;
1970 						if (i < chunk->min_frun_ind)
1971 							chunk->min_frun_ind = i;
1972 					}
1973 				}
1974 				i += mapelm->npages;
1975 			}
1976 			/*
1977 			 * Search failure.  Reset cached chunk->max_frun_npages.
1978 			 * chunk->min_frun_ind was already reset above (if
1979 			 * necessary).
1980 			 */
1981 			chunk->max_frun_npages = max_frun_npages;
1982 		}
1983 	}
1984 
1985 	/*
1986 	 * No usable runs.  Create a new chunk from which to allocate the run.
1987 	 */
1988 	chunk = arena_chunk_alloc(arena);
1989 	if (chunk == NULL)
1990 		return (NULL);
1991 	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1992 	    pagesize_2pow));
1993 	/* Update page map. */
1994 	arena_run_split(arena, run, size);
1995 	return (run);
1996 }
1997 
1998 static void
1999 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
2000 {
2001 	arena_chunk_t *chunk;
2002 	unsigned run_ind, run_pages;
2003 
2004 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2005 
2006 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2007 	    >> pagesize_2pow);
2008 	assert(run_ind >= arena_chunk_header_npages);
2009 	assert(run_ind < (chunksize >> pagesize_2pow));
2010 	run_pages = (unsigned)(size >> pagesize_2pow);
2011 	assert(run_pages == chunk->map[run_ind].npages);
2012 
2013 	/* Subtract pages from count of pages used in chunk. */
2014 	chunk->pages_used -= run_pages;
2015 
2016 	/* Mark run as deallocated. */
2017 	assert(chunk->map[run_ind].npages == run_pages);
2018 	chunk->map[run_ind].pos = POS_FREE;
2019 	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
2020 	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2021 
2022 	/*
2023 	 * Tell the kernel that we don't need the data in this run, but only if
2024 	 * requested via runtime configuration.
2025 	 */
2026 	if (opt_hint)
2027 		madvise(run, size, MADV_FREE);
2028 
2029 	/* Try to coalesce with neighboring runs. */
2030 	if (run_ind > arena_chunk_header_npages &&
2031 	    chunk->map[run_ind - 1].pos == POS_FREE) {
2032 		unsigned prev_npages;
2033 
2034 		/* Coalesce with previous run. */
2035 		prev_npages = chunk->map[run_ind - 1].npages;
2036 		run_ind -= prev_npages;
2037 		assert(chunk->map[run_ind].npages == prev_npages);
2038 		assert(chunk->map[run_ind].pos == POS_FREE);
2039 		run_pages += prev_npages;
2040 
2041 		chunk->map[run_ind].npages = run_pages;
2042 		assert(chunk->map[run_ind].pos == POS_FREE);
2043 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2044 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2045 	}
2046 
2047 	if (run_ind + run_pages < chunk_npages &&
2048 	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
2049 		unsigned next_npages;
2050 
2051 		/* Coalesce with next run. */
2052 		next_npages = chunk->map[run_ind + run_pages].npages;
2053 		run_pages += next_npages;
2054 		assert(chunk->map[run_ind + run_pages - 1].npages ==
2055 		    next_npages);
2056 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2057 
2058 		chunk->map[run_ind].npages = run_pages;
2059 		chunk->map[run_ind].pos = POS_FREE;
2060 		chunk->map[run_ind + run_pages - 1].npages = run_pages;
2061 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2062 	}
2063 
2064 	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2065 		chunk->max_frun_npages = chunk->map[run_ind].npages;
2066 	if (run_ind < chunk->min_frun_ind)
2067 		chunk->min_frun_ind = run_ind;
2068 
2069 	/* Deallocate chunk if it is now completely unused. */
2070 	if (chunk->pages_used == 0)
2071 		arena_chunk_dealloc(arena, chunk);
2072 }
2073 
2074 static arena_run_t *
2075 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2076 {
2077 	arena_run_t *run;
2078 	unsigned i, remainder;
2079 
2080 	/* Look for a usable run. */
2081 	/* LINTED */
2082 	if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2083 		/* run is guaranteed to have available space. */
2084 		/* LINTED */
2085 		RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2086 #ifdef MALLOC_STATS
2087 		bin->stats.reruns++;
2088 #endif
2089 		return (run);
2090 	}
2091 	/* No existing runs have any space available. */
2092 
2093 	/* Allocate a new run. */
2094 	run = arena_run_alloc(arena, bin->run_size);
2095 	if (run == NULL)
2096 		return (NULL);
2097 
2098 	/* Initialize run internals. */
2099 	run->bin = bin;
2100 
2101 	for (i = 0; i < bin->regs_mask_nelms; i++)
2102 		run->regs_mask[i] = UINT_MAX;
2103 	remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2104 	if (remainder != 0) {
2105 		/* The last element has spare bits that need to be unset. */
2106 		run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2107 		    - remainder));
2108 	}
2109 
2110 	run->regs_minelm = 0;
2111 
2112 	run->nfree = bin->nregs;
2113 #ifdef MALLOC_DEBUG
2114 	run->magic = ARENA_RUN_MAGIC;
2115 #endif
2116 
2117 #ifdef MALLOC_STATS
2118 	bin->stats.nruns++;
2119 	bin->stats.curruns++;
2120 	if (bin->stats.curruns > bin->stats.highruns)
2121 		bin->stats.highruns = bin->stats.curruns;
2122 #endif
2123 	return (run);
2124 }
2125 
2126 /* bin->runcur must have space available before this function is called. */
2127 static inline void *
2128 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2129 {
2130 	void *ret;
2131 
2132 	assert(run->magic == ARENA_RUN_MAGIC);
2133 	assert(run->nfree > 0);
2134 
2135 	ret = arena_run_reg_alloc(run, bin);
2136 	assert(ret != NULL);
2137 	run->nfree--;
2138 
2139 	return (ret);
2140 }
2141 
2142 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2143 static void *
2144 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2145 {
2146 
2147 	bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2148 	if (bin->runcur == NULL)
2149 		return (NULL);
2150 	assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2151 	assert(bin->runcur->nfree > 0);
2152 
2153 	return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2154 }
2155 
2156 /*
2157  * Calculate bin->run_size such that it meets the following constraints:
2158  *
2159  *   *) bin->run_size >= min_run_size
2160  *   *) bin->run_size <= arena_maxclass
2161  *   *) bin->run_size <= RUN_MAX_SMALL
2162  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2163  *
2164  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2165  * also calculated here, since these settings are all interdependent.
2166  */
2167 static size_t
2168 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2169 {
2170 	size_t try_run_size, good_run_size;
2171 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2172 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2173 
2174 	assert(min_run_size >= pagesize);
2175 	assert(min_run_size <= arena_maxclass);
2176 	assert(min_run_size <= RUN_MAX_SMALL);
2177 
2178 	/*
2179 	 * Calculate known-valid settings before entering the run_size
2180 	 * expansion loop, so that the first part of the loop always copies
2181 	 * valid settings.
2182 	 *
2183 	 * The do..while loop iteratively reduces the number of regions until
2184 	 * the run header and the regions no longer overlap.  A closed formula
2185 	 * would be quite messy, since there is an interdependency between the
2186 	 * header's mask length and the number of regions.
2187 	 */
2188 	try_run_size = min_run_size;
2189 	try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2190 	    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2191 	do {
2192 		try_nregs--;
2193 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2194 		    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2195 		try_reg0_offset = (unsigned)(try_run_size -
2196 		    (try_nregs * bin->reg_size));
2197 	} while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2198 	    > try_reg0_offset);
2199 
2200 	/* run_size expansion loop. */
2201 	do {
2202 		/*
2203 		 * Copy valid settings before trying more aggressive settings.
2204 		 */
2205 		good_run_size = try_run_size;
2206 		good_nregs = try_nregs;
2207 		good_mask_nelms = try_mask_nelms;
2208 		good_reg0_offset = try_reg0_offset;
2209 
2210 		/* Try more aggressive settings. */
2211 		try_run_size += pagesize;
2212 		try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2213 		    bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2214 		do {
2215 			try_nregs--;
2216 			try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2217 			    ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2218 			    1 : 0);
2219 			try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2220 			    bin->reg_size));
2221 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
2222 		    (try_mask_nelms - 1)) > try_reg0_offset);
2223 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2224 	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2225 	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2226 
2227 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2228 	    <= good_reg0_offset);
2229 	assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2230 
2231 	/* Copy final settings. */
2232 	bin->run_size = good_run_size;
2233 	bin->nregs = good_nregs;
2234 	bin->regs_mask_nelms = good_mask_nelms;
2235 	bin->reg0_offset = good_reg0_offset;
2236 
2237 	return (good_run_size);
2238 }
2239 
2240 static void *
2241 arena_malloc(arena_t *arena, size_t size)
2242 {
2243 	void *ret;
2244 
2245 	assert(arena != NULL);
2246 	assert(arena->magic == ARENA_MAGIC);
2247 	assert(size != 0);
2248 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
2249 
2250 	if (size <= bin_maxclass) {
2251 		arena_bin_t *bin;
2252 		arena_run_t *run;
2253 
2254 		/* Small allocation. */
2255 
2256 		if (size < small_min) {
2257 			/* Tiny. */
2258 			size = pow2_ceil(size);
2259 			bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2260 			    1)))];
2261 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2262 			/*
2263 			 * Bin calculation is always correct, but we may need
2264 			 * to fix size for the purposes of assertions and/or
2265 			 * stats accuracy.
2266 			 */
2267 			if (size < (1 << TINY_MIN_2POW))
2268 				size = (1 << TINY_MIN_2POW);
2269 #endif
2270 		} else if (size <= small_max) {
2271 			/* Quantum-spaced. */
2272 			size = QUANTUM_CEILING(size);
2273 			bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2274 			    - 1];
2275 		} else {
2276 			/* Sub-page. */
2277 			size = pow2_ceil(size);
2278 			bin = &arena->bins[ntbins + nqbins
2279 			    + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2280 		}
2281 		assert(size == bin->reg_size);
2282 
2283 		malloc_mutex_lock(&arena->mtx);
2284 		if ((run = bin->runcur) != NULL && run->nfree > 0)
2285 			ret = arena_bin_malloc_easy(arena, bin, run);
2286 		else
2287 			ret = arena_bin_malloc_hard(arena, bin);
2288 
2289 		if (ret == NULL) {
2290 			malloc_mutex_unlock(&arena->mtx);
2291 			return (NULL);
2292 		}
2293 
2294 #ifdef MALLOC_STATS
2295 		bin->stats.nrequests++;
2296 		arena->stats.nmalloc_small++;
2297 		arena->stats.allocated_small += size;
2298 #endif
2299 	} else {
2300 		/* Large allocation. */
2301 		size = PAGE_CEILING(size);
2302 		malloc_mutex_lock(&arena->mtx);
2303 		ret = (void *)arena_run_alloc(arena, size);
2304 		if (ret == NULL) {
2305 			malloc_mutex_unlock(&arena->mtx);
2306 			return (NULL);
2307 		}
2308 #ifdef MALLOC_STATS
2309 		arena->stats.nmalloc_large++;
2310 		arena->stats.allocated_large += size;
2311 #endif
2312 	}
2313 
2314 	malloc_mutex_unlock(&arena->mtx);
2315 
2316 	if (opt_junk)
2317 		memset(ret, 0xa5, size);
2318 	else if (opt_zero)
2319 		memset(ret, 0, size);
2320 	return (ret);
2321 }
2322 
2323 static inline void
2324 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2325     unsigned npages)
2326 {
2327 	unsigned i;
2328 
2329 	assert(npages > 0);
2330 
2331 	/*
2332 	 * Modifiy the map such that arena_run_dalloc() sees the run as
2333 	 * separately allocated.
2334 	 */
2335 	for (i = 0; i < npages; i++) {
2336 		chunk->map[pageind + i].npages = npages;
2337 		chunk->map[pageind + i].pos = i;
2338 	}
2339 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2340 	    pagesize_2pow)), npages << pagesize_2pow);
2341 }
2342 
2343 /* Only handles large allocations that require more than page alignment. */
2344 static void *
2345 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2346 {
2347 	void *ret;
2348 	size_t offset;
2349 	arena_chunk_t *chunk;
2350 	unsigned pageind, i, npages;
2351 
2352 	assert((size & pagesize_mask) == 0);
2353 	assert((alignment & pagesize_mask) == 0);
2354 
2355 	npages = (unsigned)(size >> pagesize_2pow);
2356 
2357 	malloc_mutex_lock(&arena->mtx);
2358 	ret = (void *)arena_run_alloc(arena, alloc_size);
2359 	if (ret == NULL) {
2360 		malloc_mutex_unlock(&arena->mtx);
2361 		return (NULL);
2362 	}
2363 
2364 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2365 
2366 	offset = (uintptr_t)ret & (alignment - 1);
2367 	assert((offset & pagesize_mask) == 0);
2368 	assert(offset < alloc_size);
2369 	if (offset == 0) {
2370 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2371 		    pagesize_2pow);
2372 
2373 		/* Update the map for the run to be kept. */
2374 		for (i = 0; i < npages; i++) {
2375 			chunk->map[pageind + i].npages = npages;
2376 			assert(chunk->map[pageind + i].pos == i);
2377 		}
2378 
2379 		/* Trim trailing space. */
2380 		arena_palloc_trim(arena, chunk, pageind + npages,
2381 		    (unsigned)((alloc_size - size) >> pagesize_2pow));
2382 	} else {
2383 		size_t leadsize, trailsize;
2384 
2385 		leadsize = alignment - offset;
2386 		ret = (void *)((uintptr_t)ret + leadsize);
2387 		pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2388 		    pagesize_2pow);
2389 
2390 		/* Update the map for the run to be kept. */
2391 		for (i = 0; i < npages; i++) {
2392 			chunk->map[pageind + i].npages = npages;
2393 			chunk->map[pageind + i].pos = i;
2394 		}
2395 
2396 		/* Trim leading space. */
2397 		arena_palloc_trim(arena, chunk,
2398 		    (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2399 		    (unsigned)(leadsize >> pagesize_2pow));
2400 
2401 		trailsize = alloc_size - leadsize - size;
2402 		if (trailsize != 0) {
2403 			/* Trim trailing space. */
2404 			assert(trailsize < alloc_size);
2405 			arena_palloc_trim(arena, chunk, pageind + npages,
2406 			    (unsigned)(trailsize >> pagesize_2pow));
2407 		}
2408 	}
2409 
2410 #ifdef MALLOC_STATS
2411 	arena->stats.nmalloc_large++;
2412 	arena->stats.allocated_large += size;
2413 #endif
2414 	malloc_mutex_unlock(&arena->mtx);
2415 
2416 	if (opt_junk)
2417 		memset(ret, 0xa5, size);
2418 	else if (opt_zero)
2419 		memset(ret, 0, size);
2420 	return (ret);
2421 }
2422 
2423 /* Return the size of the allocation pointed to by ptr. */
2424 static size_t
2425 arena_salloc(const void *ptr)
2426 {
2427 	size_t ret;
2428 	arena_chunk_t *chunk;
2429 	arena_chunk_map_t *mapelm;
2430 	unsigned pageind;
2431 
2432 	assert(ptr != NULL);
2433 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2434 
2435 	/*
2436 	 * No arena data structures that we query here can change in a way that
2437 	 * affects this function, so we don't need to lock.
2438 	 */
2439 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2440 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2441 	    pagesize_2pow);
2442 	mapelm = &chunk->map[pageind];
2443 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2444 	    pagesize_2pow)) {
2445 		arena_run_t *run;
2446 
2447 		pageind -= mapelm->pos;
2448 
2449 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2450 		    pagesize_2pow));
2451 		assert(run->magic == ARENA_RUN_MAGIC);
2452 		ret = run->bin->reg_size;
2453 	} else
2454 		ret = mapelm->npages << pagesize_2pow;
2455 
2456 	return (ret);
2457 }
2458 
2459 static void *
2460 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2461 {
2462 	void *ret;
2463 
2464 	/* Avoid moving the allocation if the size class would not change. */
2465 	if (size < small_min) {
2466 		if (oldsize < small_min &&
2467 		    ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2468 		    == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2469 			goto IN_PLACE;
2470 	} else if (size <= small_max) {
2471 		if (oldsize >= small_min && oldsize <= small_max &&
2472 		    (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2473 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2474 			goto IN_PLACE;
2475 	} else {
2476 		/*
2477 		 * We make no attempt to resize runs here, though it would be
2478 		 * possible to do so.
2479 		 */
2480 		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2481 			goto IN_PLACE;
2482 	}
2483 
2484 	/*
2485 	 * If we get here, then size and oldsize are different enough that we
2486 	 * need to use a different size class.  In that case, fall back to
2487 	 * allocating new space and copying.
2488 	 */
2489 	ret = arena_malloc(choose_arena(), size);
2490 	if (ret == NULL)
2491 		return (NULL);
2492 
2493 	/* Junk/zero-filling were already done by arena_malloc(). */
2494 	if (size < oldsize)
2495 		memcpy(ret, ptr, size);
2496 	else
2497 		memcpy(ret, ptr, oldsize);
2498 	idalloc(ptr);
2499 	return (ret);
2500 IN_PLACE:
2501 	if (opt_junk && size < oldsize)
2502 		memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2503 	else if (opt_zero && size > oldsize)
2504 		memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2505 	return (ptr);
2506 }
2507 
2508 static void
2509 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2510 {
2511 	unsigned pageind;
2512 	arena_chunk_map_t *mapelm;
2513 	size_t size;
2514 
2515 	assert(arena != NULL);
2516 	assert(arena->magic == ARENA_MAGIC);
2517 	assert(chunk->arena == arena);
2518 	assert(ptr != NULL);
2519 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
2520 
2521 	pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2522 	    pagesize_2pow);
2523 	mapelm = &chunk->map[pageind];
2524 	if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2525 	    pagesize_2pow)) {
2526 		arena_run_t *run;
2527 		arena_bin_t *bin;
2528 
2529 		/* Small allocation. */
2530 
2531 		pageind -= mapelm->pos;
2532 
2533 		run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2534 		    pagesize_2pow));
2535 		assert(run->magic == ARENA_RUN_MAGIC);
2536 		bin = run->bin;
2537 		size = bin->reg_size;
2538 
2539 		if (opt_junk)
2540 			memset(ptr, 0x5a, size);
2541 
2542 		malloc_mutex_lock(&arena->mtx);
2543 		arena_run_reg_dalloc(run, bin, ptr, size);
2544 		run->nfree++;
2545 
2546 		if (run->nfree == bin->nregs) {
2547 			/* Deallocate run. */
2548 			if (run == bin->runcur)
2549 				bin->runcur = NULL;
2550 			else if (bin->nregs != 1) {
2551 				/*
2552 				 * This block's conditional is necessary because
2553 				 * if the run only contains one region, then it
2554 				 * never gets inserted into the non-full runs
2555 				 * tree.
2556 				 */
2557 				/* LINTED */
2558 				RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2559 			}
2560 #ifdef MALLOC_DEBUG
2561 			run->magic = 0;
2562 #endif
2563 			arena_run_dalloc(arena, run, bin->run_size);
2564 #ifdef MALLOC_STATS
2565 			bin->stats.curruns--;
2566 #endif
2567 		} else if (run->nfree == 1 && run != bin->runcur) {
2568 			/*
2569 			 * Make sure that bin->runcur always refers to the
2570 			 * lowest non-full run, if one exists.
2571 			 */
2572 			if (bin->runcur == NULL)
2573 				bin->runcur = run;
2574 			else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2575 				/* Switch runcur. */
2576 				if (bin->runcur->nfree > 0) {
2577 					/* Insert runcur. */
2578 					/* LINTED */
2579 					RB_INSERT(arena_run_tree_s, &bin->runs,
2580 					    bin->runcur);
2581 				}
2582 				bin->runcur = run;
2583 			} else {
2584 				/* LINTED */
2585 				RB_INSERT(arena_run_tree_s, &bin->runs, run);
2586 			}
2587 		}
2588 #ifdef MALLOC_STATS
2589 		arena->stats.allocated_small -= size;
2590 		arena->stats.ndalloc_small++;
2591 #endif
2592 	} else {
2593 		/* Large allocation. */
2594 
2595 		size = mapelm->npages << pagesize_2pow;
2596 		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2597 
2598 		if (opt_junk)
2599 			memset(ptr, 0x5a, size);
2600 
2601 		malloc_mutex_lock(&arena->mtx);
2602 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2603 #ifdef MALLOC_STATS
2604 		arena->stats.allocated_large -= size;
2605 		arena->stats.ndalloc_large++;
2606 #endif
2607 	}
2608 
2609 	malloc_mutex_unlock(&arena->mtx);
2610 }
2611 
2612 static bool
2613 arena_new(arena_t *arena)
2614 {
2615 	unsigned i;
2616 	arena_bin_t *bin;
2617 	size_t prev_run_size;
2618 
2619 	malloc_mutex_init(&arena->mtx);
2620 
2621 #ifdef MALLOC_STATS
2622 	memset(&arena->stats, 0, sizeof(arena_stats_t));
2623 #endif
2624 
2625 	/* Initialize chunks. */
2626 	RB_INIT(&arena->chunks);
2627 	arena->spare = NULL;
2628 
2629 	/* Initialize bins. */
2630 	prev_run_size = pagesize;
2631 
2632 	/* (2^n)-spaced tiny bins. */
2633 	for (i = 0; i < ntbins; i++) {
2634 		bin = &arena->bins[i];
2635 		bin->runcur = NULL;
2636 		RB_INIT(&bin->runs);
2637 
2638 		bin->reg_size = (1 << (TINY_MIN_2POW + i));
2639 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2640 
2641 #ifdef MALLOC_STATS
2642 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2643 #endif
2644 	}
2645 
2646 	/* Quantum-spaced bins. */
2647 	for (; i < ntbins + nqbins; i++) {
2648 		bin = &arena->bins[i];
2649 		bin->runcur = NULL;
2650 		RB_INIT(&bin->runs);
2651 
2652 		bin->reg_size = quantum * (i - ntbins + 1);
2653 /*
2654 		pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2655 */
2656 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2657 
2658 #ifdef MALLOC_STATS
2659 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2660 #endif
2661 	}
2662 
2663 	/* (2^n)-spaced sub-page bins. */
2664 	for (; i < ntbins + nqbins + nsbins; i++) {
2665 		bin = &arena->bins[i];
2666 		bin->runcur = NULL;
2667 		RB_INIT(&bin->runs);
2668 
2669 		bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2670 
2671 		prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2672 
2673 #ifdef MALLOC_STATS
2674 		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2675 #endif
2676 	}
2677 
2678 #ifdef MALLOC_DEBUG
2679 	arena->magic = ARENA_MAGIC;
2680 #endif
2681 
2682 	return (false);
2683 }
2684 
2685 /* Create a new arena and insert it into the arenas array at index ind. */
2686 static arena_t *
2687 arenas_extend(unsigned ind)
2688 {
2689 	arena_t *ret;
2690 
2691 	/* Allocate enough space for trailing bins. */
2692 	ret = (arena_t *)base_alloc(sizeof(arena_t)
2693 	    + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2694 	if (ret != NULL && arena_new(ret) == false) {
2695 		arenas[ind] = ret;
2696 		return (ret);
2697 	}
2698 	/* Only reached if there is an OOM error. */
2699 
2700 	/*
2701 	 * OOM here is quite inconvenient to propagate, since dealing with it
2702 	 * would require a check for failure in the fast path.  Instead, punt
2703 	 * by using arenas[0].  In practice, this is an extremely unlikely
2704 	 * failure.
2705 	 */
2706 	_malloc_message(getprogname(),
2707 	    ": (malloc) Error initializing arena\n", "", "");
2708 	if (opt_abort)
2709 		abort();
2710 
2711 	return (arenas[0]);
2712 }
2713 
2714 /*
2715  * End arena.
2716  */
2717 /******************************************************************************/
2718 /*
2719  * Begin general internal functions.
2720  */
2721 
2722 static void *
2723 huge_malloc(size_t size)
2724 {
2725 	void *ret;
2726 	size_t csize;
2727 	chunk_node_t *node;
2728 
2729 	/* Allocate one or more contiguous chunks for this request. */
2730 
2731 	csize = CHUNK_CEILING(size);
2732 	if (csize == 0) {
2733 		/* size is large enough to cause size_t wrap-around. */
2734 		return (NULL);
2735 	}
2736 
2737 	/* Allocate a chunk node with which to track the chunk. */
2738 	node = base_chunk_node_alloc();
2739 	if (node == NULL)
2740 		return (NULL);
2741 
2742 	ret = chunk_alloc(csize);
2743 	if (ret == NULL) {
2744 		base_chunk_node_dealloc(node);
2745 		return (NULL);
2746 	}
2747 
2748 	/* Insert node into huge. */
2749 	node->chunk = ret;
2750 	node->size = csize;
2751 
2752 	malloc_mutex_lock(&chunks_mtx);
2753 	RB_INSERT(chunk_tree_s, &huge, node);
2754 #ifdef MALLOC_STATS
2755 	huge_nmalloc++;
2756 	huge_allocated += csize;
2757 #endif
2758 	malloc_mutex_unlock(&chunks_mtx);
2759 
2760 	if (opt_junk)
2761 		memset(ret, 0xa5, csize);
2762 	else if (opt_zero)
2763 		memset(ret, 0, csize);
2764 
2765 	return (ret);
2766 }
2767 
2768 /* Only handles large allocations that require more than chunk alignment. */
2769 static void *
2770 huge_palloc(size_t alignment, size_t size)
2771 {
2772 	void *ret;
2773 	size_t alloc_size, chunk_size, offset;
2774 	chunk_node_t *node;
2775 
2776 	/*
2777 	 * This allocation requires alignment that is even larger than chunk
2778 	 * alignment.  This means that huge_malloc() isn't good enough.
2779 	 *
2780 	 * Allocate almost twice as many chunks as are demanded by the size or
2781 	 * alignment, in order to assure the alignment can be achieved, then
2782 	 * unmap leading and trailing chunks.
2783 	 */
2784 	assert(alignment >= chunksize);
2785 
2786 	chunk_size = CHUNK_CEILING(size);
2787 
2788 	if (size >= alignment)
2789 		alloc_size = chunk_size + alignment - chunksize;
2790 	else
2791 		alloc_size = (alignment << 1) - chunksize;
2792 
2793 	/* Allocate a chunk node with which to track the chunk. */
2794 	node = base_chunk_node_alloc();
2795 	if (node == NULL)
2796 		return (NULL);
2797 
2798 	ret = chunk_alloc(alloc_size);
2799 	if (ret == NULL) {
2800 		base_chunk_node_dealloc(node);
2801 		return (NULL);
2802 	}
2803 
2804 	offset = (uintptr_t)ret & (alignment - 1);
2805 	assert((offset & chunksize_mask) == 0);
2806 	assert(offset < alloc_size);
2807 	if (offset == 0) {
2808 		/* Trim trailing space. */
2809 		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2810 		    - chunk_size);
2811 	} else {
2812 		size_t trailsize;
2813 
2814 		/* Trim leading space. */
2815 		chunk_dealloc(ret, alignment - offset);
2816 
2817 		ret = (void *)((uintptr_t)ret + (alignment - offset));
2818 
2819 		trailsize = alloc_size - (alignment - offset) - chunk_size;
2820 		if (trailsize != 0) {
2821 		    /* Trim trailing space. */
2822 		    assert(trailsize < alloc_size);
2823 		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2824 			trailsize);
2825 		}
2826 	}
2827 
2828 	/* Insert node into huge. */
2829 	node->chunk = ret;
2830 	node->size = chunk_size;
2831 
2832 	malloc_mutex_lock(&chunks_mtx);
2833 	RB_INSERT(chunk_tree_s, &huge, node);
2834 #ifdef MALLOC_STATS
2835 	huge_nmalloc++;
2836 	huge_allocated += chunk_size;
2837 #endif
2838 	malloc_mutex_unlock(&chunks_mtx);
2839 
2840 	if (opt_junk)
2841 		memset(ret, 0xa5, chunk_size);
2842 	else if (opt_zero)
2843 		memset(ret, 0, chunk_size);
2844 
2845 	return (ret);
2846 }
2847 
2848 static void *
2849 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2850 {
2851 	void *ret;
2852 
2853 	/* Avoid moving the allocation if the size class would not change. */
2854 	if (oldsize > arena_maxclass &&
2855 	    CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2856 		if (opt_junk && size < oldsize) {
2857 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2858 			    - size);
2859 		} else if (opt_zero && size > oldsize) {
2860 			memset((void *)((uintptr_t)ptr + oldsize), 0, size
2861 			    - oldsize);
2862 		}
2863 		return (ptr);
2864 	}
2865 
2866 	if (CHUNK_ADDR2BASE(ptr) == ptr
2867 #ifdef USE_BRK
2868 	    && ((uintptr_t)ptr < (uintptr_t)brk_base
2869 	    || (uintptr_t)ptr >= (uintptr_t)brk_max)
2870 #endif
2871 	    ) {
2872 		chunk_node_t *node, key;
2873 		void *newptr;
2874 		size_t oldcsize;
2875 		size_t newcsize;
2876 
2877 		newcsize = CHUNK_CEILING(size);
2878 		oldcsize = CHUNK_CEILING(oldsize);
2879 		assert(oldcsize != newcsize);
2880 		if (newcsize == 0) {
2881 			/* size_t wrap-around */
2882 			return (NULL);
2883 		}
2884 
2885 		/*
2886 		 * Remove the old region from the tree now.  If mremap()
2887 		 * returns the region to the system, other thread may
2888 		 * map it for same huge allocation and insert it to the
2889 		 * tree before we acquire the mutex lock again.
2890 		 */
2891 		malloc_mutex_lock(&chunks_mtx);
2892 		key.chunk = __DECONST(void *, ptr);
2893 		/* LINTED */
2894 		node = RB_FIND(chunk_tree_s, &huge, &key);
2895 		assert(node != NULL);
2896 		assert(node->chunk == ptr);
2897 		assert(node->size == oldcsize);
2898 		RB_REMOVE(chunk_tree_s, &huge, node);
2899 		malloc_mutex_unlock(&chunks_mtx);
2900 
2901 		newptr = mremap(ptr, oldcsize, NULL, newcsize,
2902 		    MAP_ALIGNED(chunksize_2pow));
2903 		if (newptr == MAP_FAILED) {
2904 			/* We still own the old region. */
2905 			malloc_mutex_lock(&chunks_mtx);
2906 			RB_INSERT(chunk_tree_s, &huge, node);
2907 			malloc_mutex_unlock(&chunks_mtx);
2908 		} else {
2909 			assert(CHUNK_ADDR2BASE(newptr) == newptr);
2910 
2911 			/* Insert new or resized old region. */
2912 			malloc_mutex_lock(&chunks_mtx);
2913 			node->size = newcsize;
2914 			node->chunk = newptr;
2915 			RB_INSERT(chunk_tree_s, &huge, node);
2916 #ifdef MALLOC_STATS
2917 			huge_nralloc++;
2918 			huge_allocated += newcsize - oldcsize;
2919 			if (newcsize > oldcsize) {
2920 				stats_chunks.curchunks +=
2921 				    (newcsize - oldcsize) / chunksize;
2922 				if (stats_chunks.curchunks >
2923 				    stats_chunks.highchunks)
2924 					stats_chunks.highchunks =
2925 					    stats_chunks.curchunks;
2926 			} else {
2927 				stats_chunks.curchunks -=
2928 				    (oldcsize - newcsize) / chunksize;
2929 			}
2930 #endif
2931 			malloc_mutex_unlock(&chunks_mtx);
2932 
2933 			if (opt_junk && size < oldsize) {
2934 				memset((void *)((uintptr_t)newptr + size), 0x5a,
2935 				    newcsize - size);
2936 			} else if (opt_zero && size > oldsize) {
2937 				memset((void *)((uintptr_t)newptr + oldsize), 0,
2938 				    size - oldsize);
2939 			}
2940 			return (newptr);
2941 		}
2942 	}
2943 
2944 	/*
2945 	 * If we get here, then size and oldsize are different enough that we
2946 	 * need to use a different size class.  In that case, fall back to
2947 	 * allocating new space and copying.
2948 	 */
2949 	ret = huge_malloc(size);
2950 	if (ret == NULL)
2951 		return (NULL);
2952 
2953 	if (CHUNK_ADDR2BASE(ptr) == ptr) {
2954 		/* The old allocation is a chunk. */
2955 		if (size < oldsize)
2956 			memcpy(ret, ptr, size);
2957 		else
2958 			memcpy(ret, ptr, oldsize);
2959 	} else {
2960 		/* The old allocation is a region. */
2961 		assert(oldsize < size);
2962 		memcpy(ret, ptr, oldsize);
2963 	}
2964 	idalloc(ptr);
2965 	return (ret);
2966 }
2967 
2968 static void
2969 huge_dalloc(void *ptr)
2970 {
2971 	chunk_node_t key;
2972 	chunk_node_t *node;
2973 
2974 	malloc_mutex_lock(&chunks_mtx);
2975 
2976 	/* Extract from tree of huge allocations. */
2977 	key.chunk = ptr;
2978 	/* LINTED */
2979 	node = RB_FIND(chunk_tree_s, &huge, &key);
2980 	assert(node != NULL);
2981 	assert(node->chunk == ptr);
2982 	/* LINTED */
2983 	RB_REMOVE(chunk_tree_s, &huge, node);
2984 
2985 #ifdef MALLOC_STATS
2986 	huge_ndalloc++;
2987 	huge_allocated -= node->size;
2988 #endif
2989 
2990 	malloc_mutex_unlock(&chunks_mtx);
2991 
2992 	/* Unmap chunk. */
2993 #ifdef USE_BRK
2994 	if (opt_junk)
2995 		memset(node->chunk, 0x5a, node->size);
2996 #endif
2997 	chunk_dealloc(node->chunk, node->size);
2998 
2999 	base_chunk_node_dealloc(node);
3000 }
3001 
3002 static void *
3003 imalloc(size_t size)
3004 {
3005 	void *ret;
3006 
3007 	assert(size != 0);
3008 
3009 	if (size <= arena_maxclass)
3010 		ret = arena_malloc(choose_arena(), size);
3011 	else
3012 		ret = huge_malloc(size);
3013 
3014 	return (ret);
3015 }
3016 
3017 static void *
3018 ipalloc(size_t alignment, size_t size)
3019 {
3020 	void *ret;
3021 	size_t ceil_size;
3022 
3023 	/*
3024 	 * Round size up to the nearest multiple of alignment.
3025 	 *
3026 	 * This done, we can take advantage of the fact that for each small
3027 	 * size class, every object is aligned at the smallest power of two
3028 	 * that is non-zero in the base two representation of the size.  For
3029 	 * example:
3030 	 *
3031 	 *   Size |   Base 2 | Minimum alignment
3032 	 *   -----+----------+------------------
3033 	 *     96 |  1100000 |  32
3034 	 *    144 | 10100000 |  32
3035 	 *    192 | 11000000 |  64
3036 	 *
3037 	 * Depending on runtime settings, it is possible that arena_malloc()
3038 	 * will further round up to a power of two, but that never causes
3039 	 * correctness issues.
3040 	 */
3041 	ceil_size = (size + (alignment - 1)) & (-alignment);
3042 	/*
3043 	 * (ceil_size < size) protects against the combination of maximal
3044 	 * alignment and size greater than maximal alignment.
3045 	 */
3046 	if (ceil_size < size) {
3047 		/* size_t overflow. */
3048 		return (NULL);
3049 	}
3050 
3051 	if (ceil_size <= pagesize || (alignment <= pagesize
3052 	    && ceil_size <= arena_maxclass))
3053 		ret = arena_malloc(choose_arena(), ceil_size);
3054 	else {
3055 		size_t run_size;
3056 
3057 		/*
3058 		 * We can't achieve sub-page alignment, so round up alignment
3059 		 * permanently; it makes later calculations simpler.
3060 		 */
3061 		alignment = PAGE_CEILING(alignment);
3062 		ceil_size = PAGE_CEILING(size);
3063 		/*
3064 		 * (ceil_size < size) protects against very large sizes within
3065 		 * pagesize of SIZE_T_MAX.
3066 		 *
3067 		 * (ceil_size + alignment < ceil_size) protects against the
3068 		 * combination of maximal alignment and ceil_size large enough
3069 		 * to cause overflow.  This is similar to the first overflow
3070 		 * check above, but it needs to be repeated due to the new
3071 		 * ceil_size value, which may now be *equal* to maximal
3072 		 * alignment, whereas before we only detected overflow if the
3073 		 * original size was *greater* than maximal alignment.
3074 		 */
3075 		if (ceil_size < size || ceil_size + alignment < ceil_size) {
3076 			/* size_t overflow. */
3077 			return (NULL);
3078 		}
3079 
3080 		/*
3081 		 * Calculate the size of the over-size run that arena_palloc()
3082 		 * would need to allocate in order to guarantee the alignment.
3083 		 */
3084 		if (ceil_size >= alignment)
3085 			run_size = ceil_size + alignment - pagesize;
3086 		else {
3087 			/*
3088 			 * It is possible that (alignment << 1) will cause
3089 			 * overflow, but it doesn't matter because we also
3090 			 * subtract pagesize, which in the case of overflow
3091 			 * leaves us with a very large run_size.  That causes
3092 			 * the first conditional below to fail, which means
3093 			 * that the bogus run_size value never gets used for
3094 			 * anything important.
3095 			 */
3096 			run_size = (alignment << 1) - pagesize;
3097 		}
3098 
3099 		if (run_size <= arena_maxclass) {
3100 			ret = arena_palloc(choose_arena(), alignment, ceil_size,
3101 			    run_size);
3102 		} else if (alignment <= chunksize)
3103 			ret = huge_malloc(ceil_size);
3104 		else
3105 			ret = huge_palloc(alignment, ceil_size);
3106 	}
3107 
3108 	assert(((uintptr_t)ret & (alignment - 1)) == 0);
3109 	return (ret);
3110 }
3111 
3112 static void *
3113 icalloc(size_t size)
3114 {
3115 	void *ret;
3116 
3117 	if (size <= arena_maxclass) {
3118 		ret = arena_malloc(choose_arena(), size);
3119 		if (ret == NULL)
3120 			return (NULL);
3121 		memset(ret, 0, size);
3122 	} else {
3123 		/*
3124 		 * The virtual memory system provides zero-filled pages, so
3125 		 * there is no need to do so manually, unless opt_junk is
3126 		 * enabled, in which case huge_malloc() fills huge allocations
3127 		 * with junk.
3128 		 */
3129 		ret = huge_malloc(size);
3130 		if (ret == NULL)
3131 			return (NULL);
3132 
3133 		if (opt_junk)
3134 			memset(ret, 0, size);
3135 #ifdef USE_BRK
3136 		else if ((uintptr_t)ret >= (uintptr_t)brk_base
3137 		    && (uintptr_t)ret < (uintptr_t)brk_max) {
3138 			/*
3139 			 * This may be a re-used brk chunk.  Therefore, zero
3140 			 * the memory.
3141 			 */
3142 			memset(ret, 0, size);
3143 		}
3144 #endif
3145 	}
3146 
3147 	return (ret);
3148 }
3149 
3150 static size_t
3151 isalloc(const void *ptr)
3152 {
3153 	size_t ret;
3154 	arena_chunk_t *chunk;
3155 
3156 	assert(ptr != NULL);
3157 
3158 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3159 	if (chunk != ptr) {
3160 		/* Region. */
3161 		assert(chunk->arena->magic == ARENA_MAGIC);
3162 
3163 		ret = arena_salloc(ptr);
3164 	} else {
3165 		chunk_node_t *node, key;
3166 
3167 		/* Chunk (huge allocation). */
3168 
3169 		malloc_mutex_lock(&chunks_mtx);
3170 
3171 		/* Extract from tree of huge allocations. */
3172 		key.chunk = __DECONST(void *, ptr);
3173 		/* LINTED */
3174 		node = RB_FIND(chunk_tree_s, &huge, &key);
3175 		assert(node != NULL);
3176 
3177 		ret = node->size;
3178 
3179 		malloc_mutex_unlock(&chunks_mtx);
3180 	}
3181 
3182 	return (ret);
3183 }
3184 
3185 static void *
3186 iralloc(void *ptr, size_t size)
3187 {
3188 	void *ret;
3189 	size_t oldsize;
3190 
3191 	assert(ptr != NULL);
3192 	assert(size != 0);
3193 
3194 	oldsize = isalloc(ptr);
3195 
3196 	if (size <= arena_maxclass)
3197 		ret = arena_ralloc(ptr, size, oldsize);
3198 	else
3199 		ret = huge_ralloc(ptr, size, oldsize);
3200 
3201 	return (ret);
3202 }
3203 
3204 static void
3205 idalloc(void *ptr)
3206 {
3207 	arena_chunk_t *chunk;
3208 
3209 	assert(ptr != NULL);
3210 
3211 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3212 	if (chunk != ptr) {
3213 		/* Region. */
3214 		arena_dalloc(chunk->arena, chunk, ptr);
3215 	} else
3216 		huge_dalloc(ptr);
3217 }
3218 
3219 static void
3220 malloc_print_stats(void)
3221 {
3222 
3223 	if (opt_print_stats) {
3224 		char s[UMAX2S_BUFSIZE];
3225 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
3226 		    "");
3227 		_malloc_message("Assertions ",
3228 #ifdef NDEBUG
3229 		    "disabled",
3230 #else
3231 		    "enabled",
3232 #endif
3233 		    "\n", "");
3234 		_malloc_message("Boolean MALLOC_OPTIONS: ",
3235 		    opt_abort ? "A" : "a",
3236 		    opt_junk ? "J" : "j",
3237 		    opt_hint ? "H" : "h");
3238 		_malloc_message(opt_utrace ? "PU" : "Pu",
3239 		    opt_sysv ? "V" : "v",
3240 		    opt_xmalloc ? "X" : "x",
3241 		    opt_zero ? "Z\n" : "z\n");
3242 
3243 		_malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3244 		_malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3245 		_malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3246 		    "\n", "");
3247 		_malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3248 		_malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3249 		    "");
3250 
3251 		_malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3252 		_malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3253 		    ")\n", "");
3254 
3255 #ifdef MALLOC_STATS
3256 		{
3257 			size_t allocated, mapped;
3258 			unsigned i;
3259 			arena_t *arena;
3260 
3261 			/* Calculate and print allocated/mapped stats. */
3262 
3263 			/* arenas. */
3264 			for (i = 0, allocated = 0; i < narenas; i++) {
3265 				if (arenas[i] != NULL) {
3266 					malloc_mutex_lock(&arenas[i]->mtx);
3267 					allocated +=
3268 					    arenas[i]->stats.allocated_small;
3269 					allocated +=
3270 					    arenas[i]->stats.allocated_large;
3271 					malloc_mutex_unlock(&arenas[i]->mtx);
3272 				}
3273 			}
3274 
3275 			/* huge/base. */
3276 			malloc_mutex_lock(&chunks_mtx);
3277 			allocated += huge_allocated;
3278 			mapped = stats_chunks.curchunks * chunksize;
3279 			malloc_mutex_unlock(&chunks_mtx);
3280 
3281 			malloc_mutex_lock(&base_mtx);
3282 			mapped += base_mapped;
3283 			malloc_mutex_unlock(&base_mtx);
3284 
3285 			malloc_printf("Allocated: %zu, mapped: %zu\n",
3286 			    allocated, mapped);
3287 
3288 			/* Print chunk stats. */
3289 			{
3290 				chunk_stats_t chunks_stats;
3291 
3292 				malloc_mutex_lock(&chunks_mtx);
3293 				chunks_stats = stats_chunks;
3294 				malloc_mutex_unlock(&chunks_mtx);
3295 
3296 				malloc_printf("chunks: nchunks   "
3297 				    "highchunks    curchunks\n");
3298 				malloc_printf("  %13llu%13lu%13lu\n",
3299 				    chunks_stats.nchunks,
3300 				    chunks_stats.highchunks,
3301 				    chunks_stats.curchunks);
3302 			}
3303 
3304 			/* Print chunk stats. */
3305 			malloc_printf(
3306 			    "huge: nmalloc      ndalloc      "
3307 			    "nralloc    allocated\n");
3308 			malloc_printf(" %12llu %12llu %12llu %12zu\n",
3309 			    huge_nmalloc, huge_ndalloc, huge_nralloc,
3310 			    huge_allocated);
3311 
3312 			/* Print stats for each arena. */
3313 			for (i = 0; i < narenas; i++) {
3314 				arena = arenas[i];
3315 				if (arena != NULL) {
3316 					malloc_printf(
3317 					    "\narenas[%u] @ %p\n", i, arena);
3318 					malloc_mutex_lock(&arena->mtx);
3319 					stats_print(arena);
3320 					malloc_mutex_unlock(&arena->mtx);
3321 				}
3322 			}
3323 		}
3324 #endif /* #ifdef MALLOC_STATS */
3325 		_malloc_message("--- End malloc statistics ---\n", "", "", "");
3326 	}
3327 }
3328 
3329 /*
3330  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3331  * implementation has to take pains to avoid infinite recursion during
3332  * initialization.
3333  */
3334 static inline bool
3335 malloc_init(void)
3336 {
3337 
3338 	if (malloc_initialized == false)
3339 		return (malloc_init_hard());
3340 
3341 	return (false);
3342 }
3343 
3344 static bool
3345 malloc_init_hard(void)
3346 {
3347 	unsigned i, j;
3348 	ssize_t linklen;
3349 	char buf[PATH_MAX + 1];
3350 	const char *opts = "";
3351 	int serrno;
3352 
3353 	malloc_mutex_lock(&init_lock);
3354 	if (malloc_initialized) {
3355 		/*
3356 		 * Another thread initialized the allocator before this one
3357 		 * acquired init_lock.
3358 		 */
3359 		malloc_mutex_unlock(&init_lock);
3360 		return (false);
3361 	}
3362 
3363 	serrno = errno;
3364 	/* Get number of CPUs. */
3365 	{
3366 		int mib[2];
3367 		size_t len;
3368 
3369 		mib[0] = CTL_HW;
3370 		mib[1] = HW_NCPU;
3371 		len = sizeof(ncpus);
3372 		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3373 			/* Error. */
3374 			ncpus = 1;
3375 		}
3376 	}
3377 
3378 	/* Get page size. */
3379 	{
3380 		long result;
3381 
3382 		result = sysconf(_SC_PAGESIZE);
3383 		assert(result != -1);
3384 		pagesize = (unsigned) result;
3385 
3386 		/*
3387 		 * We assume that pagesize is a power of 2 when calculating
3388 		 * pagesize_mask and pagesize_2pow.
3389 		 */
3390 		assert(((result - 1) & result) == 0);
3391 		pagesize_mask = result - 1;
3392 		pagesize_2pow = ffs((int)result) - 1;
3393 	}
3394 
3395 	for (i = 0; i < 3; i++) {
3396 		/* Get runtime configuration. */
3397 		switch (i) {
3398 		case 0:
3399 			if ((linklen = readlink("/etc/malloc.conf", buf,
3400 						sizeof(buf) - 1)) != -1) {
3401 				/*
3402 				 * Use the contents of the "/etc/malloc.conf"
3403 				 * symbolic link's name.
3404 				 */
3405 				buf[linklen] = '\0';
3406 				opts = buf;
3407 			} else {
3408 				/* No configuration specified. */
3409 				buf[0] = '\0';
3410 				opts = buf;
3411 			}
3412 			break;
3413 		case 1:
3414 			if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3415 			    issetugid() == 0) {
3416 				/*
3417 				 * Do nothing; opts is already initialized to
3418 				 * the value of the MALLOC_OPTIONS environment
3419 				 * variable.
3420 				 */
3421 			} else {
3422 				/* No configuration specified. */
3423 				buf[0] = '\0';
3424 				opts = buf;
3425 			}
3426 			break;
3427 		case 2:
3428 			if (_malloc_options != NULL) {
3429 			    /*
3430 			     * Use options that were compiled into the program.
3431 			     */
3432 			    opts = _malloc_options;
3433 			} else {
3434 				/* No configuration specified. */
3435 				buf[0] = '\0';
3436 				opts = buf;
3437 			}
3438 			break;
3439 		default:
3440 			/* NOTREACHED */
3441 			/* LINTED */
3442 			assert(false);
3443 		}
3444 
3445 		for (j = 0; opts[j] != '\0'; j++) {
3446 			switch (opts[j]) {
3447 			case 'a':
3448 				opt_abort = false;
3449 				break;
3450 			case 'A':
3451 				opt_abort = true;
3452 				break;
3453 			case 'h':
3454 				opt_hint = false;
3455 				break;
3456 			case 'H':
3457 				opt_hint = true;
3458 				break;
3459 			case 'j':
3460 				opt_junk = false;
3461 				break;
3462 			case 'J':
3463 				opt_junk = true;
3464 				break;
3465 			case 'k':
3466 				/*
3467 				 * Chunks always require at least one header
3468 				 * page, so chunks can never be smaller than
3469 				 * two pages.
3470 				 */
3471 				if (opt_chunk_2pow > pagesize_2pow + 1)
3472 					opt_chunk_2pow--;
3473 				break;
3474 			case 'K':
3475 				if (opt_chunk_2pow + 1 <
3476 				    (int)(sizeof(size_t) << 3))
3477 					opt_chunk_2pow++;
3478 				break;
3479 			case 'n':
3480 				opt_narenas_lshift--;
3481 				break;
3482 			case 'N':
3483 				opt_narenas_lshift++;
3484 				break;
3485 			case 'p':
3486 				opt_print_stats = false;
3487 				break;
3488 			case 'P':
3489 				opt_print_stats = true;
3490 				break;
3491 			case 'q':
3492 				if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3493 					opt_quantum_2pow--;
3494 				break;
3495 			case 'Q':
3496 				if (opt_quantum_2pow < pagesize_2pow - 1)
3497 					opt_quantum_2pow++;
3498 				break;
3499 			case 's':
3500 				if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3501 					opt_small_max_2pow--;
3502 				break;
3503 			case 'S':
3504 				if (opt_small_max_2pow < pagesize_2pow - 1)
3505 					opt_small_max_2pow++;
3506 				break;
3507 			case 'u':
3508 				opt_utrace = false;
3509 				break;
3510 			case 'U':
3511 				opt_utrace = true;
3512 				break;
3513 			case 'v':
3514 				opt_sysv = false;
3515 				break;
3516 			case 'V':
3517 				opt_sysv = true;
3518 				break;
3519 			case 'x':
3520 				opt_xmalloc = false;
3521 				break;
3522 			case 'X':
3523 				opt_xmalloc = true;
3524 				break;
3525 			case 'z':
3526 				opt_zero = false;
3527 				break;
3528 			case 'Z':
3529 				opt_zero = true;
3530 				break;
3531 			default: {
3532 				char cbuf[2];
3533 
3534 				cbuf[0] = opts[j];
3535 				cbuf[1] = '\0';
3536 				_malloc_message(getprogname(),
3537 				    ": (malloc) Unsupported character in "
3538 				    "malloc options: '", cbuf, "'\n");
3539 			}
3540 			}
3541 		}
3542 	}
3543 	errno = serrno;
3544 
3545 	/* Take care to call atexit() only once. */
3546 	if (opt_print_stats) {
3547 		/* Print statistics at exit. */
3548 		atexit(malloc_print_stats);
3549 	}
3550 
3551 	/* Set variables according to the value of opt_small_max_2pow. */
3552 	if (opt_small_max_2pow < opt_quantum_2pow)
3553 		opt_small_max_2pow = opt_quantum_2pow;
3554 	small_max = (1 << opt_small_max_2pow);
3555 
3556 	/* Set bin-related variables. */
3557 	bin_maxclass = (pagesize >> 1);
3558 	assert(opt_quantum_2pow >= TINY_MIN_2POW);
3559 	ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3560 	assert(ntbins <= opt_quantum_2pow);
3561 	nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3562 	nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3563 
3564 	/* Set variables according to the value of opt_quantum_2pow. */
3565 	quantum = (1 << opt_quantum_2pow);
3566 	quantum_mask = quantum - 1;
3567 	if (ntbins > 0)
3568 		small_min = (quantum >> 1) + 1;
3569 	else
3570 		small_min = 1;
3571 	assert(small_min <= quantum);
3572 
3573 	/* Set variables according to the value of opt_chunk_2pow. */
3574 	chunksize = (1LU << opt_chunk_2pow);
3575 	chunksize_mask = chunksize - 1;
3576 	chunksize_2pow = (unsigned)opt_chunk_2pow;
3577 	chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3578 	{
3579 		unsigned header_size;
3580 
3581 		header_size = (unsigned)(sizeof(arena_chunk_t) +
3582 		    (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3583 		arena_chunk_header_npages = (header_size >> pagesize_2pow);
3584 		if ((header_size & pagesize_mask) != 0)
3585 			arena_chunk_header_npages++;
3586 	}
3587 	arena_maxclass = chunksize - (arena_chunk_header_npages <<
3588 	    pagesize_2pow);
3589 
3590 	UTRACE(0, 0, 0);
3591 
3592 #ifdef MALLOC_STATS
3593 	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3594 #endif
3595 
3596 	/* Various sanity checks that regard configuration. */
3597 	assert(quantum >= sizeof(void *));
3598 	assert(quantum <= pagesize);
3599 	assert(chunksize >= pagesize);
3600 	assert(quantum * 4 <= chunksize);
3601 
3602 	/* Initialize chunks data. */
3603 	malloc_mutex_init(&chunks_mtx);
3604 	RB_INIT(&huge);
3605 #ifdef USE_BRK
3606 	malloc_mutex_init(&brk_mtx);
3607 	brk_base = sbrk(0);
3608 	brk_prev = brk_base;
3609 	brk_max = brk_base;
3610 #endif
3611 #ifdef MALLOC_STATS
3612 	huge_nmalloc = 0;
3613 	huge_ndalloc = 0;
3614 	huge_nralloc = 0;
3615 	huge_allocated = 0;
3616 #endif
3617 	RB_INIT(&old_chunks);
3618 
3619 	/* Initialize base allocation data structures. */
3620 #ifdef MALLOC_STATS
3621 	base_mapped = 0;
3622 #endif
3623 #ifdef USE_BRK
3624 	/*
3625 	 * Allocate a base chunk here, since it doesn't actually have to be
3626 	 * chunk-aligned.  Doing this before allocating any other chunks allows
3627 	 * the use of space that would otherwise be wasted.
3628 	 */
3629 	base_pages_alloc(0);
3630 #endif
3631 	base_chunk_nodes = NULL;
3632 	malloc_mutex_init(&base_mtx);
3633 
3634 	if (ncpus > 1) {
3635 		/*
3636 		 * For SMP systems, create four times as many arenas as there
3637 		 * are CPUs by default.
3638 		 */
3639 		opt_narenas_lshift += 2;
3640 	}
3641 
3642 #ifdef NO_TLS
3643 	/* Initialize arena key. */
3644 	(void)thr_keycreate(&arenas_map_key, NULL);
3645 #endif
3646 
3647 	/* Determine how many arenas to use. */
3648 	narenas = ncpus;
3649 	if (opt_narenas_lshift > 0) {
3650 		if ((narenas << opt_narenas_lshift) > narenas)
3651 			narenas <<= opt_narenas_lshift;
3652 		/*
3653 		 * Make sure not to exceed the limits of what base_malloc()
3654 		 * can handle.
3655 		 */
3656 		if (narenas * sizeof(arena_t *) > chunksize)
3657 			narenas = (unsigned)(chunksize / sizeof(arena_t *));
3658 	} else if (opt_narenas_lshift < 0) {
3659 		if ((narenas << opt_narenas_lshift) < narenas)
3660 			narenas <<= opt_narenas_lshift;
3661 		/* Make sure there is at least one arena. */
3662 		if (narenas == 0)
3663 			narenas = 1;
3664 	}
3665 
3666 	next_arena = 0;
3667 
3668 	/* Allocate and initialize arenas. */
3669 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3670 	if (arenas == NULL) {
3671 		malloc_mutex_unlock(&init_lock);
3672 		return (true);
3673 	}
3674 	/*
3675 	 * Zero the array.  In practice, this should always be pre-zeroed,
3676 	 * since it was just mmap()ed, but let's be sure.
3677 	 */
3678 	memset(arenas, 0, sizeof(arena_t *) * narenas);
3679 
3680 	/*
3681 	 * Initialize one arena here.  The rest are lazily created in
3682 	 * arena_choose_hard().
3683 	 */
3684 	arenas_extend(0);
3685 	if (arenas[0] == NULL) {
3686 		malloc_mutex_unlock(&init_lock);
3687 		return (true);
3688 	}
3689 
3690 	malloc_mutex_init(&arenas_mtx);
3691 
3692 	malloc_initialized = true;
3693 	malloc_mutex_unlock(&init_lock);
3694 	return (false);
3695 }
3696 
3697 /*
3698  * End general internal functions.
3699  */
3700 /******************************************************************************/
3701 /*
3702  * Begin malloc(3)-compatible functions.
3703  */
3704 
3705 void *
3706 malloc(size_t size)
3707 {
3708 	void *ret;
3709 
3710 	if (malloc_init()) {
3711 		ret = NULL;
3712 		goto RETURN;
3713 	}
3714 
3715 	if (size == 0) {
3716 		if (opt_sysv == false)
3717 			size = 1;
3718 		else {
3719 			ret = NULL;
3720 			goto RETURN;
3721 		}
3722 	}
3723 
3724 	ret = imalloc(size);
3725 
3726 RETURN:
3727 	if (ret == NULL) {
3728 		if (opt_xmalloc) {
3729 			_malloc_message(getprogname(),
3730 			    ": (malloc) Error in malloc(): out of memory\n", "",
3731 			    "");
3732 			abort();
3733 		}
3734 		errno = ENOMEM;
3735 	}
3736 
3737 	UTRACE(0, size, ret);
3738 	return (ret);
3739 }
3740 
3741 int
3742 posix_memalign(void **memptr, size_t alignment, size_t size)
3743 {
3744 	int ret;
3745 	void *result;
3746 
3747 	if (malloc_init())
3748 		result = NULL;
3749 	else {
3750 		/* Make sure that alignment is a large enough power of 2. */
3751 		if (((alignment - 1) & alignment) != 0
3752 		    || alignment < sizeof(void *)) {
3753 			if (opt_xmalloc) {
3754 				_malloc_message(getprogname(),
3755 				    ": (malloc) Error in posix_memalign(): "
3756 				    "invalid alignment\n", "", "");
3757 				abort();
3758 			}
3759 			result = NULL;
3760 			ret = EINVAL;
3761 			goto RETURN;
3762 		}
3763 
3764 		result = ipalloc(alignment, size);
3765 	}
3766 
3767 	if (result == NULL) {
3768 		if (opt_xmalloc) {
3769 			_malloc_message(getprogname(),
3770 			": (malloc) Error in posix_memalign(): out of memory\n",
3771 			"", "");
3772 			abort();
3773 		}
3774 		ret = ENOMEM;
3775 		goto RETURN;
3776 	}
3777 
3778 	*memptr = result;
3779 	ret = 0;
3780 
3781 RETURN:
3782 	UTRACE(0, size, result);
3783 	return (ret);
3784 }
3785 
3786 void *
3787 calloc(size_t num, size_t size)
3788 {
3789 	void *ret;
3790 	size_t num_size;
3791 
3792 	if (malloc_init()) {
3793 		num_size = 0;
3794 		ret = NULL;
3795 		goto RETURN;
3796 	}
3797 
3798 	num_size = num * size;
3799 	if (num_size == 0) {
3800 		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3801 			num_size = 1;
3802 		else {
3803 			ret = NULL;
3804 			goto RETURN;
3805 		}
3806 	/*
3807 	 * Try to avoid division here.  We know that it isn't possible to
3808 	 * overflow during multiplication if neither operand uses any of the
3809 	 * most significant half of the bits in a size_t.
3810 	 */
3811 	} else if ((unsigned long long)((num | size) &
3812 	   ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3813 	   (num_size / size != num)) {
3814 		/* size_t overflow. */
3815 		ret = NULL;
3816 		goto RETURN;
3817 	}
3818 
3819 	ret = icalloc(num_size);
3820 
3821 RETURN:
3822 	if (ret == NULL) {
3823 		if (opt_xmalloc) {
3824 			_malloc_message(getprogname(),
3825 			    ": (malloc) Error in calloc(): out of memory\n", "",
3826 			    "");
3827 			abort();
3828 		}
3829 		errno = ENOMEM;
3830 	}
3831 
3832 	UTRACE(0, num_size, ret);
3833 	return (ret);
3834 }
3835 
3836 void *
3837 realloc(void *ptr, size_t size)
3838 {
3839 	void *ret;
3840 
3841 	if (size == 0) {
3842 		if (opt_sysv == false)
3843 			size = 1;
3844 		else {
3845 			if (ptr != NULL)
3846 				idalloc(ptr);
3847 			ret = NULL;
3848 			goto RETURN;
3849 		}
3850 	}
3851 
3852 	if (ptr != NULL) {
3853 		assert(malloc_initialized);
3854 
3855 		ret = iralloc(ptr, size);
3856 
3857 		if (ret == NULL) {
3858 			if (opt_xmalloc) {
3859 				_malloc_message(getprogname(),
3860 				    ": (malloc) Error in realloc(): out of "
3861 				    "memory\n", "", "");
3862 				abort();
3863 			}
3864 			errno = ENOMEM;
3865 		}
3866 	} else {
3867 		if (malloc_init())
3868 			ret = NULL;
3869 		else
3870 			ret = imalloc(size);
3871 
3872 		if (ret == NULL) {
3873 			if (opt_xmalloc) {
3874 				_malloc_message(getprogname(),
3875 				    ": (malloc) Error in realloc(): out of "
3876 				    "memory\n", "", "");
3877 				abort();
3878 			}
3879 			errno = ENOMEM;
3880 		}
3881 	}
3882 
3883 RETURN:
3884 	UTRACE(ptr, size, ret);
3885 	return (ret);
3886 }
3887 
3888 void
3889 free(void *ptr)
3890 {
3891 
3892 	UTRACE(ptr, 0, 0);
3893 	if (ptr != NULL) {
3894 		assert(malloc_initialized);
3895 
3896 		idalloc(ptr);
3897 	}
3898 }
3899 
3900 /*
3901  * End malloc(3)-compatible functions.
3902  */
3903 /******************************************************************************/
3904 /*
3905  * Begin non-standard functions.
3906  */
3907 #ifndef __NetBSD__
3908 size_t
3909 malloc_usable_size(const void *ptr)
3910 {
3911 
3912 	assert(ptr != NULL);
3913 
3914 	return (isalloc(ptr));
3915 }
3916 #endif
3917 
3918 /*
3919  * End non-standard functions.
3920  */
3921 /******************************************************************************/
3922 /*
3923  * Begin library-private functions, used by threading libraries for protection
3924  * of malloc during fork().  These functions are only called if the program is
3925  * running in threaded mode, so there is no need to check whether the program
3926  * is threaded here.
3927  */
3928 
3929 void
3930 _malloc_prefork(void)
3931 {
3932 	unsigned i;
3933 
3934 	/* Acquire all mutexes in a safe order. */
3935 
3936 	malloc_mutex_lock(&arenas_mtx);
3937 	for (i = 0; i < narenas; i++) {
3938 		if (arenas[i] != NULL)
3939 			malloc_mutex_lock(&arenas[i]->mtx);
3940 	}
3941 
3942 	malloc_mutex_lock(&base_mtx);
3943 
3944 	malloc_mutex_lock(&chunks_mtx);
3945 }
3946 
3947 void
3948 _malloc_postfork(void)
3949 {
3950 	unsigned i;
3951 
3952 	/* Release all mutexes, now that fork() has completed. */
3953 
3954 	malloc_mutex_unlock(&chunks_mtx);
3955 
3956 	malloc_mutex_unlock(&base_mtx);
3957 
3958 	for (i = 0; i < narenas; i++) {
3959 		if (arenas[i] != NULL)
3960 			malloc_mutex_unlock(&arenas[i]->mtx);
3961 	}
3962 	malloc_mutex_unlock(&arenas_mtx);
3963 }
3964 
3965 /*
3966  * End library-private functions.
3967  */
3968 /******************************************************************************/
3969